From 60eba2e4c341b44178295715ff13e6acb3e30e5c Mon Sep 17 00:00:00 2001 From: Tobias Doerffel Date: Mon, 3 Aug 2009 17:14:58 +0200 Subject: [PATCH] CpuX86: added support for memory prefetching Added macros PREFETCH_READ() and PREFETCH_WRITE() which insert proper prefetch instructions. At least on my Intel Atom CPU this improves performany slightly. Probably needs some further tuning. Signed-off-by: Tobias Doerffel --- src/core/CpuX86.c | 75 +++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 62 insertions(+), 13 deletions(-) diff --git a/src/core/CpuX86.c b/src/core/CpuX86.c index 3deb01c90..4c5e7217c 100644 --- a/src/core/CpuX86.c +++ b/src/core/CpuX86.c @@ -24,6 +24,20 @@ #include "Cpu.h" +#define PREFETCH_RW(x,rw) \ + __builtin_prefetch(x,rw,0); \ + __builtin_prefetch(x+128/sizeof(*x),rw,0); \ + __builtin_prefetch(x+256/sizeof(*x),rw,0); \ + __builtin_prefetch(x+384/sizeof(*x),rw,0); + +#if 0 /* for benchmarking only */ +#undef PREFETCH_RW +#define PREFETCH_RW(x,rw) +#endif +#define PREFETCH_READ(x) PREFETCH_RW(x,0) +#define PREFETCH_WRITE(x) PREFETCH_RW(x,1) + + #ifdef X86_OPTIMIZATIONS #ifdef BUILD_MMX @@ -39,14 +53,10 @@ void memCpyMMX( void * RP _dst, const void * RP _src, int _size ) char * RP dst = (char *) _dst; __asm__ __volatile__ ( " fsave %0; fwait\n"::"m"(fpu_save[0]) ); - __asm__ __volatile__ ( - "1: prefetchnta (%0)\n" - " prefetchnta 64(%0)\n" - " prefetchnta 128(%0)\n" - " prefetchnta 192(%0)\n" - " prefetchnta 256(%0)\n" - : : "r" (src) ); - for(i=0; i