[patch] string-486.h modified

From: Petko Manolov (petkan@dce.bg)
Date: Thu Aug 31 2000 - 09:26:57 EDT

  • Next message: Daniel R Risacher: "2.2.[16,17pre20] VM do_try_to_free_pages"

            Hi to all,

    I made this patch as some people request using
    486 optimized string routines for older
    (486 and 586) machines.

    Actually i made 2 things:
            - replaced buggy macro definitions for
            memset and memcpy which caused compiler
            break. Also paranoia check added for (counter == 0);
            - rewrote these functions in order to speed
            them up and reduce the code size;

    To whom is interested - please send me comments, bug reports
    or ideas.
    The goal is to make kernel use these procedures when running
    on 486 or 586 processors, so please test...

    best,
    Petkan

    --- linux-2.4.0-test7/include/asm-i386/string-486.h.orig Tue Aug 29 11:10:51 2000
    +++ linux/include/asm-i386/string-486.h Thu Aug 31 16:10:11 2000
    @@ -16,10 +16,11 @@
      * Split into 2 CPU specific files by Alan Cox to keep #ifdef noise down.
      *
      * 1999/10/5 Proper register args for newer GCCs and minor bugs
    - * fixed - Petko Manolov (petkan@spct.net)
    + * fixed - Petko Manolov (petkan@dce.bg)
      * 1999/10/14 3DNow memscpy() added - Petkan
      * 2000/05/09 extern changed to static in function definitions
      * and a few cleanups - Petkan
    + * 2000/08/29 memset and memcpy rewritten - Petkan
      */
     
     #define __HAVE_ARCH_STRCPY
    @@ -273,79 +274,60 @@
     /* end of additional stuff */
     
     
    -/*
    - * These ought to get tweaked to do some cache priming.
    - */
    -
    -static inline void * __memcpy_by4(void * to, const void * from, size_t n)
    +static inline void *__memcpy(void * to, const void *from, size_t len)
     {
    -register void *tmp = (void *)to;
    -register int dummy1,dummy2;
    -__asm__ __volatile__ (
    - "\n1:\tmovl (%2),%0\n\t"
    - "addl $4,%2\n\t"
    - "movl %0,(%1)\n\t"
    - "addl $4,%1\n\t"
    - "decl %3\n\t"
    - "jnz 1b"
    - :"=r" (dummy1), "=r" (tmp), "=r" (from), "=r" (dummy2)
    - :"1" (tmp), "2" (from), "3" (n/4)
    - :"memory");
    -return (to);
    -}
    + int d0,d1,d2;
     
    -static inline void * __memcpy_by2(void * to, const void * from, size_t n)
    -{
    -register void *tmp = (void *)to;
    -register int dummy1,dummy2;
    -__asm__ __volatile__ (
    - "shrl $1,%3\n\t"
    - "jz 2f\n" /* only a word */
    - "1:\tmovl (%2),%0\n\t"
    - "addl $4,%2\n\t"
    - "movl %0,(%1)\n\t"
    - "addl $4,%1\n\t"
    - "decl %3\n\t"
    - "jnz 1b\n"
    - "2:\tmovw (%2),%w0\n\t"
    - "movw %w0,(%1)"
    - :"=r" (dummy1), "=r" (tmp), "=r" (from), "=r" (dummy2)
    - :"1" (tmp), "2" (from), "3" (n/2)
    - :"memory");
    -return (to);
    -}
    -
    -static inline void * __memcpy_g(void * to, const void * from, size_t n)
    -{
    -int d0, d1, d2;
    -register void *tmp = (void *)to;
    -__asm__ __volatile__ (
    - "shrl $1,%%ecx\n\t"
    - "jnc 1f\n\t"
    - "movsb\n"
    - "1:\tshrl $1,%%ecx\n\t"
    - "jnc 2f\n\t"
    + __asm__ __volatile__ (
    + "rep; movsl\n\t"
    + "testb $2,%b3\n\t"
    + "jz 1f\n\t"
             "movsw\n"
    - "2:\trep\n\t"
    - "movsl"
    + "1:\t"
    + "testb $1,%b3\n\t"
    + "jz 2f\n\t"
    + "movsb\n"
    + "2:"
             :"=&c" (d0), "=&D" (d1), "=&S" (d2)
    - :"0" (n), "1" ((long) tmp), "2" ((long) from)
    - :"memory");
    -return (to);
    + :"q" (len), "0" (len/4), "1" (to), "2" (from)
    + :"memory"
    + );
    +
    + return to;
     }
     
    -#define __memcpy_c(d,s,count) \
    -((count%4==0) ? \
    - __memcpy_by4((d),(s),(count)) : \
    - ((count%2==0) ? \
    - __memcpy_by2((d),(s),(count)) : \
    - __memcpy_g((d),(s),(count))))
    -
    -#define __memcpy(d,s,count) \
    -(__builtin_constant_p(count) ? \
    - __memcpy_c((d),(s),(count)) : \
    - __memcpy_g((d),(s),(count)))
    -
    +
    +static inline void *__constant_memcpy(void * to, const void *from, size_t len)
    +{
    + int d0,d1,d2;
    + register int tmp;
    +
    +#define MEMCP(x) \
    + __asm__ volatile ( \
    + "\n1:\t" \
    + "movl (%0),%3\n\t" \
    + "movl %3,(%1)\n\t" \
    + "addl $4,%1\n\t" \
    + "addl $4,%0\n\t" \
    + "decl %2\n\t" \
    + "jnz 1b\n\t" \
    + x \
    + :"=r" (d0), "=r" (d1), "=r" (d2),"=q" (tmp)\
    + :"0" (from), "1" (to), "2" (len/4) \
    + :"memory" \
    + )
    +
    + switch ( len % 4 ) {
    + case 0: MEMCP(""); return to;
    + case 1: MEMCP("movb (%0),%b3; movb %b3,(%1)"); return to;
    + case 2: MEMCP("movw (%0),%w3; movw %w3,(%1)"); return to;
    + default: MEMCP("movw (%0),%w3; movw %w3,(%1)\n\t"
    + "movb 2(%0),%b3; movb %b3,2(%1)"); return to;
    + }
    +#undef MEMCP
    +}
    +
    +
     #define __HAVE_ARCH_MEMCPY
     
     #include <linux/config.h>
    @@ -363,24 +345,26 @@
     ** This CPU favours 3DNow strongly (eg AMD K6-II, K6-III, Athlon)
     */
     
    -static inline void * __constant_memcpy3d(void * to, const void * from, size_t len)
    +static inline void *__memcpy3d(void *to, const void *from, size_t len)
     {
             if(len<512 || in_interrupt())
    - return __memcpy_c(to, from, len);
    + return __memcpy(to, from, len);
             return _mmx_memcpy(to, from, len);
     }
     
    -static inline void *__memcpy3d(void *to, const void *from, size_t len)
    +
    +static inline void *__constant_memcpy3d(void *to, const void *from, size_t len)
     {
             if(len<512 || in_interrupt())
    - return __memcpy_g(to, from, len);
    + return __constant_memcpy(to, from, len);
             return _mmx_memcpy(to, from, len);
     }
     
    -#define memcpy(d, s, count) \
    -(__builtin_constant_p(count) ? \
    - __constant_memcpy3d((d),(s),(count)) : \
    - __memcpy3d((d),(s),(count)))
    +
    +#define memcpy(d, s, count) \
    + (__builtin_constant_p(count) && count) ?\
    + __constant_memcpy3d( d, s, count ) : \
    + __memcpy3d( d, s, count )
      
     #else /* CONFIG_X86_USE_3DNOW */
     
    @@ -389,7 +373,10 @@
     */
     
     
    -#define memcpy(d, s, count) __memcpy(d, s, count)
    +#define memcpy(d, s, count) \
    + (__builtin_constant_p(count) && count) ?\
    + __constant_memcpy( d, s, count ) : \
    + __memcpy( d, s, count )
     
     #endif /* CONFIG_X86_USE_3DNOW */
     
    @@ -429,22 +416,7 @@
     }
     
     
    -#define __HAVE_ARCH_MEMCMP
    -static inline int memcmp(const void * cs,const void * ct,size_t count)
    -{
    -int d0, d1, d2;
    -register int __res;
    -__asm__ __volatile__(
    - "repe\n\t"
    - "cmpsb\n\t"
    - "je 1f\n\t"
    - "sbbl %0,%0\n\t"
    - "orb $1,%b0\n"
    - "1:"
    - :"=a" (__res), "=&S" (d0), "=&D" (d1), "=&c" (d2)
    - :"0" (0), "1" (cs), "2" (ct), "3" (count));
    -return __res;
    -}
    +#define memcmp __builtin_memcmp
     
     
     #define __HAVE_ARCH_MEMCHR
    @@ -465,141 +437,57 @@
     return __res;
     }
     
    -#define __memset_cc(s,c,count) \
    -((count%4==0) ? \
    - __memset_cc_by4((s),(c),(count)) : \
    - ((count%2==0) ? \
    - __memset_cc_by2((s),(c),(count)) : \
    - __memset_cg((s),(c),(count))))
    -
    -#define __memset_gc(s,c,count) \
    -((count%4==0) ? \
    - __memset_gc_by4((s),(c),(count)) : \
    - ((count%2==0) ? \
    - __memset_gc_by2((s),(c),(count)) : \
    - __memset_gg((s),(c),(count))))
    -
    -#define __HAVE_ARCH_MEMSET
    -#define memset(s,c,count) \
    -(__builtin_constant_p(c) ? \
    - (__builtin_constant_p(count) ? \
    - __memset_cc((s),(c),(count)) : \
    - __memset_cg((s),(c),(count))) : \
    - (__builtin_constant_p(count) ? \
    - __memset_gc((s),(c),(count)) : \
    - __memset_gg((s),(c),(count))))
     
    -static inline void * __memset_cc_by4(void * s, char c, size_t count)
    +static inline void *__memset_generic( void *s, char c, size_t count )
     {
    -/*
    - * register char *tmp = s;
    - */
    -register char *tmp = (char *)s;
    -register int dummy;
    -__asm__ __volatile__ (
    - "\n1:\tmovl %2,(%0)\n\t"
    - "addl $4,%0\n\t"
    - "decl %1\n\t"
    - "jnz 1b"
    - :"=r" (tmp), "=r" (dummy)
    - :"q" (0x01010101UL * (unsigned char) c), "0" (tmp), "1" (count/4)
    - :"memory");
    -return s;
    -}
    + int d0,d1;
     
    -static inline void * __memset_cc_by2(void * s, char c, size_t count)
    -{
    -register void *tmp = (void *)s;
    -register int dummy;
    -__asm__ __volatile__ (
    - "shrl $1,%1\n\t" /* may be divisible also by 4 */
    - "jz 2f\n"
    - "\n1:\tmovl %2,(%0)\n\t"
    - "addl $4,%0\n\t"
    - "decl %1\n\t"
    - "jnz 1b\n"
    - "2:\tmovw %w2,(%0)"
    - :"=r" (tmp), "=r" (dummy)
    - :"q" (0x01010101UL * (unsigned char) c), "0" (tmp), "1" (count/2)
    - :"memory");
    -return s;
    + __asm__ volatile (
    + "rep\n\t"
    + "stosb\n\t"
    + :"=&c" (d0), "=&D" (d1)
    + :"a" (c), "0" (count), "1" (s)
    + :"memory"
    + );
    +
    + return s;
     }
     
    -static inline void * __memset_gc_by4(void * s, char c, size_t count)
    -{
    -register void *tmp = (void *)s;
    -register int dummy;
    -__asm__ __volatile__ (
    - "movb %b0,%h0\n"
    - "pushw %w0\n\t"
    - "shll $16,%0\n\t"
    - "popw %w0\n"
    - "1:\tmovl %0,(%1)\n\t"
    - "addl $4,%1\n\t"
    - "decl %2\n\t"
    - "jnz 1b\n"
    - :"=q" (c), "=r" (tmp), "=r" (dummy)
    - :"0" ((unsigned) c), "1" (tmp), "2" (count/4)
    - :"memory");
    -return s;
    -}
     
    -static inline void * __memset_gc_by2(void * s, char c, size_t count)
    -{
    -register void *tmp = (void *)s;
    -register int dummy1,dummy2;
    -__asm__ __volatile__ (
    - "movb %b0,%h0\n\t"
    - "shrl $1,%2\n\t" /* may be divisible also by 4 */
    - "jz 2f\n\t"
    - "pushw %w0\n\t"
    - "shll $16,%0\n\t"
    - "popw %w0\n"
    - "1:\tmovl %0,(%1)\n\t"
    - "addl $4,%1\n\t"
    - "decl %2\n\t"
    - "jnz 1b\n"
    - "2:\tmovw %w0,(%1)"
    - :"=q" (dummy1), "=r" (tmp), "=r" (dummy2)
    - :"0" ((unsigned) c), "1" (tmp), "2" (count/2)
    - :"memory");
    -return s;
    +static inline void *__memset_constant( void *s, char c, size_t count )
    +{
    + int d0,d1;
    +
    +#define MEMST(x) \
    + __asm__ volatile ( \
    + "\n1:\t" \
    + "movl %2,(%0)\n\t" \
    + "addl $4,%0\n\t" \
    + "decl %1\n\t" \
    + "jnz 1b\n" \
    + x \
    + :"=r" (d0), "=r" (d1) \
    + :"q" (0x01010101UL * ((unsigned char)c)), "0" (s), "1" (count/4) \
    + :"memory" \
    + )
    +
    + switch ( count % 4 ) {
    + case 0: MEMST(""); return s;
    + case 1: MEMST("\tmovb %b2,(%0)"); return s;
    + case 2: MEMST("\tmovw %w2,(%0)"); return s;
    + default: MEMST("\tmovw %w2,(%0)\n\tmovb %b2,2(%0)"); return s;
    + }
     }
     
    -static inline void * __memset_cg(void * s, char c, size_t count)
    -{
    -int d0, d1;
    -register void *tmp = (void *)s;
    -__asm__ __volatile__ (
    - "shrl $1,%%ecx\n\t"
    - "rep\n\t"
    - "stosw\n\t"
    - "jnc 1f\n\t"
    - "movb %%al,(%%edi)\n"
    - "1:"
    - :"=&c" (d0), "=&D" (d1)
    - :"a" (0x0101U * (unsigned char) c), "0" (count), "1" (tmp)
    - :"memory");
    -return s;
    -}
     
    -static inline void * __memset_gg(void * s,char c,size_t count)
    -{
    -int d0, d1, d2;
    -register void *tmp = (void *)s;
    -__asm__ __volatile__ (
    - "movb %%al,%%ah\n\t"
    - "shrl $1,%%ecx\n\t"
    - "rep\n\t"
    - "stosw\n\t"
    - "jnc 1f\n\t"
    - "movb %%al,(%%edi)\n"
    - "1:"
    - :"=&c" (d0), "=&D" (d1), "=&D" (d2)
    - :"0" (count), "1" (tmp), "2" (c)
    - :"memory");
    -return s;
    -}
    +#define __memset( s, c, count ) \
    + (__builtin_constant_p( count ) && count ) ? \
    + __memset_constant( s, c, count ) : \
    + __memset_generic( s, c, count )
    +
    +#define __HAVE_ARCH_MEMSET
    +#define memset( s, c, count ) __memset( s, c, count )
    +
     
     
     /*

    -
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    Please read the FAQ at http://www.tux.org/lkml/



    This archive was generated by hypermail 2b29 : Thu Aug 31 2000 - 09:28:59 EDT