Á¦ ¸ñ:[°­ÁÂ] ÀζóÀÎ ¾î¼Àºí¸®¸¦ ºÐ¼®ÇÏÀÚ. (³¡) °ü·ÃÀÚ·á:¾øÀ½ [574] º¸³½ÀÌ:Çѵ¿ÈÆ (ddoch ) 1997-02-20 19:25 Á¶È¸:728 3. µ¡ºÙÀÌ´Â ¸» ÀÌÁ¦ ºÐ¼®ÇÒ string.h¿¡ ´õÀÌ»ó ÀζóÀÎ ¾î¼Àºí¸®°¡ ¾ø´Ù. Ȥ½Ã ÀDZ¸½ÉÀÌ ³ª´Â ºÎºÐÀÌ ÀÖÀ¸¸é ±× ºÎºÐ¸¸ µû·Î ¶¼¾î ³»¾î¼­ ÇÔ¼öÀ̸§À» ÀûÀýÈ÷ ¹Ù²Ù¾î¼­ Å×½º Æ®¸¦ ÇØ º¼ ¼öµµ ÀÖ´Ù. ¾Æ¿ï·¯ ÀÛ¼ºÇÑ ÀζóÀÎ ¾î¼Àºí¸®°¡ Æ÷ÇÔµÈ ¼Ò½º¸¦ ÄÄ ÆÄÀÏ ÇÒ Àû¿¡ gcc -S my_memcpy.c ¿Í °°ÀÌ ÇÏ¿© ¼ø¼ö ¾î¼Àºí¸® ·çƾ¸¸À» ±¸ ÇÒ ¼ö µµ ÀÖ´Ù. Á¶±ÝÀº ¿¹¿ÜÁö¸¸ ¼³¸íÇÏÁö ¾ÊÀº °Í Áß¿¡ define µÈ °ÍÀ» »ìÆìº¸ÀÚ. ----------------------------------------------------------------------- /* 1 */ #define memcpy(t, f, n) \ (__builtin_constant_p(n) ? \ __constant_memcpy((t),(f),(n)) : \ __memcpy((t),(f),(n))) #define memcmp __builtin_memcmp /* 2 */ #define __constant_count_memset(s,c,count) \ __memset_generic((s),(c),(count)) /* 3 */ #define __constant_c_x_memset(s, c, count) \ (__builtin_constant_p(count) ? \ __constant_c_and_count_memset((s),(c),(count)) : \ __constant_c_memset((s),(c),(count))) /* 4 */ #define __memset(s, c, count) \ (__builtin_constant_p(count) ? \ __constant_count_memset((s),(c),(count)) : \ __memset_generic((s),(c),(count))) /* 5 */ #define memset(s, c, count) \ (__builtin_constant_p(c) ? \ __constant_c_x_memset((s),(0x01010101UL*(unsigned char)c),(count)) : \ __memset((s),(c),(count))) ------------------------------------------------------------------------ ¸î°¡Áö ¸ÅÅ©·Î¸¦ Á¤ÀÇÇϰí ÀÖ´Â µ¥, ¸ðµÎ´Ù __builtin_constant_p()¶ó´Â °Í ¿¡ ÀÇÁ¸Çϰí ÀÖ´Ù. Ä¿³Î¼Ò½º³ª Çì´õÆÄÀÏ¿¡´Â ¾Æ¹«¸® ã¾ÆºÁµµ ÀÌ·± Á¤Àdzª ¼±¾ðÀÌ Á¸ÀçÇÏÁö ¾Ê´Â´Ù. ¶Ç À̰ÍÀº ´Ù¸¥ ¸¹Àº Ä¿³Î ¼Ò½º¼Ó¿¡¼­µµ ³ªÅ¸³ª°í ÀÖ´Ù. ÄÄÆÄÀÏ·¯ ¼Ò½º³ª, libc¼Ó¿¡ ÀÖÀ» ¹ýµµ Çѵ¥ ¾Æ¹¸´ø ùºÎºÐÀÇ ¸ÞÅ©·Î Á¤ÀÇ(1)¿¡¼­´Â memcpy¸¦ È£ÃâÇϸé __builtin_constant_p(n)ÀÌ 0À̸é __memcpy (µÚ¿¡ ³ª¿Ã ¸ÅÅ©·Î)°¡ È£ÃâµÇ°í ¾Æ´Ï¸é __constant_memcpy°¡ È£Ã⠵ȴÙ. ÀüÀÚ´Â 4¹ÙÀÌÆ®´ÜÀ§±âº»º¹»ç¿Í 1-3ÀÇ ³ª¸ÓÁö º¹»ç¸¦ ÇÏ´Â ¼ø¼ö ÀζóÀÎ ¾î¼Àºí¸®·çƾÀ̰í, ÈÄÀÚ´Â ¾ÕÀü¿¡ º¸¾Ò´ø switch ¹®°ú ÀζóÀÎ ¾î¼ÀÀ» È¥¿ëÇÑ ·çƾÀÌ´Ù. (3)¸¦ º¸¸é, __constant_c_x_memsetÀ» È£ÃâÇϸé __builtin_constant_p(count)ÀÇ °ª¿¡ µû ¶ó¼­ 0À̸é __constant_c_memset(4¹ÙÀÌÆ®´ÜÀ§º¹»ç ÀζóÀξî¼À·çƾ)ÀÌ »ç¿ëµÇ °í ¾Æ´Ï¸é __constant_c_and_count_memset(switch¿Í ÀζóÀÎÀ» È¥¿ëÇÑ ·çƾ) ÀÌ »ç¿ëµÈ´Ù. (4)À» º¸¸é, __memsetÀ» È£ÃâµÇ¸é __builtin_constant_p(count)ÀÇ °ª¿¡ µû¶ó¼­ 0À̸é __memset_genericÀÌ »ç¿ëµÇ°í, ¾Æ´Ï¸é __constant_count_memsetÀÌ »ç¿ëµÈ´Ù. (2)ÀÇ ¸ÅÅ©·Î Á¤ÀÇ·Î ÀÎÇØ __memset_genericÀº __constant_count_memset¿¡ ´ëÇÑ ¸ÅÅ©·Î·Î °°´Ù. (5)¸¦ º¸¸é, memsetÀ» È£ÃâÇϸé __builtin_constant_p(c)ÀÇ °ª¿¡ µû¶ó¼­ 0À̸é __memset ÀÌ ºÒ¸®¿öÁö°í ¾Æ´Ï¸é __constant_c_x_memset((s), (0x01010101UL*(unsigned char)c), (count)); ·Î ºÒ¸®¿öÁø´Ù. ÈÄÀÚ´Â ¶Ç´Ù½Ã (3) ¿¡ ³ª¿À´Â ¸ÅÅ©·ÎÀÌ´Ù. ¸ÅÅ©·Î¸¦ Á¦¿ÜÇÑ memset°ú °ü·ÃµÈ ÇÔ¼ö¸¸ Á¤¸®ÇÏÀÚ. __memset_generic : 1¹ÙÀÌÆ®¾¿ º¹»ç ÀζóÀÎ ¾î¼À __constant_c_memset : 4¹ÙÀÌÆ®¾¿ º¹»ç ÀζóÀÎ ¾î¼À __constant_c_and_count_memset : switch¿Í 4¹ÙÀÌÆ® º¹»ç ¾î¼À È¥¿ë 4. ³ª¿À´Â ¸» ¿©±â±îÁö ´Ù º¸½ÅºÐ¿¡°Ô ¹Ú¼ö¸¦ º¸³»µå¸®°í ½Í´Ù. ÀÌÁ¦ AT&T¹®¹ýÀÇ ÀζóÀÎ ¾î¼À¿¡´Â ¾î´ÀÁ¤µµ ¾Æ½Ç °ÍÀÌ¸ç ¿©·¯ºÐµéÀÇ ÇÁ·Î±×·¥¿¡ ÇÊ¿äÇÑ ¸¸Å­ Àζó ÀÎ ¾î¼ÀÀ» »ç¿ëÇÏ½Ç ¼öµµ ÀÖÀ» °ÍÀÌ´Ù. Ä¿³Î ¼Ò½º¸¦ º»°ÝÀûÀ¸·Î ºÐ¼®ÇÏÀÚ¸é AT&T ¹®¹ý¿¡ ±â¹ÝÇÑ ¾î¼Àºí¸®´Â ÇʼöÀûÀ¸·Î ¾Ë¾Æ¾ß ÇÑ´Ù. ±×·¸Áö ¾Ê´Ù ÇÏ´õ ¶óµµ ºü¸¥ ¼ÓµµÃ³¸®¸¦ ¿äÇÏ´Â °÷¿¡´Â Çѹø Âë »ç¿ëÇØ º¼¸¸ÇÏ´Ù. ÈÞ.. ¸¶Áö¸·À¸·Î ¸Þ¸ð¸® Á¢±Ù°ú °ü·ÃµÈ º¥Ä¡¸¶Å© ÇÁ·Î±×·¥À» ¾Æ·¡¿¡ ½Æ°Ú´Ù. 1¹ÙÀÌÆ®,4¹ÙÀÌÆ® ´ÜÀ§·Î º¹»ç¸¦ Çϰųª memsetÀ» ÇÏ´Â ÀζóÀÎ ¾î¼Àºí ¸® ·çƾµéÀÌ´Ù. string.h ÀÇ Á¦ÀÏ Ã¹ºÎºÐ¿¡ Åä¹ßÁîÀÇ ¾ð±ÞÀÌ ÀÖ±ä ÇÏÁö¸¸ °ú¿¬ ¾î´À°ÍÀÌ ´õ ºü¸¦ °ÍÀΰ¡? ¿©·¯ºÐÀÇ ½Ã½ºÅÛ¿¡¼­µµ Çѹø ½ÇÇàÇØº¸±â ¹Ù¶õ ´Ù. ±×·³, ´ÙÀ½ ±âȸ¿¡ ¶Ç ¸¸³¯ °ÍÀ» ¾à¼ÓÇϸç.... ¶ÇÄ¡ Çѵ¿ÈÆ ddoch@hitel.kol.co.kr ddoch@nownuri.nowcom.co.kr /* benchmark.c -- memory access speed benchmark test program * between 1 byte copying and 4 byte copying, and * measuring 1/100 second. * * My system is 486DX4-100, gcc 2.7.2. Follows is the result. * I've been to compile 'gcc -O2 benchmark.c. * * asm memset (byte): 198 (1/100 second) * asm memset (long): 48 (1/100 second) * libc memset : 48 (1/100 second) * * asm memcpy (byte): 341 (1/100 second) * asm memcpy (long): 190 (1/100 second) * libc memcpy : 190 (1/100 second) * * by ddoch 1997.2.20 e-mail ddoch@hitel.kol.co.kr */ #include #include #include #include #define MEMORY_ALLOC_SIZE (1024*1024) #define TEST_NUMBERS (10) inline void * byte_memset(void * s, char c, size_t count) { __asm__ __volatile__( "cld\n\t" "rep\n\t" "stosb" : /* no output */ :"a" (c),"D" (s),"c" (count) :"cx","di","memory"); return s; } inline void * long_memset(void * s, unsigned long c, size_t count) { __asm__ __volatile__( "cld\n\t" "rep ; stosl\n\t" "testb $2,%b1\n\t" "je 1f\n\t" "stosw\n" "1:\ttestb $1,%b1\n\t" "je 2f\n\t" "stosb\n" "2:" : /* no output */ :"a" (c), "q" (count), "c" (count/4), "D" ((long) s) :"cx","di","memory"); return (s); } inline void * byte_memcpy(void * to, const void * from, size_t n) { __asm__ __volatile__( "cld\n\t" "rep; movsb\n\t" : /* no output */ : "c" (n), "S" ((long)from), "D" ((long)to) : "cx", "si", "di"); return (to); } inline void * long_memcpy(void * to, const void * from, size_t n) { __asm__ __volatile__( "cld\n\t" "rep ; movsl\n\t" "testb $2,%b1\n\t" "je 1f\n\t" "movsw\n" "1:\ttestb $1,%b1\n\t" "je 2f\n\t" "movsb\n" "2:" : /* no output */ :"c" (n/4), "q" (n),"D" ((long) to),"S" ((long) from) : "cx","di","si","memory"); return (to); } void main() { void *src, *dest; clock_t c1, c2; int i; /* memory allocation */ src = (void *) malloc(MEMORY_ALLOC_SIZE); if (src == (void *)0) { fprintf(stderr, "Memory allocation error!!\n"); fprintf(stderr, "Be decreased allocation memory size\n\n"); exit(1); } dest = (void *) malloc(MEMORY_ALLOC_SIZE); if (dest == (void *)0) { fprintf(stderr, "memory allocation error!!\n"); fprintf(stderr, "Be decreased allocation memory size\n\n"); exit(1); } /* memset test */ printf("\nmemset testing...\n\n"); /* memset byte assembly */ c1 = clock(); for (i = 0; i < TEST_NUMBERS; i++) { byte_memset(src, 0, MEMORY_ALLOC_SIZE); } c2 = clock(); printf("asm memset (byte): %d (1/100 second)\n\n", c2-c1); /* memset long assembly */ c1 = clock(); for (i = 0; i < TEST_NUMBERS; i++) { long_memset(src, 0, MEMORY_ALLOC_SIZE); } c2 = clock(); printf("asm memset (long): %d (1/100 second)\n\n", c2-c1); /* memset in libc */ c1 = clock(); for (i = 0; i < TEST_NUMBERS; i++) { memset(src, 0, MEMORY_ALLOC_SIZE); } c2 = clock(); printf("libc memset: %d (1/100 second)\n\n", c2-c1); /* memcpy test */ printf("\nmemcpy testing...\n\n"); /* memcpy byte assembly */ c1 = clock(); for (i = 0; i < TEST_NUMBERS; i++) { byte_memcpy(dest, src, MEMORY_ALLOC_SIZE); } c2 = clock(); printf("asm memcpy (byte): %d (1/100 second)\n\n", c2-c1); /* memcpy long assembly */ c1 = clock(); for (i = 0; i < TEST_NUMBERS; i++) { long_memcpy(dest, src, MEMORY_ALLOC_SIZE); } c2 = clock(); printf("asm memcpy (long): %d (1/100 second)\n\n", c2-c1); /* memcpy in libc */ c1 = clock(); for (i = 0; i < TEST_NUMBERS; i++) { memcpy(dest, src, MEMORY_ALLOC_SIZE); } c2 = clock(); printf("libc memcpy: %d (1/100 second)\n\n", c2-c1); free(src); free(dest); }