linux中memecpy实现
1/*
2 * arch/arm/boot/compressed/string.c
3 *
4 * Small subset of simple string routines
5 */
6
7#include <linux/string.h>
8
9void *memcpy(void *__dest, __const void *__src, size_t __n)
10{
11 int i = 0;
12 unsigned char *d = (unsigned char *)__dest, *s = (unsigned char *)__src;
13
14 for (i = __n >> 3; i > 0; i--) {
15 *d++ = *s++;
16 *d++ = *s++;
17 *d++ = *s++;
18 *d++ = *s++;
19 *d++ = *s++;
20 *d++ = *s++;
21 *d++ = *s++;
22 *d++ = *s++;
23 }
24
25 if (__n & 1 << 2) {
26 *d++ = *s++;
27 *d++ = *s++;
28 *d++ = *s++;
29 *d++ = *s++;
30 }
31
32 if (__n & 1 << 1) {
33 *d++ = *s++;
34 *d++ = *s++;
35 }
36
37 if (__n & 1)
38 *d++ = *s++;
39
40 return __dest;
41}
请问为什么采用这种实现方式,采用这种实现方式会提升效率么?为什么? memcpy 效率 arm
[解决办法]
减少for循环变量i递增和判断语句的开销。
------解决方案--------------------
循环展开优化,现在很多编译器可以自动做这一优化,不需要再这样写了。
[解决办法]
先
http://www.microsoft.com/visualstudio/chs/downloads#d-2010-express
点开Visual C++ 2010 Express下面的语言选‘简体中文’,再点立即安装
再参考Windows中memcpy实现:
C:\Program Files\Microsoft Visual Studio 10.0\VC\crt\src\intel\memcpy.asm
[解决办法]
short src[100], dest[100];
for( int i = 0; i < 100; ++ i )
{
dest[i] = src[i];
}
short src[100], dest[100];
for( int i = 0; i < 100; i += 4 )
{
dest[i] = src[i];
dest[i+1] = src[i+1];
dest[i+2] = src[i+2];
dest[i+3] = src[i+3];
}
0040102C xor eax,eax
0040102E mov edi,edi
00401030 movzx ecx,word ptr [esp+eax+0D0h]
00401038 movzx edx,word ptr [esp+eax+0D2h]
00401040 mov word ptr [esp+eax+8],cx
00401045 movzx ecx,word ptr [esp+eax+0D4h]
0040104D mov word ptr [esp+eax+0Ah],dx
00401052 movzx edx,word ptr [esp+eax+0D6h]
0040105A mov word ptr [esp+eax+0Ch],cx
0040105F mov word ptr [esp+eax+0Eh],dx
00401064 add eax,8
00401067 cmp eax,0C8h
0040106C jl main+20h (401030h)
short src[100], dest[100];
short* psrc = src, *pdest=dest;
for( int i = 0; i < 100; i += 4 )
{
*pdest ++ = *psrc++;
*pdest ++ = *psrc++;
*pdest ++ = *psrc++;
*pdest ++ = *psrc++;
}
00401030 movzx esi,word ptr [ecx]
00401033 mov word ptr [eax],si
00401036 movzx esi,word ptr [ecx+2]
0040103A add ecx,2
0040103D add eax,2
00401040 mov word ptr [eax],si
00401043 movzx esi,word ptr [ecx+2]
00401047 add ecx,2
0040104A add eax,2
0040104D mov word ptr [eax],si
00401050 movzx esi,word ptr [ecx+2]
00401054 add ecx,2
00401057 add eax,2
0040105A mov word ptr [eax],si
0040105D add eax,2
00401060 add ecx,2
00401063 sub edx,1
00401066 jne main+20h (401030h)