大家来看看运算符的效率,如何快速求uint抹去低16位之后的值
long l1 = DateTime.Now.Ticks; for (int i = 0; i < 100000000; i++) { uint x = 0xffffaaaa & 0xffff0000; } TimeSpan ts = new TimeSpan(DateTime.Now.Ticks - l1); long l2 = DateTime.Now.Ticks; for (int i = 0; i < 100000000; i++) { uint x = (0xffffaaaa >> 16 << 16); } TimeSpan ts2 = new TimeSpan(DateTime.Now.Ticks - l2); long l3 = DateTime.Now.Ticks; unchecked { for (int i = 0; i < 100000000; i++) { uint x = (0xffffaaaa - (ushort)(0xffffffff)); } } TimeSpan ts3 = new TimeSpan(DateTime.Now.Ticks - l3); long l4 = DateTime.Now.Ticks; unsafe { uint test = 0xffffaaaa; for (int i = 0; i < 100000000; i++) { //uint x = ((uint)(*(((ushort*)(&(test)))+1))) << 16 ; uint x = ((uint)(*(((ushort*)(&(test)))+1))) * 65535 ; } } TimeSpan ts4 = new TimeSpan(DateTime.Now.Ticks - l4); long l5 = DateTime.Now.Ticks; unsafe { uint test = 0xffffaaaa; for (int i = 0; i < 100000000; i++) { *((ushort*)(&(test))) = 0; uint x = test; } } TimeSpan ts5 = new TimeSpan(DateTime.Now.Ticks - l5); Console.WriteLine("1:" + ts.Ticks + "\r\n2:" + ts2.Ticks + "\r\n3:" + ts3.Ticks + "\r\n4:" + ts4.Ticks + "\r\n5:" + ts5.Ticks);
//using System.Threading.Tasks; Parallel.For(0, 9, pos => { for (int i = 0; i < 10000000; i++) { uint x = (0xffffaaaa >> 16 << 16); } });
[解决办法]
。。。跑了下123一样快。。。
但是并行显然更快:
1:2656250
2:2656250
3:2656250
4:3125000
5:7187500
6:1406250
using System;using System.Runtime.InteropServices;using System.Collections.Generic;using System.Threading.Tasks;using System.Linq;using System.Text;namespace ConsoleApplication1{ class Program { static void Main(string[] args) { long l1 = DateTime.Now.Ticks; for (int i = 0; i < 100000000; i++) { uint x = 0xffffaaaa & 0xffff0000; } TimeSpan ts = new TimeSpan(DateTime.Now.Ticks - l1); long l2 = DateTime.Now.Ticks; for (int i = 0; i < 100000000; i++) { uint x = (0xffffaaaa >> 16 << 16); } TimeSpan ts2 = new TimeSpan(DateTime.Now.Ticks - l2); long l3 = DateTime.Now.Ticks; unchecked { for (int i = 0; i < 100000000; i++) { uint x = (0xffffaaaa - (ushort)(0xffffffff)); } } TimeSpan ts3 = new TimeSpan(DateTime.Now.Ticks - l3); long l4 = DateTime.Now.Ticks; unsafe { uint test = 0xffffaaaa; for (int i = 0; i < 100000000; i++) { //uint x = ((uint)(*(((ushort*)(&(test)))+1))) << 16 ; uint x = ((uint)(*(((ushort*)(&(test))) + 1))) * 65535; } } TimeSpan ts4 = new TimeSpan(DateTime.Now.Ticks - l4); long l5 = DateTime.Now.Ticks; unsafe { uint test = 0xffffaaaa; for (int i = 0; i < 100000000; i++) { *((ushort*)(&(test))) = 0; uint x = test; } } TimeSpan ts5 = new TimeSpan(DateTime.Now.Ticks - l5); long l6 = DateTime.Now.Ticks; Parallel.For(0, 9, pos => { for (int i = 0; i < 10000000; i++) { uint x = (0xffffaaaa >> 16 << 16); } }); TimeSpan ts6 = new TimeSpan(DateTime.Now.Ticks - l6); Console.WriteLine("1:" + ts.Ticks + "\r\n2:" + ts2.Ticks + "\r\n3:" + ts3.Ticks + "\r\n4:" + ts4.Ticks + "\r\n5:" + ts5.Ticks + "\r\n6:" + ts6.Ticks); } }}
[解决办法]
uint test = 0xffffaaaa; for (int i = 0; i < 100000000; i++) { *((ushort*)(&test)) = 0; }
[解决办法]
而且就测试的结果来看,就让程序在一个核心上跑,你的代码也不快
[解决办法]
1:6406250
2:6562500
3:6718750
4:6406250
5:5937500 Parallel 单核心
请按任意键继续. . .
好像确实是快一点啊
睡觉了,版主,咱明天再说
------解决方案--------------------
Debug 单核心33593750 :*((ushort*)(&test)) = 032656250 :*((ushort*)(&test)) = 032968750 :*((ushort*)(&test)) = 032500000 :test = test & 0xffff000032968750 :test = test & 0xffff0000;32656250 :test = test & 0xffff0000;42656250 :test = test >> 16 << 1642656250 :test = test >> 16 << 1642343750 :test = test >> 16 << 1632656250 :test = 0xffff000032968750 :test = 0xffff000032968750 :test = 0xffff000038281250 :Parallel: test1 = test1 & 0xffff000038125000 :Parallel: test1 = test1 & 0xffff000038281250 :Parallel: test1 = test1 & 0xffff0000请按任意键继续. . .Release 单核心11562500 :*((ushort*)(&test)) = 013437500 :*((ushort*)(&test)) = 09843750 :*((ushort*)(&test)) = 09843750 :test = test & 0xffff00006718750 :test = test & 0xffff0000;13281250 :test = test & 0xffff0000;7343750 :test = test >> 16 << 1613281250 :test = test >> 16 << 166718750 :test = test >> 16 << 1610000000 :test = 0xffff00006718750 :test = 0xffff00006718750 :test = 0xffff00009218750 :Parallel: test1 = test1 & 0xffff00009062500 :Parallel: test1 = test1 & 0xffff00008906250 :Parallel: test1 = test1 & 0xffff0000请按任意键继续. . .
[解决办法]
各位都好厉害啊
[解决办法]
to 18#
你这样是用的格式转换。
而事实上CPU最恨你用比它的寄存器小的大小来操作数据,比方带16bits寄存器的一句指令要比正常的32bits寄存器的指令慢10倍。
但为什么不就这样最简单有效呢:
short temp_shortI= In_Dw& 0xffff0000;
只有一个寄存器指令,改善成这样可以提高一些效率 /:^]:
unsigned int Buffer_UIarry[100000000] = {0xffffaaaa};
//这儿先判断要处理的数量能不能被6整除,除后得到的余数记录下来,在循环之后把未处理掉的余数那几个处理下就好。代码里没,憋尿急忙忙写的 /:^|
for (unsigned int * tempPtrToBuf_UIarry= Buffer_UIarry, * EndPtrToBuf_UIarry= &Buffer_UIarry[100000000]; tempPtrToBuf_UIarry<EndPtrToBuf_UIarry; tempPtrToBuf_UIarry= tempPtrToBuf_UIarry+ 6)
{
__asm
{
mov eax, [tempPtrToBuf_UIarry];
mov ebx, [tempPtrToBuf_UIarry+ 0x4];
mov ecx, [tempPtrToBuf_UIarry+ 0x8];
mov edx, [tempPtrToBuf_UIarry+ 0xc];
mov esi, [tempPtrToBuf_UIarry+ 0x10];
mov edi, [tempPtrToBuf_UIarry+ 0x14];
and eax, 0xffff0000;
and ebx, 0xffff0000;
and ecx, 0xffff0000;
and edx, 0xffff0000;
and esi, 0xffff0000;
and edi, 0xffff0000;
and [tempPtrToBuf_UIarry], eax;
and [tempPtrToBuf_UIarry+ 0x4], ebx;
and [tempPtrToBuf_UIarry+ 0x8], ecx;
and [tempPtrToBuf_UIarry+ 0xc], edx;
and [tempPtrToBuf_UIarry+ 0x10], edi;
and [tempPtrToBuf_UIarry+ 0x14], esi;
}
}
//把互相之间没有依存关系的指令写到连续的一起,可以被CPU同时载入连续的多条指令同时执行掉。记得现在的指令流水线应该是同时执行5个指令,这样的话性能应该已不错了。
[解决办法]
不知道做异或速度怎么样?应该不会很慢的把
[解决办法]
第一种方法,应该是最快的。&运算,比加减法还快。
[解决办法]
unsigned int ui=0x12345678u;
ui&=0xffff0000u;
或者
__asm {
mov eax,ui
xor ax,ax
mov ui,eax
}
[解决办法]
43汇编不错,查表怎么样,不过空间悲剧了
[解决办法]
感谢caozhy的认真详细的回复和解答
[解决办法]
[解决办法]
内容存入剪贴板
[解决办法]
我的写法是
*((ushort*)test)=0;
至于你说的慢10倍,那纯属扯淡
第一
上面的语句最终编译成的汇编指令也许是
mov dword ptr [test地址], 0
这样根本就不会吧16bit的数放到寄存器里
被放到寄存器里的也只是test变量的地址
第二
就算编译器犯二,编译成
mov ax, 0
mov dword ptr [test], ax
其中 mov ax也不会比 mov eax慢,编译器会对齐内存
而且16bit的内存对齐比32bit更容易对齐