求一个函数的优化
发在这个区说明算法层面上的优化我已经想不出来了,只能追求常数上的优化了。瓶颈就一个函数,我想问一下能有什么优化。
先给代码:
typedef unsigned long long uint64;
uint64 dp[33][19];
int n;
uint64 StateToIndex(uint64 state, int blackpos)
{
uint64 result = 0;
int prev = 1;
for(int i=n-1;i>=0;i--)
switch((state >> (2*i)) & 0x3)
{
case 2:
result += dp[i][prev+1] + dp[i][prev];
if(--prev < 0)
return -1; //invalid state
break;
case 1:
result += dp[i][prev];
++prev;
default:
break;
}
return result;
}
dp[0][0] = 1;
for(int i=1;i<=32;i++)
for(int j=0;j<=16;j++)
{
if(j <= i-1)
dp[i][j] += dp[i-1][j]; // current value 0
if(j+1 <= i-1)
dp[i][j] += dp[i-1][j+1]; // current value 1
if(j > 0)
dp[i][j] += dp[i-1][j-1]; // current value 2
}
for(int i=n-1;i>=0;i--)
switch((state >> (2*i)) & 0x3)
{
case 2:
result += dp[i][prev+1] + dp[i][prev];
if(--prev < 0)
return -1; //invalid state
break;
case 1:
result += dp[i][prev];
++prev;
default:
break;
}
[解决办法]
这代码再优化一些是没问题,优化几倍肯定要完全改思路了
[解决办法]
typedef unsigned long long uint64;
uint64 dp[33][19];
int n;
uint64 StateToIndex(uint64 state, int blackpos)
{
uint64 result = 0;
int prev = 1;
for(int i=n-1;i>=0;i--)
switch((state >> (2*i)) & 0x3)
{
case 2:
result += dp[i][prev+1] + dp[i][prev];
if(--prev < 0)
return -1; //invalid state
break;
case 1:
result += dp[i][prev];
++prev;
default:
break;
}
return result;
}
uint64 StateToIndex_24(uint64 state, int blackpos)
{
uint64 result = 0;
int prev = 1;
static uint64 masks[3][24] = {
{
(uint64)0x3<<0, (uint64)0x3<<2, (uint64)0x3<<4, (uint64)0x3<<6, (uint64)0x3<<8, (uint64)0x3<<10,(uint64)0x3<<12,
(uint64)0x3<<14,(uint64)0x3<<16,(uint64)0x3<<18,(uint64)0x3<<20,(uint64)0x3<<22,(uint64)0x3<<24,(uint64)0x3<<26,
(uint64)0x3<<28,(uint64)0x3<<30,(uint64)0x3<<32,(uint64)0x3<<34,(uint64)0x3<<36,(uint64)0x3<<38,(uint64)0x3<<40,
(uint64)0x3<<42,(uint64)0x3<<44,(uint64)0x3<<46
},
{
(uint64)0x2<<0, (uint64)0x2<<2, (uint64)0x2<<4,(uint64)0x2<<6, (uint64)0x2<<8,(uint64)0x2<<10, (uint64)0x2<<12,
(uint64)0x2<<14,(uint64)0x2<<16,(uint64)0x2<<18,(uint64)0x2<<20,(uint64)0x2<<22,(uint64)0x2<<24,(uint64)0x2<<26,
(uint64)0x2<<28,(uint64)0x2<<30,(uint64)0x2<<32,(uint64)0x2<<34,(uint64)0x2<<36,(uint64)0x2<<38,(uint64)0x2<<40,
(uint64)0x2<<42,(uint64)0x2<<44,(uint64)0x2<<46
},
{
(uint64)0x1<<0, (uint64)0x1<<2, (uint64)0x1<<4, (uint64)0x1<<6, (uint64)0x1<<8,(uint64)0x1<<10, (uint64)0x1<<12,
(uint64)0x1<<14,(uint64)0x1<<16,(uint64)0x1<<18,(uint64)0x1<<20,(uint64)0x1<<22,(uint64)0x1<<24,(uint64)0x1<<26,
(uint64)0x1<<28,(uint64)0x1<<30,(uint64)0x1<<32,(uint64)0x1<<34,(uint64)0x1<<36,(uint64)0x1<<38,(uint64)0x1<<40,
(uint64)0x1<<42,(uint64)0x1<<44,(uint64)0x1<<46
}
};
uint64 v;
#define _It_(i) \
v = state & masks[0][i];\
if(masks[1][i] == v)\
{\
result += dp[i][prev+1] + dp[i][prev];\
if(--prev < 0)\
return -1; \
}\
else if(masks[2][i] == v)\
{\
result += dp[i][prev];\
++prev;\
}
_It_(23);
_It_(22);
_It_(21);
_It_(20);
_It_(19);
_It_(18);
_It_(17);
_It_(16);
_It_(15);
_It_(14);
_It_(13);
_It_(12);
_It_(11);
_It_(10);
_It_(9);
_It_(8);
_It_(7);
_It_(6);
_It_(5);
_It_(4);
_It_(3);
_It_(2);
_It_(1);
_It_(0);
return result;
}