SSE2实现HAAR小波变换(dwt2与idwt2)
wiki链接:http://en.wikipedia.org/wiki/Haar_wavelet
可用SSE2实现HAAR小波变换,达到实时,关于HAAR小波的介绍可参考以上维基链接
参考MATLAB中dwt2与idwt2的函数原型,基于OpenCV的框架进行了汇编优化实现
HAAR小波也可用于图像的压缩,将CH,CV,CD中的分量值小于某一阈值则归为0,从而这三个矩阵将成为稀疏矩阵(Sparse Matrix),反变换后的图像的质量将取决于选择阈值的大小。
实际实现时,可选择使用浮点数(单精度或双精度)进行矩阵计算,使用整数计算能得到更快的速度但不能进行完整的压缩与解压缩。
附代码:
inline void dwt2_row(__out double* ca0, __out double* ch0, __out double* cv0, __out double* cd0, __in unsigned char* row0, __in unsigned char* row1, __in int col){__asm{moveax_ptr, ca0;movebx_ptr, ch0;movecx_ptr, cv0;movedx_ptr, cd0;movesi_ptr, row0;movedi_ptr, row1;pxorxmm3, xmm3;movapdxmm7, g_halfd;subcol, 4;jlloop_2;loop_4:movdxmm1, [esi_ptr];movdxmm5, [edi_ptr];punpcklbwxmm1, xmm3;punpcklbwxmm5, xmm3;punpcklwdxmm1, xmm3;punpcklwdxmm5, xmm3;cvtdq2pdxmm0, xmm1;cvtdq2pdxmm4, xmm5;shufpdxmm1, xmm1, 1;shufpdxmm5, xmm5, 1;cvtdq2pdxmm1, xmm1;cvtdq2pdxmm5, xmm5;addpdxmm4, xmm0;addpdxmm5, xmm1;mulpdxmm4, xmm7;mulpdxmm5, xmm7;subpdxmm0, xmm4;subpdxmm1, xmm5;movapdxmm6, xmm4;movapdxmm2, xmm0;shufpdxmm4, xmm5, 0;shufpdxmm6, xmm5, 3;shufpdxmm0, xmm1, 0;shufpdxmm2, xmm1, 3;addpdxmm6, xmm4;addpdxmm2, xmm0;mulpdxmm6, xmm7;mulpdxmm2, xmm7;subpdxmm4, xmm6;subpdxmm0, xmm2;movupd[eax_ptr], xmm6;movupd[ebx_ptr], xmm4;movupd[ecx_ptr], xmm2;movupd[edx_ptr], xmm0;addesi_ptr, 4;addedi_ptr, 4;addeax_ptr, 0x10;addebx_ptr, 0x10;addecx_ptr, 0x10;addedx_ptr, 0x10;subcol, 4;jgeloop_4;loop_2:cmpcol, -2;jlloop_end;pinsrwxmm0, [esi_ptr], 0;pinsrwxmm4, [edi_ptr], 0;punpcklbwxmm0, xmm3;punpcklbwxmm4, xmm3;punpcklwdxmm0, xmm3;punpcklwdxmm4, xmm3;cvtdq2pdxmm0, xmm0;cvtdq2pdxmm4, xmm4;addpdxmm4, xmm0;mulpdxmm4, xmm7;subpdxmm0, xmm4;movapdxmm5, xmm4;shufpdxmm4, xmm0, 0;shufpdxmm5, xmm0, 3;addpdxmm5, xmm4;mulpdxmm5, xmm7;subpdxmm4, xmm5;movsd[eax_ptr], xmm5;shufpdxmm5, xmm5, 1;movsd[ebx_ptr], xmm4;shufpdxmm4, xmm4, 1;movsd[ecx_ptr], xmm5;movsd[edx_ptr], xmm4;loop_end:}}inline void idwt2_row(__out unsigned char* row0, __out unsigned char* row1, __in double* ca0, __in double* ch0, __in double* cv0, __in double* cd0, __in int col){__asm{moveax_ptr, ca0;movebx_ptr, ch0;movecx_ptr, cv0;movedx_ptr, cd0;movesi_ptr, row0;movedi_ptr, row1;movapdxmm7, g_halfd;subcol, 4;jlloop_2;loop_4:movupdxmm0, [eax_ptr];movupdxmm1, [ebx_ptr];movupdxmm4, [ecx_ptr];movupdxmm5, [edx_ptr];addpdxmm1, xmm0;addpdxmm5, xmm4;addpdxmm0, xmm0;addpdxmm4, xmm4;subpdxmm0, xmm1;subpdxmm4, xmm5;movapdxmm2, xmm1;movapdxmm6, xmm5;shufpdxmm1, xmm0, 0;shufpdxmm2, xmm0, 3;shufpdxmm5, xmm4, 0;shufpdxmm6, xmm4, 3;addpdxmm5, xmm1;addpdxmm6, xmm2;addpdxmm1, xmm1;addpdxmm2, xmm2;subpdxmm1, xmm5;subpdxmm2, xmm6;addpdxmm5, xmm7;addpdxmm6, xmm7;addpdxmm1, xmm7;addpdxmm2, xmm7;cvttpd2dqxmm5, xmm5;cvttpd2dqxmm6, xmm6;cvttpd2dqxmm1, xmm1;cvttpd2dqxmm2, xmm2;shufpdxmm5, xmm6, 0;shufpdxmm1, xmm2, 0;packssdwxmm5, xmm1;packuswbxmm5, xmm5;pshufdxmm1, xmm5, 1;movd[esi_ptr], xmm5;movd[edi_ptr], xmm1;addesi_ptr, 4;addedi_ptr, 4;addeax_ptr, 0x10;addebx_ptr, 0x10;addecx_ptr, 0x10;addedx_ptr, 0x10;subcol, 4;jgeloop_4;loop_2:cmpcol, -2;jlloop_end;movsdxmm0, [eax_ptr];movsdxmm1, [ebx_ptr];movsdxmm4, [ecx_ptr];movsdxmm5, [edx_ptr];addpdxmm1, xmm0;addpdxmm5, xmm4;addpdxmm0, xmm0;addpdxmm4, xmm4;subpdxmm0, xmm1;subpdxmm4, xmm5;shufpdxmm1, xmm0, 0;shufpdxmm5, xmm4, 0;addpdxmm5, xmm1;addpdxmm1, xmm1;subpdxmm1, xmm5;addpdxmm5, xmm7;addpdxmm1, xmm7;cvttpd2dqxmm5, xmm5;cvttpd2dqxmm1, xmm1;packssdwxmm5, xmm1;packuswbxmm5, xmm5;movdeax_ptr, xmm5;mov[esi_ptr], ax;shreax_ptr, 16;stosw;loop_end:}}inline void dwt2(__out cv::Mat& CA, __out cv::Mat& CH, __out cv::Mat& CV, __out cv::Mat& CD, __in cv::Mat const& I){if(CA.type() != CV_64FC1 || CH.type() != CV_64FC1 || CV.type() != CV_64FC1 || CD.type() != CV_64FC1 || I.channels() != 1)return;double* ca = reinterpret_cast<double*>(CA.data);double* ch = reinterpret_cast<double*>(CH.data);double* cv = reinterpret_cast<double*>(CV.data);double* cd = reinterpret_cast<double*>(CD.data);unsigned char* row = reinterpret_cast<unsigned char*>(I.data);for(int i=0; i<I.rows; i+=2){dwt2_row(ca, ch, cv, cd, row, row+I.cols, I.cols);ca += CA.cols;ch += CH.cols;cv += CV.cols;cd += CD.cols;row += I.cols*2;}}inline void idwt2(__out cv::Mat& I, __in cv::Mat const& CA, __in cv::Mat const& CH, __in cv::Mat const& CV, __in cv::Mat const& CD){if(CA.type() != CV_64FC1 || CH.type() != CV_64FC1 || CV.type() != CV_64FC1 || CD.type() != CV_64FC1 || I.channels() != 1)return;double* ca = reinterpret_cast<double*>(CA.data);double* ch = reinterpret_cast<double*>(CH.data);double* cv = reinterpret_cast<double*>(CV.data);double* cd = reinterpret_cast<double*>(CD.data);unsigned char* row = reinterpret_cast<unsigned char*>(I.data);for(int i=0; i<I.rows; i+=2){idwt2_row(row, row+I.cols, ca, ch, cv, cd, I.cols);ca += CA.cols;ch += CH.cols;cv += CV.cols;cd += CD.cols;row += I.cols*2;}}
版权归作者所有,转载请注明出处!