C++字符串解析分享，求优化,该如何处理

2013-08-10

C++字符串解析分享，求优化需求：字符串“#C255 244 233 12##d4##f5 ，将其中的数字和字母分离出来，其中‘#’字

C++字符串解析分享，求优化
需求：字符串“#C255 244 233 12# #d4# #f5 ，将其中的数字和字母分离出来，其中‘#’字符可能是别的字符

我自己写的比较繁琐，使用了C++的string类和VECTOR，当然使用c可能更高效一些，分享一下，求指教，如何才能写的更简洁。

 
 
 #include "iostream"
 #include "string"
 #include "vector"
 
 using namespace std;
 //using namespace string;
 
 
 const string const_str = "#C255 244 233 22##H5##a1#";
 
 //检测##的位置以及数目 
 vector<int> GetTagPos(const string& str ,const string& tag)
 {
 vector<int> iPosList;
 
size_t tag_pos = 0 ;
size_t search_pos = 0;
size_t search_length = str.length();

 iPosList.clear();
 iPosList.reserve(32);
 if(tag.empty()  || str.empty())
 goto _return;

while(search_pos <= search_length)   //检测tag位置 
{
if((tag_pos = str.find(tag,search_pos))!= string::npos )
{
iPosList.push_back(tag_pos);
search_pos = tag_pos + 1 ;
}
else
{
break; 
} 
}

if(iPosList.size()%2 != 0 )   //如果不配对，返回空 
{
iPosList.clear();
goto _return;
}

_return:
return iPosList ;
 }
 
 
 void GetResult(const string& str , const string& tag )
 {
 size_t ipos = 0 ;
size_t word_pos = 0 ;
string strNum;
vector<int> itagpos;
 
if(true == str.empty())
return ;

itagpos = GetTagPos(str,tag);
if(itagpos.empty())
return ;

vector<int>::iterator it = itagpos.begin(); 
for( ; it != itagpos.end() ; ++ it)    //首先检测字母 
{
for(ipos = (*it) + 1 ; ipos != *(++it)+1 ; ++ipos)
{
char c = str[ipos] ;
if((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
{
cout << c << endl ;
word_pos = ipos ;
break;
}
else
{
continue ;
}
}
 
 
strNum.clear();
strNum.erase();
for(ipos = word_pos ; ipos != *it+1 ; ++ipos)//检测数字 
{
char c = str[ipos];
if( c >= '0'  && c <= '9')
{
strNum += c ;
}
else if( (c == ' ' ||  ipos == *it ) && !strNum.empty())
{
cout << strNum << "\n"<< endl;   // 输出了字符，可以转换为数字 
strNum.clear();
continue ;
}
else
{
continue ;
}
}
} 
 } 
 
 int main()
 {
 cout << "string parse test\n"<< endl;
 
 string tag = "#" ;
 GetResult(const_str,tag);
 
 return  0 ;
 }

c++  String 性能优化
[解决办法]

引用:

我没有检查你的程式正确与否，只是做了一些小小的改动


#include <iostream>
#include <string>
#include <vector>

//const std::string const_str = "#C255 244 233 22##H5##a1#"; //try your best to avoid global parameters, most of the times global params are evils


//检测##的位置以及数目
std::vector<int> GetTagPos(const std::string& str ,const std::string& tag)
{
    //check the tag and str are empty or not first, this is one of the technique which called "lazy evaluation"
    if(tag.empty()  
[解决办法]
 str.empty()){
        return std::vector<int>();
    }

    std::vector<int> iPosList; 
 
    size_t tag_pos = 0 ;
    size_t search_pos = 0;
    size_t search_length = str.length();

    //iPosList.clear(); //don't need to clear it since it do not have any data at all
    iPosList.reserve(32);
    while(search_pos <= search_length)   //检测tag位置
    {
        if((tag_pos = str.find(tag,search_pos)) != std::string::npos )
        {
            iPosList.push_back(tag_pos);
            search_pos = tag_pos + 1 ;
        }
        else
        {
            break;
        }
    }

    if(iPosList.size() % 2 != 0 )   //如果不配对，返回空
    {
        return std::vector<int>();
    }

    return iPosList; //remove goto, this is c++, not c, with the helps of RAII,we rarely need goto
}


void GetResult(const std::string& str , const std::string& tag )
{
     //check the str is empty or not first, this is one of the technique which called "lazy evaluation"
    if(true == str.empty()){
        return;
    }

    std::vector<int> itagpos = GetTagPos(str, tag); //this way the codes are shorter and compiler has more chances to optimize your codes
    if(itagpos.empty()){
        return ;
    }

    //size_t ipos = 0 ; //don't need to declare your local paorameter at here, you could declare it later on
    size_t word_pos = 0 ; 
 
    std::string strNum;
    auto end = std::end(itagpos); //you could use auto to save you some typing trouble
    for(auto it = std::begin(itagpos); it != end ; ++it)    //首先检测字母
    {
        for(auto ipos = (*it) + 1 ; ipos != *(++it) + 1 ; ++ipos)
        {
            char const c = str[ipos];
            if((c >= 'a' && c <= 'z') 
[解决办法]
 (c >= 'A' && c <= 'Z'))
            {
                std::cout << c << std::endl;
                word_pos = ipos ;
                break;
            }
            /*else
            {
                continue ;
            }*/
        }

        strNum.clear();
        //strNum.erase(); //1 : if you don't want to release the memory but clear the data, theyn you don't need to call erase at here
                          //2 : this is not the correct ways to call erase, please google the proper api or check your c++ primer 5
        for(auto ipos = word_pos ; ipos != *it+1 ; ++ipos)//检测数字
        {
            char const c = str[ipos]; 
 
            if( c >= '0'  && c <= '9')
            {
                strNum += c ;
            }
            else if( (c == ' ' 
[解决办法]
  ipos == *it ) && !strNum.empty())
            {
                std::cout << strNum << "\n"<< std::endl;   // 输出了字符，可以转换为数字
                strNum.clear();
                continue ;
            }
            /*else
            {
                continue ;
            }*/
        }
    }
}

int main()
{    
    std::cout << "string parse test\n"<< std::endl;

    GetResult("#C255 244 233 22##H5##a1#","#");


    return 0;
}

以下是进阶题材，可进修也可忽略
如果你要设计出很好的API，可以参考boost spirit，这个library利用expression template
的威力，在c++内设计出"domain specific language",提供了非常high level，容易维护,而且
效率极高的api（大部分情况下比c的API还要更加快）

其他的还有matrix template library 4,利用expression template消除temporary parameters
运算效率极高，比许多c程序员写的代码都来得更高

再提醒一次，这些是进阶题材，除非你有志设计出非常高质量的libraries(boost, Qt, MPL4等)，否则
你大概一辈子都不用知道什么是expression template

哥是来检测英文的
don't need to clear it since it do not have any data at all
need前面加了don't
[解决办法]

立于楼主代码之外说的：

对于字符串的操作，求优化，

第一：至少用字符串char数组要比string 高效很多的！
第二：尽量少用多重循环，加循环里加判断，会破坏连续性，
第三：能用容器的就用容器

[解决办法]
如果楼主希望让程式更容易维护
可以考虑使用std::regex,这是c++11正式支援的regular expression
至于我之前提到的boost::spirit, 虽然大部分情况下性能比c提供的标准函数还要快
但是他需要你对c++有比较深入的了解和掌握，事后有兴趣再接触也可以

给楼主另外一个版本的实现，已经把template去掉，原版的设计是希望可以支援不同的"string"和"result"
不过楼主大概刚接触c++不久，所以我先介绍你比较简单的


void split(std::string const& contents, std::vector<std::string> &result, std::string const& delimiters = "\n")
{

    std::string::size_type pos, last_pos = 0;
    while(true)
    {
        pos = contents.find_first_of(delimiters, last_pos);
        if(pos == std::string::npos)
        {
            pos = contents.length();

            if(pos != last_pos)
                result.emplace_back(contents.data() + last_pos, pos - last_pos);

            break;
        }
        else
        {
            if(pos != last_pos)
                result.emplace_back(contents.data() + last_pos, pos - last_pos );
        }

        last_pos = pos + 1;
    }
}

void test_stl()
{
    std::cout<<"get number"<<std::endl;
    std::vector<std::string> strs;
    split("#C255 244 233 22##H5##a1#", strs, "#CHa ");
    for(auto const &data : strs){
        std::cout<<data<<std::endl;
    }
}

output
255
244
233
22
5
1

希望这是楼主想要的输出

我把result设计成parameters而非return by function

是为了让memory(std::vector<std::string>)可以重复利用
如果你需要更简洁的api,大可以改成return by function

热点排行