比较两组字符串差异

2012-12-16

比较两组字符串区别两组字符串，1万组如何快速比较这两组的区别。并且把区别全部打印出来？例如：A组：aaa bbb

比较两组字符串区别
两组字符串，1万组
如何快速比较这两组的区别。
并且把区别全部打印出来？

例如：
A组：aaa bbb ddd eee ....
B组：aaa ccc ddd fff ....

A组和B组都有的：aaa ddd ...
A组有B组没有的: bbb eee ...
B组有A组没有的：ccc fff ...

[最优解释]


#include <iostream>
#include <iterator>
#include <vector>
#include <string>
#include <algorithm>

using namespace std;


int main(int argc, char *argv[])
{
    vector<string> v1, v2;
    vector<string> vone, vtwo, vall;
    vector<string>::const_iterator it;

    v1.push_back(string("aaa"));
    v1.push_back(string("bbb"));
    v1.push_back(string("ddd"));
    v1.push_back(string("eee"));

    v2.push_back(string("aaa"));
    v2.push_back(string("ccc"));
    v2.push_back(string("ddd"));
    v2.push_back(string("fff"));

    sort(v1.begin(), v1.end());
    sort(v2.begin(), v2.end());

    set_intersection(v1.begin(), v1.end(), v2.begin(), v2.end(),
            back_inserter(vall));

    set_difference(v1.begin(), v1.end(), v2.begin(), v2.end(),
            back_inserter(vone));

    set_difference(v2.begin(), v2.end(), v1.begin(), v1.end(),
            back_inserter(vtwo));

    cout << "A组和B组都有的: ";
    for (it = vall.begin(); it != vall.end(); ++it)
        cout << *it << ' ';
    cout << endl;

    cout << "A组有B组没有的: ";
    for (it = vone.begin(); it != vone.end(); ++it)
        cout << *it << ' ';
    cout << endl;

    cout << "B组有A组没有的: ";
    for (it = vtwo.begin(); it != vtwo.end(); ++it)
        cout << *it << ' ';
    cout << endl;

    return 0;
}

[其他解释]
不知道有什么其他要求木有？

在实际工作中，碰到这样的问题，首先将A和B分别存入两个set对象中（去掉重复的），然后调用algorithm中的sort函数快排，在然后遍历比较即可。
[其他解释]
先对A，B排序，然後调用set家族的算法
因为我不知道你的资料结构，没办法给出更详细的建议


std::vector<std::string> A = {"aaa", "bbb", "ccc"};
std::vector<std::string> B = {"bbb", "ccc", "ddd"};
//std::sort A和B

//A组和B组都有的
std::vector<std::string> AB_intersect;
set_symmetric_difference(A, B, AB_intersect); 
 

//A组有B组没有的
std::vector<std::string> AB_differ;
set_difference(A, B, AB_differ); //自己简化一下

//B组有A组没有的
std::vector<std::string> BA_differ;
set_difference(B, A, BA_differ); //同上

以後要实现算法前，可以先翻翻stl提供了什麽好有的东西
[其他解释]

修改错误，只要调用stl的set家族，答案很快就出来了

A组和B组都有的


std::vector<std::string> AB_intersect;
set_intersection(A, B, AB_intersect);

[其他解释]
用一个map，先遍历a，只要是a中的出现过的，value都设为1，然后遍历b，a和b中的都出现过的设为2，a中没出现过的设为3
[其他解释]
这个好像没通过编译。

引用:

先对A，B排序，然後调用set家族的算法
因为我不知道你的资料结构，没办法给出更详细的建议

C/C++ code??123456789101112131415std::vector<std::string> A = {"aaa", "bbb", "ccc"};std::vector<std::string> B = {"bbb", "ccc", "ddd"};//……

[其他解释]
用哈希速度快
[其他解释]
拷贝到2个文件，直接用BeyondCompare工具进行比较就行了。
UltraEdit也可以。
[其他解释]
先将A放到set或hash_set中，然后根据B中的每个数据去A中查找下，已经存在，说明A，B都有（同时删除这个），不存在，说明只有B有。最后set或hash_set中仍存在的数据就是只A有的。

set或hash_set可以参考《STL系列之六 set与hash_set》
http://blog.csdn.net/morewindows/article/details/7029587
[其他解释]
>这个好像没通过编译。
1 : 你用的编译器大概还不支援initializer_list，你可以用你的编译器
支持的方法初始化vector
2 : set家族的算法我写的时候经过了一些简化，详细的API请自己去翻书

[其他解释]
[code]
#include <iostream>
#include <iterator>
#include <vector>
#include <string>
#include <algorithm>

using namespace std;

int main(int argc, char *argv[])
{
    vector<string> v1, v2;
    vector<string> vone, vtwo, vall;
    vector<string>::const_iterator it;

    v1.push_back(string("aaa"));
    v1.push_back(string("bbb"));
    v1.push_back(string("ddd"));
    v1.push_back(string("eee"));

    v2.push_back(string("aaa"));
    v2.push_back(string("ccc"));
    v2.push_back(string("ddd"));
    v2.push_back(string("fff"));

    sort(v1.begin(), v1.end());
    sort(v2.begin(), v2.end());

    set_intersection(v1.begin(), v1.end(), v2.begin(), v2.end(),
            back_inserter(vall));

    set_difference(v1.begin(), v1.end(), v2.begin(), v2.end(),
            back_inserter(vone));

    set_difference(v2.begin(), v2.end(), v1.begin(), v1.end(),
            back_inserter(vtwo));

    for (it = vall.begin(); it != vall.end(); ++it)

        cout << *it << ' ';
    cout << endl;

    for (it = vone.begin(); it != vone.end(); ++it)
        cout << *it << ' ';
    cout << endl;

    for (it = vtwo.begin(); it != vtwo.end(); ++it)
        cout << *it << ' ';
    cout << endl;

    return 0;
}
[/code]
[其他解释]
来晚了，只能顶11楼。
[其他解释]
鉴于很多人说C比C++快，楼主不妨试试下面这个：

//输出PROG中有但LIST中没有的文本行，即集合PROG-LIST
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <search.h>
#define MAXCHARS 512
int MAXLINES=10000,MAXLINES2;
char *buf,*buf2;
char PROG[256]="PROG";//程序Program需要的文件列表
char LIST[256]="LIST";//dir /b /s生成的实际文件列表List
FILE *fp,*fl;
int i,c,n,L,hh;
int ignore_case=0;
char ln[MAXCHARS];
int icompare(const void *arg1,const void *arg2) {
   return stricmp((char *)arg1,(char *)arg2);
}
int compare(const void *arg1,const void *arg2) {
   return strcmp((char *)arg1,(char *)arg2);
}
int main(int argc,char **argv) {
    if (argc>1) strcpy(PROG,argv[1]);//命令行参数1覆盖PROG
    if (argc>2) strcpy(LIST,argv[2]);//命令行参数2覆盖LIST
    if (argc>3) ignore_case=1;//若存在命令行参数3，忽略大小写
    if ((fl=fopen(LIST,"rt"))==NULL) {
        fprintf(stderr,"Can not open %s\n",LIST);
        fprintf(stderr,"Usage: %s [PROG] [LIST] [-i]\n",argv[0]);
        return 1;
    }
    if ((fp=fopen(PROG,"rt"))==NULL) {
        fclose(fl);
        fprintf(stderr,"Can not open %s\n",PROG);
        fprintf(stderr,"Usage: %s [PROG] [LIST] [-i]\n",argv[0]);
        return 2;
    }
    buf=(char *)malloc(MAXLINES*MAXCHARS);
    if (NULL==buf) {
        fclose(fl);
        fclose(fp);
        fprintf(stderr,"Can not malloc(%d LINES*%d CHARS)!\n",MAXLINES,MAXCHARS);
        return 4;
    }
    n=0;
    hh=0;
    i=0;
    while (1) {
        if (fgets(ln,MAXCHARS,fl)==NULL) break;//
        hh++;
        L=strlen(ln)-1; 
 
        if ('\n'!=ln[L]) {//超长行忽略后面内容
            fprintf(stderr,"%s Line %d too long(>%d),spilth ignored.\n",LIST,hh,MAXCHARS);
            while (1) {
                c=fgetc(fl);
                if ('\n'==c

热点排行