急!请教个文件比较的问题?
有两个文本文件A和B,这两个文件里的内容类似(如AB两文件的数据大部分相同,只有少部分数据不同),要对这两个文件进
行比较,将这两个文件中的不同的数据找出来,然后存到一个新的文本文件里。
注:以A为基准进行比较;即,A里有的而B里没有的就将其不同的数据存到新文件;B里有的而A里没有的就不对其进行存储。
请教大侠们,要如何入手啊?
小弟现在的想法是两文件都存到字符串,然后在进步操作,不知道这样是否否可行啊?此外,还有没有更好的方法啊?
希望大虾们能提供下代码,最好是完整代码!
谢谢啦!!!!!很急的啊!!!!!
[解决办法]
package test;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
public class FileComper {
String sPath;
String sFilePath;
String sContent;
public static void main(String[] args) {
FileComper test = new FileComper();
String crlf = System.getProperty("line.separator");
String str1="你好这是测试字段xx"+crlf+"abcdefgaigeiieriepty";
String str2="你x这是测y字段yy"+crlf+"dbcdefgaigeiieriy";
test.writeFilebyExistsOver(str1, "d:\\1.txt");
test.writeFilebyExistsOver(str2, "d:\\2.txt");
test.readFile("d:\\1.txt", "d:\\2.txt");
}
public String readFile(String Filename,String Filename2) {
String FileName = Filename;
File myFile = new File(FileName);
File myFile2 = new File(Filename2);
if (!myFile.exists()) {
System.err.println("Can't Find " + FileName);
}
if (!myFile2.exists()) {
System.err.println("Can't Find " + Filename2);
}
StringBuffer temp = new StringBuffer();
String str3="";
try {
BufferedReader in = new BufferedReader(new FileReader(myFile));
BufferedReader in2 = new BufferedReader(new FileReader(myFile2));
String str="",str2="";
while ((str = in.readLine()) != null) {
str2 = in2.readLine();
System.out.println("a:"+str);
System.out.println("b:"+str2);
// System.out.println(f(str,str2));
str3 = str3+ f(str,str2);
}
in.close();
in2.close();
writeFilebyExistsOver(str3, "d:\\3.txt");
} catch (IOException e) {
e.getStackTrace();
}
// System.out.print(temp.toString());
return temp.toString();
}
String f(String aStrA,String aStrB){
String str=""; //用来存储不同字符
for(int i=0;i<=aStrA.length();i++){
if(i==aStrA.length()){
break;
}
if(i>aStrB.length()-1){
str = str + aStrA.substring(i, aStrA.length());
break;
}else{
String a=aStrA.substring(i,i+1); //每次获取一个字符进行逐个比较
String b=aStrB.substring(i,i+1);
if(a.equals(b)||a==b){
continue;
}else{
str=str+a;
}
}
}
return str;
}
public void writeFilebyExistsOver(String datas, String Filename) {
FileOutputStream outSTr = null;
BufferedOutputStream Buff = null;
try {
File file = new File(Filename.toString());
if (file.exists()) {
delFile(Filename);
}
outSTr = new FileOutputStream(file);
Buff = new BufferedOutputStream(outSTr);
long begin0 = System.currentTimeMillis();
Buff.write(datas.getBytes());
Buff.flush();
Buff.close();
long end0 = System.currentTimeMillis();
System.out.println("BufferedOutputStream执行耗时:" + (end0 - begin0)
+ " 豪秒");
Buff.close();
outSTr.close();
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
} catch (Exception e) {
e.printStackTrace();
}
}
}
public boolean delFile(String s) {
sFilePath = s;
sFilePath = sFilePath.toString();
File dFile = new File(sFilePath);
if (dFile.exists()) {
try {
dFile.delete();
return true;
} catch (Exception e) {
e.printStackTrace();
return false;
}
} else {
System.out.print("文件:" + s + "不存在!");
return false;
}
}
}
[解决办法]
import java.util.*;import java.io.*;import java.util.regex.*;public class UnknowFilesCompare{ public static void main(String[] args) throws Exception{ //结果存在a_b_compared.txt中。 unknowFilesComparator("a.txt","b.txt"); } static void unknowFilesComparator(String fileA,String fileB) throws Exception{ ArrayList<String[]> ra=usefulData(fileA); //printList(ra); ArrayList<String[]> rb=usefulData(fileB); //printList(rb); //把两个文件的主文件名提取出来,用来构成结果文件的文件名。 String f1=fileA.split("\\.")[0]; String f2=fileA.split("\\.")[0]; BufferedWriter bw=new BufferedWriter(new OutputStreamWriter(new FileOutputStream(f1+"_"+f2+"_compared.txt"),"UTF-8" )); StringBuilder sb=new StringBuilder(); for(int i=0;i<ra.size();i++){ String[] temp1=ra.get(i); ////found用来标志是否找到相同的号 boolean found=false; sb.append("MSISDN: "+temp1[0]+"\r\n"); for(int j=0;j<rb.size();j++){ String[] temp2=rb.get(j); if(temp1[0].equals(temp2[0])){ Arrays.sort(temp2,1,temp2.length); found=true; //allEqauls用来标志是不是相同的号的数据也全相同。 boolean allEqauls=true; for(int k=1;k<temp1.length;k++){ int index=Arrays.binarySearch(temp2,1,temp2.length,temp1[k]); if(!(index>=1&&index<temp2.length&&temp2[index].equals(temp1[k]))){ allEqauls=false; sb.append(temp1[k]+" "); } }//比较数据 if(allEqauls){ sb.append("___Same___"+fileB); } sb.append("\r\n"); }//有相同的号吗? }//找相同的msisdn号. if(!found){ for(int j=1;j<temp1.length;j++){ sb.append(temp1[j]+" "); } sb.append("\r\n---Not found in "+fileB); } bw.write(sb.toString(),0,sb.length()); bw.newLine(); sb.delete(0,sb.length()); }//for:ra bw.flush(); bw.close(); } //把有用的数据提取出来,以ArrayList的形式返回.每一个MSISDN号的数据放在String[]数组中,所有的号放在ArrayList中 public static ArrayList<String[]> usefulData(String fileName){ Scanner scan=null; String temp1=null; ArrayList<String> temp=new ArrayList<String>(); ArrayList<String[]> result=new ArrayList<String[]>(); try{ scan=new Scanner(new File(fileName)); }catch(FileNotFoundException ffe){ ffe.printStackTrace(); } while(scan.hasNext()){ if(scan.findInLine(".+\\=(\\d+)\\,.+")!=null){//找到MSISDN号的那一行。 temp.add(scan.match().group(1)); scan.nextLine(); while(scan.findInLine("SUD")==null) scan.nextLine(); //找到SUD那一行。 scan.nextLine(); while(scan.findInLine("AMSISDN.+")==null){ //只要没有到达AMSISDN那一行。 temp1=scan.nextLine().trim(); if(temp1.length()!=0){ temp.addAll(Arrays.asList(temp1.split("\\s+"))); } } result.add(temp.toArray(new String[temp.size()])); temp.clear(); } scan.nextLine(); } scan.close(); return result; } static void printList(ArrayList<String[]> li){ for(int i=0;i<li.size();i++){ System.out.println(Arrays.toString(li.get(i))); } }}
[解决办法]
import java.util.*;
import java.io.*;
import java.util.regex.*;
public class UnknowFilesCompare{
public static void main(String[] args) throws Exception{
unknowFilesComparator("f:/test/a.txt","f:/test/b.txt");
}
static void unknowFilesComparator(String fileA,String fileB) throws Exception{
File fa=new File(fileA);
File fb=new File(fileB);
String faName=fa.getName(); //get file name
String fbName=fb.getName();
String faa=fa.getAbsolutePath(); //get absolute path with file name
String faPath=faa.substring(0,faa.lastIndexOf("\\")+1); //get absolute path without file name(Windows system)
//create resulte file name, resulte file will save in the same path as fileA
//For example: fileA f:/test/a.txt fileB f:/test/b.txt, resulte file : f:/test/a_b_compared.txt
File fc=new File(faPath+faName.split("\\.")[0]+"_"+fbName.split("\\.")[0]+"_compared.txt");
//count file
////For example: fileA f:/test/a.txt fileB f:/test/b.txt, count file : f:/test/a_b_count.txt
File fd=new File(faPath+faName.split("\\.")[0]+"_"+fbName.split("\\.")[0]+"_count.txt");
ArrayList <String[]> ra=usefulData(fa);
ArrayList <String[]> rb=usefulData(fb);
BufferedWriter bw=new BufferedWriter(new OutputStreamWriter(new FileOutputStream(fc),"UTF-8" ));
BufferedWriter bw2=new BufferedWriter(new OutputStreamWriter(new FileOutputStream(fd),"UTF-8" ));
StringBuilder sb=new StringBuilder();
HashMap <String,Integer> count=new HashMap <String,Integer>();
String myKey=null;
Integer myVal=null;
for(int i=0;i <rb.size();i++){ //preliminary sorting, avert repetitive sort in for(int j=0;j <rb.size();j++)
String[] temp=rb.get(i); //get MSISDN(i)
Arrays.sort(temp,2,temp.length-1); // sort NAM data ,
}
for(int i=0;i <ra.size();i++){
String[] temp1=ra.get(i);
////found : flag found same MSISDN
boolean found=false;
sb.append("MSISDN: "+temp1[0]+"\r\n"); //MSISDN output
for(int j=0;j <rb.size();j++){
String[] temp2=rb.get(j);
if(temp1[0].equals(temp2[0])){
sb.append("NAM:\r\n"+faName+":"+temp1[1]+" "+fbName+":"+temp2[1]+"\r\n"); //NAM data output
found=true;
//allEqauls flag All Equals
boolean allEqauls=true;
sb.append("SUD:\r\n");
for(int k=2;k <temp1.length-1;k++){
int index=Arrays.binarySearch(temp2,1,temp2.length-1,temp1[k]);
if(!(index>=2&&index <temp2.length-1&&temp2[index].equals(temp1[k]))){ //find diffrent SUD data
allEqauls=false;
sb.append(temp1[k]+" "); //diffrent SUD data output
myKey=temp1[k].split("-")[0];
myVal=count.get(myKey); //use HashMap to count
if(myVal==null){
count.put(myKey,1);
}else{
count.put(myKey,myVal+1);
}
}
}//compare NAM data
if(allEqauls){
sb.append("___Same___"+fileB);
}
sb.append("\r\n");
sb.append("VLR ADDRESS:\r\n"+faName+":"+temp1[temp1.length-1]+" "+fbName+":"+temp2[temp2.length-1]+"\r\n"); //VLR ADDRESS output
}//find same MSISDN
}//found msisdn
if(!found){
for(int j=1;j <temp1.length;j++){
sb.append(temp1[j]+" ");
}
sb.append("\r\n---Not found in "+fileB);
}
bw.write(sb.toString(),0,sb.length());
bw.newLine();
sb.delete(0,sb.length());
}//for:ra
bw.flush();
bw.close();
for(String k:count.keySet()){ //count output
String line=k+": "+count.get(k)+"\r\n";
bw2.write(line,0,line.length());
}
bw2.flush();
bw2.close();
}
//retrun :ArrayList.element of ArrayList is a Stiring[].
//for each String[]:[0]:MSISDN [1]:NAM [2~length-2]:SUD Data [length-1]:VLR ADDRESS
public static ArrayList <String[]> usefulData(File myFile){
Scanner scan=null;
String temp1=null;
ArrayList <String> temp=new ArrayList <String>();
ArrayList <String[]> result=new ArrayList <String[]>();
try{
scan=new Scanner(myFile);
}catch(FileNotFoundException ffe){
ffe.printStackTrace();
}
while(scan.hasNext()){
if(scan.findInLine(".+\\=(\\d+)\\,.+")!=null){//find line "MSISDN"
temp.add(scan.match().group(1));
scan.nextLine();
while(scan.findInLine("NAM.*")==null) scan.nextLine(); //find line "NAM"
scan.nextLine();
temp1=scan.nextLine().trim(); //get NAM data
temp.add(temp1);
while(scan.findInLine("SUD.*")==null) scan.nextLine(); //find line "SUD"。
scan.nextLine();
while(scan.findInLine("AMSISDN.*")==null){
temp1=scan.nextLine().trim(); //get SUD data
if(temp1.length()!=0){
temp.addAll(Arrays.asList(temp1.split("\\s+")));
}
}
scan.nextLine();
while(scan.findInLine("VLR\\s+ADDRESS.+")==null) scan.nextLine(); //find line "VLR ADDRESS "
scan.nextLine();
temp1=scan.nextLine().trim(); //get VLR ADDRESS data
temp.add(temp1.split("\\s+")[0]);
result.add(temp.toArray(new String[temp.size()]));
temp.clear();
}
scan.nextLine();
}
scan.close();
return result;
}
static void printList(ArrayList <String[]> li){
for(int i=0;i <li.size();i++){
String[] sts=li.get(i);
System.out.println(sts[0]);
for(int j=1;j <sts.length;j++){
System.out.print(sts[j]+" ");
}
System.out.println("");
}
}
}
static class Entry{//信息块的映射类 static final Pattern NumberPattern = Pattern.compile("<hgsdp:msisdn=(\\d{13}).+"); static final String SUD = "SUD"; static final String AMSISDN = "AMSISDN"; static final String END = "END"; String msisdn;//手机号码 int sud;//SUD的位置 int amsisdn;//AMSISDN的位置 Map<String,String> info = new LinkedHashMap<String,String>();//存放要比较的信息。 List<String> contents = new ArrayList<String>();//用于程序功能扩展,本程序可不使用。 }