首页 诗词 字典 板报 句子 名言 友答 励志 学校 网站地图
当前位置: 首页 > 教程频道 > 开发语言 > 编程 >

Java 解析 Word Word 中的报表

2012-10-30 
Java解析 WordWord中的表格import java.io.File?? import java.io.FileInputStream?? import java.io.Fi

Java 解析 Word Word 中的表格

import java.io.File;??
import java.io.FileInputStream;??
import java.io.FileNotFoundException;??
?
import org.apache.poi.hwpf.HWPFDocument;??
import org.apache.poi.hwpf.usermodel.Paragraph;??
import org.apache.poi.hwpf.usermodel.Range;??
import org.apache.poi.hwpf.usermodel.Table;??
import org.apache.poi.hwpf.usermodel.TableCell;??
import org.apache.poi.hwpf.usermodel.TableIterator;??
import org.apache.poi.hwpf.usermodel.TableRow;??
?
import java.io.File;?????
import java.io.FileInputStream;?????
import java.io.InputStream;?????
????
import org.apache.poi.POIXMLDocument;?????
import org.apache.poi.POIXMLTextExtractor;?????
import org.apache.poi.hwpf.extractor.WordExtractor;?????
import org.apache.poi.openxml4j.opc.OPCPackage;?????
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;????
?
?
import org.apache.poi.poifs.filesystem.POIFSFileSystem;??
?
public class ExportDocImpl??
{??
??? public void testWord(){??
??????? try{??
??????????? FileInputStream in = new FileInputStream("D:\\2003.doc");//载入文档??
?????????? POIFSFileSystem pfs = new POIFSFileSystem(in);?????
??????????? HWPFDocument hwpf = new HWPFDocument(pfs);?????
??????????? Range range = hwpf.getRange();//得到文档的读取范围??
??????????? TableIterator it = new TableIterator(range);??
?????????? //迭代文档中的表格??
??????????? while (it.hasNext()) {?????
??????????????? Table tb = (Table) it.next();?????
??????????????? //迭代行,默认从0开始??
??????????????? for (int i = 0; i < tb.numRows(); i++) {?????
??????????????????? TableRow tr = tb.getRow(i);?????
??????????????????? //迭代列,默认从0开始??
??????????????????? for (int j = 0; j < tr.numCells(); j++) {?????
??????????????????????? TableCell td = tr.getCell(j);//取得单元格??
??????????????????????? //取得单元格的内容??
??????????????????????? for(int k=0;k<td.numParagraphs();k++){?????
??????????????????????????? Paragraph para =td.getParagraph(k);?????
??????????????????????????? String s = para.text();?????
???????????????????????????
??????????????????????? } //end for??????
??????????????????? }?? //end for??
??????????????? }?? //end for??
??????????? } //end while??
??????? }catch(Exception e){??
??????????? e.printStackTrace();??
??????? }??
??? }//end method??
??????
??? public static void main(String[] args){
??? ?ExportDocImpl ExportDocImpl = new ExportDocImpl();
??? ?ExportDocImpl.testWord1();
??? }??
???
???
???
???
?????????? public void testWord1(){??
?????????? try {?????
??????????? //word 2003: 图片不会被读取?????
??????????? InputStream is = new FileInputStream(new File("D:\\2003.doc"));?????
????????????????? WordExtractor ex = new WordExtractor(is);???
????????????????? String str1 = ex.getText();
????????????????? str1 = str1.replaceAll("", ";");
????????????????? String text2003 = str1;?????
????????????????? System.out.println(text2003);?????
?????????????????
??????????? //word 2007 图片不会被读取, 表格中的数据会被放在字符串的最后?????
??????????? OPCPackage opcPackage = POIXMLDocument.openPackage("D:\\2007.docx");?????
????????????????? POIXMLTextExtractor extractor = new XWPFWordExtractor(opcPackage);??
????????????????? String str = extractor.getText();
????????????????? str = str.replaceAll("?", ";");
????????????????? String text2007 = str;?????
????????????????? System.out.println(text2007);?????
?????????????????
??????? } catch (Exception e) {?????
????????????????? e.printStackTrace();?????
??????? }???
??? }??
}?

?

?

?

?
import java.io.File;??
import java.io.FileInputStream;??
import java.io.InputStream;??
?
import org.apache.poi.POIXMLDocument;??
import org.apache.poi.POIXMLTextExtractor;??
import org.apache.poi.hwpf.extractor.WordExtractor;??
import org.apache.poi.openxml4j.opc.OPCPackage;??
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;??
?
/**?
* POI 读取 word 2003 和 word 2007 中文字内容的测试类<br />?
* @createDate 2009-07-25?
* @author Carl He?
*/?
public class ParseTable {??
??? public static void main(String[] args) {??
??????? try {??
??????????? //word 2003: 图片不会被读取??
????????????? InputStream is = new FileInputStream(new File("d:\\2003.doc"));??
??????????? WordExtractor ex = new WordExtractor(is);??
??????????? String text2003 = ex.getText();??
??????????? System.out.println(text2003);??
?
??????????? //word 2007 图片不会被读取, 表格中的数据会被放在字符串的最后??
??????????? OPCPackage opcPackage = POIXMLDocument.openPackage("d:\\2007.docx");??
??????????? POIXMLTextExtractor extractor = new XWPFWordExtractor(opcPackage);??
??????????? String text2007 = extractor.getText();??
??????????? System.out.println(text2007);??
?????
??????? } catch (Exception e) {??
??????????? e.printStackTrace();??
??????? }??
??? }??
}?

?

?

?

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;

import org.apache.poi.hwpf.extractor.WordExtractor;

public class ParseWord {

?public static void main(String[] args) {
??File file = new File("d:\\hello.doc");
??try {
???FileInputStream fis = new FileInputStream(file);
???WordExtractor wordExtractor = new WordExtractor(fis);
???System.out.println(wordExtractor.getText());
??} catch (FileNotFoundException e) {
???e.printStackTrace();
??} catch (IOException e) {
???e.printStackTrace();
??}
?}
}

?

热点排行