首页 诗词 字典 板报 句子 名言 友答 励志 学校 网站地图
当前位置: 首页 > 教程频道 > 企业软件 > 行业软件 >

POI3.8读取WORD(简明实例)

2012-10-06 
POI3.8读取WORD(简洁实例)目前最新版POI为3.8:poi-3.8-20120326.jarpoi-examples-3.8-20120326.jarpoi-exc

POI3.8读取WORD(简洁实例)
目前最新版POI为3.8:
poi-3.8-20120326.jar
poi-examples-3.8-20120326.jar
poi-excelant-3.8-20120326.jar
poi-ooxml-3.8-20120326.jar
poi-ooxml-schemas-3.8-20120326.jar
poi-scratchpad-3.8-20120326.jar

import java.io.FileInputStream;import java.io.FileOutputStream;import java.io.IOException;import java.io.InputStream;import javax.servlet.ServletException;import javax.servlet.http.HttpServlet;import javax.servlet.http.HttpServletRequest;import javax.servlet.http.HttpServletResponse;import org.apache.poi.hwpf.HWPFDocument;import org.apache.poi.hwpf.extractor.WordExtractor;import org.apache.poi.hwpf.model.PicturesTable;import org.apache.poi.hwpf.usermodel.CharacterRun;import org.apache.poi.hwpf.usermodel.Picture;import org.apache.poi.hwpf.usermodel.Range;public class WordDemo extends HttpServlet {private static final long serialVersionUID = 1L;public void doGet(HttpServletRequest request, HttpServletResponse response)throws ServletException, IOException {this.doPost(request, response);}public void doPost(HttpServletRequest request, HttpServletResponse response)throws ServletException, IOException {//从硬盘读取一个doc文档InputStream in = new FileInputStream("F:\\test.doc");//类从word文档中提取文本,非特殊情况下,都将使用getParagraphText()与getText()WordExtractor word = new WordExtractor(in);//获取段文本String [] strArray = word.getParagraphText();String str = word.getText();  for(int i=0 ; i<strArray.length ; i++){System.out.println(strArray[i]+"\ti循环:"+i);}System.out.println(str +"\t --");//这个构造函数从InputStream中加载Word文档。HWPFDocument doc = new HWPFDocument((InputStream)new FileInputStream("F:\\test.doc"));//这个类为HWPF对象模型,对文档范围段操作Range range = doc.getRange(); ////看看此文档有多少个段落int num = range.numParagraphs();System.out.println(num+"段");   //得到word数据流byte [] dataStream = doc.getDataStream();System.out.println("数据流长度:"+dataStream.length);//用于在一段范围内获得段落数int numChar = range.numCharacterRuns();System.out.println("CharacterRuns 数:"+numChar); //负责图像提取  和    确定一些文件某块是否包含嵌入的图像。PicturesTable table = new PicturesTable(doc, dataStream, null);for(int j=0 ; j<numChar ; j++){//这个类表示一个文本运行,有着共同的属性。CharacterRun run = range.getCharacterRun(j);//是否存在图片boolean bool = table.hasPicture(run);System.out.println("是否存在图片:"+bool);if(bool){//返回图片对象绑定到指定的CharacterRunPicture pic = table.extractPicture(run, true);//图片的内容字节写入到指定的输出流。pic.writeImageContent(new FileOutputStream("F:\"+j+".bmp"));System.out.println("成功提取图片"+j+":");}}request.getRequestDispatcher("ok.jsp").forward(request, response);}}


可正常运行及提取图片

热点排行