从二进制流doc文件中提取纯文本
情况是这样的,有一个doc文件按照二进制流的形式存储在数据库中,现在我要从这个二进制流中提取doc文件中的纯文本(没有格式的文字,一个String)
需要使用到什么样的类库与方法,求帮助,谢谢。
[解决办法]
docType.InvokeMember("SaveAs", System.Reflection.BindingFlags.InvokeMethod, null, doc, new object[]{saveFileName, Word.WdSaveFormat.wdFormatText});
参见
http://dotnet.aspx.cc/article/13c874e4-7fc7-4fd1-8cf6-de9ef4469a9c/read.aspx
[解决办法]
解释InvokeMember的
http://www.cnblogs.com/zhixx/archive/2011/04/22/2025350.html
[解决办法]
还原成WORD 读取文本
Word.ApplicationClass wordApp=new ApplicationClass();
object file=path;(//译注:这个path是函数的参数,表示Word的路径)
object nullobj=System.Reflection.Missing.Value;
Word.Document doc = wordApp.Documents.Open(
ref file, ref nullobj, ref nullobj,
ref nullobj, ref nullobj, ref nullobj,
ref nullobj, ref nullobj, ref nullobj,
ref nullobj, ref nullobj, ref nullobj);
doc.ActiveWindow.Selection.WholeStory();
doc.ActiveWindow.Selection.Copy();
IDataObject data=Clipboard.GetDataObject();
txtFileContent.Text=data.GetData(DataFormats.Text).ToString();
doc.Close();
//从数据中取的流转换为doc文档。在根据路径取的内容。
FileStream fsWrite = new FileStream(@"F:\2.docx", FileMode.Create, FileAccess.Write);
fsWrite.Write(b, 0, b.Length);
fsWrite.Close();
fsWrite.Dispose();
string path = @"F:\2.docx";
Microsoft.Office.Interop.Word.ApplicationClass wordApp = new Microsoft.Office.Interop.Word.ApplicationClass();
object file = path;
object nullobj = System.Reflection.Missing.Value;
Microsoft.Office.Interop.Word.Document doc = wordApp.Documents.Open(ref file, ref nullobj, ref nullobj,
ref nullobj, ref nullobj, ref nullobj,
ref nullobj, ref nullobj, ref nullobj,
ref nullobj, ref nullobj, ref nullobj,
ref nullobj, ref nullobj, ref nullobj, ref nullobj);
string outText = doc.Content.Text;
doc.Close(ref nullobj, ref nullobj, ref nullobj);
wordApp.Quit(ref nullobj, ref nullobj, ref nullobj);
Console.WriteLine(outText);