lucene 3.0.0 简单入门
官方例子:http://lucene.apache.org/java/3_0_0/api/demo/index.html
官方网站:http://lucene.apache.org
概念理解:http://www.ibm.com/developerworks/cn/java/j-lo-lucene1/ 这里更多的是概念,一些接口对于3.0.0已经不适用了。
老外的一个网站,英文基本比较浅显易懂:http://www.lucenetutorial.com
需要的jar包:
lucene-core-3.0.0.jar --lucene核心包
lucene-smartcn-3.0.0.jar ---中文分词库,你也可以选择其他的分词jar包
实际例子:TxtFileIndexer .java,这里懒得再写个搜索类了,索引的生成和搜索都放在同一个地方了。
/** * */package com.spell;import java.io.File;import java.io.FileReader;import java.io.Reader;import java.util.Date;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.Term;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.TermQuery;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.store.SimpleFSDirectory;import org.apache.lucene.util.Version;public class TxtFileIndexer { public static void main(String[] args) throws Exception { // 这里先执行索引的建立,当然生产环境下是有自己的索引生成策略的 createIndexes(); // 执行搜索 search("修改"); } public static void createIndexes() throws Exception { // 索引文件的存放文件夹 String index_path = "E:" + File.separator + "lucene" + File.separator + "index"; // 要索引文件的文件夹 String doc_path = "E:" + File.separator + "lucene" + File.separator + "doc"; File INDEX_DIR = new File(index_path); // 保存在硬盘,也可以选择存放在内存中的 Directory dir = new SimpleFSDirectory(INDEX_DIR); File doc_dir = new File(doc_path); // 用智能的中文词库分析器 Analyzer luceneAnalyzer = new SmartChineseAnalyzer( Version.LUCENE_CURRENT); File[] dataFiles = doc_dir.listFiles(); // 索引writer IndexWriter indexWriter = new IndexWriter(dir, luceneAnalyzer, true, IndexWriter.MaxFieldLength.UNLIMITED); long startTime = new Date().getTime(); for (int i = 0; i < dataFiles.length; i++) { if (dataFiles[i].isFile() && dataFiles[i].getName().endsWith(".txt")) { System.out.println("Indexing file " + dataFiles[i].getCanonicalPath()); Document document = new Document(); Reader txtReader = new FileReader(dataFiles[i]); // Field .Text("path", dataFiles[i].getCanonicalPath()) document.add(new Field("path", dataFiles[i].getCanonicalPath(), Field.Store.YES, Field.Index.NOT_ANALYZED)); document.add(new Field("contents", txtReader)); // 索引添加Document indexWriter.addDocument(document); } } indexWriter.optimize(); indexWriter.close(); long endTime = new Date().getTime(); System.out.println("It takes " + (endTime - startTime) + " milliseconds to create index for the files in directory " + doc_dir.getPath()); } public static void search(String searchStr) throws Exception { System.out.println("=====搜索的关键字是:" + searchStr); // 索引文件的存放路径 String index_path = "E:" + File.separator + "lucene" + File.separator + "index"; File INDEX_DIR = new File(index_path); // 打开文件夹 FSDirectory directory = FSDirectory.open(INDEX_DIR); // 索引搜苏器 IndexSearcher searcher = new IndexSearcher(directory); if (!INDEX_DIR.exists()) { System.out.println("The Lucene index is not exist"); return; } QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, "contents", new SmartChineseAnalyzer(Version.LUCENE_CURRENT)); Query query = parser.parse(searchStr); TopDocs topDocs = searcher.search(query, 1000);// 一般来说,只取得前面的1000条,我们认为是最有用的 System.out.println("tatol:" + topDocs.totalHits); for (int i = 3; i < 6; i++) {// 这个思路扩展下可以做分页了 // for (ScoreDoc scordoc : topDocs.scoreDocs) { // 根据索引的ID找文档 Document tempDoc = searcher.doc(topDocs.scoreDocs[i].doc); System.out.println(topDocs.scoreDocs[i].doc + ":--" + tempDoc.getField("path").stringValue()); } //关闭文件夹 directory.close(); //关闭搜索器 searcher.close(); }}voidaddDocument(Document doc) Adds a document to this index. voidaddDocument(Document doc, Analyzer analyzer) Adds a document to this index, using the provided analyzer instead of the value of getAnalyzer(). voiddeleteAll() Delete all documents in the index. voiddeleteDocuments(Query... queries) Deletes the document(s) matching any of the provided queries. voiddeleteDocuments(Query query) Deletes the document(s) matching the provided query. voiddeleteDocuments(Term... terms) Deletes the document(s) containing any of the terms. voiddeleteDocuments(Term term) Deletes the document(s) containing term. voidupdateDocument(Term term, Document doc) Updates a document by first deleting the document(s) containing term and then adding the new document. voidupdateDocument(Term term, Document doc, Analyzer analyzer) Updates a document by first deleting the document(s) containing term and then adding the new document.