lucene增量索引的简单实现
用lucene来建立搜索程序,在检索的时候效率大大的提高了,但是却以建立索引为代价,建立索引本身就是个耗内存大、时间长的过程(数据量比较大,数据少何必用lucene来建立全文检索,个人拙见),从而索引的建立就是个瓶颈,如果我们建立好索引,然后每次更新数据后重新建立索引,无疑是不合理的,为什么不能在原先索引文件的基础上再把新更新的加在上面呢?增量索引就是在建完索引的后,将数据库的最后一条记录的ID存储起来,下次建立时候将这个ID拿到,从而可以把更新的数据拿到,并把这些更新数据的索引文件加在原先的索引文件里面,下面来看个简单的例子
数据库有两个字段id和title,话不多说,直接上代码,一看便知
import java.io.BufferedReader;import java.io.File;import java.io.FileReader;import java.io.FileWriter;import java.io.IOException;import java.io.PrintWriter;import java.sql.Connection;import java.sql.DriverManager;import java.sql.ResultSet;import java.sql.Statement;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.index.IndexWriter;public class Index { public static void main(String[] args) { try { Index index = new Index(); String path = "d:\\index";//索引文件的存放路径 String storeIdPath = "d:\\storeId.txt";//存储ID的路径 String storeId =""; storeId = index.getStoreId(storeIdPath); ResultSet rs = index.getResult(storeId); index.indexBuilding(path, storeIdPath, rs); storeId = index.getStoreId(storeIdPath); System.out.println(storeId);//打印出这次存储起来的ID } catch (Exception e) { e.printStackTrace(); } } public ResultSet getResult(String storeId) throws Exception{ Class.forName("com.mysql.jdbc.Driver").newInstance(); String url = "jdbc:mysql://localhost:3306/ding"; String userName = "root"; String password = "ding"; Connection conn = DriverManager.getConnection(url,userName,password); Statement stmt = conn .createStatement(); ResultSet rs = stmt .executeQuery("select * from newitem where id > '"+storeId+"'order by id"); return rs; } public boolean indexBuilding(String path,String storeIdPath, ResultSet rs) {// 把RS换成LIST原理一样 try { Analyzer luceneAnalyzer = new StandardAnalyzer(); // 取得存储起来的ID,以判定是增量索引还是重新索引 boolean isEmpty = true; try { File file = new File(storeIdPath); if (!file.exists()) { file.createNewFile(); } FileReader fr = new FileReader(storeIdPath); BufferedReader br = new BufferedReader(fr); if(br.readLine()!= null) { isEmpty = false; } br.close(); fr.close(); } catch (IOException e) { e.printStackTrace(); } IndexWriter writer = new IndexWriter(path, luceneAnalyzer, isEmpty);//参数isEmpty是false表示增量索引 String storeId = ""; boolean indexFlag = false; String id; String title; while (rs.next()) { // for(Iterator it = list.iterator();it.hasNext();){ id = rs.getString("id"); title = rs.getString("title"); writer.addDocument(Document(id, title)); storeId = id;//将拿到的id给storeId,这种拿法不合理,这里为了方便 indexFlag = true; } writer.optimize(); writer.close(); if(indexFlag){ // 将最后一个的ID存到磁盘文件中 this.writeStoreId(storeIdPath, storeId); } return true; } catch (Exception e) { e.printStackTrace(); System.out.println("出错了" + e.getClass() + "\n 错误信息为: " + e.getMessage()); return false; } } public static Document Document(String id, String title) { Document doc = new Document(); doc.add(new Field("ID", id, Field.Store.YES, Field.Index.TOKENIZED)); doc.add(new Field("TITLE", title, Field.Store.YES, Field.Index.TOKENIZED)); return doc; } // 取得存储在磁盘中的ID public static String getStoreId(String path) { String storeId = ""; try { File file = new File(path); if (!file.exists()) { file.createNewFile(); } FileReader fr = new FileReader(path); BufferedReader br = new BufferedReader(fr); storeId = br.readLine(); if (storeId == null || storeId == "") storeId = "0"; br.close(); fr.close(); } catch (Exception e) { e.printStackTrace(); } return storeId; } // 将ID写入到磁盘文件中 public static boolean writeStoreId(String path,String storeId) { boolean b = false; try { File file = new File(path); if (!file.exists()) { file.createNewFile(); } FileWriter fw = new FileWriter(path); PrintWriter out = new PrintWriter(fw); out.write(storeId); out.close(); fw.close(); b=true; } catch (IOException e) { e.printStackTrace(); } return b; }}