lucene3搜索引擎,索引建立搜索排序分页高亮显示, IKAnalyzer分词
package com.zjr.service.impl;import java.io.File;import java.io.IOException;import java.io.StringReader;import java.lang.reflect.InvocationTargetException;import java.util.ArrayList;import java.util.List;import org.apache.commons.beanutils.BeanUtils;import org.apache.commons.logging.Log;import org.apache.commons.logging.LogFactory;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.TokenStream;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.document.Field.Index;import org.apache.lucene.document.Field.Store;import org.apache.lucene.index.CorruptIndexException;import org.apache.lucene.index.IndexReader;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.Term;import org.apache.lucene.search.BooleanClause;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.Sort;import org.apache.lucene.search.SortField;import org.apache.lucene.search.TopDocs;import org.apache.lucene.search.TopScoreDocCollector;import org.apache.lucene.search.highlight.Highlighter;import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;import org.apache.lucene.search.highlight.QueryScorer;import org.apache.lucene.search.highlight.SimpleHTMLFormatter;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;import org.wltea.analyzer.lucene.IKAnalyzer;import org.wltea.analyzer.lucene.IKQueryParser;import org.wltea.analyzer.lucene.IKSimilarity;import com.zjr.model.User;public class UserIndexService {private final Log logger = LogFactory.getLog(UserIndexService.class);private final String dirPath = "d:/temp/user";Analyzer analyzer = new IKAnalyzer();Directory directory = null;IndexWriter writer = null;IndexSearcher indexSearcher = null;private void confirmDirs() {File indexFile = new File(dirPath);if (!indexFile.exists()) {indexFile.mkdirs();}if (!indexFile.exists() || !indexFile.canWrite()) {if (logger.isDebugEnabled())logger.error("索引文件目录创建失败或不可写入!");}}public void init() {confirmDirs();try {File f = new File(dirPath);directory = FSDirectory.open(f);} catch (Exception e) {if (logger.isDebugEnabled()) {logger.error("解除索引文件锁定失败!" + e.getCause());}}}public void createIndex(List<User> userList) {init();try {// 第一个参数是存放索引目录有FSDirectory(存储到磁盘上)和RAMDirectory(存储到内存中),//第二个参数是使用的分词器, 第三个:true,建立全新的索引,false,建立增量索引,第四个是建立的索引的最大长度writer = new IndexWriter(directory, analyzer, true,IndexWriter.MaxFieldLength.LIMITED);writer.setMergeFactor(500);writer.setMaxBufferedDocs(155);writer.setMaxFieldLength(Integer.MAX_VALUE);writeIndex(writer, userList);writer.optimize();writer.close();} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}}public List<User> search(String keyword) {File indexFile = new File(dirPath);if (!indexFile.exists()) {return null;}Directory dir;try {dir = FSDirectory.open(indexFile);indexSearcher = new IndexSearcher(dir);indexSearcher.setSimilarity(new IKSimilarity());// 单字段查询,单条件查询// Query query = IKQueryParser.parse("userInfo", keyword);// 多字段,单条件查询String[] fields = new String[] { "userInfo", "parameter1" };Query query = IKQueryParser.parseMultiField(fields, keyword);// 多字体,单条件,多BooleanClause.Occur[] flags , 查询条件的组合方式(Or/And)// BooleanClause.Occur[]数组,它表示多个条件之间的关系,// BooleanClause.Occur.MUST表示 and,// BooleanClause.Occur.MUST_NOT表示not,// BooleanClause.Occur.SHOULD表示or.// String[] fields =new String[]{"userInfo","parameter1"};// BooleanClause.Occur[] flags=new// BooleanClause.Occur[]{BooleanClause.Occur.MUST,BooleanClause.Occur.SHOULD};// Query query = IKQueryParser.parseMultiField(fields,// keyword,flags);// //多Field,多条件查询分析// String[] fields =new String[]{"userInfo","parameter1"};// String[] queries = new String[]{keyword,keyword};// Query query = IKQueryParser.parseMultiField(fields,queries);// 多Field,多条件,多Occur 查询// String[] fields =new String[]{"userInfo","parameter1"};// String[] queries = new String[]{keyword,keyword};// BooleanClause.Occur[] flags=new// BooleanClause.Occur[]{BooleanClause.Occur.MUST,BooleanClause.Occur.SHOULD};// Query query =// IKQueryParser.parseMultiField(fields,queries,flags);// 搜索相似度最高的20条记录TopDocs topDocs = indexSearcher.search(query, 20);ScoreDoc[] hits = topDocs.scoreDocs;return hitsToQuery(hits, query);} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}return null;}private List<User> hitsToQuery(ScoreDoc[] hits, Query query) {List<User> list = new ArrayList<User>();try {for (int i = 0; i < hits.length; i++) {User u = new User();Document doc = indexSearcher.doc(hits[i].doc);u.setUserId(Integer.parseInt(doc.get("userId")));u.setUserName(doc.get("userName"));u.setUserAge(Integer.parseInt(doc.get("userAge")));// 高亮设置SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter("<font color="red">", "</font>");Highlighter highlighter = new Highlighter(simpleHtmlFormatter,new QueryScorer(query));TokenStream tokenStream = analyzer.tokenStream("text",new StringReader(doc.get("userInfo")));String userInfo = highlighter.getBestFragment(tokenStream, doc.get("userInfo"));if (userInfo != null) {u.setUserInfo(userInfo);} else {u.setUserInfo(doc.get("userInfo"));}SimpleHTMLFormatter simpleHtmlFormatter1 = new SimpleHTMLFormatter("<font color="red">", "</font>");Highlighter highlighter1 = new Highlighter(simpleHtmlFormatter1, new QueryScorer(query));TokenStream tokenStream1 = analyzer.tokenStream("text1",new StringReader(doc.get("parameter1")));String p1 = highlighter1.getBestFragment(tokenStream1, doc.get("parameter1"));if (p1 != null) {u.setParameter1(p1);} else {u.setParameter1(doc.get("parameter1"));}u.setParameter2(doc.get("parameter2"));u.setParameter3(doc.get("parameter3"));u.setParameter4(doc.get("parameter4"));list.add(u);}indexSearcher.close();return list;} catch (CorruptIndexException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (InvalidTokenOffsetsException e) {// TODO Auto-generated catch blocke.printStackTrace();}return null;}public void writeIndex(IndexWriter writer, List<User> userList) {try {for (User u : userList) {Document doc = getDoc(u);writer.addDocument(doc);}} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}}private Document getDoc(User user) {System.out.println("用户ID 为" + user.getUserId() + " 索引被创建");Document doc = new Document();addField2Doc(doc, user, "userId", Store.YES, Index.NOT_ANALYZED);addField2Doc(doc, user, "userName", Store.YES, Index.NOT_ANALYZED);// Index.NOT_ANALYZED// 不分词,但建立索引addField2Doc(doc, user, "userAge", Store.YES, Index.NOT_ANALYZED);// Index.ANALYZED// 分词并且建立索引addField2Doc(doc, user, "userInfo", Store.YES, Index.ANALYZED);addField2Doc(doc, user, "parameter1", Store.YES, Index.ANALYZED);addField2Doc(doc, user, "parameter2", Store.YES, Index.ANALYZED);addField2Doc(doc, user, "parameter3", Store.YES, Index.ANALYZED);addField2Doc(doc, user, "parameter4", Store.YES, Index.ANALYZED);return doc;}private void addField2Doc(Document doc, Object bean, String name, Store s,Index i) {String value;try {value = BeanUtils.getProperty(bean, name);if (value != null) {doc.add(new Field(name, value, s, i,Field.TermVector.WITH_POSITIONS_OFFSETS));}} catch (IllegalAccessException e) {logger.error("get bean property error", e);} catch (InvocationTargetException e) {logger.error("get bean property error", e);} catch (NoSuchMethodException e) {logger.error("get bean property error", e);}}/** * 没有排序,有高亮,有分页 * * @param pageNo * @param pageSize * @param keyword * @return */public PageBean getPageQuery(int pageNo, int pageSize, String keyword) {List result = new ArrayList();File indexFile = new File(dirPath);if (!indexFile.exists()) {return null;}Directory dir;try {dir = FSDirectory.open(indexFile);indexSearcher = new IndexSearcher(dir);indexSearcher.setSimilarity(new IKSimilarity());String[] fields = new String[] { "userInfo", "parameter1" };BooleanClause.Occur[] flags = new BooleanClause.Occur[] {BooleanClause.Occur.MUST, BooleanClause.Occur.SHOULD };Query query = IKQueryParser.parseMultiField(fields, keyword, flags);TopScoreDocCollector topCollector = TopScoreDocCollector.create(indexSearcher.maxDoc(), true);indexSearcher.search(query, topCollector);// 查询当页的记录ScoreDoc[] docs = topCollector.topDocs((pageNo - 1) * pageSize,pageSize).scoreDocs;// String[] highlightCol = {"userInfo", "parameter1"};// 高亮设置SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter("<font color="red">", "</font>");Highlighter highlighter = new Highlighter(simpleHtmlFormatter,new QueryScorer(query));for (ScoreDoc scdoc : docs) {User u = new User();Document doc = indexSearcher.doc(scdoc.doc);// // for (Fieldable fa : doc.getFields()) {// System.out.println(fa.name());// String value = doc.get(fa.name());// for (String col : highlightCol) {// if(fa.name().equals(col)) {// //设置高显内容// TokenStream tokenStream = analyzer.tokenStream("text",new// StringReader(value));// value = highlighter.getBestFragment(tokenStream, value);// }// }// // }u.setUserId(Integer.parseInt(doc.get("userId")));u.setUserName(doc.get("userName"));u.setUserAge(Integer.parseInt(doc.get("userAge")));TokenStream tokenStream = analyzer.tokenStream("text",new StringReader(doc.get("userInfo")));String userInfo = highlighter.getBestFragment(tokenStream, doc.get("userInfo"));if (userInfo != null) {u.setUserInfo(userInfo);} else {u.setUserInfo(doc.get("userInfo"));}TokenStream tokenStream1 = analyzer.tokenStream("text1",new StringReader(doc.get("parameter1")));String p1 = highlighter.getBestFragment(tokenStream1, doc.get("parameter1"));if (p1 != null) {u.setParameter1(p1);} else {u.setParameter1(doc.get("parameter1"));}u.setParameter2(doc.get("parameter2"));u.setParameter3(doc.get("parameter3"));u.setParameter4(doc.get("parameter4"));result.add(u);}PageBean pb = new PageBean();pb.setCurrentPage(pageNo);// 当前页pb.setPageSize(pageSize);pb.setAllRow(topCollector.getTotalHits());// hit中的记录数目pb.setList(result);return pb;} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (InvalidTokenOffsetsException e) {// TODO Auto-generated catch blocke.printStackTrace();}return null;}/** * 排序,有高亮,有分页 * * @param pageNo * @param pageSize * @param keyword * @return */public PageBean getPageQuery2(int pageNo, int pageSize, String keyword) {List result = new ArrayList();File indexFile = new File(dirPath);if (!indexFile.exists()) {return null;}Directory dir;try {dir = FSDirectory.open(indexFile);indexSearcher = new IndexSearcher(dir);indexSearcher.setSimilarity(new IKSimilarity());String[] fields = new String[] { "userInfo", "parameter1" };BooleanClause.Occur[] flags = new BooleanClause.Occur[] {BooleanClause.Occur.MUST, BooleanClause.Occur.SHOULD };Query query = IKQueryParser.parseMultiField(fields, keyword, flags);// 多字段排序,设置在前面的会优先排序SortField[] sortFields = new SortField[2];SortField sortField = new SortField("userId", SortField.INT, false);//false升序,true降序SortField FIELD_SEX = new SortField("userAge", SortField.INT, true);sortFields[0] = sortField;sortFields[1] = FIELD_SEX;Sort sort = new Sort(sortFields);TopDocs topDocs = indexSearcher.search(query, null, 50, sort);if (topDocs.totalHits != 0) {// for(ScoreDoc sd : topDocs.scoreDocs) {// // }// 高亮设置SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter("<font color="red">", "</font>");Highlighter highlighter = new Highlighter(simpleHtmlFormatter,new QueryScorer(query));for (int i = (pageNo - 1) * pageSize; i < pageSize * pageNo; i++) {ScoreDoc scdoc = topDocs.scoreDocs[i];User u = new User();Document doc = indexSearcher.doc(scdoc.doc);u.setUserId(Integer.parseInt(doc.get("userId")));u.setUserName(doc.get("userName"));u.setUserAge(Integer.parseInt(doc.get("userAge")));TokenStream tokenStream = analyzer.tokenStream("text",new StringReader(doc.get("userInfo")));String userInfo = highlighter.getBestFragment(tokenStream,doc.get("userInfo"));if (userInfo != null) {u.setUserInfo(userInfo);} else {u.setUserInfo(doc.get("userInfo"));}TokenStream tokenStream1 = analyzer.tokenStream("text1",new StringReader(doc.get("parameter1")));String p1 = highlighter.getBestFragment(tokenStream1, doc.get("parameter1"));if (p1 != null) {u.setParameter1(p1);} else {u.setParameter1(doc.get("parameter1"));}u.setParameter2(doc.get("parameter2"));u.setParameter3(doc.get("parameter3"));u.setParameter4(doc.get("parameter4"));result.add(u);}PageBean pb = new PageBean();pb.setCurrentPage(pageNo);// 当前页pb.setPageSize(pageSize);pb.setAllRow(topDocs.totalHits);// hit中的记录数目pb.setList(result);return pb;}} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (InvalidTokenOffsetsException e) {// TODO Auto-generated catch blocke.printStackTrace();}return null;}/** * 删除索引 * @param userId */public void deleIndex(String userId){try {File f = new File(dirPath);directory = FSDirectory.open(f);IndexReader reader = IndexReader.open(directory,false); Term term = new Term("userId", userId); reader.deleteDocuments(term); reader.close(); } catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}}}??
1 楼 catastiger 2010-11-05 //高亮设置集成抽取成一个方法