hibernate search 和lucene结合使用实例(转)
?
以下的代码是根据api帮助文档作出的一个简单实例,在应用方面可以实现创建索引,搜索,过滤和高亮的功能。?
整体的环境为:spring2.5.6,hibernate3.3.1,struts2.0.8,lucene2.4.1?
第一步,首先是web.xml配置文件,由于使用了ssh2的架构,所以不得不在web.xml里配置一些东西?
?
<?xml version="1.0" encoding="UTF-8"?><web-app version="2.5" xmlns="http://java.sun.com/xml/ns/javaee" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://java.sun.com/xml/ns/javaee http://java.sun.com/xml/ns/javaee/web-app_2_5.xsd"> <!-- spring的配置文件路径 --> <context-param> <param-name>contextConfigLocation</param-name> <param-value>classpath*:spring/*.xml</param-value> </context-param> <!--Hibernate Open Session in View Filter--> <filter> <filter-name>hibernateFilter</filter-name> <filter-class> org.springframework.orm.hibernate3.support.OpenSessionInViewFilter </filter-class> </filter> <filter-mapping> <filter-name>hibernateFilter</filter-name> <url-pattern>*.action</url-pattern> <dispatcher>REQUEST</dispatcher> <dispatcher>FORWARD</dispatcher> </filter-mapping> <filter-mapping> <filter-name>hibernateFilter</filter-name> <url-pattern>*.jsp</url-pattern> <dispatcher>REQUEST</dispatcher> <dispatcher>FORWARD</dispatcher> </filter-mapping> <listener> <listener-class> org.springframework.web.context.ContextLoaderListener </listener-class> </listener> <!-- Spring 刷新Introspector防止内存泄露 --> <listener> <listener-class> org.springframework.web.util.IntrospectorCleanupListener </listener-class> </listener> <!-- Struts Action Mapping--> <filter> <filter-name>struts-cleanup</filter-name> <filter-class> org.apache.struts2.dispatcher.ActionContextCleanUp </filter-class> </filter> <filter> <filter-name>struts2</filter-name> <filter-class> org.apache.struts2.dispatcher.FilterDispatcher </filter-class> </filter> <filter-mapping> <filter-name>struts-cleanup</filter-name> <url-pattern>/*</url-pattern> </filter-mapping> <filter-mapping> <filter-name>struts2</filter-name> <url-pattern>*.jsp</url-pattern> <dispatcher>REQUEST</dispatcher> <dispatcher>FORWARD</dispatcher> </filter-mapping> <filter-mapping> <filter-name>struts2</filter-name> <url-pattern>*.action</url-pattern> <dispatcher>REQUEST</dispatcher> <dispatcher>FORWARD</dispatcher> </filter-mapping><!-- spring自带的字符转换过滤器,转换成utf-8的格式 --> <filter> <filter-name>encodingFilter</filter-name> <filter-class> org.springframework.web.filter.CharacterEncodingFilter </filter-class> <init-param> <param-name>encoding</param-name> <param-value>UTF-8</param-value> </init-param> </filter> <filter-mapping> <filter-name>encodingFilter</filter-name> <url-pattern>/*</url-pattern> </filter-mapping> <!-- 随服务器启动,自动调用对应的servlet创建索引文件 --> <servlet> <servlet-name>CreateHibernateIndex</servlet-name> <servlet-class>com.test.servlet.CreateHibernateIndex</servlet-class> <load-on-startup>20</load-on-startup> </servlet> <servlet-mapping> <servlet-name>CreateHibernateIndex</servlet-name> <url-pattern>/servlet/CreateHibernateIndex</url-pattern> </servlet-mapping> <!-- session超时定义,单位为分钟 --> <session-config> <session-timeout>20</session-timeout> </session-config> <!-- 默认首页定义 --> <welcome-file-list> <welcome-file>/index.jsp</welcome-file> </welcome-file-list></web-app>?第二步,配spring配置文件和hibernate文件?
?
这是可以使用hibernate annotation注释的sessionFactory的属性配置的一部分,注意下面的2个使用索引的属性配置,提供文件索引的保存路径和读取方式(fsdirectory,文件索引,另外一种是ramdirectory,内存索引)?
?
<prop key="hibernate.search.default.directory_provider"> org.hibernate.search.store.FSDirectoryProvider </prop> <prop key="hibernate.search.default.indexBase"> ${hibernate.search.default.indexBase} </prop>?spring的配置文件没有什么特别的,和普通ssh配置没有什么两样?
?
第三步配struts配置文件,由于也是普通配置,没有特别之处,就不贴出来了。?
第四步,写实体类,由于采用hibernate search方法搜索,所以直接利用hibernate annotation注释去定义索引的一些配置信息。关于index的基本都属于索引的配置?
?
package com.test.model;import static javax.persistence.GenerationType.IDENTITY;import java.util.Date;import javax.persistence.Column;import javax.persistence.Entity;import javax.persistence.GeneratedValue;import javax.persistence.Id;import javax.persistence.Table;import javax.persistence.Temporal;import javax.persistence.TemporalType;import javax.persistence.Transient;import org.hibernate.search.annotations.Analyzer;import org.hibernate.search.annotations.DateBridge;import org.hibernate.search.annotations.DocumentId;import org.hibernate.search.annotations.Field;import org.hibernate.search.annotations.Index;import org.hibernate.search.annotations.Indexed;import org.hibernate.search.annotations.Resolution;import org.hibernate.search.annotations.Store;import org.wltea.analyzer.lucene.IKAnalyzer;/** * Product entity. */@Entity@Table(name = "product", catalog = "hibernate_search_test")@Indexed(index = "Product")@Analyzer (impl = IKAnalyzer.class ) public class Product implements java.io.Serializable { // Fields /** * */ private static final long serialVersionUID = -7005490272739421758L; private Integer id; private String proTitle; private String proDescn; private String proPrice; private Integer proType; private Date proTime; private String findResult; // Constructors /** default constructor */ public Product() { } // Property accessors @Id @GeneratedValue(strategy = IDENTITY) @Column(name = "id") @DocumentId public Integer getId() { return this.id; } public void setId(Integer id) { this.id = id; } @Column(name = "pro_title") @Field(name = "pt", index = Index.TOKENIZED, store = Store.YES) public String getProTitle() { return this.proTitle; } public void setProTitle(String proTitle) { this.proTitle = proTitle; } @Column(name = "pro_descn") @Field(name = "pd", index = Index.TOKENIZED, store = Store.YES) public String getProDescn() { return this.proDescn; } public void setProDescn(String proDescn) { this.proDescn = proDescn; } @Column(name = "pro_price") public String getProPrice() { return this.proPrice; } public void setProPrice(String proPrice) { this.proPrice = proPrice; } @Column(name = "pro_type") public Integer getProType() { return this.proType; } public void setProType(Integer proType) { this.proType = proType; } @Temporal(TemporalType.DATE) @Column(name = "pro_time") @Field(name = "t", index = Index.UN_TOKENIZED, store = Store.YES) @DateBridge(resolution = Resolution.DAY) public Date getProTime() { return this.proTime; } public void setProTime(Date proTime) { this.proTime = proTime; }//封装搜索出的高亮内容 @Transient public String getFindResult() { return findResult; } public void setFindResult(String findResult) { this.findResult = findResult; }}
?
?第六步,写service方法,包括建索引,根据关键字用索引查,过滤,设置权重,高亮等等工作?
?
package com.test.service;import java.io.File;import java.io.StringReader;import java.util.Date;import java.util.List;import javax.annotation.Resource;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.Token;import org.apache.lucene.analysis.TokenStream;import org.apache.lucene.document.DateTools;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.document.DateTools.Resolution;import org.apache.lucene.index.IndexReader;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.Term;import org.apache.lucene.queryParser.QueryParser;import org.apache.lucene.search.BooleanClause;import org.apache.lucene.search.BooleanQuery;import org.apache.lucene.search.CachingWrapperFilter;import org.apache.lucene.search.Filter;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.QueryWrapperFilter;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.TermQuery;import org.apache.lucene.search.TermRangeQuery;import org.apache.lucene.search.TopScoreDocCollector;import org.apache.lucene.search.BooleanClause.Occur;import org.apache.lucene.search.highlight.Formatter;import org.apache.lucene.search.highlight.Highlighter;import org.apache.lucene.search.highlight.QueryScorer;import org.apache.lucene.search.highlight.SimpleFragmenter;import org.apache.lucene.search.highlight.SimpleHTMLFormatter;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.util.Version;import org.hibernate.CacheMode;import org.hibernate.FlushMode;import org.hibernate.ScrollMode;import org.hibernate.ScrollableResults;import org.hibernate.search.FullTextQuery;import org.hibernate.search.FullTextSession;import org.hibernate.search.Search;import org.springframework.context.ApplicationContext;import org.springframework.context.support.ClassPathXmlApplicationContext;import org.springframework.stereotype.Service;import org.springframework.transaction.annotation.Transactional;import org.springside.modules.orm.hibernate.HibernateDao;import org.springside.modules.service.EntityManager;import org.wltea.analyzer.lucene.IKAnalyzer;import com.test.dao.ProductDao;import com.test.model.Product;@Transactional@Servicepublic class ProductService extends EntityManager<Product, Integer> { @Resource(name = "productDao") private ProductDao productDao; @Override protected HibernateDao<Product, Integer> getEntityDao() { // TODO Auto-generated method stub return productDao; } @SuppressWarnings("unchecked") public List<Product> QueryByIndex(String words, String startDate,String endDate) throws Exception { FullTextSession fullTextSession = Search.createFullTextSession(productDao.getSession()); /*Query IKQuery = IKQueryParser.parseMultiField(new String[] { "proTitle", "proDescn" }, new String[] { words, words }, new BooleanClause.Occur[] { Occur.SHOULD, Occur.SHOULD }); Query luceneQuery = MultiFieldQueryParser.parse(new String[] { words, words }, new String[] { "pro_title", "pro_descn" }, new BooleanClause.Occur[] { Occur.SHOULD, Occur.SHOULD }, new StandardAnalyzer());*/ BooleanQuery bQuery = new BooleanQuery(); Analyzer analyzer = new IKAnalyzer(); //设置对域采用的某种分词器的QueryParser对象 QueryParser qp; //设置了关键字的查询您对象 //Query q; qp = new QueryParser(Version.LUCENE_CURRENT,"pt",analyzer); Query q1 = qp.parse(words); q1.setBoost(1.5f); bQuery.add(q1, Occur.SHOULD); qp = new QueryParser(Version.LUCENE_CURRENT,"pd",analyzer); Query q2 = qp.parse(words); q2.setBoost(1.0f); bQuery.add(q2, Occur.SHOULD); FullTextQuery fullTextQuery = fullTextSession.createFullTextQuery(bQuery,Product.class); // 添加是或者否的条件到query中 boolean filterResult = false; BooleanQuery bQueryForFilter = new BooleanQuery(); if (!startDate.equalsIgnoreCase("") && !endDate.equalsIgnoreCase("")) { // 时间过滤 // RangeFilter rangefilter = new RangeFilter("pro_time", // "20090927","20090929", false, false); // 只能使用一个过滤器,所以只能用下面的RangeQuery,然后将所有query封装到一个过滤条件中 TermRangeQuery rangeQuery = new TermRangeQuery("t",startDate,endDate,true,true); bQueryForFilter.add(rangeQuery, BooleanClause.Occur.MUST); filterResult = true; } if (filterResult) { // 将booleanQuery封装到Filter中 Filter filter = new CachingWrapperFilter(new QueryWrapperFilter(bQueryForFilter)); fullTextQuery.setFilter(filter); } List<Product> result = fullTextQuery.list(); String findResult;//根据上边已经写好的query封装出一个查询计分器 QueryScorer qs1 = new QueryScorer(q1); QueryScorer qs2 = new QueryScorer(q2); //设置高亮的模板,其实就是在关键字两边加一对html的格式标签,下面是最基本的加粗。 Formatter formatter = new SimpleHTMLFormatter("<b>", "</b>"); Highlighter highlighter1 = new Highlighter(formatter,qs1); Highlighter highlighter2 = new Highlighter(formatter,qs2); String text;//下面通过将上面根据关键字,过滤条件和权重排序等找出的结果集做一次循环,进行高亮,把高亮后得到的//一个字符串,封装如每个实体类中的一个额外字段,方便在页面输出。 for(Product product:result){ text = product.getProTitle() ; findResult = highlighter1.getBestFragment(analyzer,"pt", text); if(findResult==null){ text = product.getProDescn() ; highlighter2.setTextFragmenter(new SimpleFragmenter(30)); findResult = highlighter2.getBestFragment(analyzer,"pd", text); } product.setFindResult(findResult); } return result; }//下面的方法是用hibernate search的方法来创建索引 public void createIndexByHibernateSearch() { long startTime = new Date().getTime(); int BATCH_SIZE = 1000; FullTextSession s = Search.createFullTextSession(productDao.getSession()); // Transaction tr = s.beginTransaction(); s.setFlushMode(FlushMode.MANUAL); s.setCacheMode(CacheMode.IGNORE); ScrollableResults results = s.createQuery("from Product").setFetchSize(BATCH_SIZE).scroll(ScrollMode.FORWARD_ONLY); int index = 0; while (results.next()) { index++; s.index(results.get(0)); // index each element if (index % BATCH_SIZE == 0) { // s.flushToIndexes(); //apply changes to indexes s.clear(); // clear since the queue is processed } } s.clear(); long endTime = new Date().getTime(); logger.warn("建立Product索引 , 这花费了" + (endTime - startTime) + " 毫秒来把文档增加到索引里面去!"); // tr.commit(); }//下面的方法是用lucene的方式来创建索引文件,不过用这种方式创建索引后,也只能使用lucene的方式去进行搜索 @SuppressWarnings("deprecation") public void createIndexByLucene() { try { File fsDir = new File("E:\\indexes\\product"); Analyzer analyzer = new IKAnalyzer(); /* // 内存索引 RAMDirectory ramDir = new RAMDirectory(); IndexWriter ramWriter = new IndexWriter(ramDir, luceneAnalyzer, true, IndexWriter.MaxFieldLength.UNLIMITED); */ IndexWriter fsWriter = new IndexWriter( FSDirectory.open(fsDir), analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED ); fsWriter.setMaxBufferedDocs(1000); fsWriter.setMergeFactor(1000); List<Product> productList = find("from Product"); int size = productList.size(); long startTime = new Date().getTime(); Document doc; for (Product product : productList) { doc = new Document(); doc.add(new Field("pro_title", product.getProTitle(),Field.Store.YES, Field.Index.ANALYZED)); doc.add(new Field("pro_descn", product.getProDescn(),Field.Store.YES, Field.Index.ANALYZED)); if(product.getProTime()!=null) doc.add(new Field("pro_time",DateTools.dateToString( product.getProTime(), Resolution.DAY),Field.Store.YES, Field.Index.NOT_ANALYZED)); fsWriter.addDocument(doc); // 先缓存入内存索引,后写入文件索引 /* ramWriter.addDocument(doc); int i = 1; i++; if (i % 100 == 0 || i == size) { logger.warn("i:" + i); ramWriter.close(); fsWriter.addIndexesNoOptimize(new Directory[] { ramDir }); ramWriter = new IndexWriter(ramDir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED); }*/ } // 自动优化合并索引文件 fsWriter.optimize(); fsWriter.close(); long endTime = new Date().getTime(); System.out.println("一共" + size + ",这花费了" + (endTime - startTime) + " 毫秒来把文档增加到索引里面去!"); } catch (Exception e) { e.printStackTrace(); } } public void SearchByLucene(){ createIndexByLucene(); File fsDir = new File("E:\\luceneIndexes\\product"); Analyzer analyzer = new IKAnalyzer(); try{ // 索引查询 IndexReader reader = IndexReader.open(FSDirectory.open(fsDir), true); // only searching, so read-only=true IndexSearcher isearcher = new IndexSearcher(reader); BooleanQuery booleanQuery = new BooleanQuery(); QueryParser parser; Query query; parser = new QueryParser(Version.LUCENE_CURRENT,"pro_title",analyzer); query = parser.parse("大灯");// 检索词 query.setBoost(1.5f); booleanQuery.add(query, Occur.SHOULD); parser = new QueryParser(Version.LUCENE_CURRENT,"pro_descn",analyzer); query = parser.parse("大灯");// 检索词 query.setBoost(1.0f); booleanQuery.add(query, Occur.SHOULD); BooleanQuery filterBooleanQuery = new BooleanQuery(); TermRangeQuery rangeQuery = new TermRangeQuery("pro_time","20090101","20091101",true,true); filterBooleanQuery.add(rangeQuery, BooleanClause.Occur.MUST); // 将booleanQuery封装到Filter中 Filter filter = new CachingWrapperFilter(new QueryWrapperFilter(filterBooleanQuery)); TopScoreDocCollector collector = TopScoreDocCollector.create(100,true); isearcher.search(booleanQuery,filter,collector); ScoreDoc[] hits = collector.topDocs(0,100).scoreDocs; QueryScorer qs = new QueryScorer(new TermQuery(new Term("pro_title","大灯"))); for(ScoreDoc h:hits){ Document d = isearcher.doc(h.doc); String text = d.get("pro_title") ; Formatter formatter = new SimpleHTMLFormatter("<b>", "</b>"); Highlighter hl = new Highlighter(formatter,qs); System.out.println(hl.getBestFragment(analyzer,"pro_title", text)); //System.out.println("doc:"+h.doc+" \tscore:"+h.score+" \t"+d.get("pro_title")); } System.out.println("命中:" + hits.length); isearcher.close(); }catch(Exception e){ e.printStackTrace(); } } // 查看分词效果 @SuppressWarnings("deprecation") public static void showAnalyzerResult(Analyzer analyzer, String s) throws Exception { StringReader reader = new StringReader(s); TokenStream ts = analyzer.tokenStream(s, reader); Token t = ts.next(); while (t != null) { System.out.print(t.termText() + " "); t = ts.next(); } System.out.println(); } public static void main(String[] args) { ApplicationContext ctx = new ClassPathXmlApplicationContext("spring/applicationContext.xml"); ProductService service = (ProductService) ctx.getBean("productService"); service.SearchByLucene(); }}
?
?
这里最值得注意的是这里使用了date的类型,并加入了查询中去,在属性里要加入@DateBridge注解,它里面的属性resolution可以指定不同形式的日期形式,具体如下:
Resolution.YEAR: yyyy Resolution.MONTH: yyyyMM Resolution.DAY: yyyyMMdd Resolution.HOUR: yyyyMMddHH Resolution.MINUTE: yyyyMMddHHmm Resolution.SECOND: yyyyMMddHHmmss Resolution.MILLISECOND: yyyyMMddHHmmssSSS
所以只要将日期转换为这种格式就可以用日期的范围查询了。
?
?