lucence自定义搜索结果的排序代码分享
最近做lucence的应用,趁着一个节点的间歇,总结了下lucence中有关自定义搜索结果排序的相关代码,一来和大家共同探讨,二来也便于备忘。
众所周知,lucence默认的结果是根据Score从高到低,当Score相等时,则会根据建立索引时创建的docID由小到大排序。通过自定义搜索结果的排序,则可以实现完全按照真实业务的需要,自定义结果的排序。
下面以一个查询餐馆距离的例子配合代码进行讲解(该例很多地方都有,但是我参考的时候发现很多地方提供的例子都是不能直接运行的)。并提供可以直接运行的例子代码如下:
DistanceComparatorSource.java
package com.xxx.demo;import java.io.IOException;import org.apache.lucene.index.IndexReader;import org.apache.lucene.search.FieldCache;import org.apache.lucene.search.FieldComparator;import org.apache.lucene.search.FieldComparatorSource;import org.apache.lucene.search.SortField;import org.apache.lucene.search.FieldCache.IntParser;public class DistanceComparatorSource extends FieldComparatorSource{private int x;private int y;public DistanceComparatorSource(int x,int y){this.x = x;this.y = y;}@Overridepublic FieldComparator newComparator(String fieldname,int numHits,int sortPos,boolean reversed) throws IOException{return new DistanceScoreDocLookupComparator(fieldname,numHits);}private class DistanceScoreDocLookupComparator extends FieldComparator{private int[] xDoc,yDoc;private float[] values;private float bottom;String fieldName;public DistanceScoreDocLookupComparator(String fieldName,int numHits){values = new float[numHits];this.fieldName = fieldName;}private class DistanceXIntParser implements IntParser{@Overridepublic int parseInt(String string){return Integer.parseInt(string.split(",")[0]);}}private class DistanceYIntParser implements IntParser{@Overridepublic int parseInt(String string){return Integer.parseInt(string.split(",")[1]);}}@Overridepublic int compare(int slot1,int slot2){if(values[slot1]<values[slot2]) return -1;if(values[slot1]>values[slot2]) return 1;return 0;}@Overridepublic int compareBottom(int doc) throws IOException{float docDistance = getDistance(doc); if(bottom<docDistance) return -1;if(bottom>docDistance) return 1;return 0;}@Overridepublic void copy(int slot,int doc) throws IOException{values[slot] = getDistance(doc);}@Overridepublic void setBottom(int slot){bottom = values[slot];}@Overridepublic void setNextReader(IndexReader reader,int docBase)throws IOException{xDoc = FieldCache.DEFAULT.getInts(reader,this.fieldName,new DistanceXIntParser());yDoc = FieldCache.DEFAULT.getInts(reader,this.fieldName,new DistanceYIntParser());}@Overridepublic Float value(int slot){return new Float(values[slot]);}private float getDistance(int doc){int deltax = xDoc[doc] - x;int deltay = yDoc[doc] - y;return (float)Math.sqrt(deltax*deltax + deltay*deltay);}public int sortType(){return SortField.CUSTOM;}}public String toString(){return "Distance from ("+x+","+y+")";}}
package com.xxx.demo;import java.io.IOException;import junit.framework.TestCase;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.index.CorruptIndexException;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.index.IndexWriterConfig.OpenMode;import org.apache.lucene.queryParser.QueryParser;import org.apache.lucene.search.FieldDoc;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.Sort;import org.apache.lucene.search.SortField;import org.apache.lucene.search.TopDocs;import org.apache.lucene.search.TopFieldDocs;import org.apache.lucene.store.RAMDirectory;import org.apache.lucene.util.Version;public class DistanceSortingTest extends TestCase{private RAMDirectory directory;private IndexSearcher searcher ;private Query query;protected void setUp() throws Exception{directory = new RAMDirectory();IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_33,new StandardAnalyzer(Version.LUCENE_33));config.setOpenMode(OpenMode.CREATE);IndexWriter writer = new IndexWriter(directory,config);addPoint(writer,"El Charro","restaurant restaurant restaurant",1,2);//5addPoint(writer,"Cafe Poca Cosa","restaurant",5,9);//25+81=106addPoint(writer,"Los Betos","restaurant",9,6);//81+36=117addPoint(writer,"Nico's Taco Shop","restaurant restaurant",3,8);//9+64=73writer.close();searcher = new IndexSearcher(directory);QueryParser parser = new QueryParser(Version.LUCENE_33, "type", new StandardAnalyzer(Version.LUCENE_33));query = parser.parse("type:restaurant");}private void addPoint(IndexWriter writer,String name,String type,int x,int y) throws CorruptIndexException, IOException{Document doc = new Document();doc.add(new Field("name",name,Field.Store.YES,Field.Index.ANALYZED));doc.add(new Field("type",type,Field.Store.YES,Field.Index.ANALYZED));doc.add(new Field("location",x+","+y,Field.Store.YES,Field.Index.NOT_ANALYZED));writer.addDocument(doc);}public void testNormRestaurant() throws IOException{TopDocs hits = searcher.search(query,10);System.out.println("--------testNormRestaurant---------- ");for(ScoreDoc doc : hits.scoreDocs){System.out.println("docId:"+doc.doc+"score:"+doc.score+", name:"+searcher.doc(doc.doc).get("name"));}assertEquals("first","Cafe Poca Cosa",searcher.doc(hits.scoreDocs[0].doc).get("name"));assertEquals("second","Los Betos",searcher.doc(hits.scoreDocs[1].doc).get("name"));assertEquals("third","Nico's Taco Shop",searcher.doc(hits.scoreDocs[2].doc).get("name"));assertEquals("forth","El Charro",searcher.doc(hits.scoreDocs[3].doc).get("name"));}public void testNearestRestaurantToHome() throws IOException{Sort sort = new Sort(new SortField("location",new DistanceComparatorSource(0,0)));TopDocs hits = searcher.search(query,null,10,sort);System.out.println("--------testNearestRestaurantToHome---------- ");for(ScoreDoc doc : hits.scoreDocs){System.out.println("docId:"+doc.doc+"name:"+searcher.doc(doc.doc).get("name"));}assertEquals("cloest","El Charro",searcher.doc(hits.scoreDocs[0].doc).get("name"));assertEquals("second","Nico's Taco Shop",searcher.doc(hits.scoreDocs[1].doc).get("name"));assertEquals("third","Cafe Poca Cosa",searcher.doc(hits.scoreDocs[2].doc).get("name"));assertEquals("furthest","Los Betos",searcher.doc(hits.scoreDocs[3].doc).get("name"));}public void testNearestRestaurantToWork() throws IOException{Sort sort = new Sort(new SortField("location",new DistanceComparatorSource(10,10)));TopFieldDocs docs = searcher.search(query,null,3,sort);assertEquals(4,docs.totalHits);assertEquals(3,docs.scoreDocs.length);FieldDoc fieldDoc = (FieldDoc)docs.scoreDocs[0];assertEquals("(10,10) -> (9,6) = sqrt(17)",new Float(Math.sqrt(17)),fieldDoc.fields[0]);Document document = searcher.doc(fieldDoc.doc);assertEquals("Los Betos", document.get("name"));}}