lucene自定义排序例子
本人主要是参考lucene实战一书,不过中文版上总是发现一些错误,导致程序并没有给出想要的结果,还是要看api文档。
lucene3.X实现自定义排序,主要是实现继承FieldComparatorSource抽象类的子类和继承FieldComparator的子类。
1.继承FieldComparatorSource,必须实现抽象方法newComparator。
2.继承FieldComparator,必须实现下面6个抽象方法:
compare(int, int) Compare a hit at 'slot a' with hit 'slot b'.setBottom(int) This method is called byFieldValueHitQueue to notify the FieldComparator of the current weakest ("bottom") slot. Note that this slot may not hold the weakest value according to your comparator, in cases where your comparator is not the primary one (ie, is only used to break ties from the comparators before it). compareBottom(int) Compare a new hit (docID) against the "weakest" (bottom) entry in the queue.copy(int, int) Installs a new hit into the priority queue. TheFieldValueHitQueue calls this method when a new hit is competitive.setNextReader(org.apache.lucene.index.IndexReader, int) Invoked when the search is switching to the next segment. You may need to update internal state of the comparator, for example retrieving new values from theFieldCache. value(int) Return the sort value stored in the specified slot. This is only called at the end of the search, in order to populateFieldDoc.fields when returning the top results. 上面方法描述摘自api文档,详细请查阅api。
例子是书上的一个简单例子,匹配结果根据用户所在地址(二维)查找离他最近的餐厅顺序排序。每个地点指定了三个域,即地名、二维坐标x和y,以及该地点的类型。下面是具体实现代码:
package org.apache.lucene.demo;import java.io.IOException;import org.apache.lucene.document.Field;import org.apache.lucene.index.IndexReader;import org.apache.lucene.search.FieldCache;import org.apache.lucene.search.FieldComparator;import org.apache.lucene.search.FieldComparatorSource;import org.apache.lucene.search.SortField;public class DistanceComparatorSource extends FieldComparatorSource{ private int x; private int y; public DistanceComparatorSource(int x,int y){ this.x = x; this.y = y; }@Overridepublic FieldComparator<?> newComparator(String arg0, int arg1, int arg2,boolean arg3) throws IOException {// TODO Auto-generated method stubreturn new DistanceSourceLookupComparator(arg0, arg1);}private class DistanceSourceLookupComparator extends FieldComparator{private int[] xDoc,yDoc;private float[] values;private float bottom;String fieldName;public DistanceSourceLookupComparator(String fieldName , int numHits){values = new float[numHits];this.fieldName = fieldName;}@Overridepublic int compare(int arg0, int arg1) {// TODO Auto-generated method stubif(values[arg0] > values[arg1]) return 1;if(values[arg0] < values[arg1]) return -1;return 0;}private float getDistance(int doc){int deltax = xDoc[doc] - x ;int deltay = yDoc[doc] - y;return (float)Math.sqrt(deltax*deltax+deltay*deltay);}@Overridepublic int compareBottom(int arg0) throws IOException {// TODO Auto-generated method stubfloat distance = getDistance(arg0);if(bottom < distance) return -1;if(bottom > distance) return 1;return 0;}@Overridepublic void copy(int arg0, int arg1) throws IOException {// TODO Auto-generated method stubvalues[arg0] = getDistance(arg1);}@Overridepublic void setBottom(int arg0) {// TODO Auto-generated method stubbottom = values[arg0];}@Overridepublic void setNextReader(IndexReader arg0, int arg1) //在读下一个段时,书上有误,根据api的理解,如下实现得到正确结果throws IOException {// TODO Auto-generated method stubString[] temp = FieldCache.DEFAULT.getStrings(arg0, "location");xDoc = new int[temp.length];yDoc = new int[temp.length];for(int i = 0 ;i<temp.length;i++){String[] str = temp[i].split(",");xDoc[i] = Integer.parseInt(str[0]);yDoc[i] = Integer.parseInt(str[1]);}}@Overridepublic Object value(int arg0) {// TODO Auto-generated method stubreturn new Float(values[arg0]);}public int sortType(){return SortField.CUSTOM;}public String toString(){return "Distance from ("+x+","+y+")";}} }
下面是具体的测试运行排序结果的程序:
package org.apache.lucene.demo;import java.io.IOException;import javax.crypto.SealedObject;import org.apache.lucene.analysis.WhitespaceAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.document.FieldSelectorResult;import org.apache.lucene.index.CorruptIndexException;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.Term;import org.apache.lucene.search.FieldDoc;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.Searcher;import org.apache.lucene.search.Sort;import org.apache.lucene.search.SortField;import org.apache.lucene.search.TermQuery;import org.apache.lucene.search.TopDocs;import org.apache.lucene.search.TopFieldDocs;import org.apache.lucene.store.LockObtainFailedException;import org.apache.lucene.store.RAMDirectory;public class DistanceSortingTest {/** * @param args * @throws IOException * @throws LockObtainFailedException * @throws CorruptIndexException */public static void main(String[] args) throws CorruptIndexException, LockObtainFailedException, IOException {// TODO Auto-generated method stub RAMDirectory directory = new RAMDirectory(); IndexWriter indexWriter = new IndexWriter(directory, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED); addPoint(indexWriter, "El charro", "restaurant", 1, 2); addPoint(indexWriter, "Cafe Poca Cosa", "restaurant", 5, 9); addPoint(indexWriter, "Los Betos", "restaurant", 9, 6); addPoint(indexWriter, "Nico's Toco Shop", "restaurant", 3, 8); indexWriter.close(); Searcher searcher = new IndexSearcher(directory); Query query = new TermQuery(new Term("type","restaurant")); Sort sort = new Sort(new SortField("location",new DistanceComparatorSource(10, 10))); TopFieldDocs topDocs = searcher.search(query, null, 5,sort); ScoreDoc[] docs = topDocs.scoreDocs; //FieldDoc fieldDoc = (FieldDoc)topDocs.scoreDocs[0]; //System.out.println(fieldDoc.fields[0]); for(ScoreDoc doc : docs){ FieldDoc fieldDoc2 = (FieldDoc)doc; Document document = searcher.doc(doc.doc); System.out.println(document.get("name")); } System.out.println(Math.sqrt(17));}private static void addPoint(IndexWriter writer,String name,String type,int x,int y) throws CorruptIndexException, IOException{Document document = new Document();document.add(new Field("name",name,Field.Store.YES,Field.Index.NOT_ANALYZED));document.add(new Field("type",type,Field.Store.YES,Field.Index.NOT_ANALYZED));document.add(new Field("location",x+","+y,Field.Store.YES,Field.Index.NOT_ANALYZED));writer.addDocument(document);}}
运行结果:
4.1231055
Los Betos
5.0990195
Cafe Poca Cosa
7.28011
Nico's Toco Shop
12.0415945
El charro