lucene实现结果分组统计,类似group by
import java.io.IOException;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.document.Field.Index;import org.apache.lucene.document.Field.Store;import org.apache.lucene.index.CorruptIndexException;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.Term;import org.apache.lucene.index.IndexWriter.MaxFieldLength;import org.apache.lucene.search.DuplicateFilter;import org.apache.lucene.search.Filter;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.TermQuery;import org.apache.lucene.search.TopDocs;import org.apache.lucene.store.Directory;import org.apache.lucene.store.LockObtainFailedException;import org.apache.lucene.store.RAMDirectory;import org.apache.lucene.util.Version;public class DuplicateFilterTest { public static void main(String[] args) { Directory dir = new RAMDirectory(); Document doc = new Document(); doc.add(new Field("id", "binbin", Store.YES, Index.NOT_ANALYZED)); doc.add(new Field("string", "haha", Store.YES, Index.NOT_ANALYZED)); doc.add(new Field("time", "20100801", Store.YES, Index.NOT_ANALYZED)); doc.add(new Field("duplicate", "123456", Store.YES, Index.NOT_ANALYZED)); Document doc1 = new Document(); doc1.add(new Field("id", "yaoyao", Store.YES, Index.NOT_ANALYZED)); doc1.add(new Field("string", "haha", Store.YES, Index.NOT_ANALYZED)); doc1.add(new Field("time", "20100801", Store.YES, Index.NOT_ANALYZED)); doc1.add(new Field("duplicate", "123456", Store.YES,Index.NOT_ANALYZED)); Document doc11 = new Document(); doc11.add(new Field("id", "liufeng", Store.YES, Index.NOT_ANALYZED)); doc11.add(new Field("string", "haha", Store.YES, Index.NOT_ANALYZED)); doc11.add(new Field("time", "20100801", Store.YES, Index.NOT_ANALYZED)); doc11.add(new Field("duplicate", "123456", Store.YES,Index.NOT_ANALYZED)); Document doc2 = new Document(); doc2.add(new Field("id", "zhangjian", Store.YES, Index.NOT_ANALYZED)); doc2.add(new Field("string", "haha", Store.YES, Index.NOT_ANALYZED)); doc2.add(new Field("time", "20100801", Store.YES, Index.NOT_ANALYZED)); doc2.add(new Field("duplicate", "123455", Store.YES,Index.NOT_ANALYZED)); Document doc3 = new Document(); doc3.add(new Field("id", "liweicheng", Store.YES, Index.NOT_ANALYZED)); doc3.add(new Field("string", "haha", Store.YES, Index.NOT_ANALYZED)); doc3.add(new Field("time", "20100801", Store.YES, Index.NOT_ANALYZED)); doc3.add(new Field("duplicate", "123451", Store.YES,Index.NOT_ANALYZED)); try { IndexWriter indexWriter = new IndexWriter(dir, new StandardAnalyzer(Version.LUCENE_29), true, MaxFieldLength.LIMITED); indexWriter.addDocument(doc); indexWriter.addDocument(doc1); indexWriter.addDocument(doc11); indexWriter.addDocument(doc2); indexWriter.addDocument(doc3); indexWriter.close(); Query query = new TermQuery(new Term("string", "haha")); Filter filter = new DuplicateFilter("duplicate"); IndexSearcher indexSearcher = new IndexSearcher(dir); TopDocs top = indexSearcher.search(query, filter, 200); ScoreDoc[] scoreDocs = top.scoreDocs; for (ScoreDoc scoreDoc : scoreDocs) { Document rdoc = indexSearcher.doc(scoreDoc.doc); System.out.print("id:"+rdoc.get("id") +" 排重ID:" +rdoc.get("duplicate")); Query queryDuplicate = new TermQuery(new Term("duplicate", rdoc.get("duplicate"))); System.out.println("转载:"+ indexSearcher.search(queryDuplicate, 100).totalHits ); } } catch (CorruptIndexException e) { e.printStackTrace(); } catch (LockObtainFailedException e) { e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } }}
id:liufeng 排重ID:123456转载:3id:zhangjian 排重ID:123455转载:1id:liweicheng 排重ID:123451转载:1