lucene兑现结果分组统计,类似group by

2012-12-18

lucene实现结果分组统计,类似group byimport java.io.IOExceptionimport org.apache.lucene.analysis.sta

lucene实现结果分组统计,类似group by

import java.io.IOException;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.document.Field.Index;import org.apache.lucene.document.Field.Store;import org.apache.lucene.index.CorruptIndexException;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.Term;import org.apache.lucene.index.IndexWriter.MaxFieldLength;import org.apache.lucene.search.DuplicateFilter;import org.apache.lucene.search.Filter;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.TermQuery;import org.apache.lucene.search.TopDocs;import org.apache.lucene.store.Directory;import org.apache.lucene.store.LockObtainFailedException;import org.apache.lucene.store.RAMDirectory;import org.apache.lucene.util.Version;public class DuplicateFilterTest {    public static void main(String[] args) {        Directory dir = new RAMDirectory();        Document doc = new Document();        doc.add(new Field("id", "binbin", Store.YES, Index.NOT_ANALYZED));        doc.add(new Field("string", "haha", Store.YES, Index.NOT_ANALYZED));        doc.add(new Field("time", "20100801", Store.YES, Index.NOT_ANALYZED));        doc.add(new Field("duplicate", "123456", Store.YES, Index.NOT_ANALYZED));        Document doc1 = new Document();        doc1.add(new Field("id", "yaoyao", Store.YES, Index.NOT_ANALYZED));        doc1.add(new Field("string", "haha", Store.YES, Index.NOT_ANALYZED));        doc1.add(new Field("time", "20100801", Store.YES, Index.NOT_ANALYZED));        doc1.add(new Field("duplicate", "123456", Store.YES,Index.NOT_ANALYZED));                Document doc11 = new Document();        doc11.add(new Field("id", "liufeng", Store.YES, Index.NOT_ANALYZED));        doc11.add(new Field("string", "haha", Store.YES, Index.NOT_ANALYZED));        doc11.add(new Field("time", "20100801", Store.YES, Index.NOT_ANALYZED));        doc11.add(new Field("duplicate", "123456", Store.YES,Index.NOT_ANALYZED));                                Document doc2 = new Document();        doc2.add(new Field("id", "zhangjian", Store.YES, Index.NOT_ANALYZED));        doc2.add(new Field("string", "haha", Store.YES, Index.NOT_ANALYZED));        doc2.add(new Field("time", "20100801", Store.YES, Index.NOT_ANALYZED));        doc2.add(new Field("duplicate", "123455", Store.YES,Index.NOT_ANALYZED));                                                Document doc3 = new Document();        doc3.add(new Field("id", "liweicheng", Store.YES, Index.NOT_ANALYZED));        doc3.add(new Field("string", "haha", Store.YES, Index.NOT_ANALYZED));        doc3.add(new Field("time", "20100801", Store.YES, Index.NOT_ANALYZED));        doc3.add(new Field("duplicate", "123451", Store.YES,Index.NOT_ANALYZED));                                                try {            IndexWriter indexWriter = new IndexWriter(dir,                    new StandardAnalyzer(Version.LUCENE_29), true,                    MaxFieldLength.LIMITED);            indexWriter.addDocument(doc);            indexWriter.addDocument(doc1);            indexWriter.addDocument(doc11);            indexWriter.addDocument(doc2);            indexWriter.addDocument(doc3);            indexWriter.close();            Query query = new TermQuery(new Term("string", "haha"));            Filter filter = new DuplicateFilter("duplicate");            IndexSearcher indexSearcher = new IndexSearcher(dir);            TopDocs top = indexSearcher.search(query, filter, 200);            ScoreDoc[] scoreDocs = top.scoreDocs;            for (ScoreDoc scoreDoc : scoreDocs) {                Document rdoc = indexSearcher.doc(scoreDoc.doc);                System.out.print("id:"+rdoc.get("id") +"  排重ID:" +rdoc.get("duplicate"));                Query queryDuplicate = new TermQuery(new Term("duplicate", rdoc.get("duplicate")));               System.out.println("转载:"+ indexSearcher.search(queryDuplicate, 100).totalHits               );                            }        } catch (CorruptIndexException e) {            e.printStackTrace();        } catch (LockObtainFailedException e) {            e.printStackTrace();        } catch (IOException e) {            // TODO Auto-generated catch block            e.printStackTrace();        }    }}

id:liufeng  排重ID:123456转载:3id:zhangjian  排重ID:123455转载:1id:liweicheng  排重ID:123451转载:1

热点排行

编程

lucene兑现结果分组统计,类似group by