lucene多线程并发下的报错问题
架了一个lucene,在上边做了一个项目,单线程运行没有问题,压力测试,开了100个线程并发去调数据,结果运行一会就报错了,"未将对象引用到实例"
public sealed class Hits
{
private Weight weight;
private Searcher searcher;
private Filter filter = null;
private Sort sort = null;
private int length; // the total number of hits
private System.Collections.ArrayList hitDocs = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10)); // cache of hits retrieved
private HitDoc first; // head of LRU cache
private HitDoc last; // tail of LRU cache
private int numDocs = 0; // number cached
private int maxDocs = 200; // max to cache
private int nDeletions; // # deleted docs in the index.
private int lengthAtStart; // this is the number apps usually count on (although deletions can bring it down).
private int nDeletedHits = 0; // # of already collected hits that were meanwhile deleted.
public /*internal*/ bool debugCheckedForDeletions = false; // for test purposes.
internal Hits(Searcher s, Query q, Filter f)
{
weight = q.Weight(s);
searcher = s;
filter = f;
nDeletions = CountDeletions(s);
GetMoreDocs(50); // retrieve 100 initially
lengthAtStart = length;
}
internal Hits(Searcher s, Query q, Filter f, Sort o)
{
weight = q.Weight(s);
searcher = s;
filter = f;
sort = o;
nDeletions = CountDeletions(s);
GetMoreDocs(50); // retrieve 100 initially
lengthAtStart = length;
}
// count # deletions, return -1 if unknown.
private int CountDeletions(Searcher s)
{
int cnt = - 1;
if (s is IndexSearcher)
{
cnt = s.MaxDoc() - ((IndexSearcher) s).GetIndexReader().NumDocs();
}
return cnt;
}
/// <summary> Tries to add new documents to hitDocs.
/// Ensures that the hit numbered <code>min</code> has been retrieved.
/// </summary>
private void GetMoreDocs(int min)
{
if (hitDocs.Count > min)
{
min = hitDocs.Count;
}
int n = min * 2; // double # retrieved
TopDocs topDocs = (sort == null) ? searcher.Search(weight, filter, n) : searcher.Search(weight, filter, n, sort);
//TopDocs topDocs = searcher.Search(weight, filter, n);
length = topDocs.totalHits;
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
float scoreNorm = 1.0f;
if (length > 0 && topDocs.GetMaxScore() > 1.0f)
{
scoreNorm = 1.0f / topDocs.GetMaxScore();
}
int start = hitDocs.Count - nDeletedHits;
// any new deletions?
int nDels2 = CountDeletions(searcher);
debugCheckedForDeletions = false;
if (nDeletions < 0 || nDels2 > nDeletions)
{
// either we cannot count deletions, or some "previously valid hits" might have been deleted, so find exact start point
nDeletedHits = 0;
debugCheckedForDeletions = true;
int i2 = 0;
for (int i1 = 0; i1 < hitDocs.Count && i2 < scoreDocs.Length; i1++)
{
int id1 = ((HitDoc) hitDocs[i1]).id;
int id2 = scoreDocs[i2].doc;
if (id1 == id2)
{
i2++;
}
else
{
nDeletedHits++;
}
}
start = i2;
}
int end = scoreDocs.Length < length ? scoreDocs.Length : length;
length += nDeletedHits;
for (int i = start; i < end; i++)
{
hitDocs.Add(new HitDoc(scoreDocs[i].score * scoreNorm, scoreDocs[i].doc));
}
nDeletions = nDels2;
}
/// <summary>Returns the total number of hits available in this set. </summary>
public int Length()
{
return length;
}
/// <summary>Returns the stored fields of the n<sup>th</sup> document in this set.
/// <p>Documents are cached, so that repeated requests for the same element may
/// return the same Document object.
/// </summary>
/// <throws> CorruptIndexException if the index is corrupt </throws>
/// <throws> IOException if there is a low-level IO error </throws>
public Document Doc(int n)
{
HitDoc hitDoc = HitDoc(n);
// Update LRU cache of documents
Remove(hitDoc); // remove from list, if there
AddToFront(hitDoc); // add to front of list
if (numDocs > maxDocs)
{
// if cache is full
HitDoc oldLast = last;
Remove(last); // flush last
oldLast.doc = null; // let doc get gc'd
}
if (hitDoc.doc == null)
{
hitDoc.doc = searcher.Doc(hitDoc.id); // cache miss: read document
}
return hitDoc.doc;
}
/// <summary>Returns the score for the n<sup>th</sup> document in this set. </summary>
public float Score(int n)
{
return HitDoc(n).score;
}
/// <summary>Returns the id for the n<sup>th</sup> document in this set.
/// Note that ids may change when the index changes, so you cannot
/// rely on the id to be stable.
/// </summary>
public int Id(int n)
{
return HitDoc(n).id;
}
/// <summary> Returns a {@link HitIterator} to navigate the Hits. Each item returned
/// from {@link Iterator#next()} is a {@link Hit}.
/// <p>
/// <b>Caution:</b> Iterate only over the hits needed. Iterating over all
/// hits is generally not desirable and may be the source of
/// performance issues. If you need to iterate over many or all hits, consider
/// using a search method that takes a {@link HitCollector}.
/// </p>
/// </summary>
public System.Collections.IEnumerator Iterator()
{
return new HitIterator(this);
}
private HitDoc HitDoc(int n)
{
if (n >= length)
{
throw new System.IndexOutOfRangeException("Not a valid hit number: " + n);
}
if (n >= hitDocs.Count)
{
GetMoreDocs(n);
}
if (n >= length)
{
throw new System.Exception("Not a valid hit number: " + n);
}
return (HitDoc) hitDocs[n];
}
private void AddToFront(HitDoc hitDoc)
{
// insert at front of cache
if (first == null)
{
last = hitDoc;
}
else
{
first.prev = hitDoc;
}
hitDoc.next = first;
first = hitDoc;
hitDoc.prev = null;
numDocs++;
}
private void Remove(HitDoc hitDoc)
{
// remove from cache
if (hitDoc.doc == null)
{
// it's not in the list
return ; // abort
}
if (hitDoc.next == null)
{
last = hitDoc.prev;
}
else
{
hitDoc.next.prev = hitDoc.prev;
}
if (hitDoc.prev == null)
{
first = hitDoc.next;
}
else
{
hitDoc.prev.next = hitDoc.next;//此处报得错误,可是跟踪的时候查看有时hitDoc为null,有时不是.
}
numDocs--;
}
}
sealed class HitDoc
{
internal float score;
internal int id;
internal Document doc = null;
internal HitDoc next; // in doubly-linked cache
internal HitDoc prev; // in doubly-linked cache
internal HitDoc(float s, int i)
{
score = s;
id = i;
}
}
需要说明的是我的IndexSearch是共享的,一个单件模式类封装的.查了网上说"在程序的查询中共享一个IndexSearch实例,它支持多线程安全。"可貌似不是这样...另外我只是用单个读取器.
按我的理解应该是在链表是多线程共享下才会出现这种问题,同步一下应该没问题了,可是这个链表貌似不是共享的,注释里边写着什么LRU cache of documents,应该没缓存吧?只是每次查询建一个链表维护后返回.哪位大哥帮帮忙啊,看lucene代码看得头疼...没弄明白,英文不好找了个中文的lucene in action,结果内容不全....................
[解决办法]
该回复于2009-11-05 09:20:04被版主删除
[解决办法]
哪位实际项目用过lucene的哥们,分享一下经验啊,IndexSearch怎么弄得,难道一个System.Collections.ArrayList.Synchronized能把线程同步,可现在就是出现多线程报错的现象啊
[解决办法]
调用的代码也贴出来吧。每次调用Hits.Doc()都会引发Hits对象里面的缓存链表发生更新,如果在多个线程调用同一Hits对象的Doc方法,会有线程安全问题,引发异常。
[解决办法]
调用就是直接一个全局对象的INDEXSEARCH对象来操作的
[解决办法]
广州达梦网络科技有限公司是一家致力于为提供各行业垂直搜索和元搜索服务的专业化公司。公司坚持以服务客户为中心,以技术创新为手段,为客户提供各个行业、任意搜索源精确搜索的解决方案,以及中个小企业信息服务的解决方案!
主要的搜索引擎案例有:万帮生活搜索,114soso网,万帮知识经验搜索,佛教新闻、网页、图片、视频、经典、词典、mp3等十个搜索引擎,还有各个行业的搜索引擎,目标是打造100个行业的百度。
能为您快速定制各类搜索引擎,如果您各类搜索引擎需求,请联系我们:020-22174900,QQ:46244150。
[解决办法]
万帮生活搜索?我打开用了17秒,搜索用了20秒,出来只有第一条靠点边?耿耿地。。。