hadoop中迭代器的对象重用问题
在用reduce时出现一个问题,在这上面耗费了好些时间,一直以为是业务逻辑方面的问题,不曾想是技术上的问题:reduce中迭代器返回对象的问题。写此blog以纪念在解决这个问题时的怂……囧
先看这个reduce的实例:
public static class sellerInfoReduce extends MapReduceBase implements Reducer<Text, Promotion, Text, Promotion> { private static final Set<Promotion> set = new HashSet<Promotion>(); private static final Text k = new Text();@Overridepublic void reduce(Text key, Iterator<Promotion> values,OutputCollector<Text, Promotion> output, Reporter reporter)throws IOException { set.clear(); Promotion obj = null; Promotion sellerPromotion = null; int count = 0;//记录while循环次数 while(values.hasNext()) { count++; obj = values.next(); if(obj.isNull()) {sellerPromotion = obj;//how asshole! System.out.println("threadId="+Thread.currentThread().getId()+" count="+count+" 1:sellerPromotion===="+sellerPromotion); } else { set.add(obj); if(sellerPromotion != null) { System.out.println("threadId="+Thread.currentThread().getId()+" count="+count+" 2:sellerPromotion===="+sellerPromotion); System.out.println("threadId="+Thread.currentThread().getId()+" count="+count+" 2:obj===="+obj); } } }}}threadId=1 count=1 1:sellerPromotion====object.Promotion@5a4threadId=1 count=2 2:sellerPromotion====object.Promotion@13691399threadId=1 count=2 2:obj====object.Promotion@13691399threadId=1 count=3 2:sellerPromotion====object.Promotion@136912c0threadId=1 count=3 2:obj====object.Promotion@136912c0threadId=1 count=4 2:sellerPromotion====object.Promotion@136912bbthreadId=1 count=4 1:obj====object.Promotion@136912bb