MongoDB 之MapReduce统计关键字频率测试
测试环境:windows xp
Operating System: Windows XP Professional (5.1, Build 2600) Service Pack 3 (2600.xpsp_sp3_gdr.101209-1647)
Language: Chinese (Regional Setting: Chinese)
Processor: Pentium(R) Dual-Core CPU E5500 @ 2.80GHz (2 CPUs)
Memory: 3292MB RAM
测试结果:
1079844 条数据统计出10957个关键字排序取前100条记录,总耗时:308578毫秒
测试程序:
import java.io.BufferedReader;import java.io.FileReader;import java.net.UnknownHostException;import java.util.ArrayList;import java.util.HashMap;import java.util.List;import java.util.Map;import java.util.Random;import com.mongodb.BasicDBObject;import com.mongodb.DB;import com.mongodb.DBCollection;import com.mongodb.DBCursor;import com.mongodb.DBObject;import com.mongodb.MapReduceCommand;import com.mongodb.MapReduceOutput;import com.mongodb.Mongo;import com.mongodb.MongoException;/** * *//** * @author songlianjun * */public class Test4MongoDb {/** * @param args */public static void main(String[] args) {// TODO Auto-generated method stub//insertTestKeywordLog();calculateSearchKeyword();}/** * 生成测试数据 */private static void insertTestKeywordLog() {List<String> keyWordList = new ArrayList<String>();try {BufferedReader reader = new BufferedReader(new FileReader("d:\\pinyin.txt"));String line = null;Random ran = new Random(System.currentTimeMillis());Mongo m;int totalRows = 0;long start = 0;long end = 0;m = new Mongo("localhost", 9999);DB db = m.getDB("test");DBCollection collection = db.getCollection("t_log");String month = "02";String year = "2010";start = System.currentTimeMillis();while ((line = reader.readLine()) != null) {int insertCount = ran.nextInt(100);if (insertCount == 0) {insertCount = 1;}totalRows += insertCount;for (int i = 0; i < insertCount; i++) {DBObject record = new BasicDBObject();record.put("id", System.currentTimeMillis());record.put("keyword", line);int day = ran.nextInt(28);if (day == 0) {day = 1;}record.put("dd", year + "-" + month+ (day < 10 ? "-0" + day : "-" + day));collection.save(record);}}end = System.currentTimeMillis();System.out.println("insert time =" + (end - start) + " row count="+ totalRows);} catch (Exception e) {// TODO Auto-generated catch blocke.printStackTrace();}}/** * 统计查询关键字频率 */private static void calculateSearchKeyword() {long start = 0;long end = 0;Mongo m;try {m = new Mongo("localhost", 9999);DB db = m.getDB("test");DBCollection collection = db.getCollection("t_log");DBObject newDB = new BasicDBObject();newDB.put("max", 100000);String collectionName = "t_log_result_"+ System.currentTimeMillis();DBCollection resultCollection = db.createCollection(collectionName,newDB);//创建统计数量索引DBObject indexObject = new BasicDBObject();indexObject.put("hitCount", -1);resultCollection.createIndex(indexObject);//DBCollection resultCollection = db.getCollection("t_log_result_"+System.currentTimeMillis());start = System.currentTimeMillis();DBObject dbKey = new BasicDBObject();dbKey.put("dd", true);//查询符合条件的数据DBObject condition = new BasicDBObject();condition.put("dd", new BasicDBObject("$gte", "2010-02-01").append("$lte", "2010-02-28"));//定义mapString map = "function() { key=this.keyword; "+ "emit(key,{'count':1}); " + "}";//定义reduceString reduce = " function r( key, values ) { " + " var count=0;"+ " for ( var i = 0; i < values.length; i++ ){"+ " count += values[i].count;" + " }"+ " return count;} ";///Map<String,Object> scope = new HashMap<String,Object>();MapReduceCommand mr = new MapReduceCommand(collection, map, reduce,null, MapReduceCommand.OutputType.INLINE, condition);int resultCount = 0;MapReduceOutput out = collection.mapReduce(mr);//获取统计结果for (DBObject result : out.results()) {Double value = null;if (result.get("value") != null&& result.get("value") instanceof DBObject) {DBObject dbObj = (DBObject) result.get("value");value = (Double) dbObj.get("count");} else {value = (Double) result.get("value");}String found = (String) result.get("_id");DBObject keywordObject = new BasicDBObject();keywordObject.put("hitCount", value);keywordObject.put("keyword", found);//记录到统计结果表中resultCollection.save(keywordObject);resultCount++;}DBObject query = new BasicDBObject();DBObject orderBy = new BasicDBObject();orderBy.put("hitCount", -1);//取top100DBCursor cursor = resultCollection.find().sort(orderBy).limit(100);while (cursor.hasNext()) {System.out.println(cursor.next());}end = System.currentTimeMillis();System.out.println("total time =" + (end - start)+ " total row count=" + resultCount);} catch (UnknownHostException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (MongoException e) {// TODO Auto-generated catch blocke.printStackTrace();}}}