初探map/reduce原理
下面的代码来自于hadoop官网,但是那个例子很繁琐,我对此作了简化
运行下面代码必须在linux系统上,并且已经成功部署安装hadoop
package com.hadoop.test3;import java.io.IOException;import java.util.*;import org.apache.hadoop.fs.Path;import org.apache.hadoop.conf.*;import org.apache.hadoop.io.*;import org.apache.hadoop.mapred.*;import org.apache.hadoop.util.*;public class WordCount {public static class Map extends MapReduceBase implements Mapper<LongWritable, Text, Text, IntWritable>{private final static IntWritable one=new IntWritable(1);private Text word=new Text();public void map(LongWritable key,Text value,OutputCollector<Text, IntWritable>output,Reporter reporter)throws IOException{String line=value.toString();StringTokenizer tokenizer=new StringTokenizer(line);while(tokenizer.hasMoreElements()){word.set(tokenizer.nextToken());output.collect(word,one);}}}public static class Reduce extends MapReduceBase implements Reducer<Text, IntWritable, Text, IntWritable>{public void reduce(Text key,Iterator<IntWritable>values,OutputCollector<Text, IntWritable>output,Reporter reporter)throws IOException{int sum=0;while(values.hasNext()){sum+=values.next().get();}output.collect(key, new IntWritable(sum));}}public static void main(String[] args) throws Exception{JobConf conf=new JobConf(WordCount.class);conf.setJobName("wordcount");conf.setOutputKeyClass(Text.class);conf.setOutputValueClass(IntWritable.class);conf.setMapperClass(Map.class);conf.setCombinerClass(Reduce.class);conf.setReducerClass(Reduce.class);conf.setInputFormat(TextInputFormat.class);conf.setOutputFormat(TextOutputFormat.class);FileInputFormat.setInputPaths(conf, new Path("/home/root/test/input"));FileOutputFormat.setOutputPath(conf, new Path("/home/root/test/output"));JobClient.runJob(conf);}}Bye 1Goodbye 2Hadoop 3Hello 2World 2hadoop 1hello 1