xiayu 发表于 2017-12-17 20:45:19

hadoop MapReduce WordCount简单版java代码

  package com.hadoop;
  import java.io.IOException;
  import java.util.StringTokenizer;
  import org.apache.hadoop.conf.Configuration;
  import org.apache.hadoop.fs.Path;
  import org.apache.hadoop.io.IntWritable;
  import org.apache.hadoop.io.LongWritable;
  import org.apache.hadoop.io.Text;
  import org.apache.hadoop.mapreduce.Job;
  import org.apache.hadoop.mapreduce.Mapper;
  import org.apache.hadoop.mapreduce.Reducer;
  import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
  import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
  import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
  import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

  public>  //映射类 WordCountMap map

  public static>  private final IntWritable one = new IntWritable(1);
  private Text word = new Text();
  public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
  String line = value.toString();
  StringTokenizer token = new StringTokenizer(line);
  while (token.hasMoreTokens()) {
  word.set(token.nextToken());
  context.write(word, one);
  }
  }
  }
  //归约类 WordCountReduce reduce

  public static>  public void reduce(Text key, Iterable<IntWritable> values,
  Context context) throws IOException, InterruptedException {
  int sum = 0;
  for (IntWritable val : values) {
  sum += val.get();
  }
  context.write(key, new IntWritable(sum));
  }
  }
  public static void main(String[] args) throws Exception {
  //hadoop配置文件
  Configuration conf = new Configuration();
  //任务,将配置文件装载到任务中
  Job job = new Job(conf);
  job.setJarByClass(WordCount.class);
  job.setJobName("wordcount");
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(IntWritable.class);
  job.setMapperClass(WordCountMap.class);
  job.setReducerClass(WordCountReduce.class);
  job.setInputFormatClass(TextInputFormat.class);
  job.setOutputFormatClass(TextOutputFormat.class);
  FileInputFormat.addInputPath(job, new Path("/input"));
  FileOutputFormat.setOutputPath(job, new Path("/output"));
  job.waitForCompletion(true);
  }
  }
页: [1]
查看完整版本: hadoop MapReduce WordCount简单版java代码