Hadoop MapReduce程序开发(一)
1.Configurationpackage com.mapreduce.test1;
import org.apache.hadoop.conf.Configuration;
public class Test1 {
public static void main(String[] args) {
Configuration conf = new Configuration();
conf.addResource("config-default.xml");
conf.addResource("config-site.xml");
System.out.println(conf.get("hadoop.tmp.dir"));
System.out.println(conf.get("height"));
}
}
多个配置文件顺序加进来,后面覆盖前面属性,但不能覆盖final为true的属性
2.Mapper处理的数据是由InputFormat分解过来的数据集InputSplit,RecordReader将split分解成<key, value>
package com.mapReducce.test;
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class Map extends Mapper<LongWritable, Text, Text, FloatWritable> {
@Override
protected void map(LongWritable key, Text value,
Context context)
throws IOException, InterruptedException {
String line = value.toString();
System.out.println(line);
StringTokenizer token = new StringTokenizer(line, " ");
String symb1 = token.nextToken();
String symb2 = token.nextToken();
context.write(new Text(symb1), new FloatWritable(Float.valueOf(symb2)));
}
}
3.Reduce
package com.mapReducce.test;
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class Map extends Mapper<LongWritable, Text, Text, FloatWritable> {
@Override
protected void map(LongWritable key, Text value,
Context context)
throws IOException, InterruptedException {
String line = value.toString();
System.out.println(line);
StringTokenizer token = new StringTokenizer(line, " ");
String symb1 = token.nextToken();
String symb2 = token.nextToken();
context.write(new Text(symb1), new FloatWritable(Float.valueOf(symb2)));
}
}
页:
[1]