Hadoop MapReduce编程 API入门系列之统计学生成绩版本2（十八）

gdrocket 发表于 2017-12-18 12:10:26

package zhouls.bigdata.myMapReduce.Gender;　　

　　
import java.io.IOException;
　　
import org.apache.hadoop.conf.Configuration;
　　
import org.apache.hadoop.conf.Configured;
　　
import org.apache.hadoop.fs.FileSystem;
　　
import org.apache.hadoop.fs.Path;
　　
import org.apache.hadoop.io.Text;
　　
import org.apache.hadoop.mapred.JobConf;
　　
import org.apache.hadoop.mapreduce.Job;
　　
import org.apache.hadoop.mapreduce.Mapper;
　　
import org.apache.hadoop.mapreduce.Partitioner;
　　
import org.apache.hadoop.mapreduce.Reducer;
　　
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
　　
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
　　
import org.apache.hadoop.util.Tool;
　　
import org.apache.hadoop.util.ToolRunner;
　　

/**　　
*
　　
* @function 统计不同年龄段内男、女最高分数
　　
*
　　
*
　　

*/　　

　　
/*
　　
Alice<tab>23<tab>female<tab>45
　　
Bob<tab>34<tab>male<tab>89
　　
Chris<tab>67<tab>male<tab>97
　　
Kristine<tab>38<tab>female<tab>53
　　
Connor<tab>25<tab>male<tab>27
　　
Daniel<tab>78<tab>male<tab>95
　　
James<tab>34<tab>male<tab>79
　　
Alex<tab>52<tab>male<tab>69
　　
Nancy<tab>7<tab>female<tab>98
　　
Adam<tab>9<tab>male<tab>37
　　
Jacob<tab>7<tab>male<tab>23
　　
Mary<tab>6<tab>female<tab>93
　　
Clara<tab>87<tab>female<tab>72
　　
Monica<tab>56<tab>female<tab>92
　　
*/

　　
public>　　
/*
　　
*
　　
* @function Mapper 解析输入数据，然后按需求输出
　　
* @input key=行偏移量 value=学生数据
　　
* @output key=gender value=name+age+score
　　
*
　　
*/

　　
public static>　　
{
　　
public void map(Object key, Text value, Context context) throws IOException, InterruptedException
　　
{//拿Alice<tab>23<tab>female<tab>45
　　
String[] tokens = value.toString().split("<tab>");//使用分隔符<tab>，将数据解析为数组 tokens
　　
//得到Alice 23 female 45
　　
//即tokens tokens tokens tokens
　　
String gender = tokens.toString();//性别
　　
String nameAgeScore = tokens + "\t" + tokens + "\t"+ tokens;
　　
//输出 key=gender value=name+age+score
　　
//输出 key=female value=Alice +23+45
　　
context.write(new Text(gender), new Text(nameAgeScore));//将（female ， Alice+ 23+ 45) 写入到context中
　　
}
　　
}

　　
public static>　　
{
　　
/** Use {@link Object#hashCode()} to partition. */
　　
@Override
　　
public int getPartition(Text key, Text value,int numReduceTasks)
　　
{
　　
return (key.hashCode()) % numReduceTasks;
　　
}
　　

　　
}
　　
/**
　　
*
　　
* @function Partitioner 根据 age 选择 reduce 分区
　　
*
　　
*/

　　
public static>　　
{
　　

　　
@Override
　　
public int getPartition(Text key, Text value, int numReduceTasks)
　　
{
　　
// TODO Auto-generated method stub
　　
String[] nameAgeScore = value.toString().split("\t");
　　
String age = nameAgeScore;//学生年龄
　　
int ageInt = Integer.parseInt(age);//按年龄段分区
　　

　　
// 默认指定分区 0
　　
if (numReduceTasks == 0)
　　
return 0;
　　

　　
//年龄小于等于20，指定分区0
　　
if (ageInt <= 20) {
　　
return 0;
　　
}
　　
// 年龄大于20，小于等于50，指定分区1
　　
if (ageInt > 20 && ageInt <= 50) {
　　

　　
return 1 % numReduceTasks;
　　
}
　　
// 剩余年龄，指定分区2
　　
else
　　
return 2 % numReduceTasks;
　　
}
　　
}
　　

　　
/**
　　
*
　　
* @function 定义Combiner 合并 Mapper 输出结果
　　
*
　　
*/

　　
public static>　　
{
　　
private Text text = new Text();
　　

　　
public void reduce(Text key, Iterable<Text> values, Context context)throws IOException, InterruptedException
　　
{
　　
int maxScore = Integer.MIN_VALUE;
　　
String name = " ";
　　
String age = " ";
　　
int score = 0;
　　
for (Text val : values)
　　
{
　　
String[] valTokens = val.toString().split("\\t");
　　
score = Integer.parseInt(valTokens);
　　
if (score > maxScore)
　　
{
　　
name = valTokens;
　　
age = valTokens;
　　
maxScore = score;
　　
}
　　
}
　　
text.set(name + "\t" + age + "\t" + maxScore);
　　
context.write(key, text);
　　
}
　　
}
　　

　　
/*
　　
*
　　
* @function Reducer 统计出不同年龄段、不同性别的最高分
　　
* input key=gender value=name+age+score
　　
* output key=name value=age+gender+score
　　
*
　　
*/

　　
static>　　
{
　　
@Override
　　
public void reduce(Text key, Iterable<Text> values, Context context)throws IOException, InterruptedException
　　
{
　　
int maxScore = Integer.MIN_VALUE;
　　
String name = " ";
　　
String age = " ";
　　
String gender = " ";
　　
int score = 0;
　　
// 根据key，迭代 values 集合，求出最高分
　　
for (Text val : values)
　　
{
　　
String[] valTokens = val.toString().split("\\t");
　　
score = Integer.parseInt(valTokens);
　　
if (score > maxScore)
　　
{
　　
name = valTokens;
　　
age = valTokens;
　　
gender = key.toString();
　　
maxScore = score;
　　
}
　　
}
　　
context.write(new Text(name), new Text("age- " + age + "\t" + gender + "\tscore-" + maxScore));
　　
}
　　
}
　　

　　
/**
　　
* @function 任务驱动方法
　　
* @param args
　　
* @return
　　
* @throws Exception
　　
*/
　　
@Override
　　
public int run(String[] args) throws Exception
　　
{
　　
// TODO Auto-generated method stub
　　
Configuration conf = new Configuration();//读取配置文件
　　

　　
Path mypath = new Path(args);
　　
FileSystem hdfs = mypath.getFileSystem(conf);
　　
if (hdfs.isDirectory(mypath))
　　
{
　　
hdfs.delete(mypath, true);
　　
}
　　

　　
@SuppressWarnings("deprecation")
　　
Job job = new Job(conf, "gender");//新建一个任务
　　
job.setJarByClass(Gender.class);//主类
　　
job.setMapperClass(PCMapper.class);//Mapper
　　
job.setReducerClass(PCReducer.class);//Reducer
　　

　　
job.setPartitionerClass(MyHashPartitioner.class);
　　
//job.setPartitionerClass(PCPartitioner.class);//设置Partitioner类
　　
job.setNumReduceTasks(3);// reduce个数设置为3
　　

　　
job.setMapOutputKeyClass(Text.class);//map 输出key类型
　　
job.setMapOutputValueClass(Text.class);//map 输出value类型
　　

　　
job.setCombinerClass(PCCombiner.class);//设置Combiner类
　　

　　
job.setOutputKeyClass(Text.class);//输出结果 key类型
　　
job.setOutputValueClass(Text.class);//输出结果 value 类型
　　

　　
FileInputFormat.addInputPath(job, new Path(args));// 输入路径
　　
FileOutputFormat.setOutputPath(job, new Path(args));// 输出路径
　　
job.waitForCompletion(true);//提交任务
　　
return 0;
　　
}
　　
/**
　　
* @function main 方法
　　
* @param args
　　
* @throws Exception
　　
*/
　　
public static void main(String[] args) throws Exception
　　
{
　　
// String[] args0 = {
　　
// "hdfs://HadoopMaster:9000/gender/gender.txt",
　　
// "hdfs://HadoopMaster:9000/out/partition/" };
　　

　　
String[] args0 = {
　　
"./data/gender/gender.txt",
　　
"./out/gender" };
　　

　　

　　
int ec = ToolRunner.run(new Configuration(),new Gender(), args0);
　　
System.exit(ec);
　　
}
　　
}

页: [1]

运维网's Archiver

Hadoop MapReduce编程 API入门系列之统计学生成绩版本2（十八）