使用目前hadoop内置的序列化类(不使用自定义序列化类)，实现流量统计的功能

十二12 · 发表于 2016-12-12 10:28:22

package hadoop2;

import java.io.IOException;
import java.net.URI;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class TrafficSysper {

public static void main(String[] args) throws Exception{
Job job = Job.getInstance(new Configuration(), TrafficSysper.class.getSimpleName());
job.setJarByClass(TrafficSysper.class);

//设置读取目录，从参数中获取
FileInputFormat.setInputPaths(job, args[0]);
job.setMapperClass(MyMapper.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);

//如果输出目录存在，则删除输出目录
Path path = new Path(args[1]);
FileSystem fs = FileSystem.get(new URI(args[1]), new Configuration());
if(fs.exists(path)){
fs.delete(path, true);
}
//设置输出目录，从参数中获取
FileOutputFormat.setOutputPath(job, path);

job.setReducerClass(MyReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);

job.waitForCompletion(true);

}

public static class MyMapper  extends Mapper<LongWritable, Text, Text, Text>{

Text k2 = new Text();
Text v2 = new Text();

@Override
protected void map(LongWritable k1, Text v1,
Mapper<LongWritable, Text, Text, Text>.Context context)
throws IOException, InterruptedException {
String line = v1.toString();
String[] splited = line.split("\t");

k2.set(splited[1]);
v2.set(splited[6] + "\t" + splited[7] + "\t" +splited[8] + "\t" +splited[9]);
context.write(k2, v2);
}
}

public static class MyReducer extends Reducer<Text, Text, Text, Text>{
Text k3 = new Text();
Text v3 = new Text();
@Override
protected void reduce(Text k2, Iterable<Text> v2s,
Reducer<Text, Text, Text, Text>.Context context)
throws IOException, InterruptedException {
Long t1 = 0L;
Long t2 = 0L;
Long t3 = 0L;
Long t4 = 0L;
for (Text v2 : v2s) {
String line = v2.toString();
String[] splited = line.split("\t");
t1 += Long.parseLong(splited[0]);
t2 += Long.parseLong(splited[1]);
t3 += Long.parseLong(splited[2]);
t4 += Long.parseLong(splited[3]);
}
k3.set(k2);
v3.set(t1 + "\t" + t2 + "\t" + t3 + "\t" + t4);
context.write(k3, v3);
}
}

}

账号		自动登录	找回密码
密码			立即注册

Centos6.5×64安装配置openmeetings3.0.3详

大疆运维招人啦，

C++ :try 语句块和异常处理

C++的多态

Red Hat RHCE 8 (EX294) Cert Guide

Java/C++ 区别：看完这一篇，就够用！

别再用过时库了！这 13 个顶级 C++ 库才是

[经验分享] 使用目前hadoop内置的序列化类(不使用自定义序列化类)，实现流量统计的功能

扫码加入运维网微信交流群