crespo09 发表于 2018-10-30 11:10:37

Hadoop中的一些自定义


[*]  自定义计数器
  计数器用来监控,hadoop中job的运行进度和状态。
  如源文件内容为:
  a    b
  c    d    e    f
  g    h    i
  现在需要找出字段数大于3和小于3的记录条数,可以使用计数器来实现,代码如下:
  public void map(LongWritable key, Text value,
  OutputCollector output, Reporter reporter)
  throws IOException {
  String[] split = value.toString().split("\t");
  if(split.length>3){
  org.apache.hadoop.mapred.Counters.Counter counter = reporter.getCounter("MyCounter", "isLong");
  counter.increment(1);
  }else if(split.length
  long>  String name;
  long age;
  @Override
  public void readFields(DataInput in) throws IOException {
  this.id = in.readLong();
  this.name = in.readUTF();
  this.age = in.readLong();
  }
  @Override
  public void write(DataOutput out) throws IOException {
  out.writeLong(id);
  out.writeUTF(name);
  out.writeLong(age);
  }
  @Override
  public String toString() {
  return "id:"+id+" name:"+name+" age:"+age;
  }
  public long getId() {

  return>  }
  public String getName() {
  return name;
  }
  public long getAge() {
  return age;
  }
  }
  代码二:基于key的比较
  package cn.com.bonc.hadoop;
  import java.io.DataInput;
  import java.io.DataOutput;
  import java.io.IOException;
  import org.apache.hadoop.io.WritableComparable;

  public>
  long>  String name;
  long age;
  @Override
  public void readFields(DataInput in) throws IOException {
  in.readLong();
  in.readUTF();
  in.readLong();
  }
  @Override
  public void write(DataOutput out) throws IOException {
  out.writeLong(id);
  out.writeUTF(name);
  out.writeLong(age);
  }
  @Override
  public int compareTo(PersonSortByAge o) {
  return (int) (this.id - o.id);
  }
  @Override
  public String toString() {
  return "id:"+id+" name:"+name+" age:"+age;
  }
  }

页: [1]
查看完整版本: Hadoop中的一些自定义