Why didn’t I use Serialization when we first started Hadoop? Because it looked
big and hairy and I thought we needed something lean and mean, where we had
precise control over exactly how objects are written and read, since that is central
to Hadoop. With Serialization you can get some control, but you have to fight for
it.
The logic for not using RMI was similar. Effective, high-performance inter-process
communications are critical to Hadoop. I felt like we’d need to precisely control
how things like connections, timeouts and buffers are handled, and RMI gives you
little control over those.
package siat.miner.etl.instance
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
public class Attribute implements Writable{
public static int ATTRIBUTE_TYPE_STRING = 1;//string type
public static int ATTRIBUTE_TYPE_NOMINAL = 2;//nominal type
public static int ATTRIBUTE_TYPE_REAL = 3;//real type
private IntWritable type;
private Text name;
public IntWritable getType() {
return type;
}
public void setType(int type) {
this.type = new IntWritable(type);
}
public Text getName() {
return name;
}
public void setName(String name) {
this.name = new Text(name);
}
public Attribute() {
super();
this.type = new IntWritable(0);
this.name = new Text("");
}
public Attribute(int type, String name) {
super();
this.type = new IntWritable(type);
this.name = new Text(name);
}
@Override
public void readFields(DataInput in) throws IOException {
// TODO Auto-generated method stub
type.readFields(in);
name.readFields(in);
}
@Override
public void write(DataOutput out) throws IOException {
// TODO Auto-generated method stub
type.write(out);
name.write(out);
}
}
类TestA:
package siat.miner.etl.test;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInput;
import java.io.DataInputStream;
import java.io.DataOutput;
import java.io.DataOutputStream;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Writable;
import siat.miner.etl.instance.Attribute;
public class TestA implements Writable{
private Attribute a;
private IntWritable b;
/**
* @param args
* @throws IOException
*/
public static void main(String[] args) throws IOException {
// TODO Auto-generated method stub
Attribute a = new Attribute(Attribute.ATTRIBUTE_TYPE_NOMINAL, "name");
TestA ta = new TestA(a, new IntWritable(1));
ByteArrayOutputStream bos = new ByteArrayOutputStream();
DataOutputStream oos = new DataOutputStream(bos);
ta.write(oos);
TestA tb = new TestA();
tb.readFields(new DataInputStream(new ByteArrayInputStream(bos.toByteArray())));
}
public TestA(Attribute a, IntWritable b) {
super();
this.a = a;
this.b = b;
}
public TestA() {
// TODO Auto-generated constructor stub
}
@Override
public void readFields(DataInput in) throws IOException {
// TODO Auto-generated method stub
a = new Attribute();
a.readFields(in);
b = new IntWritable();
b.readFields(in);
}
@Override
public void write(DataOutput out) throws IOException {
// TODO Auto-generated method stub
a.write(out);
b.write(out);
}
}