风起漂泊 发表于 2016-12-5 09:04:11

Hadoop SequenceFile Writer And Reader

package cn.edu.xmu.dm.mpdemo.ioformat;
import java.io.IOException;
import java.net.URI;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.SequenceFile.CompressionType;
import org.apache.hadoop.io.Text;
/**
* desc: SequenceFileWriter
* <code>SequenceFileWriteDemo</code>
*
* @author chenwq (irwenqiang@gmail.com)
* @version 1.0 2012/05/19
*/
public class SequenceFileWriteDemo {
private static final String[] DATA = { "One, two, buckle my shoe",
"Three, four, shut the door", "Five, six, pick up sticks",
"Seven, eight, lay them straight", "Nine, ten, a big fat hen" };
public static void main(String[] args) throws IOException {
String uri = args;
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(URI.create(uri), conf);
Path path = new Path(uri);
IntWritable key = new IntWritable();
Text value = new Text();
SequenceFile.Writer writer = null;
try {
/**
* fs: outputstream
* conf: configuration object
* key: the key' type
* value: the value's type
*/
writer = SequenceFile.createWriter(fs, conf, path, key.getClass(),
value.getClass());
//writer = SequenceFile.createWriter(fs, conf, path, key.getClass(),
//value.getClass(), CompressionType.BLOCK);
for (int i = 0; i < 100; i++) {
key.set(100 - i);
value.set(DATA);
System.out.printf("[%s]\t%s\t%s\n", writer.getLength(), key,
value);
writer.append(key, value);
}
} finally {
IOUtils.closeStream(writer);
}
}
}


package cn.edu.xmu.dm.mpdemo.ioformat;
import java.io.IOException;
import java.net.URI;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.util.ReflectionUtils;
/**
* desc: SequenceFileReader
* <code>SequenceFileReadDemo</code>
*
* @author chenwq (irwenqiang@gmail.com)
* @version 1.0 2012/05/19
*/
public class SequenceFileReadDemo {
public static void main(String[] args) throws IOException {
String uri = args;
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(URI.create(uri), conf);
Path path = new Path(uri);
SequenceFile.Reader reader = null;
try {
reader = new SequenceFile.Reader(fs, path, conf);
Writable key = (Writable) ReflectionUtils.newInstance(
reader.getKeyClass(), conf);
Writable value = (Writable) ReflectionUtils.newInstance(
reader.getValueClass(), conf);
long position = reader.getPosition();
while (reader.next(key, value)) {
String syncSeen = reader.syncSeen() ? "*" : "";
System.out.printf("[%s%s]\t%s\t%s\n", position, syncSeen, key,
value);
position = reader.getPosition(); // beginning of next record
}
} finally {
IOUtils.closeStream(reader);
}
}
}

  使用Block压缩后的大小对比:

root@ubuntu:~# hadoop fs -ls mpdemo/
Found 2 items
-rw-r--r--   3 root supergroup       4788 2012-05-19 00:11 /user/root/mpdemo/seqinput
-rw-r--r--   3 root supergroup      484 2012-05-19 00:17 /user/root/mpdemo/seqinputblock

 
页: [1]
查看完整版本: Hadoop SequenceFile Writer And Reader