死siua11 发表于 2016-12-7 07:32:27

hadoop小文件操作之SequenceFile

  存储文件:
  import java.io.BufferedInputStream;
  import java.io.FileInputStream;
  import java.io.IOException;
  import java.io.InputStream;
  import java.net.URI;
  import org.apache.hadoop.conf.Configuration;
  import org.apache.hadoop.fs.FileSystem;
  import org.apache.hadoop.fs.Path;
  import org.apache.hadoop.io.IOUtils;
  import org.apache.hadoop.io.SequenceFile;
  import org.apache.hadoop.io.Text;
  public class SequenceFileWrite {
  public static void main(String[] args) throws IOException {
  String src = "E:\\test\\spring3_MVC.docx";
  InputStream in = new BufferedInputStream(new FileInputStream(src));
  String uri = "hdfs://localhost:9000/home/hdfs/spring.seq";
      Configuration conf = new Configuration();
      FileSystem fs = FileSystem.get(URI.create(uri), conf);
      Path path = new Path(uri);
      Text key = new Text();   
      Text value = new Text();
      SequenceFile.Writer writer = null;    
      try {
        //返回一个SequenceFile.Writer实例 需要数据流和path对象 将数据写入了path对象
        writer = SequenceFile.createWriter(fs, conf, path,key.getClass(), value.getClass());  
        int len = 0;
        byte[] buff = new byte;
        key.set("spring.docx");
        while ((len = in.read(buff))!= -1) {
  value.set(buff,0,len);
  writer.append(key, value);//将每条记录追加到SequenceFile.Writer实例的末尾   
      value.clear();
    }
      } finally {
        IOUtils.closeStream(writer);
        IOUtils.closeStream(in);
      }
    }
  }
  读取文件:
  import java.io.FileOutputStream;
  import java.io.IOException;
  import java.io.OutputStream;
  import java.net.URI;
  import org.apache.hadoop.conf.Configuration;
  import org.apache.hadoop.fs.FileSystem;
  import org.apache.hadoop.fs.Path;
  import org.apache.hadoop.io.IOUtils;
  import org.apache.hadoop.io.SequenceFile;
  import org.apache.hadoop.io.Text;
  import org.apache.hadoop.io.Writable;
  import org.apache.hadoop.util.ReflectionUtils;
  public class SequenceFileReader {
  public static void main(String[] args) throws IOException {  
      String uri = "hdfs://localhost:9000/home/hdfs/spring.seq";  
      Configuration conf = new Configuration();  
      FileSystem fs = FileSystem.get(URI.create(uri), conf);  
      Path path = new Path(uri);    
      SequenceFile.Reader reader = null;  
      String dst = "e:\\test\\spring.docx";    
      OutputStream out = null;
      try {  
        reader = new SequenceFile.Reader(fs, path, conf);
        //返回 SequenceFile.Reader 对象       getKeyClass()获得Sequence中使用的类型  
        Writable key = (Writable)  ReflectionUtils.newInstance(reader.getKeyClass(), conf);
        out =new  FileOutputStream(dst);
        Text  value = new Text();
        while (reader.next(key, value)) { //next()方法迭代读取记录 直到读完返回false  
      System.out.println(key);
      out.write(value.getBytes(),0,value.getLength());//这个长度一定要添加,否则不兼容office2007
      value.clear();  //记着清除一下,不然可能会出现多余的输出     
        }  
        out.flush();
      } finally {  
        IOUtils.closeStream(reader);  
        IOUtils.closeStream(out);
      }  
    } 
  }
页: [1]
查看完整版本: hadoop小文件操作之SequenceFile