cy_88 发表于 2017-12-17 10:09:08

Hadoop文件解压缩

package Compress;  import java.io.File;
  import java.io.FileInputStream;
  import java.io.FileOutputStream;
  import java.io.IOException;
  import org.apache.hadoop.conf.Configuration;
  import org.apache.hadoop.fs.Path;
  import org.apache.hadoop.io.IOUtils;
  import org.apache.hadoop.io.compress.CompressionCodec;
  import org.apache.hadoop.io.compress.CompressionCodecFactory;
  import org.apache.hadoop.io.compress.CompressionInputStream;
  import org.apache.hadoop.mapreduce.Job;
  /**
  * 解压缩
  * @author liguodong
  */

  public>  final static String file = "/liguodong/data.gz";
  public static void main(String[] args) throws IOException {
  Configuration conf = new Configuration();
  Job job = Job.getInstance(conf, "DeCodec");
  //打包执行必须执行的方法
  job.setJarByClass(Decompression.class);
  CompressionCodecFactory codecFactory = new CompressionCodecFactory(conf);
  //返回一个解压缩的实例
  CompressionCodec codec = codecFactory.getCodec(new Path(file));
  //返回被算法解压了的输入流
  CompressionInputStream inputStream = codec.createInputStream
  (new FileInputStream(new File(file)));
  //将输入流文件写出到去除了扩展名的文件
  FileOutputStream outputStream = new FileOutputStream
  (new File(codecFactory.removeSuffix(file, codec.getDefaultExtension())));
  IOUtils.copyBytes(inputStream, outputStream, conf);
  }
  }
页: [1]
查看完整版本: Hadoop文件解压缩