cixiren 发表于 2018-10-28 14:06:44

Hadoop HDFS Java API

package com.uplooking.bigdata.hdfs;  import org.apache.hadoop.conf.Configuration;
  import org.apache.hadoop.fs.*;
  import org.apache.hadoop.fs.permission.FsPermission;
  import org.apache.hadoop.io.IOUtils;
  import org.junit.After;
  import org.junit.Before;
  import org.junit.Test;
  import java.io.BufferedReader;
  import java.io.IOException;
  import java.io.InputStreamReader;
  import java.net.URI;
  import java.text.DateFormat;
  import java.text.SimpleDateFormat;
  import java.util.Date;
  /**
  * 列出目录的内容:listStatus
  * 读取文件:open
  * 创建目录:mkdirs
  * 创建文件:create
  * 删除文件或目录:delete
  * 显示文件存储位置:getFileBlockLocations
  */

  public>  private FileSystem fs;
  private DateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm");
  /**
  * 初始化资源
  * @throws Exception
  */
  @Before
  public void setUp() throws Exception {
  URI uri = new URI("hdfs://uplooking01:9000");
  Configuration configuration = new Configuration();
  fs = FileSystem.get(uri, configuration);
  }
  /**
  * 列出目录的内容:listStatus
  * 模仿:
  * $ hdfs dfs -ls /
  * -rw-r--r--   1 uplooking supergroup         28 2018-02-28 12:29 /hello
  * drwxr-xr-x   - uplooking supergroup          0 2018-02-28 12:31 /output
  * drwx------   - uplooking supergroup          0 2018-02-28 12:31 /tmp
  *
  * @throws IOException
  */
  @Test
  public void testList() throws IOException {
  FileStatus[] fileStatuses = fs.listStatus(new Path("/"));
  for (FileStatus fileStatus : fileStatuses) {
  // 先定义好需要判断才能确定的项
  String prefix = "d";
  String repliaction = "-";
  // 获取文件类型
  if (fileStatus.isFile()) {
  prefix = "-";
  }
  // 获取权限列表
  FsPermission permission = fileStatus.getPermission();
  String uacl = permission.getUserAction().SYMBOL;
  String gacl = permission.getGroupAction().SYMBOL;
  String oacl = permission.getOtherAction().SYMBOL;
  String acl = uacl + gacl + oacl;
  // 获取复制因子数
  if (fileStatus.isFile()) {
  repliaction = fileStatus.getReplication() + "";
  }
  // 获取文件属主
  String owner = fileStatus.getOwner();
  // 获取文件属组
  String group = fileStatus.getGroup();
  // 获取文件大小
  long len = fileStatus.getLen();
  // 获取文件修改时间
  String mTime = df.format(new Date(fileStatus.getModificationTime()));
  // 获取文件路径
  Path path = fileStatus.getPath();
  // 格式化输出
  System.out.println(prefix + acl + "\t" + repliaction + "\t" + owner + "" + group + "\t" + mTime + "\t" + path);
  }
  }
  /**
  * 读取文件:open
  *
  * @throws IOException
  */
  @Test
  public void testOpen() throws IOException {
  FSDataInputStream fis = fs.open(new Path("hdfs://uplooking01:9000/hello"));
  // 方式1:
  /* byte[] bytes = new byte;
  int len = 0;
  while ((len = fis.read(bytes)) != -1) {
  System.out.println(new String(bytes, 0, len));
  }
  fis.close();*/
  // 方式2:
  /*BufferedReader br = new BufferedReader(new InputStreamReader(fis));
  String line = null;
  while ((line = br.readLine()) != null) {
  System.out.println(line);
  }
  fis.close();*/
  // 方式3:
  IOUtils.copyBytes(fis, System.out, 1024, false);
  }
  /**
  * 创建目录:mkdirs
  *
  * @throws IOException
  */
  @Test
  public void testMkdir() throws IOException {
  boolean ret = fs.mkdirs(new Path("/input/hdfs"));
  System.out.println(ret ? "创建目录成功" : "创建目录失败");
  }
  /**
  * 创建文件:create
  *
  * @throws IOException
  */
  @Test
  public void testCreate() throws IOException {
  // 第二个参数为是否覆盖,Files are overwritten by default
  FSDataOutputStream fos = fs.create(new Path("/input/hdfs/word.txt"), false);
  fos.write("hello\n".getBytes());
  fos.write("xpleaf\n".getBytes());
  fos.close();
  }
  /**
  * 删除文件或目录:delete
  *
  * @throws IOException
  */
  @Test
  public void testDelete() throws IOException {
  // 第二个参数为是否递归删除(当删除目录时)
  boolean ret = fs.delete(new Path("/input/hdfs/word.txt"), false);
  System.out.println(ret ? "删除成功" : "删除失败");
  }
  /**
  * 显示文件存储位置:getFileBlockLocations
  *
  * @throws IOException
  */
  @Test
  public void testLocations() throws IOException {
  Path path = new Path("/hadoop-2.6.4.tar.gz");
  FileStatus fileStatus = fs.getFileStatus(path);
  // 参数分别为:文件路径   偏移起始位置文件长度
  BlockLocation[] locations = fs.getFileBlockLocations(path, 0, fileStatus.getLen());
  System.out.println(locations);
  for (BlockLocation location : locations) {
  System.out.println(location);
  }
  /**
  * 0,134217728,uplooking01          (偏移量从0开始,大小为128MB的块存储在节点uplooking01上)
  134217728,61798247,uplooking01   (偏移量从128M开始,大小为59M的块(就是剩余大小)存储在节点uplooking01上)
  可以看到,两个块都只存在uplooking01上的,这是因为这里的hadoop环境是伪分布式的
  */
  }
  /**
  * 释放资源
  * @throws IOException
  */
  @After
  public void cleanUp() throws IOException {
  fs.close();
  }
  }

页: [1]
查看完整版本: Hadoop HDFS Java API