378 发表于 2016-12-12 06:00:26

Hadoop Hive 复合数据结构Array,Struct,Maps

  在Hive 中如何使用符合数据结构  maps,array,structs
  1. Array的使用
  创建数据库表,以array作为数据类型
  create table  person(name string,work_locations array<string>)
  ROW FORMAT DELIMITED
  FIELDS TERMINATED BY '\t'
  COLLECTION ITEMS TERMINATED BY ',';
  数据
  biansutaobeijing,shanghai,tianjin,hangzhou
  linanchangchu,chengdu,wuhan
  入库数据
  LOAD DATA LOCAL INPATH '/home/hadoop/person.txt' OVERWRITE INTO TABLE person;
  查询
  hive> select * from person;
  biansutao       ["beijing","shanghai","tianjin","hangzhou"]
  linan   ["changchu","chengdu","wuhan"]
  Time taken: 0.355 seconds
  hive> select name from person;
  linan
  biansutao
  Time taken: 12.397 seconds
  hive> select work_locations from person;
  changchu
  beijing
  Time taken: 13.214 seconds
  hive> select work_locations from person;   
  ["changchu","chengdu","wuhan"]
  ["beijing","shanghai","tianjin","hangzhou"]
  Time taken: 13.755 seconds
  hive> select work_locations from person;
  NULL
  hangzhou
  Time taken: 12.722 seconds
  hive> select work_locations from person;
  NULL
  NULL
  Time taken: 15.958 seconds
  2. Map 的使用
  创建数据库表
  create table score(name string, score map<string,int>)
  ROW FORMAT DELIMITED
  FIELDS TERMINATED BY '\t'
  COLLECTION ITEMS TERMINATED BY ','
  MAP KEYS TERMINATED BY ':';
  要入库的数据
  biansutao'数学':80,'语文':89,'英语':95
  jobs'语文':60,'数学':80,'英语':99
  入库数据
  LOAD DATA LOCAL INPATH '/home/hadoop/score.txt' OVERWRITE INTO TABLE score;
  查询
  hive> select * from score;
  biansutao       {"数学":80,"语文":89,"英语":95}
  jobs    {"语文":60,"数学":80,"英语":99}
  Time taken: 0.665 seconds
  hive> select name from score;
  jobs
  biansutao
  Time taken: 19.778 seconds
  hive> select t.score from score t;
  {"语文":60,"数学":80,"英语":99}
  {"数学":80,"语文":89,"英语":95}
  Time taken: 19.353 seconds
  hive> select t.score['语文'] from score t;
  60
  89
  Time taken: 13.054 seconds
  hive> select t.score['英语'] from score t;
  99
  95
  Time taken: 13.769 seconds
  3 Struct 的使用
  创建数据表
  CREATE TABLE test(id int,course struct<course:string,score:int>)
  ROW FORMAT DELIMITED
  FIELDS TERMINATED BY '\t'
  COLLECTION ITEMS TERMINATED BY ',';
  数据
  1english,80
  2math,89
  3chinese,95
  入库
  LOAD DATA LOCAL INPATH '/home/hadoop/test.txt' OVERWRITE INTO TABLE test;
  查询
  hive> select * from test;
  OK
  1       {"course":"english","score":80}
  2       {"course":"math","score":89}
  3       {"course":"chinese","score":95}
  Time taken: 0.275 seconds
  hive> select course from test;
  {"course":"english","score":80}
  {"course":"math","score":89}
  {"course":"chinese","score":95}
  Time taken: 44.968 seconds
  select t.course.course from test t; 
  english
  math
  chinese
  Time taken: 15.827 seconds
  hive> select t.course.score from test t;
  80
  89
  95
  Time taken: 13.235 seconds
  4. 数据组合 (不支持组合的复杂数据类型)
  LOAD DATA LOCAL INPATH '/home/hadoop/test.txt' OVERWRITE INTO TABLE test;
  create table test1(id int,a MAP<STRING,ARRAY<STRING>>)
  row format delimited fields terminated by '\t' 
  collection items terminated by ','
  MAP KEYS TERMINATED BY ':';
  1english:80,90,70
  2math:89,78,86
  3chinese:99,100,82
  LOAD DATA LOCAL INPATH '/home/hadoop/test1.txt' OVERWRITE INTO TABLE test1;
页: [1]
查看完整版本: Hadoop Hive 复合数据结构Array,Struct,Maps