yzq 发表于 2017-12-16 22:15:16

【hadoop】python通过hdfs模块读hdfs数据

#!/usr/bin/env python  
#
encoding: utf-8  

  
"""Avro extension example."""
  

  
from hdfs import Config
  
from hdfs.ext.avro import AvroReader, AvroWriter
  

  

  
# Get the default alias' client.
  
client = Config().get_client()
  

  
# Some sample data.
  
records = [
  {'name': 'Ann', 'age': 23},
  {'name': 'Bob', 'age': 22},
  
]
  

  
# Write an Avro File to HDFS (since our records' schema is very simple, we let
  
# the writer infer it automatically, otherwise we would pass it as argument).
  
with AvroWriter(client, 'names.avro', overwrite=True) as writer:
  for record in records:
  writer.write(record)
  

  
# Read it back.
  
with AvroReader(client, 'names.avro') as reader:
  schema = reader.schema # The inferred schema.
  content = reader.content # The remote file's HDFS content object.
  assert list(reader) == records # The records match!
页: [1]
查看完整版本: 【hadoop】python通过hdfs模块读hdfs数据