wsaer 发表于 2017-12-17 08:49:51

hadoop第一部分-安装、测试

  一、hadoop安装(本地模式及伪分布式安装)
  hadoop历史版本下载网站:http://archive.apache.org/dist/
  运行模式:
  本地模式
  yarn模式
  hadoop组成:
  common:基本组件、命令
  hdfs:分布式文件系统,安全(默认副本集)
  yarn:数据操作系统(性质相当于linux OS)
  mapreduce:分布式计算框架
  input -> map -> shuffer -> reduce -> output
  1、安装配置jdk开发环境
#tar -zxvf jdk-7u67-linux-x64.tar.gz
#mkdir /usr/java
#mv jdk1.7.0_67/ /usr/java/
#vim /etc/profile
  export JAVA_HOME=/usr/java/jdk1.7.0_67
  export PATH=$JAVA_HOME/bin:$JAVA_HOME/jre/bin:$PATH

  export>#source /etc/profile
# java -version
  java version "1.7.0_67"
  Java(TM) SE Runtime Environment (build 1.7.0_67-b01)
  Java HotSpot(TM) 64-Bit Server VM (build 24.65-b04, mixed mode)
  -----jdk配置成功---------------------------
  2、安装hadoop软件
#tar -zxvf hadoop-2.5.0.tar.gz
#mv /mnt/hadoop-2.5.0 /usr/local/hadoop-2.5.0/
#chown -R hadoop:hadoop /usr/local/hadoop-2.5.0/
  3、测试
$bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.5.0.jar grep input output 'dfs+'
$mkdir wcinput
$cd wcinput/
$touch wc.input
$vim wc.input
  hadoop yarn
  hadoop mapreduce
  hadoop hdfs
  yarn nodemanager
  hadoop resourcemanager
$cd ../
$bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.5.0.jar wordcount wcinput wcoutput
  4、编辑配置文件,配置hdfs
$vim etc/hadoop/hadoop-env.sh
  export JAVA_HOME=/usr/java/jdk1.7.0_67
$mkdir -p data/tmp
$vim etc/hadoop/core-site.xml
  <configuration>
  <property>
  <name>fs.defaultFS</name>
  <value>hdfs://db01:9000</value>
  </property>
  <property>
  <name>hadoop.tmp.dir</name>
  <value>/usr/local/hadoop-2.5.0/data/tmp</value>
  </property>
  </configuration>
$vim etc/hadoop/hdfs-site.xml
  <configuration>
  <property>
  <name>dfs.replication</name>
  <value>1</value>
  </property>
  </configuration>
  5、格式化hdfs系统
$bin/hdfs namenode -format
  6、分别启动namenode和datanode节点
$sbin/hadoop-daemon.sh start namenode
$sbin/hadoop-daemon.sh start datanode
  7、浏览器访问hdfs系统
  网址:http://db01:50070/
  8、创建hdfs下的工作目录,在hdfs上测试wordcount功能
$bin/hdfs dfs -mkdir -p /user/hadoop/
$bin/hdfs dfs -ls -R /
$bin/hdfs dfs -mkdir -p /user/hadoop/mapreduce/wordcount/input
$bin/hdfs dfs -put wcinput/wc.input /user/hadoop/mapreduce/wordcount/input/
$bin/hdfs dfs -cat /user/hadoop/mapreduce/wordcount/input/wc.input
$bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.5.0.jar wordcount /user/hadoop/mapreduce/wordcount/input/ /user/hadoop/mapreduce/wordcount/output/
$bin/hdfs dfs -cat /user/hadoop/mapreduce/wordcount/output/part-r-00000
  9、配置yarn
$ vim etc/hadoop/yarn-env.sh
  export JAVA_HOME=/usr/java/jdk1.7.0_67
$ vim etc/hadoop/yarn-site.xml
  <configuration>
  <property>
  <name>yarn.nodemanager.aux-services</name>
  <value>mapreduce_shuffle</value>
  </property>
  <property>
  <name>yarn.resourcemanager.hostname</name>
  <value>db01</value>
  </property>
  </configuration>
$ vim etc/hadoop/slaves
  db01
  10、启动yarn
$ sbin/yarn-daemon.sh start resourcemanager
$ sbin/yarn-daemon.sh start nodemanager
$ jps
  14573 NodeManager
  13490 DataNode
  13400 NameNode
  14685 Jps
  14315 ResourceManager
  11、浏览器进入yarn监控
  http://db01:8088
  12、配置mapreduce
$ vim etc/hadoop/mapred-env.sh
  export JAVA_HOME=/usr/java/jdk1.7.0_67
$ cp etc/hadoop/mapred-site.xml.template etc/hadoop/mapred-site.xml
$ vim etc/hadoop/mapred-site.xml
  <configuration>
  <property>
  <name>mapreduce.framework.name</name>
  <value>yarn</value>
  </property>
  </configuration>
  13、测试wordcount
$ bin/hdfs dfs -rm -R /user/hadoop/mapreduce/wordcount/output/

  17/03/01 17:16:03 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java>  17/03/01 17:16:04 INFO fs.TrashPolicyDefault: Namenode trash configuration: Deletion interval = 0 minutes, Emptier interval = 0 minutes.
  Deleted /user/hadoop/mapreduce/wordcount/output
$ bin/hdfs dfs -ls -R /

  17/03/01 17:16:28 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java>  drwxr-xr-x   - hadoop supergroup          0 2017-03-01 16:04 /user
  drwxr-xr-x   - hadoop supergroup          0 2017-03-01 16:07 /user/hadoop
  drwxr-xr-x   - hadoop supergroup          0 2017-03-01 16:07 /user/hadoop/mapreduce
  drwxr-xr-x   - hadoop supergroup          0 2017-03-01 17:16 /user/hadoop/mapreduce/wordcount
  drwxr-xr-x   - hadoop supergroup          0 2017-03-01 16:08 /user/hadoop/mapreduce/wordcount/input
  -rw-r--r--   1 hadoop supergroup         81 2017-03-01 16:08 /user/hadoop/mapreduce/wordcount/input/wc.input
$ bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.5.0.jar wordcount /user/hadoop/mapreduce/wordcount/input/ /user/hadoop/mapreduce/wordcount/output/

  17/03/01 17:18:08 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java>  17/03/01 17:18:09 INFO client.RMProxy: Connecting to ResourceManager at db01/192.168.100.231:8032
  17/03/01 17:18:10 INFO input.FileInputFormat: Total input paths to process : 1
  17/03/01 17:18:10 INFO mapreduce.JobSubmitter: number of splits:1
  17/03/01 17:18:10 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1488358618376_0001
  17/03/01 17:18:11 INFO impl.YarnClientImpl: Submitted application application_1488358618376_0001
  17/03/01 17:18:11 INFO mapreduce.Job: The url to track the job: http://db01:8088/proxy/application_1488358618376_0001/
  17/03/01 17:18:11 INFO mapreduce.Job: Running job: job_1488358618376_0001
  17/03/01 17:18:19 INFO mapreduce.Job: Job job_1488358618376_0001 running in uber mode : false
  17/03/01 17:18:19 INFO mapreduce.Job:map 0% reduce 0%
  17/03/01 17:18:25 INFO mapreduce.Job:map 100% reduce 0%
  17/03/01 17:18:31 INFO mapreduce.Job:map 100% reduce 100%
  17/03/01 17:18:31 INFO mapreduce.Job: Job job_1488358618376_0001 completed successfully
  17/03/01 17:18:31 INFO mapreduce.Job: Counters: 49
  File System Counters
  FILE: Number of bytes read=97
  FILE: Number of bytes written=194147
  FILE: Number of read operations=0
  FILE: Number of large read operations=0
  FILE: Number of write operations=0
  HDFS: Number of bytes read=209
  HDFS: Number of bytes written=67
  HDFS: Number of read operations=6
  HDFS: Number of large read operations=0
  HDFS: Number of write operations=2
  Job Counters
  Launched map tasks=1
  Launched reduce tasks=1
  Data-local map tasks=1
  Total time spent by all maps in occupied slots (ms)=3516
  Total time spent by all reduces in occupied slots (ms)=3823
  Total time spent by all map tasks (ms)=3516
  Total time spent by all reduce tasks (ms)=3823
  Total vcore-seconds taken by all map tasks=3516
  Total vcore-seconds taken by all reduce tasks=3823
  Total megabyte-seconds taken by all map tasks=3600384
  Total megabyte-seconds taken by all reduce tasks=3914752
  Map-Reduce Framework
  Map input records=5
  Map output records=10
  Map output bytes=121
  Map output materialized bytes=97
  Input split bytes=128
  Combine input records=10
  Combine output records=6
  Reduce input groups=6
  Reduce shuffle bytes=97
  Reduce input records=6
  Reduce output records=6
  Spilled Records=12
  Shuffled Maps =1
  Failed Shuffles=0
  Merged Map outputs=1
  GC time elapsed (ms)=47
  CPU time spent (ms)=1690
  Physical memory (bytes) snapshot=411054080
  Virtual memory (bytes) snapshot=1784795136
  Total committed heap usage (bytes)=275251200
  Shuffle Errors
  BAD_ID=0
  CONNECTION=0
  IO_ERROR=0
  WRONG_LENGTH=0
  WRONG_MAP=0
  WRONG_REDUCE=0
  File Input Format Counters
  Bytes Read=81
  File Output Format Counters
  Bytes Written=67
$ bin/hdfs dfs -ls -R /user/hadoop/mapreduce/wordcount/output/

  17/03/01 17:19:42 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java>  -rw-r--r--   1 hadoop supergroup          0 2017-03-01 17:18 /user/hadoop/mapreduce/wordcount/output/_SUCCESS
  -rw-r--r--   1 hadoop supergroup         67 2017-03-01 17:18 /user/hadoop/mapreduce/wordcount/output/part-r-00000
$ bin/hdfs dfs -cat /user/hadoop/mapreduce/wordcount/output/part-r-00000

  17/03/01 17:20:58 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java>  hadoop    4
  hdfs    1
  mapreduce    1
  nodemanager    1
  resourcemanager    1
  yarn    2
  14、yarn测试wordcount(输出文件夹不能存在,否则会报错)
$ bin/yarn jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.5.0.jar wordcount /user/hadoop/mapreduce/wordcount/input/ /user/hadoop/mapreduce/wordcount/output2/

  17/03/01 17:43:08 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java>  17/03/01 17:43:09 INFO client.RMProxy: Connecting to ResourceManager at db01/192.168.100.231:8032
  17/03/01 17:43:10 INFO input.FileInputFormat: Total input paths to process : 1
  17/03/01 17:43:10 INFO mapreduce.JobSubmitter: number of splits:1
  17/03/01 17:43:10 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1488358618376_0002
  17/03/01 17:43:11 INFO impl.YarnClientImpl: Submitted application application_1488358618376_0002
  17/03/01 17:43:11 INFO mapreduce.Job: The url to track the job: http://db01:8088/proxy/application_1488358618376_0002/
  17/03/01 17:43:11 INFO mapreduce.Job: Running job: job_1488358618376_0002
  17/03/01 17:43:18 INFO mapreduce.Job: Job job_1488358618376_0002 running in uber mode : false
  17/03/01 17:43:18 INFO mapreduce.Job:map 0% reduce 0%
  17/03/01 17:43:23 INFO mapreduce.Job:map 100% reduce 0%
  17/03/01 17:43:29 INFO mapreduce.Job:map 100% reduce 100%
  17/03/01 17:43:30 INFO mapreduce.Job: Job job_1488358618376_0002 completed successfully
  17/03/01 17:43:30 INFO mapreduce.Job: Counters: 49
  File System Counters
  FILE: Number of bytes read=97
  FILE: Number of bytes written=194149
  FILE: Number of read operations=0
  FILE: Number of large read operations=0
  FILE: Number of write operations=0
  HDFS: Number of bytes read=209
  HDFS: Number of bytes written=67
  HDFS: Number of read operations=6
  HDFS: Number of large read operations=0
  HDFS: Number of write operations=2
  Job Counters
  Launched map tasks=1
  Launched reduce tasks=1
  Data-local map tasks=1
  Total time spent by all maps in occupied slots (ms)=3315
  Total time spent by all reduces in occupied slots (ms)=3460
  Total time spent by all map tasks (ms)=3315
  Total time spent by all reduce tasks (ms)=3460
  Total vcore-seconds taken by all map tasks=3315
  Total vcore-seconds taken by all reduce tasks=3460
  Total megabyte-seconds taken by all map tasks=3394560
  Total megabyte-seconds taken by all reduce tasks=3543040
  Map-Reduce Framework
  Map input records=5
  Map output records=10
  Map output bytes=121
  Map output materialized bytes=97
  Input split bytes=128
  Combine input records=10
  Combine output records=6
  Reduce input groups=6
  Reduce shuffle bytes=97
  Reduce input records=6
  Reduce output records=6
  Spilled Records=12
  Shuffled Maps =1
  Failed Shuffles=0
  Merged Map outputs=1
  GC time elapsed (ms)=38
  CPU time spent (ms)=1690
  Physical memory (bytes) snapshot=400715776
  Virtual memory (bytes) snapshot=1776209920
  Total committed heap usage (bytes)=274202624
  Shuffle Errors
  BAD_ID=0
  CONNECTION=0
  IO_ERROR=0
  WRONG_LENGTH=0
  WRONG_MAP=0
  WRONG_REDUCE=0
  File Input Format Counters
  Bytes Read=81
  File Output Format Counters
  Bytes Written=67
$ bin/hdfs dfs -cat /user/hadoop/mapreduce/wordcount/output2/

  17/03/01 17:44:11 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java>  cat: `/user/hadoop/mapreduce/wordcount/output2': Is a directory
$
$
$ bin/hdfs dfs -cat /user/hadoop/mapreduce/wordcount/output2/part*

  17/03/01 17:44:40 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java>  hadoop    4
  hdfs    1
  mapreduce    1
  nodemanager    1
  resourcemanager    1
  yarn    2
$ bin/hdfs dfs -text /user/hadoop/mapreduce/wordcount/output2/part*

  17/03/01 17:47:38 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java>  hadoop    4
  hdfs    1
  mapreduce    1
  nodemanager    1
  resourcemanager    1
  yarn    2
  注意:mapreduce会对默认结果进行排序。
  15、启动mapreduce历史服务器
$ sbin/mr-jobhistory-daemon.sh start historyserver
  starting historyserver, logging to /usr/local/hadoop-2.5.0/logs/mapred-hadoop-historyserver-db01.out
$ jps
  14573 NodeManager
  13490 DataNode
  13400 NameNode
  14315 ResourceManager
  16366 Jps
  16296 JobHistoryServer
  16、启用yarn日志的聚集(Aggregation)功能
  聚集:在mapreduce任务完成后,将日志信息上床到hdfs上。
$ cat etc/hadoop/yarn-site.xml
  <configuration>
  <property>
  <name>yarn.nodemanager.aux-services</name>
  <value>mapreduce_shuffle</value>
  </property>
  <property>
  <name>yarn.resourcemanager.hostname</name>
  <value>db01</value>
  </property>
  ##开启日志聚集功能
  <property>
  <name>yarn.log-aggregation-enable</name>
  <value>true</value>
  </property>
  ##日志保存7天(单位秒)
  <property>
  <name>yarn.log-aggregation.retain-seconds</name>
  <value>600000</value>
  </property>
  </configuration>
  ----------重启yarn服务及historyserver服务:
$ sbin/yarn-daemon.sh stop resourcemanager
  stopping resourcemanager
$ sbin/yarn-daemon.sh stop nodemanager
  stopping nodemanager
  nodemanager did not stop gracefully after 5 seconds: killing with kill -9
$ jps
  13490 DataNode
  13400 NameNode
  16511 Jps
  16296 JobHistoryServer
$ sbin/mr-jobhistory-daemon.sh stop historyserver
  stopping historyserver
$ jps
  13490 DataNode
  13400 NameNode
  16548 Jps
$ sbin/yarn-daemon.sh start resourcemanager
  starting resourcemanager, logging to /usr/local/hadoop-2.5.0/logs/yarn-hadoop-resourcemanager-db01.out
$ sbin/yarn-daemon.sh start nodemanager
  starting nodemanager, logging to /usr/local/hadoop-2.5.0/logs/yarn-hadoop-nodemanager-db01.out
$ sbin/mr-jobhistory-daemon.sh start historyserver
  starting historyserver, logging to /usr/local/hadoop-2.5.0/logs/mapred-hadoop-historyserver-db01.out
$ jps
  16584 ResourceManager
  13490 DataNode
  13400 NameNode
  16834 NodeManager
  16991 JobHistoryServer
  17028 Jps
$
  17、重新运行wordcount任务,测试yarn日志聚集功能:
$ bin/yarn jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.5.0.jar wordcount /user/hadoop/mapreduce/wordcount/input/ /user/hadoop/mapreduce/wordcount/output3/
  浏览器(http://db01:8088/)查看日志信息:
  Log Type: stderr
  Log Length: 0
  Log Type: stdout
  Log Length: 0
  Log Type: syslog
  Log Length: 3816
  2017-03-01 18:36:45,873 WARN org.apache.hadoop.conf.Configuration: job.xml:an attempt to override final parameter: mapreduce.job.end-notification.max.retry.interval;Ignoring.
  2017-03-01 18:36:45,911 WARN org.apache.hadoop.conf.Configuration: job.xml:an attempt to override final parameter: mapreduce.job.end-notification.max.attempts;Ignoring.

  2017-03-01 18:36:46,130 WARN org.apache.hadoop.util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java>  2017-03-01 18:36:46,239 INFO org.apache.hadoop.metrics2.impl.MetricsConfig: loaded properties from hadoop-metrics2.properties
  2017-03-01 18:36:46,319 INFO org.apache.hadoop.metrics2.impl.MetricsSystemImpl: Scheduled snapshot period at 10 second(s).
  2017-03-01 18:36:46,319 INFO org.apache.hadoop.metrics2.impl.MetricsSystemImpl: MapTask metrics system started
  2017-03-01 18:36:46,335 INFO org.apache.hadoop.mapred.YarnChild: Executing with tokens:

  2017-03-01 18:36:46,335 INFO org.apache.hadoop.mapred.YarnChild: Kind: mapreduce.job, Service: job_1488364479714_0001,>  2017-03-01 18:36:46,427 INFO org.apache.hadoop.mapred.YarnChild: Sleeping for 0ms before retrying again. Got null now.
  2017-03-01 18:36:46,732 INFO org.apache.hadoop.mapred.YarnChild: mapreduce.cluster.local.dir for child: /usr/local/hadoop-2.5.0/data/tmp/nm-local-dir/usercache/hadoop/appcache/application_1488364479714_0001
  2017-03-01 18:36:46,863 WARN org.apache.hadoop.conf.Configuration: job.xml:an attempt to override final parameter: mapreduce.job.end-notification.max.retry.interval;Ignoring.
  2017-03-01 18:36:46,878 WARN org.apache.hadoop.conf.Configuration: job.xml:an attempt to override final parameter: mapreduce.job.end-notification.max.attempts;Ignoring.
  2017-03-01 18:36:47,202 INFO org.apache.hadoop.conf.Configuration.deprecation: session.id is deprecated. Instead, use dfs.metrics.session-id
  2017-03-01 18:36:47,668 INFO org.apache.hadoop.mapred.Task:Using ResourceCalculatorProcessTree : [ ]
  2017-03-01 18:36:47,873 INFO org.apache.hadoop.mapred.MapTask: Processing split: hdfs://db01:9000/user/hadoop/mapreduce/wordcount/input/wc.input:0+81

  2017-03-01 18:36:47,887 INFO org.apache.hadoop.mapred.MapTask: Map output collector>  2017-03-01 18:36:47,953 INFO org.apache.hadoop.mapred.MapTask: (EQUATOR) 0 kvi 26214396(104857584)
  2017-03-01 18:36:47,953 INFO org.apache.hadoop.mapred.MapTask: mapreduce.task.io.sort.mb: 100
  2017-03-01 18:36:47,953 INFO org.apache.hadoop.mapred.MapTask: soft limit at 83886080
  2017-03-01 18:36:47,953 INFO org.apache.hadoop.mapred.MapTask: bufstart = 0; bufvoid = 104857600
  2017-03-01 18:36:47,953 INFO org.apache.hadoop.mapred.MapTask: kvstart = 26214396; length = 6553600
  2017-03-01 18:36:47,989 INFO org.apache.hadoop.mapred.MapTask: Starting flush of map output
  2017-03-01 18:36:47,989 INFO org.apache.hadoop.mapred.MapTask: Spilling map output
  2017-03-01 18:36:47,990 INFO org.apache.hadoop.mapred.MapTask: bufstart = 0; bufend = 121; bufvoid = 104857600
  2017-03-01 18:36:47,990 INFO org.apache.hadoop.mapred.MapTask: kvstart = 26214396(104857584); kvend = 26214360(104857440); length = 37/6553600
  2017-03-01 18:36:48,002 INFO org.apache.hadoop.mapred.MapTask: Finished spill 0
  2017-03-01 18:36:48,008 INFO org.apache.hadoop.mapred.Task: Task:attempt_1488364479714_0001_m_000000_0 is done. And is in the process of committing
  2017-03-01 18:36:48,106 INFO org.apache.hadoop.mapred.Task: Task 'attempt_1488364479714_0001_m_000000_0' done.
  18、hadoop配置文件
  默认配置文件:四个模块相对应的jar包中
  *core-default.xml
  *hdfs-default.xml
  *yarn-default.xml
  *mapred-default.xml
  用户自定义配置文件:$HADOOP_HOME/etc/hadoop/
  *core-site.xml
  *hdfs-site.xml
  *yarn-site.xml
  *mapred-site.xml
  19、开启hdfs回收站功能
$ cat etc/hadoop/core-site.xml
  <configuration>
  <property>
  <name>fs.defaultFS</name>
  <value>hdfs://db01:9000</value>
  </property>
  <property>
  <name>hadoop.tmp.dir</name>
  <value>/usr/local/hadoop-2.5.0/data/tmp</value>
  </property>
  ##开启回收站功能,设置保存7天删除数据信息
  <property>
  <name>fs.trash.interval</name>
  <value>7 * 24 * 60</value>
  </property>
  </configuration>
  重启生效:
  20、hadoop的3种启动/关闭方式
  *各个服务器逐一启动(比较常用,编写shell脚本)
  hdfs:
  sbin/hadoop-daemon.sh start|stop namenode
  sbin/hadoop-daemon.sh start|stop datanode
  sbin/hadoop-daemon.sh start|stop secondarynamenode
  yarn:
  sbin/yarn-daemon.sh start|stop resourcemanager
  sbin/yarn-daemon.sh start|stop nodemanager
  mapreduce:
  sbin/mr-jobhistory-daemon.sh start|stop historyserver
  *各个模块分开启动:需要配置ssh对等性,需要在namenode上运行
  hdfs:
  sbin/start-dfs.sh
  sbin/start-yarn.sh
  yarn:
  sbin/stop-dfs.sh
  sbin/stop-yarn.sh
  *全部启动:不建议使用,这个命令需要在namenode上运行,但是会同时叫secondaryname节点也启动到namenode节点
  sbin/start-all.sh
  sbin/stop-all.sh
  附加:配置ssh对等性
$ ssh-keygen -t dsa -P '' -f ~/.ssh/id_dsa
$ scp .ssh/authorized_keys db02:/home/hadoop/.ssh/authorized_keys
  21、hadoop角色
  namenode:参数值hdfs://db01:9000决定
  core-site.xml
  ##以下参数确定namenode节点
  <property>
  <name>fs.defaultFS</name>
  <value>hdfs://db01:9000</value>
  </property>
  datanode:slaves文件内容决定
$ cat etc/hadoop/slaves
  db01
  secondarynamenode:参数dfs.namenode.secondary.http-address决定
  hdfs-site.xml
  <property>
  <name>dfs.namenode.secondary.http-address</name>
  <value>db01:50090</value>
  </property>
  resourcemanager:
  yarn-site.xml
  <property>
  <name>yarn.resourcemanager.hostname</name>
  <value>db01</value>
  </property>
  nodemanager:
$ cat etc/hadoop/slaves
  db01
  jobhistoryserver:
  mapred-site.xml
  <property>
  <name>mapreduce.jobhistory.address</name>
  <value>db01:10020</value>
  </property>
  <property>
  <name>mapreduce.jobhistory.webapp.address</name>
  <value>db01:19888</value>
  </property>
  22、问题
$ bin/hdfs dfs -ls

  17/03/01 21:50:33 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java>  Found 1 items-------------------------------------------------------------->这个警告需要源码替换lib/native包才能消除
  drwxr-xr-x   - hadoop supergroup          0 2017-03-01 16:07 mapreduce
  23、附加:配置文件
$ cat etc/hadoop/core-site.xml
  <?xml version="1.0" encoding="UTF-8"?>
  <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
  <!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at
  http://www.apache.org/licenses/LICENSE-2.0
  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
  -->
  <!-- Put site-specific property overrides in this file. -->
  <configuration>
  <property>
  <name>fs.defaultFS</name>
  <value>hdfs://db01:9000</value>
  </property>
  <property>
  <name>hadoop.tmp.dir</name>
  <value>/usr/local/hadoop-2.5.0/data/tmp</value>
  </property>
  <property>
  <name>fs.trash.interval</name>
  <value>7000</value>
  </property>
  </configuration>
$ cat etc/hadoop/hdfs-site.xml
  <?xml version="1.0" encoding="UTF-8"?>
  <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
  <!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at
  http://www.apache.org/licenses/LICENSE-2.0
  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
  -->
  <!-- Put site-specific property overrides in this file. -->
  <configuration>
  <property>
  <name>dfs.replication</name>
  <value>1</value>
  </property>
  <property>
  <name>dfs.namenode.secondary.http-address</name>
  <value>db01:50090</value>
  </property>
  </configuration>
$ cat etc/hadoop/yarn-site.xml
  <?xml version="1.0"?>
  <!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at
  http://www.apache.org/licenses/LICENSE-2.0
  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
  -->
  <configuration>
  <property>
  <name>yarn.nodemanager.aux-services</name>
  <value>mapreduce_shuffle</value>
  </property>
  <property>
  <name>yarn.resourcemanager.hostname</name>
  <value>db01</value>
  </property>
  <property>
  <name>yarn.log-aggregation-enable</name>
  <value>true</value>
  </property>
  <property>
  <name>yarn.log-aggregation.retain-seconds</name>
  <value>600000</value>
  </property>
  </configuration>
$ cat etc/hadoop/mapred-site.xml
  <?xml version="1.0"?>
  <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
  <!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at
  http://www.apache.org/licenses/LICENSE-2.0
  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
  -->
  <!-- Put site-specific property overrides in this file. -->
  <configuration>
  <property>
  <name>mapreduce.framework.name</name>
  <value>yarn</value>
  </property>
  <property>
  <name>mapreduce.jobhistory.address</name>
  <value>db01:10020</value>
  </property>
  <property>
  <name>mapreduce.jobhistory.webapp.address</name>
  <value>db01:19888</value>
  </property>
  </configuration>
  另外注意需要在各个配置文件中定义java环境变量
页: [1]
查看完整版本: hadoop第一部分-安装、测试