3ewsd 发表于 2017-12-17 06:19:08

hadoop HA YARN 搭建

<configuration>  
<!-- Site specific YARN configuration properties -->
  
<!-- 是否启用日志聚合.应用程序完成后,日志汇总收集每个容器的日志,这些日志移动到文件系统,例如HDFS. -->
  
<!-- 用户可以通过配置"yarn.nodemanager.remote-app-log-dir"、"yarn.nodemanager.remote-app-log-dir-suffix"来确定日志移动到的位置 -->
  
<!-- 用户可以通过应用程序时间服务器访问日志 -->
  
<property>
  <name>yarn.log-aggregation-enable</name>
  <value>true</value>
  
</property>
  

  
<!--    <property>
  <name>yarn.log.server.url</name>
  <value>hadoop-master1:19888/jobhistory/logs</value>
  </property>
  <property>
  <name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
  <value>org.apache.hadoop.mapred.ShuffleHandler</value>
  </property> -->
  
<!--开启resource manager HA,默认为false-->
  
<property>
  <name>yarn.resourcemanager.ha.enabled</name>
  <value>true</value>
  
</property>
  
<!-- 集群的Id,使用该值确保RM不会做为其它集群的active -->
  
<property>
  <name>yarn.resourcemanager.cluster-id</name>
  <value>mycluster</value>
  
</property>
  
<!--配置resource manager命名-->
  
<property>
  <name>yarn.resourcemanager.ha.rm-ids</name>
  <value>rm1,rm2</value>
  
</property>
  
<property>
  <name>yarn.resourcemanager.hostname.rm1</name>
  <value>node1</value>
  
</property>
  
<property>
  <name>yarn.resourcemanager.hostname.rm2</name>
  <value>node2</value>
  
</property>
  
<property>
  <name>yarn.resourcemanager.address.rm1</name>
  <value>node1:8032</value>
  
</property>
  
<property>
  <name>yarn.resourcemanager.scheduler.address.rm1</name>
  <value>node1:8030</value>
  
</property>
  
<property>
  <name>yarn.resourcemanager.resource-tracker.address.rm1</name>
  <value>node1:8031</value>
  
</property>
  
<property>
  <name>yarn.resourcemanager.admin.address.rm1</name>
  <value>node1:8033</value>
  
</property>
  
<property>
  <name>yarn.resourcemanager.webapp.address.rm1</name>
  <value>node1:8088</value>
  
</property>
  
<property>
  <name>yarn.resourcemanager.address.rm2</name>
  <value>node2:8032</value>
  
</property>
  
<property>
  <name>yarn.resourcemanager.scheduler.address.rm2</name>
  <value>node2:8030</value>
  
</property>
  
<property>
  <name>yarn.resourcemanager.resource-tracker.address.rm2</name>
  <value>node2:8031</value>
  
</property>
  
<property>
  <name>yarn.resourcemanager.admin.address.rm2</name>
  <value>node2:8033</value>
  
</property>
  
<property>
  <name>yarn.resourcemanager.webapp.address.rm2</name>
  <value>node2:8088</value>
  
</property>
  
<!--开启resourcemanager自动恢复功能-->
  
<property>
  <name>yarn.resourcemanager.recovery.enabled</name>
  <value>true</value>
  
</property>
  <!--在node1上配置rm1,在node2上配置rm2,注意:一般都喜欢把配置好的文件远程复制到其它机器上,但这个在YARN的另一个机器上一定要修改,其他机器上不配置此项-->
  <property>
  <name>yarn.resourcemanager.ha.id</name>
  <value>rm1</value>
  <description>If we want to launch more than one RM in single node, we need this configuration</description>
  </property>
  <!--用于持久存储的类。尝试开启-->
  
<property>
  <name>yarn.resourcemanager.store.class</name>
  <value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
  
</property>
  
<property>
  <name>yarn.resourcemanager.zk-address</name>
  <value>node2:2181,node3:2181,node4:2181</value>
  <description>For multiple zk services, separate them with comma</description>
  
</property>
  
<!--开启resourcemanager故障自动切换,指定机器-->
  
<property>
  <name>yarn.resourcemanager.ha.automatic-failover.enabled</name>
  <value>true</value>
  <description>Enable automatic failover; By default, it is enabled only when HA is enabled.</description>
  
</property>
  
<property>
  <name>yarn.client.failover-proxy-provider</name>
  <value>org.apache.hadoop.yarn.client.ConfiguredRMFailoverProxyProvider</value>
  
</property>
  
<property>
  <name>yarn.nodemanager.resource.cpu-vcores</name>
  <value>4</value>
  
</property>
  
<property>
  <name>yarn.nodemanager.resource.memory-mb</name>
  <value>4608</value>
  <discription>每个节点可用内存,单位MB</discription>
  
</property>
  
<property>
  <name>yarn.scheduler.minimum-allocation-mb</name>
  <value>1536</value>
  <discription>单个任务可申请最少内存,默认1024MB</discription>
  
</property>
  
<property>
  <name>yarn.scheduler.maximum-allocation-mb</name>
  <value>4608</value>
  <discription>单个任务可申请最大内存,默认8192MB</discription>
  
</property>
  
<!--多长时间聚合删除一次日志-->
  
<property>
  <name>yarn.log-aggregation.retain-seconds</name>
  <value>2592000</value><!--30 day-->
  
</property>
  
<!--时间在几秒钟内保留用户日志。只适用于如果日志聚合是禁用的-->
  
<property>
  <name>yarn.nodemanager.log.retain-seconds</name>
  <value>604800</value><!--7 day-->
  
</property>
  
<!--指定文件压缩类型用于压缩汇总日志-->
  
<property>
  <name>yarn.nodemanager.log-aggregation.compression-type</name>
  <value>gz</value>
  
</property>
  
<!-- nodemanager本地文件存储目录-->
  
<property>
  <name>yarn.nodemanager.local-dirs</name>
  <value>/data/hadoop/tmp/yarn/local</value>
  
</property>
  
<!-- <property>
  <name>yarn.resourcemanager.state-store.max-completed-applications</name>
  <value>500</value>
  
</property>-->
  
<property>
  <name>yarn.resourcemanager.max-completed-applications</name>
  <value>1000</value>
  
</property>-->
  
<!-- 逗号隔开的服务列表,列表名称应该只包含a-zA-Z0-9_,不能以数字开始-->
  
<property>
  <name>yarn.nodemanager.aux-services</name>
  <value>mapreduce_shuffle</value>
  
</property>
  
<property>
  <name>yarn.nodemanager.aux-services.spark_shuffle.class</name>
  <value>org.apache.spark.network.yarn.YarnShuffleService</value>
  
</property>
  
<!--rm失联后重新链接的时间-->
  
<property>
  <name>yarn.resourcemanager.connect.retry-interval.ms</name>
  <value>2000</value>
  
</property>
  
</configuration>
  
页: [1]
查看完整版本: hadoop HA YARN 搭建