xiguaqq20 发表于 2018-10-29 08:02:20

安装hadoop-2.8.0

0x01 版本  
版本hdaoop2.8 hbase-1.2.5Hive2.1.1spark2.1.1scala-2.12.1
  
0x02 hdfs安装
  
1.初始化,建立目录和用户,jdk环境
  
- name: pro
  
file: path=/home/hadoop state=directory
  
- name: add user
  
action: user name=hadoop update_password=always shell=/bin/bash
  
- name: chpasswd
  
shell: echo "xx"|passwd --stdin hadoop
  
- name: chown
  
shell: chown -R hadoop.hadoop /home/hadoop
  
- name: copy profile
  
copy: src=/opt/src/hprofile dest=/etc/profile force=yes owner=root group=root mode=0644
  
- name: copy jdk
  
copy: src=/opt/src/jdk.tar.gz dest=/usr/java/
  
- name: tar
  
shell: chdir=/usr/java tar xvzf jdk.tar.gz
  
- name: rm
  
2.namenode与datanode 用hadoop ssh打通,跟自己也要ssh打通
  
second namenode 也要与datanode打通
  

  
以下脚本来源 http://slaytanic.blog.51cto.com/2057708/1370007 大牛
  

  
2.1生成ssh key脚本
  
ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa
  
cp ~/.ssh/id_rsa.pub ~/.ssh/authorized_keys
  
2.2添加公钥到从节点的脚本
  
#!/bin/sh
  
read -p "输入远端服务器IP: " ip
  
ssh-copy-id -o StrictHostKeyChecking=no -i ~/.ssh/id_rsa.pub root@$ip
  
ssh root@$ip 'sed -i "s/^#RSAAuthentication\ yes/RSAAuthentication\ yes/g" /etc/ssh/sshd_config'
  
ssh root@$ip 'sed -i "s/^#PubkeyAuthentication\ yes/PubkeyAuthentication yes/g" /etc/ssh/sshd_config'
  
ssh root@$ip 'sed -i "s/^#PermitRootLogin\ yes/PermitRootLogin\ yes/g" /etc/ssh/sshd_config'
  
ssh root@$ip 'service sshd restart'
  
hostname=`ssh root@${ip} 'hostname'`
  
echo "添加主机名和IP到本地/etc/hosts文件中"
  
echo "$ip    $hostname" >> /etc/hosts
  
echo "远端主机主机名称为$hostname, 请查看 /etc/hosts 确保该主机名和IP添加到主机列表文件中"
  
echo "主机公钥复制完成
  
2.3 读取主机列表然后把/etc/hosts复制到所有主机上
  
#!/bin/sh
  
cat /etc/hosts | while read LINE
  
do
  
    ip=`echo $LINE | awk '{print $1}' | grep -v "::" | grep -v "127.0.0.1"`
  
    echo "Copying /etc/hosts to ${ip}"
  
    scp -o StrictHostKeyChecking=no /etc/hosts root@${ip}:/etc/
  
done
  

  
或者使用自己的exp.sh ip
  

  
3.修改的配置
  

  
namenode ha 配置
  
vim hdfs-site.xml
  

  
dfs.namenode.secondary.http-address
  
d17:50090
  

  

  
测试HA
  
$ sbin/hadoop-daemon.sh stop namenode
  
再次查看CentOS7-2上的namenode,发现自动切换为active了
  

  
vim slaves
  
d17
  
d18
  

  
参考自己写的安装hadoop-2.3.0-cdh5.1.2
  
http://szgb17.blog.51cto.com/340201/1691814
  

  
4.初始化,并启动hdfs
  
hadoop namenode -format    初始化,只做一次
  
启动的命令,旧版
  
hadoop-daemon.sh start namenode
  
hadoop-daemons.sh start datanode
  
yarn-daemon.sh start resourcemanager
  
yarn-daemons.sh start nodemanager
  
新版
  
start-dfs.sh 启动的warning,说要重新编译一次,太麻烦,没做
  
17/05/15 17:10:15 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
  
start-yarn.sh
  

  
5.启动后的服务确认
  

  
网上找到旧版启动的服务
  
$ jps
  
8192 TaskTracker
  
7905 DataNode
  
7806 NameNode
  
8065 JobTracker
  
8002 SecondaryNameNode
  
8234 Jps
  

  
新版
  
$ jps
  
9088 Jps
  
472 NameNode
  
2235 ResourceManager
  
1308 QuorumPeerMain
  
1901 HMaster
  

  

  
0x03 hbase安装
  
启动顺序:hadoop-->zookeeper-->hbase
  
关闭顺序:hbase-->zookeeper-->hadoop
  
1.先安装zookeeper,使用ansible安装zookeeper
  
2.启动报错,start-hbase.sh
  
Could not start ZK with 3 ZK servers in local mode deployment
  
出现这个报错,主要是这个属性hbase.cluster.distributed,写错,切记
  
vim hbase-site.xml
  

  

  
hbase.rootdir
  
hdfs://n16:9000/hbase/data
  

  

  
hbase.cluster.distributed
  
true
  

  

  
hbase.zookeeper.property.clientPort
  
2182
  

  

  
hbase.zookeeper.quorum
  
n16,d17,d18
  

  

  
hbase.zookeeper.property.dataDir
  
/home/hadoop/zookeeper/data
  

  

  
3.start-hbase
  

  
cat /etc/profile
  
export JAVA_HOME=/usr/java/jdk
  
export JRE_HOME=/usr/java/jdk/jre
  
exportCLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar:$JRE_HOME/lib
  
export PATH=$JAVA_HOME/bin:$JRE_HOME/bin:$PATH
  
export HADOOP_HOME=/home/hadoop
  
export PATH=$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$JAVA_HOME/bin:/home/hadoop/hbase/bin:$PATH
  

  

  
0x04 hive安装
  
metastore使用远程mysql
  

  
使用的jar包mysql-connector-java-5.1.38
  

  
172.x.x.3 3306
  
CREATE DATABASE hive;
  
USE hive;
  
CREATE USER 'hive'@'localhost' IDENTIFIED BY 'hive';
  
GRANT ALL ON hive.* TO 'hive'@'localhost' IDENTIFIED BY 'hive';
  
GRANT ALL ON hive.* TO 'hive'@'172.x.1.%' IDENTIFIED BY 'hive';
  
GRANT ALL ON hive.* TO 'hive'@'172.x.2.%' IDENTIFIED BY 'hive';
  
FLUSH PRIVILEGES;
  

  
hdfs dfs -mkdir -p /user/hive/warehouse
  
hdfs dfs -mkdir -p /user/hive/tmp
  
hdfs dfs -mkdir -p /user/hive/log
  
hdfs dfs -chmod g+w /user/hive/warehouse
  
#hdfs dfs -chmod g+w /user/hive/tmp
  
hdfs dfs -chmod g+w /user/hive/log
  
hdfs dfs -chmod 777 /user/hive/tmp
  

  
配置文件
  
参考
  
cp hive-env.sh.template hive-env.sh
  
cp hive-default.xml.template hive-site.xml
  
cp hive-log4j2.properties.template hive-log4j2.properties
  
cp hive-exec-log4j2.properties.template hive-exec-log4j2.properties
  

  
cat hive-env.sh
  
export JAVA_HOME=/usr/java/jdk
  
export HADOOP_HOME=/home/hadoop
  
export HIVE_HOME=/home/hadoop/hive
  
export HIVE_CONF_DIR=/home/hadoop/conf
  

  
vim hive-site.xml
  

  
    hive.exec.scratchdir
  
    /user/hive/tmp
  
    HDFS root scratch dir for Hive jobs which gets created with write all (733) permission. For each connecting user, an HDFS scratch dir: ${hive.exec.scratchdir}/<username> is created, with ${hive.scratch.dir.permission}.
  

  

  
    hive.metastore.warehouse.dir
  
    /user/hive/warehouse
  
    location of default database for the warehouse
  

  
    hive.querylog.location
  
    /user/hive/log
  
    Location of Hive run time structured log file
  

  

  

  
    javax.jdo.option.ConnectionURL
  
    jdbc:mysql://172.x.x.3:3306/hive?createDatabaseIfNotExist=true&characterEncoding=UTF-8&useSSL=false
  

  

  
    javax.jdo.option.ConnectionDriverName
  
    com.mysql.jdbc.Driver
  

  

  
    javax.jdo.option.ConnectionUserName
  
    hive
  

  

  
    javax.jdo.option.ConnectionPassword
  
    hive
  

  

  
还需要更改
  
hive.exec.local.scratchdir
  
    /tmp/${user.name}
  
hive.downloaded.resources.dir
  
    /tmp/${hive.session.id}_resources
  
将 hive-site.xml 中的 ${system:java.io.tmpdir} 和 ${system:user.name} 分别替换成 /tmp 和 ${user.name}
  

  

  
初始化
  
schematool -dbType mysql -initSchema
  

  
启动
  
hive --service hiveserver2 &
  
hive --service metastore &
  
hadoop job -kill jobid
  

  
kill `pgrep -f hive`
  

  
nohup hive --service metastore &
  
nohup hive --service hiveserver2 &
  

  
修改conf/hive-site.xml 中的 “hive.metastore.schema.verification”值为 false即可解决 “Caused by: MetaException(message:Version information not found in metastore. )”
  
查了一下这个配置项的意义:
  
hive.metastore.schema.verification:强制metastore的schema一致性,开启的话会校验在metastore中存储的信息的版本和hive的jar包中的版本一致性,并且关闭自动schema迁移,用户必须手动的升级hive并且迁移schema,关闭的话只会在版本不一致时给出警告,默认是false不开启;
  
因为此配置项设置成为了true,所以可能存在jar包版本不一致问题。导致无法正常启动!
  
将value由true改为false以后,顺利启动spark-shell
  

  

  
调试 模式命令hive -hiveconf hive.root.logger=DEBUG,console
  

  

  
0x05 spark安装
  

  
cat /etc/profile
  
export JAVA_HOME=/usr/java/jdk
  
export JRE_HOME=/usr/java/jdk/jre
  
exportCLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar:$JRE_HOME/lib
  
export PATH=$JAVA_HOME/bin:$JRE_HOME/bin:$PATH
  
export HADOOP_HOME=/home/hadoop
  
export PATH=$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$JAVA_HOME/bin:/home/hadoop/hbase/bin:/home/hadoop/hive/bin:/home/hadoop/spark/bin:/home/hadoop/scala/bin:/home/hadoop/spark/sbin:$PATH
  

  
http://mirror.bit.edu.cn/apache/spark/spark-2.1.1/
  
http://spark.apache.org/downloads.html
  
由于我们已经自己安装了Hadoop,所以,在“Choose a package type”后面需要选择“Pre-build with user-provided Hadoop ”,然后,点击“Download Spark”后面的“spark-2.1.0-bin-without-hadoop.tgz”下载即可。下载的文件,默认会被浏览器保存在“/home/hadoop/下载”目录下。需要说明的是,Pre-build with user-provided Hadoop: 属于“Hadoop free”版,这样,下载到的Spark,可应用到任意Hadoop 版本。
  

  
local 模式
  
http://dblab.xmu.edu.cn/blog/1307-2/
  
export SCALA_HOME=/root/dev/java/scala-2.12.1
  
export SPARK_WORKER_MEMORY=1g
  
export SPARK_MASTER_IP=your_server_ip
  
export MASTER=spark://your_server_ip:7077
  
export SPARK_SSH_OPTS="-p 22000"
  
至此Spark 2.1.0就已经完全安装完成了。我们可以试验一下安装是否正确,进入到$SPARK_HOME目录,运行求PI的实例:
  
./bin/run-example org.apache.spark.examples.SparkPi
  
我们还可以尝试启动Spark-shell:
  
./bin/spark-shell
  

  
分布式
  
cd /home/hadoop/spark/conf
  
vim spark-env.sh
  
export JAVA_HOME=/usr/java/jdk
  
export SCALA_HOME=/home/hadoop/scala
  
export SPARK_MASTER_IP=n16
  
export SPARK_WORKER_MEMORY=256M
  
export HADOOP_CONF_DIR=/home/hadoop/etc/hadoop
  
#export SPARK_DIST_CLASSPATH=$(hadoop classpath)
  

  
"echo \"$(hadoop classpath)\""
  
echo $(hadoop classpath)
  
export SPARK_DIST_CLASSPATH="/home/hadoop/etc/hadoop:/home/hadoop/share/hadoop/common/lib/*:/home/hadoop/share/hadoop/common/*:/home/hadoop/share/hadoop/hdfs:/home/hadoop/share/hadoop/hdfs/lib/*:/home/hadoop/share/hadoop/hdfs/*:/home/hadoop/share/hadoop/yarn/lib/*:/home/hadoop/share/hadoop/yarn/*:/home/hadoop/share/hadoop/mapreduce/lib/*:/home/hadoop/share/hadoop/mapreduce/*:/home/hadoop/contrib/capacity-scheduler/*.jar"
  

  
ssh 远程执行echo命令,特殊字符单引号,双引号丢失问题
  
参考 http://bbs.chinaunix.net/thread-3739461-1-1.html
  

  
vim slaves
  
d17
  
d18
  

  
cp -rlog4j.properties.template log4j.properties
  

  
启动
  
start-master.sh
  
Error: A JNI error has occurred, please check your installation and try again
  
Exception in thread "main" java.lang.NoClassDefFoundError: org/slf4j/Logger
  at java.lang.Class.getDeclaredMethods0(Native Method)
  at java.lang.Class.privateGetDeclaredMethods(Class.java:2701)
  at java.lang.Class.privateGetMethodRecursive(Class.java:3048)
  at java.lang.Class.getMethod0(Class.java:3018)
  at java.lang.Class.getMethod(Class.java:1784)
  at sun.launcher.LauncherHelper.validateMainClass(LauncherHelper.java:544)
  at sun.launcher.LauncherHelper.checkAndLoadMain(LauncherHelper.java:526)
  
Caused by: java.lang.ClassNotFoundException: org.slf4j.Logger
  at java.net.URLClassLoader.findClass(URLClassLoader.java:381)
  at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
  at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:331)
  at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
  ... 7 more
  

  
表面这个是原装的编译的,jars包下不包含hbase。
  

  
start-slave.sh
  

  

  
0x06 启动报错
  
1.hbase Could not start ZK with 3 ZK servers in local mode deployment
  

  
2.hive Unable to instantiate org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient
  

  
source /etc/profile
  
vim hive-site.xml
  
hive.vectorized.execution.enabled
  
    false
  
hive--service metastore &
  
hive --service hiveserver2 &
  
hive-hiveconf hive.root.logger=DEBUG,console
  

  
3.spark
  
hive的版本过高导致的问题
  

  
0x07 参考
  
http://slaytanic.blog.51cto.com/2057708/1397396
  

  
web界面查看
  
Web查看HDFS: http://ip:50070namenode
  
通过Web查看hadoop集群状态: http://ip:8088
  
vim /home/hadoop/hadoop-2.2.0/etc/hadoop/yarn-site.xml
  
yarn.resourcemanager.webapp.address
  
xuegod63.cn:8088
  

  

  

  
historyserver
  
mr-jobhistory-daemon.sh start historyserver
  
web访问
  
http://172.x.x.1:19888/   JobHistory
  
http://172.x.x.1:8088/   All Applications
  

  

  
0x08 启动的命令和服务
  
start-dfs.sh
  
start-yarn.sh
  
start-hbase.sh
  
hive --service hiveserver2&
  
mr-jobhistory-daemon.sh start historyserver
  
start-master.sh
  
start-slave.sh
  

  
最好start-all.sh
  

  
stop-master.sh
  
stop-slave.sh
  
mr-jobhistory-daemon.sh stop historyserver
  
kill `pgrep -f hive`
  
stop-hbase.sh
  
stop-yarn.sh
  
stop-dfs.sh
  

  
查看启动后的服务
  
n16 jps
  
$ jps
  
14865 ResourceManager
  
17748 Jps
  
13749 RunJar
  
17575 Master
  
472 NameNode
  
15690 HMaster
  
15354 JobHistoryServer
  
13931 RunJar
  
1308 QuorumPeerMain
  

  
d17
  
$ jps
  
28468 HRegionServer
  
18420 QuorumPeerMain
  
28151 NodeManager
  
18072 DataNode
  
18184 SecondaryNameNode
  
29944 Worker
  
30108 Jps


页: [1]
查看完整版本: 安装hadoop-2.8.0