???紵 发表于 2016-12-5 11:07:35

【大数据笔记】-解读hadoop命令

  下面是hadoop发布版本, bin目录下面的hadoop命令的源码,hadoop命令支持好多种参数,一直记不住,想通过精度这部分代码,能记住部分参数.

#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This script runs the hadoop core commands.

#这3行命令的主要目的是,获取Hadoop运行所在目录.
bin=`which $0`
bin=`dirname ${bin}`
bin=`cd "$bin"; pwd`

#定位找到 hadoop-config.sh 文件,里面包含了很多Hadoop命令的配置文件.
#先找HADOOP_LIBEXEC_DIR目录,如果没有定义,就使用默认的路径,也就是hadoop根目录下面的libexec
DEFAULT_LIBEXEC_DIR="$bin"/../libexec
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
. $HADOOP_LIBEXEC_DIR/hadoop-config.sh
function print_usage(){
echo "Usage: hadoop [--config confdir] COMMAND"
echo "       where COMMAND is one of:"
echo "fs                   run a generic filesystem user client"
echo "version            print the version"
echo "jar <jar>            run a jar file"
echo "checknative [-a|-h]check native hadoop and compression libraries availability"
echo "distcp <srcurl> <desturl> copy file or directories recursively"
echo "archive -archiveName NAME -p <parent path> <src>* <dest> create a hadoop archive"
echo "classpath            prints the class path needed to get the"
echo "                     Hadoop jar and the required libraries"
echo "daemonlog            get/set the log level for each daemon"
echo " or"
echo "CLASSNAME            run the class named CLASSNAME"
echo ""
echo "Most commands print help when invoked w/o parameters."
}
#如果命令参数个数为0,则打印提示,退出
if [ $# = 0 ]; then
print_usage
exit
fi
#解析第1个参数,第0个参数是命令本身
COMMAND=$1
case $COMMAND in
# usage flags
--help|-help|-h)
print_usage
exit
;;
#hdfs commands
namenode|secondarynamenode|datanode|dfs|dfsadmin|fsck|balancer|fetchdt|oiv|dfsgroups)
echo "DEPRECATED: Use of this script to execute hdfs command is deprecated." 1>&2
echo "Instead use the hdfs command for it." 1>&2
echo "" 1>&2
#try to locate hdfs and if present, delegate to it.
shift
if [ -f "${HADOOP_HDFS_HOME}"/bin/hdfs ]; then
exec "${HADOOP_HDFS_HOME}"/bin/hdfs ${COMMAND/dfsgroups/groups}"$@"
elif [ -f "${HADOOP_PREFIX}"/bin/hdfs ]; then
exec "${HADOOP_PREFIX}"/bin/hdfs ${COMMAND/dfsgroups/groups} "$@"
else
echo "HADOOP_HDFS_HOME not found!"
exit 1
fi
;;
#mapred commands for backwards compatibility
pipes|job|queue|mrgroups|mradmin|jobtracker|tasktracker|mrhaadmin|mrzkfc|jobtrackerha)
echo "DEPRECATED: Use of this script to execute mapred command is deprecated." 1>&2
echo "Instead use the mapred command for it." 1>&2
echo "" 1>&2
#try to locate mapred and if present, delegate to it.
shift
if [ -f "${HADOOP_MAPRED_HOME}"/bin/mapred ]; then
exec "${HADOOP_MAPRED_HOME}"/bin/mapred ${COMMAND/mrgroups/groups} "$@"
elif [ -f "${HADOOP_PREFIX}"/bin/mapred ]; then
exec "${HADOOP_PREFIX}"/bin/mapred ${COMMAND/mrgroups/groups} "$@"
else
echo "HADOOP_MAPRED_HOME not found!"
exit 1
fi
;;
#打印出Hadoop执行时的classpath,方便查找classpath的错误
classpath)
if $cygwin; then
CLASSPATH=`cygpath -p -w "$CLASSPATH"`
fi
echo $CLASSPATH
exit
;;
#core commands
*)
# the core commands
if [ "$COMMAND" = "fs" ] ; then
CLASS=org.apache.hadoop.fs.FsShell
elif [ "$COMMAND" = "version" ] ; then
CLASS=org.apache.hadoop.util.VersionInfo
elif [ "$COMMAND" = "jar" ] ; then
CLASS=org.apache.hadoop.util.RunJar
elif [ "$COMMAND" = "checknative" ] ; then
CLASS=org.apache.hadoop.util.NativeLibraryChecker
elif [ "$COMMAND" = "distcp" ] ; then
CLASS=org.apache.hadoop.tools.DistCp
CLASSPATH=${CLASSPATH}:${TOOL_PATH}
elif [ "$COMMAND" = "daemonlog" ] ; then
CLASS=org.apache.hadoop.log.LogLevel
elif [ "$COMMAND" = "archive" ] ; then
CLASS=org.apache.hadoop.tools.HadoopArchives
CLASSPATH=${CLASSPATH}:${TOOL_PATH}
elif [[ "$COMMAND" = -*]] ; then
# class and package names cannot begin with a -
echo "Error: No command named \`$COMMAND' was found. Perhaps you meant \`hadoop ${COMMAND#-}'"
exit 1
else
#如果上面的都没匹配上,那么第一个参数作为classname 来解析,比如下面就是一个示例
#hadoop org.apache.hadoop.examples.WordCount /tmp/15 /tmp/46
CLASS=$COMMAND
fi
#删除$@中的第一个参数,比如"hadoop org.apache.hadoop.examples.WordCount /tmp/15 /tmp/46"
#在运行shift之前$@=org.apache.hadoop.examples.WordCount /tmp/15 /tmp/46
#之后$@=/tmp/15 /tmp/46
shift
# Always respect HADOOP_OPTS and HADOOP_CLIENT_OPTS
# 对应的这两个变量默认的定义在文件hadoop-config.sh,如果要修改启动参数,也可以修改这个文件,比如想开启远程debug
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
#make sure security appender is turned off
HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,NullAppender}"
#兼容cygwin模拟器
if $cygwin; then
CLASSPATH=`cygpath -p -w "$CLASSPATH"`
fi
#没什么意思,放在这是为了方便修改扩展CLASSPATH
export CLASSPATH=$CLASSPATH
exec "$JAVA" $JAVA_HEAP_MAX $HADOOP_OPTS $CLASS "$@"
;;
esac
页: [1]
查看完整版本: 【大数据笔记】-解读hadoop命令