export CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar
export PATH=$JAVA_HOME/bin:$PATH
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export HADOOP_COMMON_HOME=$HADOOP_HOME
export HADOOP_HDFS_HOME=$HADOOP_HOME
export YARN_HOME=$HADOOP_HOME
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
export PATH=$PATH:$HADOOP_HOME/sbin:$HADOOP_HOME/bin
若出现提示 “INFO metrics.MetricsUtil: Unable to obtain hostName java.net.UnknowHostException”,
或者其他的netconnectionexception都可用此解决
这需要执行如下命令修改 hosts 文件,为你的主机名增加IP映射:
- sudo vim /etc/hosts
主机名在终端窗口标题里可以看到,或执行命令 hostname
查看,如下图所示,在最后面增加一行 “127.0.0.1 dblab”:
保存文件后,重新运行 hadoop 实例
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<property>
<name>io.file.buffer.size</name>
<value>131072</value>
</property>
</configuration>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<name>dfs.support.append</name>
<value>true</value>
</property>
<name>dfs.namenode.name.dir</name>
<value>file:/home/lz/hadoop-2.7.4/namenode</value>
<name>dfs.blocksize</name>
<value>565456465</value>具体得设置一下提示 小于最小值然后再设置大于最小就行
</property>
<name>dfs.datanode.data.dir</name>
<value>file:/home/lz/hadoop-2.7.4/datanode
</value>
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
</configuration>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<name>mapreduce.jobhistory.address</name>
<value>localhost:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>localhost:19888</value>
</property>
<property>
<name>mapreduce.tasktracker.map.tasks.maximum</name>
<value>10</value>
</property>
<property>
<name>mapreduce.tasktracker.reduce.tasks.maximum</name>
<value>4</value>
</property>
</configuration>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<configuration>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<name>yarn.resourcemanager.admin.address</name>
<value>localhost:8033</value>
</property>
<name>yarn.resourcemanager.address</name>
<value>localhost:8032</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>localhost:8031</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>localhost:8030</value>
</property>
export HADOOP_HOME=/home/hadoop/labc/hadoop- 2.7. 1
export HADOOP_OPTS= "-Djava.library.path=$HADOOP_HOME/lib:$HADOOP_COMMON_LIB_NATIVE_DIR"
提示hadoop不能加载本地库。
以前官网会提供32位的安装包,但目前提供的下载包为64位的,在linux 32位系统下安装后会一直提示错误“WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform… using builtin-java classes where applicable ”,但官网又不提供32位系统下的安装包,所以我们只能自己编译打包源码
上图镇下
echo "Hello World Hello Hadoop" > 1.txt
echo "Hello Hadoop Bye " >2.txt
然后把数据put到HDFS里:
hadoop fs -mkdir /input
hadoop fs -put /root/1.txt /input
hadoop fs -put /root/2.txt /input
再然后进入到jar所在的目录里“
cd hadoop-2.0.0-cdh4.5.0\share\hadoop\mapreduce1
执行命令:
hadoop jar hadoop-mapreduce-examples-2.0.0-cdh4.5.0.jar WordCount /input /output
一步一错,一步一坑
伪分布式运行 MapReduce 作业的方式跟单机模式相同,区别在于伪分布式读取的是HDFS中的文
运行 Hadoop 程序时,为了防止覆盖结果,程序指定的输出目录(如 output)不能存在,否则会提示错误,因此运行前需要先删除输出目录
(伪分布式不启动 YARN 也可以,一般不会影响程序执行)
有的读者可能会疑惑,怎么启动 Hadoop 后,见不到书上所说的 JobTracker 和 TaskTracker,这是因为新版的 Hadoop 使用了新的 MapReduce 框架(MapReduce V2,也称为 YARN,Yet Another Resource Negotiator)。
YARN 是从 MapReduce 中分离出来的,负责资源管理与任务调度。YARN 运行于 MapReduce 之上,提供了高可用性、高扩展性,YARN 的更多介绍在此不展开,有兴趣的可查阅相关资料。
启动 YARN 之后,运行实例的方法还是一样的,仅仅是资源管理方式、任务调度不同。观察日志信息可以发现,不启用 YARN 时,是 “mapred.LocalJobRunner” 在跑任务,启用 YARN 之后,是 “mapred.YARNRunner” 在跑任务。启动 YARN 有个好处是可以通过 Web 界面查看任务的运行情况:http://localhost:8088/cluster,
</configuration>