eclipse 运作hadoop wordcount_Eclipse

eclipse 运行hadoop wordcount

给大家一个建议如果使用1.XX的版本hadoop ?建议大家严格按照 ?以下的 ?第一条中的博文的版本安装，可以是单机或者伪分布式，主要是因为，hadoop版本和 eclipse插件版本如果不一致会带来很多问题，解决起来比较麻烦，

先说明一下 ?此次搭建 hadoop ?运行示例，过程中参考的文章：

1. eclipse 搭建 hadoop环境：

http://www.cnblogs.com/xia520pi/archive/2012/05/20/2510723.html

2.运行hadoop是遇到的问题：

? 第一个问题：XX.XX.XXX.WordCount$TokenizerMapper ? 这个主要是因为插件版本不一致的问题，但是目前我没有解决，我用的还是1.2.1的有源码，查了一些资料也手工打了一个 ?eclipse-plugin 但是最终还是不好使，打eclipse-plugins 的可以参考 ?利用hadoop源码打plugins的文章（我打了包但是还是不好用），绕过这个问题的方法可以参考（http://www.cnblogs.com/spork/archive/2010/04/21/1717592.html ?文章有几篇写的不错，可以都看看也算都hadoop执行过程的一个了解），这个最终解决办法是模拟了 hadoop将本地文件打成jar 的过程

? 以上问题解决按照网上的文章应该是已经可以运行了，但是我这又遇到了其他问题：

问题二：

eclipse 中运行 org.apache.hadoop.mapreduce.lib.input.InvalidInputException

? ? 执行 ?wordount 的时候，配置input文件找不到：明明就在那里防止相对路径绝对路径都试过了还是不行，最后想着 hadoop在命令行执行的话，是将本地文件上传到 ?hadoop中的，但是我运行时候每次都报的的是本地文件找不到，所以应该写远程文件地址，参开执行成功的hadoop文件发现文件地址为：

mapred.output.dirhdfs://172.16.236.11:9000/user/libin/output/wordcount-ec

所以就修改了本地代码程序：

		FileInputFormat.addInputPath(job, new Path("hdfs://172.16.236.11:9000"				+ File.separator + otherArgs[0]));		FileOutputFormat.setOutputPath(job, new Path(				"hdfs://172.16.236.11:9000" + File.separator + otherArgs[1]));

这样本地执行的代码就可以在hadoop服务端执行了。

这里还有一个问题就是，如果每次测试都需要将本地文件上传到 hadoop服务端，好像有点麻烦，所以可以考虑，每次addInputPath 的时候，在这个之前，先执行以下类型 hadoop fs ?-put 的代码，将本地文件上传到 ?hadoop服务端，这样就不用每次手工上传文件到服务器参考?

http://younglibin.iteye.com/admin/blogs/1925109

贴一个完整的示例程序吧：

package com.younglibin.hadoop.test;import java.io.File;import java.io.IOException;import java.util.StringTokenizer;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapred.JobConf;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.util.GenericOptionsParser;import com.younglibin.hadoop.EJob;public class WordCount {	public static class TokenizerMapper extends			Mapper<Object, Text, Text, IntWritable> {		private final static IntWritable one = new IntWritable(1);		private Text word = new Text();		public void map(Object key, Text value, Context context)				throws IOException, InterruptedException {			StringTokenizer itr = new StringTokenizer(value.toString());			while (itr.hasMoreTokens()) {				word.set(itr.nextToken());				context.write(word, one);			}		}	}	public static class IntSumReducer extends			Reducer<Text, IntWritable, Text, IntWritable> {		private IntWritable result = new IntWritable();		public void reduce(Text key, Iterable<IntWritable> values,				Context context) throws IOException, InterruptedException {			int sum = 0;			for (IntWritable val : values) {				sum += val.get();			}			result.set(sum);			context.write(key, result);		}	}	public static void main(String[] args) throws Exception {		File jarFile = EJob.createTempJar("bin");		EJob.addClasspath("/home/libin/software/hadoop/hadoop-1.2.1/conf");		ClassLoader classLoader = EJob.getClassLoader();		Thread.currentThread().setContextClassLoader(classLoader);		Configuration conf = new Configuration();		conf.set("mapred.job.tracker", "172.16.236.11:9001");		args = new String[] { "/user/libin/input/libin", "/user/libin/output/wordcount-ec" };		String[] otherArgs = new GenericOptionsParser(conf, args)				.getRemainingArgs();		if (otherArgs.length != 2) {			System.err.println("Usage: wordcount <in> <out>");			System.exit(2);		}		Job job = new Job(conf, "word count");		job.setJarByClass(WordCount.class);		((JobConf) job.getConfiguration()).setJar(jarFile.toString());		job.setMapperClass(TokenizerMapper.class);		job.setCombinerClass(IntSumReducer.class);		job.setReducerClass(IntSumReducer.class);		job.setOutputKeyClass(Text.class);		job.setOutputValueClass(IntWritable.class);		FileInputFormat.addInputPath(job, new Path("hdfs://172.16.236.11:9000"				+ File.separator + otherArgs[0]));		FileOutputFormat.setOutputPath(job, new Path(				"hdfs://172.16.236.11:9000" + File.separator + otherArgs[1]));		System.exit(job.waitForCompletion(true) ? 0 : 1);	}}

/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements.  See the NOTICE file * distributed with this work for additional information * regarding copyright ownership.  The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License.  You may obtain a copy of the License at * *     http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */package com.younglibin.hadoop;import java.io.File;import java.io.FileInputStream;import java.io.FileOutputStream;import java.io.IOException;import java.io.InputStream;import java.io.OutputStream;import java.lang.reflect.Array;import java.lang.reflect.InvocationTargetException;import java.lang.reflect.Method;import java.net.URL;import java.net.URLClassLoader;import java.util.ArrayList;import java.util.Arrays;import java.util.Enumeration;import java.util.jar.JarEntry;import java.util.jar.JarFile;import java.util.jar.JarOutputStream;import java.util.jar.Manifest;public class EJob {	private static ArrayList<URL> classPath = new ArrayList<URL>();	/** Unpack a jar file into a directory. */	public static void unJar(File jarFile, File toDir) throws IOException {		JarFile jar = new JarFile(jarFile);		try {			Enumeration entries = jar.entries();			while (entries.hasMoreElements()) {				JarEntry entry = (JarEntry) entries.nextElement();				if (!entry.isDirectory()) {					InputStream in = jar.getInputStream(entry);					try {						File file = new File(toDir, entry.getName());						if (!file.getParentFile().mkdirs()) {							if (!file.getParentFile().isDirectory()) {								throw new IOException("Mkdirs failed to create "										+ file.getParentFile().toString());							}						}						OutputStream out = new FileOutputStream(file);						try {							byte[] buffer = new byte[8192];							int i;							while ((i = in.read(buffer)) != -1) {								out.write(buffer, 0, i);							}						} finally {							out.close();						}					} finally {						in.close();					}				}			}		} finally {			jar.close();		}	}	/**	 * Run a Hadoop job jar. If the main class is not in the jar's manifest, then	 * it must be provided on the command line.	 */	public static void runJar(String[] args) throws Throwable {		String usage = "jarFile [mainClass] args...";		if (args.length < 1) {			System.err.println(usage);			System.exit(-1);		}		int firstArg = 0;		String fileName = args[firstArg++];		File file = new File(fileName);		String mainClassName = null;		JarFile jarFile;		try {			jarFile = new JarFile(fileName);		} catch (IOException io) {			throw new IOException("Error opening job jar: " + fileName).initCause(io);		}		Manifest manifest = jarFile.getManifest();		if (manifest != null) {			mainClassName = manifest.getMainAttributes().getValue("Main-Class");		}		jarFile.close();		if (mainClassName == null) {			if (args.length < 2) {				System.err.println(usage);				System.exit(-1);			}			mainClassName = args[firstArg++];		}		mainClassName = mainClassName.replaceAll("/", ".");		File tmpDir = new File(System.getProperty("java.io.tmpdir"));		tmpDir.mkdirs();		if (!tmpDir.isDirectory()) {			System.err.println("Mkdirs failed to create " + tmpDir);			System.exit(-1);		}		final File workDir = File.createTempFile("hadoop-unjar", "", tmpDir);		workDir.delete();		workDir.mkdirs();		if (!workDir.isDirectory()) {			System.err.println("Mkdirs failed to create " + workDir);			System.exit(-1);		}		Runtime.getRuntime().addShutdownHook(new Thread() {			public void run() {				try {					fullyDelete(workDir);				} catch (IOException e) {				}			}		});		unJar(file, workDir);		classPath.add(new File(workDir + "/").toURL());		classPath.add(file.toURL());		classPath.add(new File(workDir, "classes/").toURL());		File[] libs = new File(workDir, "lib").listFiles();		if (libs != null) {			for (int i = 0; i < libs.length; i++) {				classPath.add(libs[i].toURL());			}		}		ClassLoader loader = new URLClassLoader(classPath.toArray(new URL[0]));		Thread.currentThread().setContextClassLoader(loader);		Class<?> mainClass = Class.forName(mainClassName, true, loader);		Method main = mainClass.getMethod("main", new Class[] { Array.newInstance(				String.class, 0).getClass() });		String[] newArgs = Arrays.asList(args).subList(firstArg, args.length)				.toArray(new String[0]);		try {			main.invoke(null, new Object[] { newArgs });		} catch (InvocationTargetException e) {			throw e.getTargetException();		}	}	/**	 * Delete a directory and all its contents. If we return false, the directory	 * may be partially-deleted.	 */	public static boolean fullyDelete(File dir) throws IOException {		File contents[] = dir.listFiles();		if (contents != null) {			for (int i = 0; i < contents.length; i++) {				if (contents[i].isFile()) {					if (!contents[i].delete()) {						return false;					}				} else {					// try deleting the directory					// this might be a symlink					boolean b = false;					b = contents[i].delete();					if (b) {						// this was indeed a symlink or an empty directory						continue;					}					// if not an empty directory or symlink let					// fullydelete handle it.					if (!fullyDelete(contents[i])) {						return false;					}				}			}		}		return dir.delete();	}	/**	 * Add a directory or file to classpath.	 * 	 * @param component	 */	public static void addClasspath(String component) {		if ((component != null) && (component.length() > 0)) {			try {				File f = new File(component);				if (f.exists()) {					URL key = f.getCanonicalFile().toURL();					if (!classPath.contains(key)) {						classPath.add(key);					}				}			} catch (IOException e) {			}		}	}	/**	 * Add default classpath listed in bin/hadoop bash.	 * 	 * @param hadoopHome	 */	public static void addDefaultClasspath(String hadoopHome) {		// Classpath initially contains conf dir.		addClasspath(hadoopHome + "/conf");		// For developers, add Hadoop classes to classpath.		addClasspath(hadoopHome + "/build/classes");		if (new File(hadoopHome + "/build/webapps").exists()) {			addClasspath(hadoopHome + "/build");		}		addClasspath(hadoopHome + "/build/test/classes");		addClasspath(hadoopHome + "/build/tools");		// For releases, add core hadoop jar & webapps to classpath.		if (new File(hadoopHome + "/webapps").exists()) {			addClasspath(hadoopHome);		}		addJarsInDir(hadoopHome);		addJarsInDir(hadoopHome + "/build");		// Add libs to classpath.		addJarsInDir(hadoopHome + "/lib");		addJarsInDir(hadoopHome + "/lib/jsp-2.1");		addJarsInDir(hadoopHome + "/build/ivy/lib/Hadoop/common");	}	/**	 * Add all jars in directory to classpath, sub-directory is excluded.	 * 	 * @param dirPath	 */	public static void addJarsInDir(String dirPath) {		File dir = new File(dirPath);		if (!dir.exists()) {			return;		}		File[] files = dir.listFiles();		if (files == null) {			return;		}		for (int i = 0; i < files.length; i++) {			if (files[i].isDirectory()) {				continue;			} else {				addClasspath(files[i].getAbsolutePath());			}		}	}	/**	 * Create a temp jar file in "java.io.tmpdir".	 * 	 * @param root	 * @return	 * @throws IOException	 */	public static File createTempJar(String root) throws IOException {		if (!new File(root).exists()) {			return null;		}		Manifest manifest = new Manifest();		manifest.getMainAttributes().putValue("Manifest-Version", "1.0");		final File jarFile = File.createTempFile("EJob-", ".jar", new File(System				.getProperty("java.io.tmpdir")));		Runtime.getRuntime().addShutdownHook(new Thread() {			public void run() {				jarFile.delete();			}		});		JarOutputStream out = new JarOutputStream(new FileOutputStream(jarFile),				manifest);		createTempJarInner(out, new File(root), "");		out.flush();		out.close();		return jarFile;	}	private static void createTempJarInner(JarOutputStream out, File f,			String base) throws IOException {		if (f.isDirectory()) {			File[] fl = f.listFiles();			if (base.length() > 0) {				base = base + "/";			}			for (int i = 0; i < fl.length; i++) {				createTempJarInner(out, fl[i], base + fl[i].getName());			}		} else {			out.putNextEntry(new JarEntry(base));			FileInputStream in = new FileInputStream(f);			byte[] buffer = new byte[1024];			int n = in.read(buffer);			while (n != -1) {				out.write(buffer, 0, n);				n = in.read(buffer);			}			in.close();		}	}	/**	 * Return a classloader based on user-specified classpath and parent	 * classloader.	 * 	 * @return	 */	public static ClassLoader getClassLoader() {		ClassLoader parent = Thread.currentThread().getContextClassLoader();		if (parent == null) {			parent = EJob.class.getClassLoader();		}		if (parent == null) {			parent = ClassLoader.getSystemClassLoader();		}		return new URLClassLoader(classPath.toArray(new URL[0]), parent);	}}