Hadoop在 Mac OS X EI上的环境搭建与开发入门

一些总结:

0. 安装Java和Homebrew

Java安装方式请问度娘。

Homebrew的安装方式:执行如下命令:

ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)"

 

 

1. 配置SSH

为确保在远程管理Hadoop以及Hadoop节点在用户共享时候的安全性,Hadoop需要使用SSH协议。

在Mac上执行:

 

ssh localhost

 如果执行失败,则需要修改一下系统设置:

 

系统偏好设置 -> 共享 -> 打开远程登录 -> 右侧选择允许所有用户访问。


 

生成密钥对:

 

ssh-keygen -t dsa -P '' -f ~/.ssh/id_dsa

 执行该命令后,会在当前用户目录的.ssh文件夹下生成id_rsa文件。然后在该目录下生成authorized_keys。命令如下:

cat ~/.ssh/id_dsa.pub >> ~/.ssh/authorized_keys

 

2. 安装Hadoop 

安装方式为执行以下命令:

brew install hadoop

 命令执行完以后,Hadoop会被安装在/usr/local/Cellar/hadoop目录下。

 

2.1 配置Hadoop

在目录/usr/local/Cellar/hadoop/2.7.2/libexec/etc/hadoop下

1) 修改hadoop-env.sh文件

将其中的

# Extra Java runtime options.  Empty by defaulte
export HADOOP_OPTS="$HADOOP_OPTS -Djava.net.preferIPv4Stack=true"

 修改为:

export HADOOP_OPTS="$HADOOP_OPTS -Djava.net.preferIPv4Stack=true -Djava.security.krb5.realm= -Djava.security.krb5.kdc="

 

2) 修改core-site.xml文件为:

<configuration>
	<property>
		<name>hadoop.tmp.dir</name>
		<value>/usr/local/Cellar/hadoop/hdfs/tmp</value>
	</property>

	<property>
		<name>fs.default.name</name>
		<value>hdfs://localhost:9000</value>
	</property>
</configuration>

 

3)修改mapred-site.xml文件为:(如果没有该文件,就把mapred-site.xml.template文件复制为该文件

<configuration>
	<property>
		<name>mapred.job.tracker</name>
		<value>localhost:9010</value>
	</property>
	<property>
		<name>mapreduce.framework.name</name>
		<value>yarn</value>
	</property>
</configuration>

 

4)修改hdfs-site.xml文件为:

<configuration>
	<property>
		<name>dfs.replication</name>
		<value>1</value>
	</property>
</configuration>

 

运行后台程序之前,需要格式化hdfs。执行命令如下:

hadoop namenode -format

 

2.2 启动Hadoop

在/usr/local/Cellar/hadoop/2.7.2/sbin目录下,执行如下命令:

#启动Hadoop
./start-dfs.sh

#停止Hadoop
./stop-dfs.sh

 启动以后,可以通过http://localhost:50070/ 来访问Hadoop 页面。查看Hadoop。

 

3. 使用Maven开发Hadoop示例

3.1 创建Maven工程

使用maven-archetype-quickstart创建项目,jar包依赖如下:

		<dependency>
			<groupId>org.apache.hadoop</groupId>
			<artifactId>hadoop-common</artifactId>
			<version>2.7.2</version>
		</dependency>
		<dependency>
			<groupId>org.apache.hadoop</groupId>
			<artifactId>hadoop-hdfs</artifactId>
			<version>2.7.2</version>
		</dependency>
		<dependency>
			<groupId>org.apache.hadoop</groupId>
			<artifactId>hadoop-client</artifactId>
			<version>2.7.2</version>
		</dependency>

 

3.2 测试hdfs

import java.io.InputStream;
import java.net.URI;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;

public class Test {

	public static void main(String[] args) throws Exception {
		String uri = "hdfs://localhost:9000";
		
		Configuration conf = new Configuration();
		FileSystem fs = FileSystem.get(new URI(uri), conf);
		
		FSDataOutputStream os = fs.create(new Path("/user/lxlong/test.log"));
		os.write("Hello lxlong".getBytes());
		os.flush();
		os.close();
		
		InputStream is = fs.open(new Path("/user/lxlong/test.log"));
		IOUtils.copyBytes(is, System.out, 1024, true);
		
		FileStatus[] statuses = fs.listStatus(new Path("/user/lxlong"));
		
		for(FileStatus status : statuses) {
			System.out.println(status);
		}
		
	}

}

 

3.3 测试MapReduce

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

public class WordCount {
	
	public static class MyMapper extends Mapper<Object, Text, Text, IntWritable> {
		private final static IntWritable one = new IntWritable(1);
		private Text event = new Text();
		
		public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
			int idx = value.toString().indexOf(" ");
			if(idx > 0) {
				String e = value.toString().substring(0, idx);
				event.set(e);
				context.write(event, one);
			}
		}
		
	}
	
	public static class MyReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
		private IntWritable result = new IntWritable();
		
		public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
			int sum = 0;
			for(IntWritable val : values) {
				sum += val.get();
			}
			result.set(sum);
			context.write(key, result);
		}
	}

	public static void main(String[] args) throws Exception {
		Configuration conf = new Configuration();
		String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
		if(otherArgs.length < 2) {
			System.err.println("Usage: WordCount <in> <out>");
			System.exit(2);
		}
		
		Job job = Job.getInstance(conf, "Word Count");
		job.setJarByClass(WordCount.class);
		job.setMapperClass(MyMapper.class);
		job.setCombinerClass(MyReducer.class);
		job.setReducerClass(MyReducer.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(IntWritable.class);
		
		FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
		FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
		
		System.exit(job.waitForCompletion(true) ? 0 : 1);
	}

}

 

1) 在该工程下使用mvn clean package命令生成jar包HadoopTest-0.0.1-SNAPSHOT.jar。

 

2)通过:

cp HadoopTest-0.0.1-SNAPSHOT.jar /usr/local/Cellar/hadoop/2.7.2/bin

命令将jar包拷贝到Hadoop命令目录下。。(只是为了方便一些而已)

 

3)复制几个文件到hdfs下:

./hdfs dfs -put /tmp/input /user/lxlong/input

 

4)执行Hadoop任务:

./hadoop jar HadoopTest-0.0.1-SNAPSHOT.jar com.test.HadoopTest.WordCount /user/lxlong/input /user/lxlong/output  

 

5)在hdfs上查看输出:

hdfs dfs -cat /user/lxlong/output/part-r-00000 

 

 

 

版权声明:本文为iteye_15322原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。
本文链接:https://blog.csdn.net/iteye_15322/article/details/82671217