1.预备知识
1.1 linux须知
- linux微内核的特性,vmware安装linux
- centos:稳定
- linux常操作目录:/bin,/usr,/etc
- xshell:实用xshell操作centos
- linux记事本:vi/vim
1.2 常用命令
- 帮助:man
- 目录:mkdir,rmdir,mv,ls,rm -rf,cd
- 文件:touch/vi,cat,cp,rm,more,grep
- 搜索:which,whereis,find
- 时间:date ,date -s
- 用户和组管理:useradd…,groupadd…
- 进程:ps -ef,kill -9 进程id,pkill -p id 或者/-f 进程
- 网络:netstat -aux
- 磁盘:df
- 压缩和解压:zip,unzip,tar
- tar -zcvf 压缩
- tar -zxvf 解压
- 软件:yum
- yum list
- yum install
- yum remove
- rpm -ivh,evh:了解
- 上传、下载(lrzsz):rz,sz
- 定时任务:crontab -e
- min,h,d,m,week
- crontab -l
- crontab -r:删除
1.3 shell脚本
-
变量:
x,$x
-
运算符:
$[3+6]
-
判断:
if [];then fi
-
循环:
for(()) 或者 for x in list
do
done
while [ ]
do
done
-
函数:
function fun(){} fun
2.hadoop之windows配置
-
解压hadoop压缩文件
-
指定HADOOP_HOME
-
指定path:/bin,/sbin
-
测试:
hadoop version
3.在linux上搭建hadoop集群
集群成员:
主机 hdfs yarn
master namenode ,secondarynamenode resourcemanager
slave1 datanode nodemanager
slave2 datanode nodemanager
3.1 安装jdk8,hadoop3.2.1
-
上传压缩文件并解压(/usr)
-
设置环境变量(/etc/profile)
export JAVA_HOME=/usr/jdk8
export HADOOP_HOME=/usr/hadoop321
export PATH=PATH:PATH:PATH:JAVA_HOME/bin:HADOOPHOME/bin:HADOOP_HOME/bin:HADOOPHOME/bin:HADOOP_HOME/sbin -
激活配置文件
. /etc/profile
-
测试:
hadoop version
3.2 hdfs配置
-
core-site.xml
fs.defaultFS hdfs://master:9000
-
hdfs-site.xml
dfs.replication 2 dfs.http.address 0.0.0.0:5700 dfs.namenode.name.dir file:///root/hadoop/dfs/namenode dfs.datanode.data.dir file:///root/hadoop/dfs/datanode dfs.webhdfs.enabled true
-
初始化namenode
hdfs namenode -format
-
start-dfs.sh,stop-dfs.sh
#设置用户
HDFS_NAMENODE_USER=root
HDFS_DATANODE_USER=root
HDFS_SECONDARYNAMENODE_USER=root -
hadoop-env.sh
export JAVA_HOME=/usr/jdk8
3.3 集群成员配置
-
域名与ip绑定(/etc/hosts)
192.168.85.129 master
192.168.85.130 slave1
192.168.85.131 slave2 -
配置workers(工作节点)(/usr/hadoop321/etc/hadoop/workers)
slave1
slave2
-
修改副本数量(数据节点数量)(hdfs-site.xml)
dfs.replication 2
3.4 yarn配置
-
yarn-site.xml
yarn.nodemanager.aux-services mapreduce_shuffle yarn.resourcemanager.hostname master yarn.resourcemanager.webapp.address master:8088 yarn.application.classpath /usr/hadoop321/etc/hadoop:/usr/hadoop321/share/hadoop/common/lib/*:/usr/hadoop321/share/hadoop/common/*:/usr/hadoop321/share/hadoop/hdfs:/usr/hadoop321/share/hadoop/hdfs/lib/*:/usr/hadoop321/share/hadoop/hdfs/*:/usr/hadoop321/share/hadoop/mapreduce/lib/*:/usr/hadoop321/share/hadoop/mapreduce/*:/usr/hadoop321/share/hadoop/yarn:/usr/hadoop321/share/hadoop/yarn/lib/*:/usr/hadoop321/share/hadoop/yarn/*
-
mapred-site.xml
mapreduce.framework.name yarn
-
start-yarn.sh,stop-yarn.sh
YARN_RESOURCEMANAGER_USER=root
YARN_NODEMANAGER_USER=root
3.5 cnetos克隆
-
修改主机名
hostnamectl set-hostanme 主机名
-
删除/tmp目录下的文件,使数据节点在浏览器端能看见(注意:防火墙关闭)
systemctl disable firewalld(开机不自启)
3.6 master免密登录slave
-
在root目录创建密钥:
ssh-keygen
-
authorized_keys拷贝到slave上
cat id_rsa.pub >> authorized_keys
scp 拷贝到salve的.ssh文件夹下
scp authorized_keys root@slave1:/root/.ssh
3.7 启动hadoop集群
-
在master上启动
start-all.sh
-
测试
jps
-
查看节点
hdfs dfsadmin -report
4. mapreduce实例
4.1 单词统计(入门)
/**
* 英文单词统计
*/
public class WordCounter {
//实现分词
public static class MyMapper extends Mapper<LongWritable, Text,Text, IntWritable>{
public static Text text = new Text();
public static IntWritable intWritable = new IntWritable(1);
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String v = value.toString();
String[] words = v.split(" ");
for (String word : words) {
text.set(word);
context.write(text,intWritable);
}
}
}
//实现统计
public static class MyReducer extends Reducer<Text,IntWritable,Text,IntWritable>{
@Override
protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
int count = 0;
for (IntWritable value : values) {
count+=value.get();
}
context.write(key,new IntWritable(count));
}
}
public static void main(String[] args) {
Configuration conf = new Configuration();
try {
// 任务
Job job = Job.getInstance(conf);
job.setJobName("firstJob");
job.setJarByClass(WordCounter.class);
// 设置mapper,reducer
job.setMapperClass(MyMapper.class);
job.setReducerClass(MyReducer.class);
// 设置输出数据类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
// 设置输入输出目录
FileInputFormat.setInputPaths(job,"data6");
FileOutputFormat.setOutputPath(job,new Path("dTemp"));
// 执行并关闭
job.waitForCompletion(true);
job.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
4.2 中文分词统计(ik)
/**
* 中文单词统计
*/
public class CNWordCounter {
// 实现中文分词
public static class MyMapper extends Mapper<LongWritable, Text,Text, IntWritable>{
public static Text text = new Text();
public static IntWritable intWritable = new IntWritable(1);
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
byte[] bytes = value.toString().getBytes();
ByteArrayInputStream bis = new ByteArrayInputStream(bytes);
InputStreamReader isReader = new InputStreamReader(bis);
IKSegmenter ikSegmenter = new IKSegmenter(isReader, true);
Lexeme lexeme=null;
while ((lexeme=ikSegmenter.next())!=null){
String word = lexeme.getLexemeText();
text.set(word);
context.write(text,intWritable);
}
}
}
// 实现统计
public static class MyReducer extends Reducer<Text,IntWritable,Text,IntWritable>{
public static Text text = new Text();
public static List<Record> list =new ArrayList<Record>();
@Override
protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
int count=0;
for (IntWritable value : values) {
count+=value.get();
}
// context.write(key,new IntWritable(count));
Record record = new Record(key.toString(), count);
list.add(record);
}
// 实现排序
@Override
protected void cleanup(Context context) throws IOException, InterruptedException {
Collections.sort(list);
Collections.reverse(list);
for (Record record : list) {
text.set(record.getWord());
context.write(text,new IntWritable(record.getCount()));
}
}
}
public static void main(String[] args) {
Configuration conf = new Configuration();
try {
Job job = Job.getInstance(conf);
job.setJobName("secondJob");
job.setJarByClass(CNWordCounter.class);
// 设置mapper,reducer
job.setMapperClass(MyMapper.class);
job.setReducerClass(MyReducer.class);
// 设置输出类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
// 设置输入输出目录
FileInputFormat.setInputPaths(job,"/test99/data2");
FileOutputFormat.setOutputPath(job,new Path("/test99/out"));
// 启动任务并关闭
job.waitForCompletion(true);
job.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
4.3 数据清洗(去重、去空、去非)
/**
* 数据清洗:去空,去重,去非
*/
public class DataClear {
public static void main(String[] args) {
Configuration conf = new Configuration();
try {
Job job = Job.getInstance(conf);
job.setJobName("clearJob");
job.setJarByClass(DataClear.class);
// mapper
job.setMapperClass(RemoveReplyMapper.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
// 输入输出目录
FileInputFormat.setInputPaths(job,"data4");
FileOutputFormat.setOutputPath(job,new Path("out"));
job.waitForCompletion(true);
job.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
/**
* 去空Mapper类
*/
class RemoveNullMapper extends Mapper<LongWritable, Text,Text, NullWritable> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String v = value.toString();
boolean flag = isEmpty(v);
// 非空则写入
if (!flag){
context.write(value,NullWritable.get());
}
}
// 如果某一字段为空,则判断为空
private boolean isEmpty(String v) {
String[] split = v.split(" ");
for (String field : split) {
if (field==null||field.equals(" ")||field.equals("")){
return true;
}
}
return false;
}
}
/**
* 去重Mapper类:利用set集合去重
*/
class RemoveReplyMapper extends Mapper<LongWritable,Text,Text,NullWritable>{
public static Set<String> names = new HashSet<>();
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String v = value.toString();
boolean flag = isRely(v);
// 非重复数据则写入
if (!flag){
context.write(value,NullWritable.get());
}
}
// 如果姓名字段重复,则判定重复
private boolean isRely(String v) {
String[] split = v.split(" ");
String name =split[0];
// 重复
if (names.contains(name)){
return true;
}
// 不重复
names.add(name);
return false;
}
}
/**
* 去非Mapper类
*/
class RemoveIllegalMapper extends Mapper<LongWritable,Text,Text,NullWritable>{
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String v = value.toString();
boolean flag = isIllegal(v);
// 合法数据则写入
if (!flag){
context.write(value,NullWritable.get());
}
}
// 如果成绩字段>100或<0,则为非法数据
private boolean isIllegal(String v) {
String[] split = v.split("\\s+");
for (int i = 1; i < split.length; i++) {
int score = Integer.parseInt(split[i]);
if (score>100 || score<0){
return true;
}
}
return false;
}
}
4.4 序列化和反序列化(writable)
/**
* 手机话费序列化类:话费、流量费
*/
public class PhoneWritable implements Writable {
private String num;
private Double base;
private Double flow;
public PhoneWritable() {
}
@Override
public String toString() {
return "PhoneWritable{" +
"base=" + base +
", flow=" + flow +
'}';
}
public PhoneWritable(Double base, Double flow) {
this.base = base;
this.flow = flow;
}
public String getNum() {
return num;
}
public void setNum(String num) {
this.num = num;
}
public Double getBase() {
return base;
}
public void setBase(Double base) {
this.base = base;
}
public Double getFlow() {
return flow;
}
public void setFlow(Double flow) {
this.flow = flow;
}
// 序列化
@Override
public void write(DataOutput out) throws IOException {
out.writeDouble(base);
out.writeDouble(flow);
}
// 反序列化
@Override
public void readFields(DataInput in) throws IOException {
this.base=in.readDouble();
this.flow=in.readDouble();
}
}
4.5 数据排序(WritableComparable)
@Data
@NoArgsConstructor
@AllArgsConstructor
public class SortRecord implements WritableComparable<SortRecord> {
private String key;
private Integer value;
@Override
public String toString() {
return key+" "+value;
}
@Override
public int compareTo(SortRecord o) {
// 降序
return o.getValue()-this.getValue();
}
@Override
public void write(DataOutput out) throws IOException {
out.writeUTF(key);
out.writeInt(value);
}
@Override
public void readFields(DataInput in) throws IOException {
this.key=in.readUTF();
this.value=in.readInt();
}
}
4.6 数据压缩(map,reduce)
// map压缩
conf.setBoolean("mapreduce.map.output.compress",true);
conf.setClass("mapreduce.map.output.compress.codec", BZip2Codec.class, CompressionCodec.class);
// reduce压缩
FileOutputFormat.setCompressOutput(job,true);
FileOutputFormat.setOutputCompressorClass(job,BZip2Codec.class);
4.7 连接查询(mapJoin,reduceJoin)
- reduce端连接
class JoinMapper extends Mapper<LongWritable, Text,Text,Record>{
Record record=new Record();
Text text =new Text();
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
InputSplit inputSplit = context.getInputSplit();
FileSplit fileSplit = (FileSplit) inputSplit;
String name = fileSplit.getPath().getName();
String[] split = value.toString().split("\\s+");
String pid=null;
if (name.startsWith("order")){
pid=split[1];
record.setOrderid(split[0]);
record.setPid(split[1]);
record.setNum(Integer.parseInt(split[2]));
record.setPname("");
}else {
pid=split[0];
record.setOrderid("");
record.setPid(split[0]);
record.setPname(split[1]);
record.setNum(0);
}
text.set(pid);
context.write(text,record);
}
}
class JoinReducer extends Reducer<Text,Record,Text, NullWritable>{
Text text=new Text();
@Override
protected void reduce(Text key, Iterable<Record> values, Context context) throws IOException, InterruptedException {
List<Record> list =new ArrayList<>();
Record pd =new Record();
for (Record record : values) {
if (StringUtils.isEmpty(record.getPname())){
Record record1 = new Record();
// 订单
try {
BeanUtils.copyProperties(record1,record);
} catch (Exception e) {
e.printStackTrace();
}
list.add(record1);
}else {
pd.setPname(record.getPname());
}
}
for (Record re : list) {
String res =re.getOrderid()+" "+pd.getPname()+" "+re.getNum();
text.set(res);
context.write(text,NullWritable.get());
}
}
}
public class ReduceJoin {
public static void main(String[] args) {
Configuration conf = new Configuration();
try {
Job job = Job.getInstance(conf, "reduceJoin");
job.setJarByClass(ReduceJoin.class);
job.setMapperClass(JoinMapper.class);
job.setReducerClass(JoinReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Record.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
FileInputFormat.setInputPaths(job,"data");
FileOutputFormat.setOutputPath(job,new Path("out"));
job.waitForCompletion(true);
job.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
-
map端连接
-
自定义序列化类
@Data @NoArgsConstructor @AllArgsConstructor public class Record implements Writable { @Override public void write(DataOutput out) throws IOException { out.writeUTF(orderid); out.writeUTF(pid); out.writeUTF(pname); out.writeInt(num); } @Override public void readFields(DataInput in) throws IOException { orderid=in.readUTF(); pid=in.readUTF(); pname=in.readUTF(); num=in.readInt(); } private String orderid; private String pid; private String pname; private Integer num; } -
map任务
class MyMapper extends Mapper<LongWritable, Text,Text, NullWritable>{ // 存放商品星系:id,name Map<String,String> map =new HashMap<>(); @Override protected void setup(Context context) throws IOException, InterruptedException { FileInputStream fileInputStream = new FileInputStream("data/pd.txt"); BufferedReader reader=new BufferedReader(new InputStreamReader(fileInputStream)); String str=null; while ((str=reader.readLine())!=null){ String[] split = str.split("\\s+"); map.put(split[0],split[1]); } } @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { InputSplit inputSplit = context.getInputSplit(); FileSplit fileSplit= (FileSplit) inputSplit; String name = fileSplit.getPath().getName(); if (name.startsWith("order")){ String[] split = value.toString().split("\\s+"); String res =split[0]+" "+map.get(split[1])+" "+split[2]; context.write(new Text(res),NullWritable.get()); } } } public class MapJoin { public static void main(String[] args) { Configuration conf = new Configuration(); try { Job job = Job.getInstance(conf, "mapJoin"); job.setMapperClass(MyMapper.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); FileInputFormat.setInputPaths(job,"data"); FileOutputFormat.setOutputPath(job,new Path("out")); job.waitForCompletion(true); job.close(); } catch (Exception e) { e.printStackTrace(); } } }
-
4.8 自定义分区(partitioner)
-
自定义分区类
class MyPartition extends Partitioner<Text,Record>{ @Override public int getPartition(Text text, Record record, int i) { String key= text.toString(); switch (key){ case "01": return 1; case "02": return 2; case "03": return 3; } return 1; } } -
job中指定
job.setPartitionerClass(MyPartition.class); job.setNumReduceTasks(3);
5. hadoop优化
- 选用高性能机器
- map之前预处理:小文件合并成大文件
- map阶段:combine,局部汇总
- reduce阶段:设置reduce buff参数
- 数据倾斜
- 自定义分区
- mapJoin
6.zookeeper的使用
-
下载文件解压(/usr)
-
配置环境变量(/etc/profile)
export ZK_HOME=/usr/zk
export PATH=PATH:PATH:PATH:JAVA_HOME/bin:HADOOPHOME/bin:HADOOP_HOME/bin:HADOOPHOME/bin:HADOOP_HOME/sbin:$ZK_HOME/bin -
配置运行参数(/usr/zk/conf/zoo.cfg)
dataDir=/root/zk/data
dataLogDir=/root/zk/log -
启动zk服务端
zkServer.sh start
-
打开zk客户端
zkCli.sh
-
关闭zk服务端
zkServer.sh stop
7.mapreduce进阶案例
7.1 多mr顺序执行
public static void main(String[] args) {
Mr1.execMr1();
Mr2.execMr2();
}
7.2 mapreduce的链式执行
//map链
ChainMapper.addMapper(job,MyMapper1.class,LongWritable.class,Text.class,Text.class,IntWritable.class,cfg);
ChainMapper.addMapper(job,MyMapper2.class,Text.class,IntWritable.class,Text.class,IntWritable.class,cfg);
//reducer
ChainReducer.setReducer(job,MyReducer.class,Text.class,IntWritable.class,Text.class,IntWritable.class,cfg);
//mapper3
ChainMapper.addMapper(job,MyMapper3.class,Text.class,IntWritable.class,Text.class,IntWritable.class,cfg);
7.3 气温指数分析
-
自定义天气类
/** * 天气类:年,月,日,温度 */ @Data@NoArgsConstructor@AllArgsConstructor public class TianQi implements WritableComparable<TianQi> { private Integer year; private Integer month; private Integer day; private Integer wd; @Override public String toString() { return year+"\t"+month+"\t"+day+"\t"+wd+"c"; } @Override public int compareTo(TianQi o) { // 按年升序,月升序,温度降序,日升序 int yAsc = Integer.compare(this.getYear(),o.getYear()); if (yAsc==0){ int mAsc = Integer.compare(this.getMonth(), o.getMonth()); if (mAsc==0){ int wdDesc = Integer.compare(o.getWd(), this.getWd()); if (wdDesc==0){ int dAsc = Integer.compare(this.getDay(), o.getDay()); return dAsc; } return wdDesc; } return mAsc; } return yAsc; } @Override public void write(DataOutput out) throws IOException { out.writeInt(year); out.writeInt(month); out.writeInt(day); out.writeInt(wd); } @Override public void readFields(DataInput in) throws IOException { year=in.readInt(); month=in.readInt(); day=in.readInt(); wd=in.readInt(); } } -
自定义分组类
/** * 按年月分组 */ public class TianQiGroupComparator extends WritableComparator { public TianQiGroupComparator() { super(TianQi.class,true); } @Override public int compare(WritableComparable a, WritableComparable b) { TianQi aa= (TianQi) a; TianQi bb = (TianQi) b; int y =aa.getYear()-bb.getYear(); if (y==0){ return aa.getMonth()-bb.getMonth(); } return y; } } -
编写mr程序
/** * 统计每月温度最高的两天 */ public class TianQiClient { /** * 封装到TianQi类中 */ public static class TianQiMapper extends Mapper<LongWritable, Text,TianQi, NullWritable>{ @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String[] split = value.toString().split("\\s+"); String time =split[0]+" "+split[1]; int wd = Integer.parseInt(split[2].substring(0, split[2].length()-1)); SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); try { Date date = simpleDateFormat.parse(time); Calendar calendar = Calendar.getInstance(); calendar.setTime(date); int year =calendar.get(Calendar.YEAR); int month =calendar.get(Calendar.MONTH)+1; int day =calendar.get(Calendar.DAY_OF_MONTH); TianQi tianQi = new TianQi(year, month, day, wd); context.write(tianQi,NullWritable.get()); } catch (ParseException e) { e.printStackTrace(); } } } public static class TianQiPartitioner extends Partitioner<TianQi, NullWritable>{ @Override public int getPartition(TianQi tianQi, NullWritable nullWritable, int numPartitions) { return (tianQi.getYear()&Integer.MAX_VALUE)%numPartitions; } } /** * 找出温度最高的两天 */ public static class TianQiReducer extends Reducer<TianQi,NullWritable,TianQi,NullWritable>{ @Override protected void reduce(TianQi key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException { int flag = 0; int day = 0; for (NullWritable nullWritable : values) { // 写出最高温度 if(flag == 0){ context.write(key, NullWritable.get()); flag ++; // 记录天 day = key.getDay(); } // 写出次高温度 if(key.getDay() != day){ context.write(key, NullWritable.get()); break; } } } } public static void main(String[] args) { Configuration cfg = new Configuration(); try { Job job = Job.getInstance(cfg,"tianqi"); job.setMapperClass(TianQiMapper.class); job.setReducerClass(TianQiReducer.class); job.setMapOutputKeyClass(TianQi.class); job.setMapOutputValueClass(NullWritable.class); job.setOutputKeyClass(TianQi.class); job.setOutputValueClass(NullWritable.class); FileInputFormat.setInputPaths(job,"data2"); FileOutputFormat.setOutputPath(job,new Path("out")); job.setPartitionerClass(TianQiPartitioner.class); job.setNumReduceTasks(3); job.setGroupingComparatorClass(TianQiGroupComparator.class); job.waitForCompletion(true); job.close(); } catch (Exception e) { e.printStackTrace(); } } }
7.4 好友推荐
/**
* 好友推荐:推荐潜在好友
*/
public class FriendClient {
/**
* 直接好友:0,间接好友:1
*/
public static class FriendMapper extends Mapper<LongWritable, Text,Text, IntWritable>{
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String[] split = value.toString().split(":");
String left =split[0];
String[] rights = split[1].split("\\s+");
for (int i = 0; i < rights.length; i++) {
// 直接好友
context.write(new Text(unit(left,rights[i])),new IntWritable(0));
for (int j = i+1; j < rights.length; j++) {
// 间接好友
context.write(new Text(unit(rights[i],rights[j])),new IntWritable(1));
}
}
}
}
public static class FriendReducer extends Reducer<Text, IntWritable,Text, IntWritable> {
@Override
protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
int count = 0;
for (IntWritable value : values) {
// 排除直接好友
if (value.get() == 0) {
return;
}
count++;
}
context.write(key, new IntWritable(count));
}
}
// 交换
private static String unit(String left, String right) {
return left.compareTo(right)>0?left+":"+right:right+":"+left;
}
public static void main(String[] args) {
Configuration cfg = new Configuration();
try {
Job job = Job.getInstance(cfg,"fried");
job.setMapperClass(FriendMapper.class);
job.setReducerClass(FriendReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.setInputPaths(job,"data4");
Path out = new Path("out");
FileSystem fs = out.getFileSystem(cfg);
if (fs.exists(out)){
fs.delete(out,true);
}
FileOutputFormat.setOutputPath(job,out);
job.waitForCompletion(true);
job.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
8 部署高可用Hadoop集群
集群成员:
主机 hdfs yarn
master namenode resourcemanager
slave1 namenode,datanode resourcemanager,nodemanager
slave2 datanode nodemanager
8.1 在master上安装zk
8.2 修改core-site.xml
fs.defaultFS hdfs://cluster hadoop.tmp.dir file:/root/hadoop/tmp ha.zookeeper.quorum master:2181
8.3 修改hdfs-site.xml
<configuration> <!--指定hdfs的nameservice为myNameService1,需要和core-site.xml中的保持一致 --> <property> <name>dfs.nameservices</name> <value>cluster</value> </property> <!-- myNameService1下面有两个NameNode,分别是nn1,nn2 --> <property> <name>dfs.ha.namenodes.cluster</name> <value>master,slave1</value> </property> <!-- master的RPC通信地址 --> <property> <name>dfs.namenode.rpc-address.cluster.master</name> <value>master:9000</value> </property> <!-- master的http通信地址 --> <property> <name>dfs.namenode.http-address.cluster.master</name> <value>master:50070</value> </property> <!-- slave1的RPC通信地址 --> <property> <name>dfs.namenode.rpc-address.cluster.slave1</name> <value>slave1:9000</value> </property> <!-- slave2的http通信地址 --> <property> <name>dfs.namenode.http-address.cluster.slave1</name> <value>slave1:50070</value> </property> <!-- 指定NameNode的元数据在JournalNode上的存放位置 --> <property> <name>dfs.namenode.shared.edits.dir</name> <value>qjournal://master:8485;slave1:8485;slave2:8485/cluster</value> </property> <!-- 指定JournalNode在本地磁盘存放数据的位置 --> <property> <name>dfs.journalnode.edits.dir</name> <value>/root/hadoop/journalData</value> </property> <!-- 开启NameNode失败自动切换 --> <property> <name>dfs.ha.automatic-failover.enabled</name> <value>true</value> </property> <!-- 配置失败自动切换实现方式 --> <property> <name>dfs.client.failover.proxy.provider.myNameService1</name> <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value> </property> <!-- 配置隔离机制方法,Failover后防止停掉的Namenode启动,造成两个服务,多个机制用换行分割,即每个机制暂用一行--> <property> <name>dfs.ha.fencing.methods</name> <value> sshfence shell(/bin/true) </value> </property> <!-- 使用sshfence隔离机制时需要ssh免登陆,注意换成自己的用户名 --> <property> <name>dfs.ha.fencing.ssh.private-key-files</name> <value>/root/.ssh/id_rsa</value> </property> <!-- 配置sshfence隔离机制超时时间 --> <property> <name>dfs.ha.fencing.ssh.connect-timeout</name> <value>30000</value> </property> <property> <name>dfs.replication</name> <value>2</value> </property> <property> <name>dfs.namenode.name.dir</name> <value>/root/hadoop/dfs/namenode</value> </property> <property> <name>dfs.datanode.data.dir</name> <value>/root/hadoop/dfs/datanode</value> </property> <property> <name>dfs.webhdfs.enabled</name> <value>true</value> </property> <property> <name>dfs.permissions</name> <value>false</value> </property> <property> <name>dfs.client.failover.proxy.provider.cluster</name> <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value> </property> </configuration>
8.4 修改mapred-site.xml
<configuration>
<!-- 指定mr框架为yarn方式 -->
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<!-- 配置 MapReduce JobHistory Server 地址 ,默认端口10020 -->
<property>
<name>mapreduce.jobhistory.address</name>
<value>master:10020</value>
</property>
<!-- 配置 MapReduce JobHistory Server web ui 地址, 默认端口19888 -->
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>master:19888</value>
</property>
<property>
<name>mapreduce.application.classpath</name> <value>/usr/hadoop321/share/hadoop/mapreduce/*,/usr/hadoop321/share/hadoop/mapreduce/lib/*</value>
</property>
</configuration>
8.5 修改yarn-site.xml
<configuration>
<!-- 开启RM高可用 -->
<property>
<name>yarn.resourcemanager.ha.enabled</name>
<value>true</value>
</property>
<!-- 指定RM的cluster id -->
<property>
<name>yarn.resourcemanager.cluster-id</name>
<value>yrc</value>
</property>
<!-- 指定RM的名字 -->
<property>
<name>yarn.resourcemanager.ha.rm-ids</name>
<value>rm1,rm2</value>
</property>
<!-- 分别指定RM的地址 -->
<property>
<name>yarn.resourcemanager.hostname.rm1</name>
<value>master</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm2</name>
<value>slave1</value>
</property>
<!-- RM对外暴露的web http地址,用户可通过该地址在浏览器中查看集群信息 -->
<property>
<name>yarn.resourcemanager.webapp.address.rm1</name>
<value>master:8088</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address.rm2</name>
<value>slave1:8088</value>
</property>
<!-- 指定zookeeper集群地址 -->
<property>
<name>yarn.resourcemanager.zk-address</name>
<value>master:2181</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.application.classpath</name> <value>/usr/hadoop321/etc/hadoop:/usr/hadoop321/share/hadoop/common/lib/*:/usr/hadoop321/share/hadoop/common/*:/usr/hadoop321/share/hadoop/hdfs:/usr/hadoop321/share/hadoop/hdfs/lib/*:/usr/hadoop321/share/hadoop/hdfs/*:/usr/hadoop321/share/hadoop/mapreduce/lib/*:/usr/hadoop321/share/hadoop/mapreduce/*:/usr/hadoop321/share/hadoop/yarn:/usr/hadoop321/share/hadoop/yarn/lib/*:/usr/hadoop321/share/hadoop/yarn/*</value>
</property>
</configuration>
8.6 修改hadoop-env.sh
export HDFS_NAMENODE_USER=root
export HDFS_DATANODE_USER=root
export HDFS_ZKFC_USER=root
export HDFS_JOURNALNODE_USER=root
export YARN_RESOURCEMANAGER_USER=root
export YARN_NODEMANAGER_USER=root
8.7 设置多机互相免密登录
-
在各机器上生成密钥
ssh-keygen
-
都把公钥放到authorized_keys文件中
cat id_rsa.pub>>authorized_keys
cat id_rsa.pub.s1>>authorized_keys
cat id_rsa.pub.s2>>authorized_keys
-
发送到每台机器上
scp authorized_keys root@slave1:/root/.ssh
scp authorized_keys root@slave2:/root/.ssh
8.8 启动设置
-
三台机上都启动journalnode
hdfs --daemon start journalnode
-
在master上:
hdfs namenode -format #格式化namenode
zkServer.sh start #启动zk
hdfs zkfc -formatZK #格式化zk
scp -r /root/hadoop root@slave1:/root #同步两个namenode
start-all.sh #启动服务 -
jps查看进程,浏览器访问namenode

3870

被折叠的 条评论
为什么被折叠?



