首页 分享 MapReduce——统计单词出现次数WordCount

MapReduce——统计单词出现次数WordCount

来源:花匠小妙招 时间:2024-12-17 08:38

一、

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.FileSystem;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.Reducer;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

import java.net.URI;

import java.net.URISyntaxException;

public class ForWorldCount {

public static class ForMapper extends Mapper<LongWritable,Text,Text,IntWritable>{

Text oKey=new Text();

IntWritable oValue=new IntWritable(1);

@Override

protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

String line=value.toString();

String []strs=line.split(" ");

for(String s:strs){

oKey.set(s);

context.write(oKey,oValue);

}

}

}

public static class ForReducer extends Reducer<Text,IntWritable,Text,IntWritable>{

IntWritable oValue=new IntWritable();

@Override

protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {

int sum=0;

for(IntWritable i:values){

sum+=i.get();

}

oValue.set(sum);

context.write(key,oValue);

}

}

public static void main(String[] args) throws IOException, URISyntaxException, ClassNotFoundException, InterruptedException {

Job job= Job.getInstance();

job.setMapperClass(ForMapper.class);

job.setMapOutputKeyClass(Text.class);

job.setMapOutputValueClass(IntWritable.class);

job.setReducerClass(ForReducer.class);

job.setOutputKeyClass(Text.class);

job.setOutputValueClass(IntWritable.class);

FileInputFormat.addInputPath(job,new Path("E://forTestData//forWordCount"));

FileSystem fileSystem=FileSystem.get(new URI("file://E://output"),new Configuration());

Path path=new Path("E://output");

if(fileSystem.exists(path)){

fileSystem.delete(path,true);

}

FileOutputFormat.setOutputPath(job,path);

job.waitForCompletion(true);

}

}

二、

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.FileSystem;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.Reducer;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

import java.net.URI;

import java.net.URISyntaxException;

import java.util.*;

public class ForSortWordCount {

public static class ForMapper extends Mapper<LongWritable,Text,Text,IntWritable>{

Map<String,Integer> map=new HashMap<String, Integer>();

int maxTimes=0;

@Override

protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

String line=value.toString();

String strs[]=line.split("t");

String word=strs[0];

int times=Integer.parseInt(strs[1]);

if(times>maxTimes){

map.clear();

map.put(word,times);

maxTimes=times;

}

}

@Override

protected void cleanup(Context context) throws IOException, InterruptedException {

Map.Entry<String,Integer> entry=map.entrySet().iterator().next();

context.write(new Text(entry.getKey()),new IntWritable(entry.getValue()));

}

}

public static class ForReducer extends Reducer<Text,IntWritable,Text,IntWritable>{

Map<String,Integer> map=new HashMap<String, Integer>();

@Override

protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {

map.put(key.toString(),values.iterator().next().get());

}

@Override

protected void cleanup(Context context) throws IOException, InterruptedException {

List<Map.Entry<String,Integer>> list=new ArrayList<Map.Entry<String,Integer>>(map.entrySet());

Collections.sort(list,new Comparator<Map.Entry<String, Integer>>() {

public int compare(Map.Entry<String, Integer> o1, Map.Entry<String, Integer> o2) {

if(o1.getValue()==o2.getValue()){

return o2.getKey().length()-o1.getKey().length();

}else{

return o2.getValue()-o1.getValue();

}

}});

Map.Entry<String,Integer> entry=list.get(0);

context.write(new Text(entry.getKey()),new IntWritable(entry.getValue()));

}

}

public static void main(String[] args) throws IOException, URISyntaxException, ClassNotFoundException, InterruptedException {

Job job= Job.getInstance();

job.setMapperClass(ForMapper.class);

job.setMapOutputKeyClass(Text.class);

job.setMapOutputValueClass(IntWritable.class);

job.setReducerClass(ForReducer.class);

job.setOutputKeyClass(Text.class);

job.setOutputValueClass(IntWritable.class);

FileSystem fileSystem=FileSystem.get(new URI("file:E://output"),new Configuration());

Path path=new Path("E://output");

if(fileSystem.exists(path)){

fileSystem.delete(path,true);

}

FileInputFormat.addInputPath(job,new Path("E://forTestData//forWordCount//forSortWordCount"));

FileOutputFormat.setOutputPath(job,path);

job.setNumReduceTasks(1);

job.waitForCompletion(true);

}

}


相关知识

性能调优: df中的string wordcount
投票次数统计器
图解大数据
最小操作次数
58. 最后一个单词的长度
花祭的单词 花祭的单词是什么
花的单词复数形式
花期单词
花的单词是什么
定义一个函数count,统计字符串中字符出现频数, 输入一个字符串,输出一个表,

网址: MapReduce——统计单词出现次数WordCount https://www.huajiangbk.com/newsview1141729.html

所属分类:花卉
上一篇: 【小程序】wx:for=“”下对
下一篇: MyBatis mapper.x

推荐分享