mapreduce用于显示从a-z开始的所有单词

pkmbmrz7  于 2021-05-29  发布在  Hadoop
关注(0)|答案(1)|浏览(361)

我试着让所有的单词从字母a到z开始。reduce函数的输出如下: key="alphabet", value="list of words against alphabet + their count" 我使用下面的代码,但它只显示词频,而不是单词列表。

import java.io.IOException;
import java.util.*;

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.conf.*;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapred.*;
import org.apache.hadoop.util.*;

public class WordCountFrequency {

    public static class WordCountFrequencyMap extends MapReduceBase implements Mapper<LongWritable, Text, Text, IntWritable> {
        private final static IntWritable one = new IntWritable(1);
        private Text word = new Text();

        public void map(LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException {
            String line = value.toString();
            StringTokenizer tokenizer = new StringTokenizer(line);
            while (tokenizer.hasMoreTokens()) {
                String token=tokenizer.nextToken();
                if (token.startsWith("A")) {
                    word.set("A_Count");
                    output.collect(word, one);
                } else if (token.startsWith("B")) {
                    word.set("B_Count");
                    output.collect(word, one); 
                }
            }//end of while
        }
    }

    public static class WordCountFrequencyReduce extends MapReduceBase implements Reducer<Text, IntWritable, Text, IntWritable> {
        public void reduce(Text key, Iterator<IntWritable> values, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException {
            int sum = 0;
            while (values.hasNext()) {
                sum += values.next().get();
            }
            output.collect(key, new IntWritable(sum));
        }
    }

    public static void main(String[] args) throws Exception {
        JobConf conf = new JobConf(WordCountFrequency.class);
        conf.setJobName("WordCountFrequency");

        conf.setOutputKeyClass(Text.class);
        conf.setOutputValueClass(IntWritable.class);

        conf.setMapperClass(WordCountFrequencyMap.class);
        conf.setCombinerClass(WordCountFrequencyReduce.class);
        conf.setReducerClass(WordCountFrequencyReduce.class);

        conf.setInputFormat(TextInputFormat.class);
        conf.setOutputFormat(TextOutputFormat.class);

        FileInputFormat.setInputPaths(conf, new Path(args[0]));
        FileOutputFormat.setOutputPath(conf, new Path(args[1]));

        JobClient.runJob(conf);
    }
}

我想这样显示输出: "Alphabet, 'list of words', word counts" ```
A: Apple, Ant, And, Add, Axis, 5[wordcount]
B: Ball, Bat, Boy, Bus, 4
....
Z: Zebra, Zinc, Zeal ,3

如何像上面提到的那样显示输出。
u3r8eeie

u3r8eeie1#

下面是解决方案的伪代码

map(LongWritable key, Text value) {
    for each token in value:
        output.collect(token.charAt(0), token)
}

reduce (Text letter, Iterable<Text> words) {
    String result = "";
    int count = 0;
    for (Text word : words) {
        result += word.get()+", ";
        count++;
    }
    output.collect(letter, new Text(result+count));
}

相关问题