hadoop io错误:map中的键类型不匹配:应为org.apache.hadoop.io.text,收到regexmatcher.customkey

f0brbegy  于 2021-06-02  发布在  Hadoop
关注(0)|答案(1)|浏览(402)

我得到以下错误:

java.lang.Exception: java.io.IOException: Type mismatch in key from map: expected org.apache.hadoop.io.Text, received RegexMatcher.CustomKey
    at org.apache.hadoop.mapred.LocalJobRunner$Job.runTasks(LocalJobRunner.java:462)
    at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:522)
Caused by: java.io.IOException: Type mismatch in key from map: expected org.apache.hadoop.io.Text, received RegexMatcher.CustomKey

首先,我在map reduce中定义一个名为customkey的自定义数据类型:

public  class CustomKey implements Writable {

    public Text userId;
    public Text friendId;

    public CustomKey() {

        this.userId = new Text();
        this.friendId = new Text();

    }

    public CustomKey(String userId, String friendId) {

        this.userId = new Text(userId);
        this.friendId = new Text(friendId);

    }

    @Override
    public void write(DataOutput out) throws IOException {
        userId.write(out);
        userId.write(out);
    }

    @Override
    public void readFields(DataInput in) throws IOException {
        userId.readFields(in);
        friendId.readFields(in);
    }

}

然后我创建一个Map器singleclassv2logmapper

public static class SingleClassv2LogMapper extends Mapper<Object, Text, CustomKey, IntWritable> {
    private final static IntWritable one = new IntWritable(1);
    private Text word = new Text();

    @Override
    protected void map(Object key, Text value, Context context) throws IOException, InterruptedException {

        Configuration conf = context.getConfiguration();
        String regex = conf.get("regex");
        String delimeter = conf.get("delimeter");
        String currentLine = value.toString();
        String tag = RingIdLogParser.parseHashTag(value.toString());
        String body = RingIdLogParser.parseBody(value.toString());
        if (tag != null) {
            if (tag.equals(RegularExpressionBundle.updateMultipleMessageStatus)) {
                CustomKey customKey = RingIdLogParser.parseUserFrinedInfo(body);
                int messageNo = RingIdLogParser.getMessageCount(body);
                context.write(customKey, new IntWritable(messageNo));
            }
        }
    }

}

和减速器

public static class SingleClassv2LogReducer extends Reducer<CustomKey, IntWritable, Text, IntWritable> {

    TextArrayWritable sum = new TextArrayWritable();

    @Override
    protected void reduce(CustomKey key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {

        int sum = 0;
        for (IntWritable value : values) {
            sum = sum + value.get();

        }
        String compactUser = key.userId.toString() +" "+ key.friendId.toString();
        context.write(new Text(compactUser), new IntWritable(sum));
    }

}

我现在该怎么办?有人来帮我吗。
与驱动程序相关的代码如下所示

Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "Regex Matcher");
job.setJarByClass(SingleClassLogDriverv2.class);
job.setMapperClass(SingleClassv2LogMapper.class);
job.setCombinerClass(SingleClassv2LogCombiner.class);
job.setReducerClass(SingleClassv2LogReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setMapOutputKeyClass(CustomKey.class);
job.setMapOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
hjzp0vay

hjzp0vay1#

在使用eclipseformap reduce和compariability创建jar时,我也遇到了类似的问题,我的问题是除了传统的遗留wordcount程序之外,还要在数字前面打印390k个数字的单词计数。这里是我的12个文件中的数字列表,其中也包含一次冗余。

java.io.IOException: Type mismatch in key from map: expected org.apache.hadoop.io.Text, recieved org.apache.hadoop.io.LongWritable

我后来纠正了,我想在下面说的聚合结果-

716900482    Seventy One Crore Sixty Nine Lac Four Hundred Eighty Two only.

我已经开发了一个maven构建工具,用于以文字形式打印数字,从而将jar显式地添加到我的项目中。

所以,我们来看看我的程序,它类似于wordcount程序,但用途不同-

package com.whodesire.count;

import java.io.IOException;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

import com.whodesire.numstats.AmtInWords;

public class CountInWords {

    public static class NumberTokenizerMapper 
                    extends Mapper <Object, Text, LongWritable, Text> {

        private static final Text theOne = new Text("1");
        private LongWritable longWord = new LongWritable();

        public void map(Object key, Text value, Context context) {

            try{
                StringTokenizer itr = new StringTokenizer(value.toString());
                while (itr.hasMoreTokens()) {
                    longWord.set(Long.parseLong(itr.nextToken()));
                    context.write(longWord, theOne);
                }
            }catch(ClassCastException cce){
                System.out.println("ClassCastException raiseddd...");
                System.exit(0);
            }catch(IOException | InterruptedException ioe){
                ioe.printStackTrace();
                System.out.println("IOException | InterruptedException raiseddd...");
                System.exit(0);
            }
        }
    }

    public static class ModeReducerCumInWordsCounter 
            extends Reducer <LongWritable, Text, LongWritable, Text>{
        private Text result = new Text();

        //This is the user defined reducer function which is invoked for each unique key
        public void reduce(LongWritable key, Iterable<Text> values, 
                Context context) throws IOException, InterruptedException {

            /***Putting the key, which is a LongWritable value, 
                        putting in AmtInWords constructor as String***/
            AmtInWords aiw = new AmtInWords(key.toString());
            result.set(aiw.getInWords());

            //Finally the word and counting is sent to Hadoop MR and thus to target
            context.write(key, result);
        }
    }

    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {

        /****
       ***all random numbers generated inside input files has been
       ***generated using url https://andrew.hedges.name/experiments/random/
       ****/

        //Load the configuration files and add them to the the conf object
        Configuration conf = new Configuration();       

        String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

        Job job = new Job(conf, "CountInWords");

        //Specify the jar which contains the required classes for the job to run.
        job.setJarByClass(CountInWords.class);

        job.setMapperClass(NumberTokenizerMapper.class);
        job.setCombinerClass(ModeReducerCumInWordsCounter.class);
        job.setReducerClass(ModeReducerCumInWordsCounter.class);

        //Set the output key and the value class for the entire job
        job.setMapOutputKeyClass(LongWritable.class);
        job.setMapOutputValueClass(Text.class);

        //Set the Input (format and location) and similarly for the output also
        FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
        FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

        //Setting the Results to Single Target File
        job.setNumReduceTasks(1);

        //Submit the job and wait for it to complete
        System.exit(job.waitForCompletion(true) ? 0 : 1);

    }

}

我知道这是一个太迟的答复,但希望这将有助于找到一个方法,也有人,谢谢。

相关问题