我运行了一系列hadoopMap器/还原器，得到了一个电影id列表。我使用moviedata文件根据这些id显示电影的名称。我正在使用下面的mapper类。我看到setup方法没有被调用，因为我看不到print语句，而且当我尝试使用load方法中加载的hashmap时，我得到了一个null异常。下面是代码。如有任何提示，我们将不胜感激。

import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.HashMap;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapreduce.Mapper.Context;
public class MovieNamesMapper extends MapReduceBase implements Mapper<Object, Text, Text, Text> {
    private static HashMap<String, String> movieNameHashMap = new HashMap<String, String>();
    private BufferedReader bufferedReader;
    private String movieId = "";
    protected void setup(Context context) throws IOException,
            InterruptedException {
        System.out.println("Setting up system..");
        Path[] cacheFilesLocal = DistributedCache.getLocalCacheFiles(context
                .getConfiguration());
        for (Path eachPath : cacheFilesLocal) {
            if (eachPath.getName().toString().trim().equals("u.item")) {
                loadMovieNamesHashMap(eachPath, context);
            }
        }
    }
    private void loadMovieNamesHashMap(Path filePath, Context context)
            throws IOException {
        System.out.println("Loading movie names..");
        String strLineRead = "";
        try {
            bufferedReader = new BufferedReader(new FileReader(
                    filePath.toString()));
            while ((strLineRead = bufferedReader.readLine()) != null) {
                String movieIdArray[] = strLineRead.toString().split("\t|::");
                movieNameHashMap.put(movieIdArray[0].trim(),
                        movieIdArray[1].trim());
            }
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            if (bufferedReader != null) {
                bufferedReader.close();
            }
        }
    }
    public void map(Object key, Text value, OutputCollector<Text, Text> output,
            Reporter reporter) throws IOException {
        System.out.println(key.toString() + " - " + value.toString());
        if (value.toString().length() > 0) {
            String moviePairArray[] = value.toString().split(":");
            for (String moviePair : moviePairArray) {
                String movieArray[] = moviePair.split(",");
                output.collect(new Text(movieNameHashMap.get(movieArray[0])),
                        new Text(movieNameHashMap.get(movieArray[1])));
            }
        }
    }
    public String getMovieId() {
        return movieId;
    }
    public void setMovieId(String movieId) {
        this.movieId = movieId;
    }
}

下面是我的跑步方法。

public int run(String[] args) throws Exception {
    // For finding user and his rated movie list.
    JobConf conf1 = new JobConf(MovieTopDriver.class);
    conf1.setMapperClass(MoviePairsMapper.class);
    conf1.setReducerClass(MoviePairsReducer.class);
    conf1.setJarByClass(MovieTopDriver.class);
    FileInputFormat.addInputPath(conf1, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf1, new Path("temp"));
    conf1.setMapOutputKeyClass(Text.class);
    conf1.setMapOutputValueClass(Text.class);
    conf1.setOutputKeyClass(Text.class);
    conf1.setOutputValueClass(IntWritable.class);
    // For finding movie pairs.
    JobConf conf2 = new JobConf(MovieTopDriver.class);
    conf2.setMapperClass(MoviePairsCoOccurMapper.class);
    conf2.setReducerClass(MoviePairsCoOccurReducer.class);
    conf2.setJarByClass(MovieTopDriver.class);
    FileInputFormat.addInputPath(conf2, new Path("temp"));
    FileOutputFormat.setOutputPath(conf2, new Path("freq_temp"));
    conf2.setInputFormat(KeyValueTextInputFormat.class);
    conf2.setMapOutputKeyClass(Text.class);
    conf2.setMapOutputValueClass(IntWritable.class);
    conf2.setOutputKeyClass(Text.class);
    conf2.setOutputValueClass(IntWritable.class);
    // Find top frequent movies along with their names.
    // Output Freq, moviePair
    // Keep a count and output only 20.
    JobConf conf3 = new JobConf(MovieTopDriver.class);
    conf3.setMapperClass(ValueKeyMapper.class);
    conf3.setReducerClass(ValueKeyReducer.class);
    conf3.setJarByClass(MovieTopDriver.class);
    FileInputFormat.addInputPath(conf3, new Path("freq_temp"));
    FileOutputFormat.setOutputPath(conf3, new Path("freq_temp2"));
    conf3.setInputFormat(KeyValueTextInputFormat.class);
    conf3.setMapOutputKeyClass(IntWritable.class);
    conf3.setMapOutputValueClass(Text.class);
    conf3.setOutputKeyClass(IntWritable.class);
    conf3.setOutputValueClass(Text.class);
    // Use only one reducer as we want to sort.
    conf3.setNumReduceTasks(1);
    // To sort in decreasing order.
    conf3.setOutputKeyComparatorClass(LongWritable.DecreasingComparator.class);
    // Find top movie name
    // Use a mapper side join to output names.
    JobConf conf4 = new JobConf(MovieTopDriver.class);
    conf4.setMapperClass(MovieNamesMapper.class);
    conf4.setJarByClass(MovieTopDriver.class);
    FileInputFormat.addInputPath(conf4, new Path("freq_temp2"));
    FileOutputFormat.setOutputPath(conf4, new Path(args[1]));
    conf4.setInputFormat(KeyValueTextInputFormat.class);
    conf4.setMapOutputKeyClass(Text.class);
    conf4.setMapOutputValueClass(Text.class);
    // Run the jobs
    Job job1 = new Job(conf1);
    Job job2 = new Job(conf2);
    Job job3 = new Job(conf3);
    Job job4 = new Job(conf4);
    JobControl jobControl = new JobControl("jobControl");
    jobControl.addJob(job1);
    jobControl.addJob(job2);
    jobControl.addJob(job3);
    jobControl.addJob(job4);
    job2.addDependingJob(job1);
    job3.addDependingJob(job2);
    job4.addDependingJob(job3);
    handleRun(jobControl);
    FileSystem.get(conf2).deleteOnExit(new Path("temp"));
    FileSystem.get(conf3).deleteOnExit(new Path("freq_temp"));
    FileSystem.get(conf4).deleteOnExit(new Path("freq_temp2"));
    System.out.println("Program complete.");
    return 0;
}

更新：我使用的是hadoop1.2.1，在学校使用集群时只能使用它。
更新：使用configure而不是setup，但是仍然没有被调用。

public void configure(JobConf jobConf) {
    System.out.println("Setting up system..");
    Path[] cacheFilesLocal;
    try {
        cacheFilesLocal = DistributedCache.getLocalCacheFiles(jobConf);
        for (Path eachPath : cacheFilesLocal) {
            if (eachPath.getName().toString().trim().equals("u.item")) {
                loadMovieNamesHashMap(eachPath);
            }
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
}

添加了以下运行方法。

DistributedCache.addFileToClassPath(new Path("moviedata"), conf4);
conf4.set("mapred.job.tracker", "local");

---替代解决方案---
我也还没弄明白。似乎在Map器的开头调用setup方法的模型，在任何Map调用之前，可能只是新api的一部分（mapred vs mapreduce）。
我想对只有一个变量差异的多个Map器使用相同的Map方法。不能重写变量，所以我调用 pulic void setup() 在map方法的开头，子Map器中的覆盖。当然，每次Map调用都会调用它（例如，这些Map器的输入文件中的每一行），但这是目前效率最低的一次。

public static class Mapper1
    extends MapReduceBase
    implements Mapper<LongWritable, Text, Text, Text>
{
    protected int someVar;
    public void setup()
    {
        System.out.println("[LOG] setup called");
        someVar = 1;
    }
    public void map(
        LongWritable key,
        Text value,
        OutputCollector<Text, Text> output,
        Reporter reporter
    ) throws IOException
    {
        setup();
        System.out.println("someVar: " + String.valueOf(someVar));
        //...
        output.collect(someKey, someValue);
    }
}
public static class Mapper3
    extends Mapper1
{
    //protected int someVar;
    //private int someVar;
    /*
    @Override
    public void setup(Context context)
        throws IOException, InterruptedException
    {
        System.out.println("[LOG] setup called");
        someVar = 2;
    }
    @Override
    public void configure(JobConf jobConf)
    {
        System.out.println("[LOG] configure called");
        someVar = 2;
    }
    */
    @Override
    public void setup()
    {
        System.out.println("[LOG] setup called");
        someVar = 2;
    }
}

展开查看全部

4条答案

按热度按时间

nhn9ugyo1#

如果您的ide支持它，让您的ide重写超类中的方法（在eclipse中是source->override/implementmethods），看看ide是否认为您的类型（上下文）错误。如果您弄错了，那么eclipse将允许您重写该方法，插入具有正确签名的存根。
准确地说，您需要决定是使用mapred（旧）还是mapreduce（新）包。您似乎在使用mapred包（请注意，上下文是从错误的包导入的）。如果要使用mapred包，请使用configure（）方法，否则请对mapreduce包使用setup（）

赞(0）回复(0）举报 2021-06-04

mnowg1ta2#

您必须使用configure方法：

public void configure(JobConf job) {
   }

文档中未定义设置

w1e3prcc3#

oyjwcjzk4#

我有一个在hadoop1.2.1上运行的代码（也在2.2.0上测试过），它广泛地使用了setup。在我的代码中是这样的：

@Override
    public void setup(Context context) throws IllegalArgumentException, IOException {
        logger.debug("setup has been called");
    }

我看到的区别是使用了“public”而不是“protected”，并且还使用了@override，这可以帮助您确定是否没有正确地重写该方法。还要注意，我使用的是新的api（org.apache.hadoop.mapreduce）。

setup方法在hadoopMap器中未被调用

4条答案

相关问题

热门标签

最新问答