矩阵乘法hadoop代码不适用于大型矩阵-溢出失败

xcitsw88 于 2021-06-02 发布在 Hadoop

关注(0)|答案(1)|浏览(318)

我想做这个大矩阵乘法。下面的代码工作得很好，但是当我尝试对大型矩阵使用它时，会出现以下错误。请注意，我的输入文件绝对没有问题（没有奇怪的字符等）。
另外，在运行一段时间后，它崩溃了，出现了以下错误，我在ubuntu系统上得到一个提示，文件系统根目录只有360mb左右。坠机只是因为太空问题吗？

import java.io.IOException;
import java.util.*;

import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.conf.*;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.*;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

public class OneStepMatrixMultiplication {

    public static class Map extends Mapper<LongWritable, Text, Text, Text> {
        public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            Configuration conf = context.getConfiguration();
            int m = Integer.parseInt(conf.get("m"));
            int p = Integer.parseInt(conf.get("p"));
            String line = value.toString();
            String[] indicesAndValue = line.split(",");
            Text outputKey = new Text();
            Text outputValue = new Text();
            if (indicesAndValue[0].equals("A")) {
                for (int k = 0; k < p; k++) {
                    outputKey.set(indicesAndValue[1] + "," + k);
                    outputValue.set("A," + indicesAndValue[2] + "," + indicesAndValue[3]);
                    context.write(outputKey, outputValue);
                }
            } else {
                for (int i = 0; i < m; i++) {
                    outputKey.set(i + "," + indicesAndValue[2]);
                    outputValue.set("B," + indicesAndValue[1] + "," + indicesAndValue[3]);
                    context.write(outputKey, outputValue);
                }
            }
        }
    }

    public static class Reduce extends Reducer<Text, Text, Text, Text> 
    {
        public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
            String[] value;
            HashMap<Integer, Float> hashA = new HashMap<Integer, Float>();
            HashMap<Integer, Float> hashB = new HashMap<Integer, Float>();

            for (Text val : values) 
            {
                value = val.toString().split(",");

                if (value[0].equals("A")) 
                {
                    hashA.put(Integer.parseInt(value[1]), Float.parseFloat(value[2]));
                } else 
                {
                    hashB.put(Integer.parseInt(value[1]), Float.parseFloat(value[2]));
                }
            }

            int n = Integer.parseInt(context.getConfiguration().get("n"));
            float result = 0.0f;
            float a_ij;
            float b_jk;
            for (int j = 0; j < n; j++) 
            {
                a_ij = hashA.containsKey(j) ? hashA.get(j) : 0.0f;
                b_jk = hashB.containsKey(j) ? hashB.get(j) : 0.0f;
                result += a_ij * b_jk;
            }
            if (result != 0.0f) 
            {
                context.write(null, new Text(key.toString() + "," + Float.toString(result)));
            }
        }
    }

    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();
        // A is an m-by-n matrix; B is an n-by-p matrix.

        conf.set("m", "10000");
        conf.set("n", "3");
        conf.set("p", "10000");

        Job job = new Job(conf, "MatrixMatrixMultiplicationOneStep");
        job.setJarByClass(OneStepMatrixMultiplication.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        job.setMapperClass(Map.class);
        job.setReducerClass(Reduce.class);

        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(TextOutputFormat.class);

        FileInputFormat.addInputPath(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));

        job.waitForCompletion(true);
    }
}

错误

5/05/31 18:45:35 WARN mapred.LocalJobRunner: job_local1019087739_0001
java.lang.Exception: java.io.IOException: Spill failed
    at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:354)

hadoop mapreduce matrix-multiplication

来源：https://stackoverflow.com/questions/30564601/matrix-multiplication-hadoop-code-does-not-work-for-large-matrices-spill-faile