使用hadoop执行jar文件

mwyxok5s  于 2021-06-03  发布在  Hadoop
关注(0)|答案(2)|浏览(357)

我想执行一个jar文件,在从命令行执行时可以正常工作:

java -Xmx3g -jar jarname.jar -T class_name_in_jar -R filename1 -I filename2 -known filename3 -o filename4

上面的命令通过获取输入filename1、filename2和filename3来执行\u jar中的class\u name\u。它将在filename4中生成输出。
这是我的MapReduce程序:

import java.io.IOException;
    import java.util.*;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.conf.*;
    import org.apache.hadoop.io.*;
    import org.apache.hadoop.mapred.*;
    import org.apache.hadoop.util.*;

    public class GatkWordCount {

       public static class Reduce extends MapReduceBase implements Reducer<Text, IntWritable, Text, IntWritable> {
         public void reduce(Text key, Iterator<IntWritable> values, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException {
           String find_targets_cmd = "java -Xmx3g -jar <jarname>.jar -T <class name in jar> -R <filename1> -I <filename2> -known <filename3> -o <filename4>";

           exceptionOnError(execAndReconnect(find_targets_cmd));
         }
       }

    public static int execAndReconnect(String cmd) throws IOException  {
       Process p = Runtime.getRuntime().exec(cmd);
        p.waitFor();
       return p.exitValue();
    }

    public static void exceptionOnError(int errorCode) throws IOException{
        if(0 != errorCode)
            throw new IOException(String.valueOf(errorCode));
    }

       public static void main(String[] args) throws Exception {
         JobConf conf = new JobConf(GatkWordCount.class);
         conf.setJobName("GatkWordCount");

         conf.setOutputKeyClass(Text.class);
         conf.setOutputValueClass(IntWritable.class);

         conf.setReducerClass(Reduce.class);

         conf.setInputFormat(TextInputFormat.class);
         conf.setOutputFormat(TextOutputFormat.class);

         FileInputFormat.setInputPaths(conf, new Path(args[0]));
         FileOutputFormat.setOutputPath(conf, new Path(args[1]));

         JobClient.runJob(conf);
       }
    }

HDFS ,我已经放置了所有必需的输入文件。我已执行以下命令:

enter code herehadoop/bin/hadoop jar gatkword.jar GatkWordCount /user/hduser/gatkinput/gatkinput/group.bam /user/hduser/gatkword2

下面是我执行上述命令后得到的错误消息:

13/12/29 17:58:59 WARN mapred.JobClient: Use GenericOptionsParser for parsing the arguments. Applications should implement Tool for the same.
13/12/29 17:58:59 INFO util.NativeCodeLoader: Loaded the native-hadoop library
13/12/29 17:58:59 WARN snappy.LoadSnappy: Snappy native library not loaded
13/12/29 17:58:59 INFO mapred.FileInputFormat: Total input paths to process : 1
13/12/29 17:58:59 INFO mapred.JobClient: Running job: job_201312261425_0013
13/12/29 17:59:00 INFO mapred.JobClient:  map 0% reduce 0%
13/12/29 17:59:06 INFO mapred.JobClient: Task Id : attempt_201312261425_0013_m_000000_0, Status : FAILED
java.io.IOException: Type mismatch in key from map: expected org.apache.hadoop.io.Text, recieved org.apache.hadoop.io.LongWritable
    at org.apache.hadoop.mapred.MapTask$MapOutputBuffer.collect(MapTask.java:1014)
    at org.apache.hadoop.mapred.MapTask$OldOutputCollector.collect(MapTask.java:592)
    at org.apache.hadoop.mapred.lib.IdentityMapper.map(IdentityMapper.java:38)
    at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:50)
    at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:436)
    at org.apache.hadoop.mapred.MapTask.run(MapTask.java:372)
    at org.apache.hadoop.mapred.Child$4.run(Child.java:255)
    at java.security.AccessController.doPrivileged(Native Method)
    at javax.security.auth.Subject.doAs(Subject.java:415)
    at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1136)
    at org.apache.hadoop.mapred.Child.main(Child.java:249)

13/12/29 17:59:06 INFO mapred.JobClient: Task Id : attempt_201312261425_0013_m_000001_0, Status : FAILED
java.io.IOException: Type mismatch in key from map: expected org.apache.hadoop.io.Text, recieved org.apache.hadoop.io.LongWritable
    at org.apache.hadoop.mapred.MapTask$MapOutputBuffer.collect(MapTask.java:1014)
    at org.apache.hadoop.mapred.MapTask$OldOutputCollector.collect(MapTask.java:592)
    at org.apache.hadoop.mapred.lib.IdentityMapper.map(IdentityMapper.java:38)
    at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:50)
    at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:436)
    at org.apache.hadoop.mapred.MapTask.run(MapTask.java:372)
    at org.apache.hadoop.mapred.Child$4.run(Child.java:255)
    at java.security.AccessController.doPrivileged(Native Method)
    at javax.security.auth.Subject.doAs(Subject.java:415)
    at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1136)
    at org.apache.hadoop.mapred.Child.main(Child.java:249)

13/12/29 17:59:11 INFO mapred.JobClient: Task Id : attempt_201312261425_0013_m_000000_1, Status : FAILED
java.io.IOException: Type mismatch in key from map: expected org.apache.hadoop.io.Text, recieved org.apache.hadoop.io.LongWritable
    at org.apache.hadoop.mapred.MapTask$MapOutputBuffer.collect(MapTask.java:1014)
    at org.apache.hadoop.mapred.MapTask$OldOutputCollector.collect(MapTask.java:592)
    at org.apache.hadoop.mapred.lib.IdentityMapper.map(IdentityMapper.java:38)
    at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:50)
    at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:436)
    at org.apache.hadoop.mapred.MapTask.run(MapTask.java:372)
    at org.apache.hadoop.mapred.Child$4.run(Child.java:255)
    at java.security.AccessController.doPrivileged(Native Method)
    at javax.security.auth.Subject.doAs(Subject.java:415)
    at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1136)
    at org.apache.hadoop.mapred.Child.main(Child.java:249)

13/12/29 17:59:11 INFO mapred.JobClient: Task Id : attempt_201312261425_0013_m_000001_1, Status : FAILED
java.io.IOException: Type mismatch in key from map: expected org.apache.hadoop.io.Text, recieved org.apache.hadoop.io.LongWritable
    at org.apache.hadoop.mapred.MapTask$MapOutputBuffer.collect(MapTask.java:1014)
    at org.apache.hadoop.mapred.MapTask$OldOutputCollector.collect(MapTask.java:592)
    at org.apache.hadoop.mapred.lib.IdentityMapper.map(IdentityMapper.java:38)
    at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:50)
    at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:436)
    at org.apache.hadoop.mapred.MapTask.run(MapTask.java:372)
    at org.apache.hadoop.mapred.Child$4.run(Child.java:255)
    at java.security.AccessController.doPrivileged(Native Method)
    at javax.security.auth.Subject.doAs(Subject.java:415)
    at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1136)
    at org.apache.hadoop.mapred.Child.main(Child.java:249)

13/12/29 17:59:17 INFO mapred.JobClient: Task Id : attempt_201312261425_0013_m_000000_2, Status : FAILED
java.io.IOException: Type mismatch in key from map: expected org.apache.hadoop.io.Text, recieved org.apache.hadoop.io.LongWritable
    at org.apache.hadoop.mapred.MapTask$MapOutputBuffer.collect(MapTask.java:1014)
    at org.apache.hadoop.mapred.MapTask$OldOutputCollector.collect(MapTask.java:592)
    at org.apache.hadoop.mapred.lib.IdentityMapper.map(IdentityMapper.java:38)
    at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:50)
    at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:436)
    at org.apache.hadoop.mapred.MapTask.run(MapTask.java:372)
    at org.apache.hadoop.mapred.Child$4.run(Child.java:255)
    at java.security.AccessController.doPrivileged(Native Method)
    at javax.security.auth.Subject.doAs(Subject.java:415)
    at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1136)
    at org.apache.hadoop.mapred.Child.main(Child.java:249)

13/12/29 17:59:17 INFO mapred.JobClient: Task Id : attempt_201312261425_0013_m_000001_2, Status : FAILED
java.io.IOException: Type mismatch in key from map: expected org.apache.hadoop.io.Text, recieved org.apache.hadoop.io.LongWritable
    at org.apache.hadoop.mapred.MapTask$MapOutputBuffer.collect(MapTask.java:1014)
    at org.apache.hadoop.mapred.MapTask$OldOutputCollector.collect(MapTask.java:592)
    at org.apache.hadoop.mapred.lib.IdentityMapper.map(IdentityMapper.java:38)
    at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:50)
    at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:436)
    at org.apache.hadoop.mapred.MapTask.run(MapTask.java:372)
    at org.apache.hadoop.mapred.Child$4.run(Child.java:255)
    at java.security.AccessController.doPrivileged(Native Method)
    at javax.security.auth.Subject.doAs(Subject.java:415)
    at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1136)
    at org.apache.hadoop.mapred.Child.main(Child.java:249)

13/12/29 17:59:22 INFO mapred.JobClient: Job complete: job_201312261425_0013
13/12/29 17:59:22 INFO mapred.JobClient: Counters: 7
13/12/29 17:59:22 INFO mapred.JobClient:   Job Counters 
13/12/29 17:59:22 INFO mapred.JobClient:     SLOTS_MILLIS_MAPS=42572
13/12/29 17:59:22 INFO mapred.JobClient:     Total time spent by all reduces waiting after reserving slots (ms)=0
13/12/29 17:59:22 INFO mapred.JobClient:     Total time spent by all maps waiting after reserving slots (ms)=0
13/12/29 17:59:22 INFO mapred.JobClient:     Launched map tasks=8
13/12/29 17:59:22 INFO mapred.JobClient:     Data-local map tasks=8
13/12/29 17:59:22 INFO mapred.JobClient:     SLOTS_MILLIS_REDUCES=0
13/12/29 17:59:22 INFO mapred.JobClient:     Failed map tasks=1
13/12/29 17:59:22 INFO mapred.JobClient: Job Failed: # of failed Map Tasks exceeded allowed limit. FailedCount: 1. LastFailedTask: task_201312261425_0013_m_000000
Exception in thread "main" java.io.IOException: Job failed!
    at org.apache.hadoop.mapred.JobClient.runJob(JobClient.java:1327)
    at GatkWordCount.main(GatkWordCount.java:51)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.lang.reflect.Method.invoke(Method.java:601)
    at org.apache.hadoop.util.RunJar.main(RunJar.java:156)

请建议什么需要在我的代码中进行更改,以正确执行它。谢谢你的帮助。

9njqaruj

9njqaruj1#

我修改了代码,如下所示:

import java.io.*;
    import java.util.*;

    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.conf.*;
    import org.apache.hadoop.io.*;
    import org.apache.hadoop.mapred.*;
    import org.apache.hadoop.util.*;

    public class GatkWordCount {
       public static class Reduce extends MapReduceBase implements Reducer<LongWritable, Text, LongWritable, Text> {
         public void reduce(LongWritable key, Iterator<Text> values, OutputCollector<LongWritable, Text> output, Reporter reporter) throws IOException {
           String cmd = "java -jar /home/hduser/apps/hadoop/GenomeAnalysisTK.jar -T RealignerTargetCreator -R /user/hduser/gatkinput/gatkinput/ucsc.hg19.fasta -I /user/hduser/gatkinput/gatkinput/groupbam -known /user/hduser/gatkinput/gatkinput/Mills_and_1000G_gold_standard.indels.hg19.vcf -o /user/hduser/gatkinput/gatkinput/target_intervals.list";

    try{    
        System.out.println("GATK cmd===>"+cmd);
        Process p = Runtime.getRuntime().exec(cmd);
    p.waitFor();
    System.out.println("p.exitValue-->"+p.exitValue());
         } catch (Exception e){
        System.out.println("Exception-->"+e.getMessage());
    }
}}
      public static void main(String[] args) throws Exception {
         JobConf conf = new JobConf(GatkWordCount.class);
         conf.setJobName("GatkWordCount");

         conf.setReducerClass(Reduce.class);
         //conf.setMapperClass(Map.class);

         conf.setOutputKeyClass(LongWritable.class);
         conf.setOutputValueClass(Text.class);

         conf.setInputFormat(TextInputFormat.class);
         conf.setOutputFormat(TextOutputFormat.class);

         FileInputFormat.setInputPaths(conf, new Path(args[0]));
         FileOutputFormat.setOutputPath(conf, new Path(args[1]));

         JobClient.runJob(conf);
       }
    }

[1] 现在,在执行中没有问题,但是减少了停留在66%的作业[2]userlogs/./stderr文件中没有错误。即使system.out.println在stdout文件中打印,但system.out.println仍在stdout文件中连续打印[3] 代码中的p.exitvalue-->打印1,表示作业异常终止,但日志中没有问题。你能让我知道这个代码的问题,或者建议一个执行jar文件的替代方法吗?附言:我没法回答,因为我没什么名气。谢谢。

b09cbbtk

b09cbbtk2#

在此示例中,您尚未指定Map器类,并且不能使用默认的标识Map器。
这是因为您指定的textinputformat产生longwritable(行号)作为键和text(行内容)作为值。因此,默认的身份Map器将忠实地发出longwritable和文本不变。由于已将outputkeyclass指定为text,Map器的键输出(longwritable)与Map器输出排序规则系统(text)所需的键类型之间存在类不匹配。值字段上也会出现类似的不匹配,但系统首先在键字段上失败。
要解决这个问题,您必须编写自己的Map器类,该类接受longwritable文本,并输出一个text intwritable。
编辑:我刚刚仔细看了你的代码。您只是使用mapreduce框架在reducer中执行javajar,这显然违反了hadoop(使用mapreduce对hdfs数据进行计算)的精神。实际上,我会重新检查您到底想用这个应用程序做些什么,而不是花更多的时间让它在mapreduce中工作。

相关问题