我想执行一个jar文件,在从命令行执行时可以正常工作:
java -Xmx3g -jar jarname.jar -T class_name_in_jar -R filename1 -I filename2 -known filename3 -o filename4
上面的命令通过获取输入filename1、filename2和filename3来执行\u jar中的class\u name\u。它将在filename4中生成输出。
这是我的MapReduce程序:
import java.io.IOException;
import java.util.*;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.conf.*;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapred.*;
import org.apache.hadoop.util.*;
public class GatkWordCount {
public static class Reduce extends MapReduceBase implements Reducer<Text, IntWritable, Text, IntWritable> {
public void reduce(Text key, Iterator<IntWritable> values, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException {
String find_targets_cmd = "java -Xmx3g -jar <jarname>.jar -T <class name in jar> -R <filename1> -I <filename2> -known <filename3> -o <filename4>";
exceptionOnError(execAndReconnect(find_targets_cmd));
}
}
public static int execAndReconnect(String cmd) throws IOException {
Process p = Runtime.getRuntime().exec(cmd);
p.waitFor();
return p.exitValue();
}
public static void exceptionOnError(int errorCode) throws IOException{
if(0 != errorCode)
throw new IOException(String.valueOf(errorCode));
}
public static void main(String[] args) throws Exception {
JobConf conf = new JobConf(GatkWordCount.class);
conf.setJobName("GatkWordCount");
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(IntWritable.class);
conf.setReducerClass(Reduce.class);
conf.setInputFormat(TextInputFormat.class);
conf.setOutputFormat(TextOutputFormat.class);
FileInputFormat.setInputPaths(conf, new Path(args[0]));
FileOutputFormat.setOutputPath(conf, new Path(args[1]));
JobClient.runJob(conf);
}
}
在 HDFS
,我已经放置了所有必需的输入文件。我已执行以下命令:
enter code herehadoop/bin/hadoop jar gatkword.jar GatkWordCount /user/hduser/gatkinput/gatkinput/group.bam /user/hduser/gatkword2
下面是我执行上述命令后得到的错误消息:
13/12/29 17:58:59 WARN mapred.JobClient: Use GenericOptionsParser for parsing the arguments. Applications should implement Tool for the same.
13/12/29 17:58:59 INFO util.NativeCodeLoader: Loaded the native-hadoop library
13/12/29 17:58:59 WARN snappy.LoadSnappy: Snappy native library not loaded
13/12/29 17:58:59 INFO mapred.FileInputFormat: Total input paths to process : 1
13/12/29 17:58:59 INFO mapred.JobClient: Running job: job_201312261425_0013
13/12/29 17:59:00 INFO mapred.JobClient: map 0% reduce 0%
13/12/29 17:59:06 INFO mapred.JobClient: Task Id : attempt_201312261425_0013_m_000000_0, Status : FAILED
java.io.IOException: Type mismatch in key from map: expected org.apache.hadoop.io.Text, recieved org.apache.hadoop.io.LongWritable
at org.apache.hadoop.mapred.MapTask$MapOutputBuffer.collect(MapTask.java:1014)
at org.apache.hadoop.mapred.MapTask$OldOutputCollector.collect(MapTask.java:592)
at org.apache.hadoop.mapred.lib.IdentityMapper.map(IdentityMapper.java:38)
at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:50)
at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:436)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:372)
at org.apache.hadoop.mapred.Child$4.run(Child.java:255)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1136)
at org.apache.hadoop.mapred.Child.main(Child.java:249)
13/12/29 17:59:06 INFO mapred.JobClient: Task Id : attempt_201312261425_0013_m_000001_0, Status : FAILED
java.io.IOException: Type mismatch in key from map: expected org.apache.hadoop.io.Text, recieved org.apache.hadoop.io.LongWritable
at org.apache.hadoop.mapred.MapTask$MapOutputBuffer.collect(MapTask.java:1014)
at org.apache.hadoop.mapred.MapTask$OldOutputCollector.collect(MapTask.java:592)
at org.apache.hadoop.mapred.lib.IdentityMapper.map(IdentityMapper.java:38)
at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:50)
at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:436)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:372)
at org.apache.hadoop.mapred.Child$4.run(Child.java:255)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1136)
at org.apache.hadoop.mapred.Child.main(Child.java:249)
13/12/29 17:59:11 INFO mapred.JobClient: Task Id : attempt_201312261425_0013_m_000000_1, Status : FAILED
java.io.IOException: Type mismatch in key from map: expected org.apache.hadoop.io.Text, recieved org.apache.hadoop.io.LongWritable
at org.apache.hadoop.mapred.MapTask$MapOutputBuffer.collect(MapTask.java:1014)
at org.apache.hadoop.mapred.MapTask$OldOutputCollector.collect(MapTask.java:592)
at org.apache.hadoop.mapred.lib.IdentityMapper.map(IdentityMapper.java:38)
at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:50)
at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:436)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:372)
at org.apache.hadoop.mapred.Child$4.run(Child.java:255)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1136)
at org.apache.hadoop.mapred.Child.main(Child.java:249)
13/12/29 17:59:11 INFO mapred.JobClient: Task Id : attempt_201312261425_0013_m_000001_1, Status : FAILED
java.io.IOException: Type mismatch in key from map: expected org.apache.hadoop.io.Text, recieved org.apache.hadoop.io.LongWritable
at org.apache.hadoop.mapred.MapTask$MapOutputBuffer.collect(MapTask.java:1014)
at org.apache.hadoop.mapred.MapTask$OldOutputCollector.collect(MapTask.java:592)
at org.apache.hadoop.mapred.lib.IdentityMapper.map(IdentityMapper.java:38)
at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:50)
at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:436)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:372)
at org.apache.hadoop.mapred.Child$4.run(Child.java:255)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1136)
at org.apache.hadoop.mapred.Child.main(Child.java:249)
13/12/29 17:59:17 INFO mapred.JobClient: Task Id : attempt_201312261425_0013_m_000000_2, Status : FAILED
java.io.IOException: Type mismatch in key from map: expected org.apache.hadoop.io.Text, recieved org.apache.hadoop.io.LongWritable
at org.apache.hadoop.mapred.MapTask$MapOutputBuffer.collect(MapTask.java:1014)
at org.apache.hadoop.mapred.MapTask$OldOutputCollector.collect(MapTask.java:592)
at org.apache.hadoop.mapred.lib.IdentityMapper.map(IdentityMapper.java:38)
at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:50)
at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:436)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:372)
at org.apache.hadoop.mapred.Child$4.run(Child.java:255)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1136)
at org.apache.hadoop.mapred.Child.main(Child.java:249)
13/12/29 17:59:17 INFO mapred.JobClient: Task Id : attempt_201312261425_0013_m_000001_2, Status : FAILED
java.io.IOException: Type mismatch in key from map: expected org.apache.hadoop.io.Text, recieved org.apache.hadoop.io.LongWritable
at org.apache.hadoop.mapred.MapTask$MapOutputBuffer.collect(MapTask.java:1014)
at org.apache.hadoop.mapred.MapTask$OldOutputCollector.collect(MapTask.java:592)
at org.apache.hadoop.mapred.lib.IdentityMapper.map(IdentityMapper.java:38)
at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:50)
at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:436)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:372)
at org.apache.hadoop.mapred.Child$4.run(Child.java:255)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1136)
at org.apache.hadoop.mapred.Child.main(Child.java:249)
13/12/29 17:59:22 INFO mapred.JobClient: Job complete: job_201312261425_0013
13/12/29 17:59:22 INFO mapred.JobClient: Counters: 7
13/12/29 17:59:22 INFO mapred.JobClient: Job Counters
13/12/29 17:59:22 INFO mapred.JobClient: SLOTS_MILLIS_MAPS=42572
13/12/29 17:59:22 INFO mapred.JobClient: Total time spent by all reduces waiting after reserving slots (ms)=0
13/12/29 17:59:22 INFO mapred.JobClient: Total time spent by all maps waiting after reserving slots (ms)=0
13/12/29 17:59:22 INFO mapred.JobClient: Launched map tasks=8
13/12/29 17:59:22 INFO mapred.JobClient: Data-local map tasks=8
13/12/29 17:59:22 INFO mapred.JobClient: SLOTS_MILLIS_REDUCES=0
13/12/29 17:59:22 INFO mapred.JobClient: Failed map tasks=1
13/12/29 17:59:22 INFO mapred.JobClient: Job Failed: # of failed Map Tasks exceeded allowed limit. FailedCount: 1. LastFailedTask: task_201312261425_0013_m_000000
Exception in thread "main" java.io.IOException: Job failed!
at org.apache.hadoop.mapred.JobClient.runJob(JobClient.java:1327)
at GatkWordCount.main(GatkWordCount.java:51)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:601)
at org.apache.hadoop.util.RunJar.main(RunJar.java:156)
请建议什么需要在我的代码中进行更改,以正确执行它。谢谢你的帮助。
2条答案
按热度按时间9njqaruj1#
我修改了代码,如下所示:
[1] 现在,在执行中没有问题,但是减少了停留在66%的作业[2]userlogs/./stderr文件中没有错误。即使system.out.println在stdout文件中打印,但system.out.println仍在stdout文件中连续打印[3] 代码中的p.exitvalue-->打印1,表示作业异常终止,但日志中没有问题。你能让我知道这个代码的问题,或者建议一个执行jar文件的替代方法吗?附言:我没法回答,因为我没什么名气。谢谢。
b09cbbtk2#
在此示例中,您尚未指定Map器类,并且不能使用默认的标识Map器。
这是因为您指定的textinputformat产生longwritable(行号)作为键和text(行内容)作为值。因此,默认的身份Map器将忠实地发出longwritable和文本不变。由于已将outputkeyclass指定为text,Map器的键输出(longwritable)与Map器输出排序规则系统(text)所需的键类型之间存在类不匹配。值字段上也会出现类似的不匹配,但系统首先在键字段上失败。
要解决这个问题,您必须编写自己的Map器类,该类接受longwritable文本,并输出一个text intwritable。
编辑:我刚刚仔细看了你的代码。您只是使用mapreduce框架在reducer中执行javajar,这显然违反了hadoop(使用mapreduce对hdfs数据进行计算)的精神。实际上,我会重新检查您到底想用这个应用程序做些什么,而不是花更多的时间让它在mapreduce中工作。