在emr作业中使用aws java sdk

fhity93d  于 2021-06-02  发布在  Hadoop
关注(0)|答案(1)|浏览(484)

我在fatjar中打包了一份非常重要的工作,并在emr hadoop集群上运行。最近,我添加了一个新特性,需要在Map中使用dynamodb连接。但一旦Map程序到达dynamodb初始化,它就会抛出以下异常:

  1. Error in configuring object
  2. at org.apache.hadoop.util.ReflectionUtils.setJobConf(ReflectionUtils.java:112)
  3. at org.apache.hadoop.util.ReflectionUtils.setConf(ReflectionUtils.java:78)
  4. at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:136)
  5. at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:450)
  6. at org.apache.hadoop.mapred.MapTask.run(MapTask.java:344)
  7. at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:172)
  8. at java.security.AccessController.doPrivileged(Native Method)
  9. at javax.security.auth.Subject.doAs(Subject.java:415)
  10. at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1657)
  11. at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:166)
  12. Caused by: java.lang.reflect.InvocationTargetException
  13. at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
  14. at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
  15. at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
  16. at java.lang.reflect.Method.invoke(Method.java:606)
  17. at org.apache.hadoop.util.ReflectionUtils.setJobConf(ReflectionUtils.java:109)
  18. ... 9 more
  19. Caused by: cascading.flow.FlowException: internal error during mapper configuration
  20. at cascading.flow.hadoop.FlowMapper.configure(FlowMapper.java:102)
  21. ... 14 more
  22. Caused by: com.esotericsoftware.kryo.KryoException: Unable to find class: com.amazonaws.http.conn.$Proxy7
  23. Serialization trace:
  24. connManager (com.amazonaws.http.impl.client.SdkHttpClient)
  25. httpClient (com.amazonaws.http.AmazonHttpClient)
  26. client (awscala.dynamodbv2.DynamoDBClient)
  27. client (me.chuwy.enrich.hadoop.DuplicateStorage$DynamoDbStorage)
  28. duplicateStorage (me.chuwy.enrich.hadoop.ShredJob)
  29. $outer (me.chuwy.enrich.hadoop.ShredJob$$anonfun$11)
  30. at com.esotericsoftware.kryo.util.DefaultClassResolver.readName(DefaultClassResolver.java:138)
  31. at com.esotericsoftware.kryo.util.DefaultClassResolver.readClass(DefaultClassResolver.java:115)
  32. at com.esotericsoftware.kryo.Kryo.readClass(Kryo.java:610)
  33. at com.esotericsoftware.kryo.serializers.FieldSerializer$ObjectField.read(FieldSerializer.java:599)
  34. at com.esotericsoftware.kryo.serializers.FieldSerializer.read(FieldSerializer.java:221)
  35. at com.esotericsoftware.kryo.Kryo.readObject(Kryo.java:648)
  36. at com.esotericsoftware.kryo.serializers.FieldSerializer$ObjectField.read(FieldSerializer.java:605)
  37. at com.esotericsoftware.kryo.serializers.FieldSerializer.read(FieldSerializer.java:221)
  38. at com.esotericsoftware.kryo.Kryo.readObject(Kryo.java:648)
  39. at com.esotericsoftware.kryo.serializers.FieldSerializer$ObjectField.read(FieldSerializer.java:605)
  40. at com.esotericsoftware.kryo.serializers.FieldSerializer.read(FieldSerializer.java:221)
  41. at com.esotericsoftware.kryo.Kryo.readObject(Kryo.java:648)
  42. at com.esotericsoftware.kryo.serializers.FieldSerializer$ObjectField.read(FieldSerializer.java:605)
  43. at com.esotericsoftware.kryo.serializers.FieldSerializer.read(FieldSerializer.java:221)
  44. at com.esotericsoftware.kryo.Kryo.readClassAndObject(Kryo.java:729)
  45. at com.twitter.chill.SomeSerializer.read(SomeSerializer.scala:25)
  46. at com.twitter.chill.SomeSerializer.read(SomeSerializer.scala:19)
  47. at com.esotericsoftware.kryo.Kryo.readObject(Kryo.java:648)
  48. at com.esotericsoftware.kryo.serializers.FieldSerializer$ObjectField.read(FieldSerializer.java:605)
  49. at com.esotericsoftware.kryo.serializers.FieldSerializer.read(FieldSerializer.java:221)
  50. at com.esotericsoftware.kryo.Kryo.readObject(Kryo.java:648)
  51. at com.esotericsoftware.kryo.serializers.FieldSerializer$ObjectField.read(FieldSerializer.java:605)
  52. at com.esotericsoftware.kryo.serializers.FieldSerializer.read(FieldSerializer.java:221)
  53. at com.esotericsoftware.kryo.Kryo.readClassAndObject(Kryo.java:729)
  54. at com.twitter.chill.SomeSerializer.read(SomeSerializer.scala:25)
  55. at com.twitter.chill.SomeSerializer.read(SomeSerializer.scala:19)
  56. at com.esotericsoftware.kryo.Kryo.readClassAndObject(Kryo.java:729)
  57. at com.twitter.chill.SerDeState.readClassAndObject(SerDeState.java:61)
  58. at com.twitter.chill.KryoPool.fromBytes(KryoPool.java:94)
  59. at com.twitter.chill.Externalizer.fromBytes(Externalizer.scala:145)
  60. at com.twitter.chill.Externalizer.maybeReadJavaKryo(Externalizer.scala:158)
  61. at com.twitter.chill.Externalizer.readExternal(Externalizer.scala:148)
  62. at java.io.ObjectInputStream.readExternalData(ObjectInputStream.java:1839)
  63. at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1796)
  64. at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1350)
  65. at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:1997)
  66. at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1921)
  67. at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1798)
  68. at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1350)
  69. at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:1997)
  70. at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1921)
  71. at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1798)
  72. at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1350)
  73. at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:1997)
  74. at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1921)
  75. at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1798)
  76. at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1350)
  77. at java.io.ObjectInputStream.readObject(ObjectInputStream.java:370)
  78. at java.util.HashMap.readObject(HashMap.java:1180)
  79. at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
  80. at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
  81. at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
  82. at java.lang.reflect.Method.invoke(Method.java:606)
  83. at java.io.ObjectStreamClass.invokeReadObject(ObjectStreamClass.java:1058)
  84. at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1897)
  85. at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1798)
  86. at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1350)
  87. at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:1997)
  88. at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1921)
  89. at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1798)
  90. at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1350)
  91. at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:1997)
  92. at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1921)
  93. at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1798)
  94. at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1350)
  95. at java.io.ObjectInputStream.readObject(ObjectInputStream.java:370)
  96. at cascading.flow.hadoop.util.JavaObjectSerializer.deserialize(JavaObjectSerializer.java:101)
  97. at cascading.flow.hadoop.util.HadoopUtil.deserializeBase64(HadoopUtil.java:312)
  98. at cascading.flow.hadoop.util.HadoopUtil.deserializeBase64(HadoopUtil.java:293)
  99. at cascading.flow.hadoop.FlowMapper.configure(FlowMapper.java:81)
  100. ... 14 more
  101. Caused by: java.lang.ClassNotFoundException: com.amazonaws.http.conn.$Proxy7
  102. at java.net.URLClassLoader$1.run(URLClassLoader.java:366)
  103. at java.net.URLClassLoader$1.run(URLClassLoader.java:355)
  104. at java.security.AccessController.doPrivileged(Native Method)
  105. at java.net.URLClassLoader.findClass(URLClassLoader.java:354)
  106. at java.lang.ClassLoader.loadClass

我认为这个问题可能是因为emr ami在中国有自己的jar /usr/share/aws/aws-java-sdk/ 它们与我在fatjar中包含的lib冲突,我试图删除它们,但是其他emr步骤都失败了。

8yoxcaq7

8yoxcaq71#

问题在于严重的序列化,而不是我最初怀疑的二进制不兼容。dynamoddb客户机不是可序列化的实体,因为它包含处理程序、线程池等内容。因此,应该直接在作业内部初始化它并声明为lazy val。

相关问题