尝试在aws emr-6.1.0上运行spark 3.0.1作业时,出现以下错误:
Exception in thread "main" java.io.FileNotFoundException: File file:/home/hadoop/.ivy2/jars/io.netty_netty-transport-native-epoll-4.1.59.Final.jar does not exist
at org.apache.hadoop.fs.RawLocalFileSystem.deprecatedGetFileStatus(RawLocalFileSystem.java:671)
at org.apache.hadoop.fs.RawLocalFileSystem.getFileLinkStatusInternal(RawLocalFileSystem.java:992)
at org.apache.hadoop.fs.RawLocalFileSystem.getFileStatus(RawLocalFileSystem.java:661)
at org.apache.hadoop.fs.FilterFileSystem.getFileStatus(FilterFileSystem.java:464)
at org.apache.hadoop.fs.FileUtil.copy(FileUtil.java:386)
at org.apache.hadoop.fs.FileUtil.copy(FileUtil.java:337)
at org.apache.spark.deploy.yarn.Client.copyFileToRemote(Client.scala:397)
at org.apache.spark.deploy.yarn.Client.distribute$1(Client.scala:489)
at org.apache.spark.deploy.yarn.Client.$anonfun$prepareLocalResources$23(Client.scala:615)
at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
at org.apache.spark.deploy.yarn.Client.$anonfun$prepareLocalResources$22(Client.scala:614)
at org.apache.spark.deploy.yarn.Client.$anonfun$prepareLocalResources$22$adapted(Client.scala:613)
at scala.collection.immutable.List.foreach(List.scala:392)
at org.apache.spark.deploy.yarn.Client.prepareLocalResources(Client.scala:613)
at org.apache.spark.deploy.yarn.Client.createContainerLaunchContext(Client.scala:875)
at org.apache.spark.deploy.yarn.Client.submitApplication(Client.scala:195)
at org.apache.spark.scheduler.cluster.YarnClientSchedulerBackend.start(YarnClientSchedulerBackend.scala:60)
at org.apache.spark.scheduler.TaskSchedulerImpl.start(TaskSchedulerImpl.scala:201)
at org.apache.spark.SparkContext.<init>(SparkContext.scala:550)
at org.apache.spark.SparkContext$.getOrCreate(SparkContext.scala:2556)
at org.apache.spark.sql.SparkSession$Builder.$anonfun$getOrCreate$1(SparkSession.scala:930)
at scala.Option.getOrElse(Option.scala:189)
at org.apache.spark.sql.SparkSession$Builder.getOrCreate(SparkSession.scala:921)
at com.semanticbits.App$.main(App.scala:23)
at com.semanticbits.App.main(App.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:936)
at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)
at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)
at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)
at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1015)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1024)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
1条答案
按热度按时间dgenwo3n1#
原来问题是使用aws java sdk的版本2从aws参数存储中提取一些参数。出于某些原因,emr-6.1.0仅支持版本1。一旦我将依赖项切换到旧的sdk,错误就被解决了。