我正在尝试向s3写入一个Dataframe,这个Dataframe是通过从snowflake读取数据创建的。我在eks上运行一个作业时用这样的方式写:
// Read from Snowflake
val dataset = sqlContext.read.format("net.snowflake.spark.snowflake").options(sfOptions()).load()
// Configuring S3A
val hc = sparkContext.hadoopConfiguration
hc.set("fs.s3a.aws.credentials.provider", "com.amazonaws.auth.InstanceProfileCredentialsProvider,com.amazonaws.auth.DefaultAWSCredentialsProviderChain")
hc.set("fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem")
hc.set("fs.s3a.multipart.size", "104857600") // 100M
hc.set("fs.s3a.block.size", "33554432") // 32M
hc.set("com.amazonaws.services.s3.enableV4", "true")
regionOpt.foreach { region => hc.set("fs.s3a.endpoint", s"s3.$region.amazonaws.com") }
// start writing to S3
dataset.repartition(1)
.write
.option("header", "true")
.option("timestampFormat", "yyyy/MM/dd HH:mm:ss ZZ")
.option("delimiter", delimiter)
.option("quote", quote)
.format(format)
.mode(SaveMode.Overwrite)
.save(tempDir)
在作业执行的某个地方,我看到了以下错误,但是我;我不能确定确切的原因
Lost task 0.1 in stage 1.0 (TID 7, 10.0.10.183, executor 1): org.apache.spark.SparkException: Task failed while writing rows.
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask(FileFormatWriter.scala:257)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply(FileFormatWriter.scala:170)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply(FileFormatWriter.scala:169)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:123)
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Caused by: com.amazonaws.SdkClientException: Unable to execute HTTP request: Read timed out
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleRetryableException(AmazonHttpClient.java:1175)
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeHelper(AmazonHttpClient.java:1121)
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.doExecute(AmazonHttpClient.java:770)
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeWithTimer(AmazonHttpClient.java:744)
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.execute(AmazonHttpClient.java:726)
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.access$500(AmazonHttpClient.java:686)
at com.amazonaws.http.AmazonHttpClient$RequestExecutionBuilderImpl.execute(AmazonHttpClient.java:668)
at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:532)
at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:512)
at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:4920)
at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:4866)
at com.amazonaws.services.s3.AmazonS3Client.copyObject(AmazonS3Client.java:1999)
at com.amazonaws.services.s3.transfer.internal.CopyCallable.copyInOneChunk(CopyCallable.java:145)
at com.amazonaws.services.s3.transfer.internal.CopyCallable.call(CopyCallable.java:133)
at com.amazonaws.services.s3.transfer.internal.CopyMonitor.call(CopyMonitor.java:132)
at com.amazonaws.services.s3.transfer.internal.CopyMonitor.call(CopyMonitor.java:43)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
... 3 more
不确定是什么原因导致了这个问题,有什么提示吗??
暂无答案!
目前还没有任何答案,快来回答吧!