我使用pyspark运行python脚本,它连接到kubernetes集群,使用executor pods运行作业。该脚本的思想是创建一个查询雪花数据库的sqlcontext。但是,我得到了以下异常,但是这个异常描述得不够
20/07/15 12:10:39 ERROR executor.Executor: Exception in task 0.0 in stage 0.0 (TID 0)
java.lang.UnsupportedOperationException: sun.misc.Unsafe or java.nio.DirectByteBuffer.<init>(long, int) not available
at net.snowflake.client.jdbc.internal.io.netty.util.internal.PlatformDependent.directBuffer(PlatformDependent.java:399)
at net.snowflake.client.jdbc.internal.io.netty.buffer.NettyArrowBuf.getDirectBuffer(NettyArrowBuf.java:243)
at net.snowflake.client.jdbc.internal.io.netty.buffer.NettyArrowBuf.nioBuffer(NettyArrowBuf.java:233)
at net.snowflake.client.jdbc.internal.io.netty.buffer.ArrowBuf.nioBuffer(ArrowBuf.java:247)
at net.snowflake.client.jdbc.internal.apache.arrow.vector.ipc.ReadChannel.readFully(ReadChannel.java:81)
at net.snowflake.client.jdbc.internal.apache.arrow.vector.ipc.message.MessageSerializer.readMessageBody(MessageSerializer.java:696)
at net.snowflake.client.jdbc.internal.apache.arrow.vector.ipc.message.MessageChannelReader.readNext(MessageChannelReader.java:68)
at net.snowflake.client.jdbc.internal.apache.arrow.vector.ipc.ArrowStreamReader.loadNextBatch(ArrowStreamReader.java:106)
at net.snowflake.client.jdbc.ArrowResultChunk.readArrowStream(ArrowResultChunk.java:117)
at net.snowflake.client.core.SFArrowResultSet.buildFirstChunk(SFArrowResultSet.java:352)
at net.snowflake.client.core.SFArrowResultSet.<init>(SFArrowResultSet.java:230)
at net.snowflake.client.jdbc.SnowflakeResultSetSerializableV1.getResultSet(SnowflakeResultSetSerializableV1.java:1079)
at net.snowflake.spark.snowflake.io.ResultIterator.liftedTree1$1(SnowflakeResultSetRDD.scala:85)
at net.snowflake.spark.snowflake.io.ResultIterator.<init>(SnowflakeResultSetRDD.scala:78)
at net.snowflake.spark.snowflake.io.SnowflakeResultSetRDD.compute(SnowflakeResultSetRDD.scala:41)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:337)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:337)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:337)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:337)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:127)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:464)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1377)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:467)
at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
at java.base/java.lang.Thread.run(Unknown Source)
有人碰到过类似的案子吗?如果是的话,你是怎么解决的?
2条答案
按热度按时间p4rjhz4m1#
注:以下更改在本地系统中完成。它可能对你有用,也可能对你无效。
张贴步骤如何我有固定的问题。
我也有类似的问题
brew
安装Sparkopenjdk@11 默认情况下&为了解决这个问题,我将java版本从openjdk@11
至oracle jdk 1.8
(可以使用open jdk 1.8而不是oracle jdk 1.8)更改了java版本openjdk@11 到oracle jdk 1.8。现在我的
spark-submit
命令如下所示。要解决此问题,请尝试将conf设置为
spark-submit
```spark-submit
--conf 'spark.executor.extraJavaOptions=-Dio.netty.tryReflectionSetAccessible=true'
--conf 'spark.driver.extraJavaOptions=-Dio.netty.tryReflectionSetAccessible=true'
...
g9icjywg2#
我遇到了同样的问题,并且能够解决它。我知道了
io.netty.tryReflectionSetAccessible
需要显式设置为true
在java>=9中,spark snowflake connector能够读取从snowflake返回的数据,在kubernetes executor pods中。现在,自从
io.netty
包在snowflake jdbc中有阴影,我们需要用完整的包名限定属性,即。net.snowflake.client.jdbc.internal.io.netty.tryReflectionSetAccessible=true
.此属性需要设置为spark executor pod的jvm选项。这可以通过设置executor jvm选项或executor extra jvm option spark属性来实现。例如:
属性名称:
spark.executor.extraJavaOptions
价值:-Dnet.snowflake.client.jdbc.internal.io.netty.tryReflectionSetAccessible=true