我一直在尝试在zepplin0.9本地运行的数据集上运行一些实验。但是,在对数据集执行操作时,我遇到了npe。同样的操作似乎也适用于dataframe。这是一个失败的例子
import spark.implicits._
import org.apache.spark.sql.Row
import org.apache.spark.sql.types._
case class Person(firstname: String, middlename: String, lastname: String, id: String, gender: String, salary: Int)
val simpleData = Seq(Row("James","","Smith","36636","M",3000),
Row("Michael","Rose","","40288","M",4000),
Row("Robert","","Williams","42114","M",4000),
Row("Maria","Anne","Jones","39192","F",4000),
Row("Jen","Mary","Brown","","F",-1)
)
val simpleSchema = StructType(Array(
StructField("firstname",StringType,true),
StructField("middlename",StringType,true),
StructField("lastname",StringType,true),
StructField("id", StringType, true),
StructField("gender", StringType, true),
StructField("salary", IntegerType, true)
))
val df = spark.createDataFrame(
spark.sparkContext.parallelize(simpleData),simpleSchema).as[Person]
df.filter( x => x.firstname == "James").show()
这就是我得到的错误
java.lang.NullPointerException
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.spark.sql.catalyst.encoders.OuterScopes$$anonfun$getOuterScope$1.apply(OuterScopes.scala:70)
at org.apache.spark.sql.catalyst.expressions.objects.NewInstance$$anonfun$10.apply(objects.scala:485)
at org.apache.spark.sql.catalyst.expressions.objects.NewInstance$$anonfun$10.apply(objects.scala:485)
at scala.Option.map(Option.scala:146)
at org.apache.spark.sql.catalyst.expressions.objects.NewInstance.doGenCode(objects.scala:485)
at org.apache.spark.sql.catalyst.expressions.Expression$$anonfun$genCode$2.apply(Expression.scala:108)
at org.apache.spark.sql.catalyst.expressions.Expression$$anonfun$genCode$2.apply(Expression.scala:105)
at scala.Option.getOrElse(Option.scala:121)
暂无答案!
目前还没有任何答案,快来回答吧!