scala Spark json文件抛出ArrayIndexOutOfBoundsException

1rhkuytd  于 2024-01-08  发布在  Scala
关注(0)|答案(1)|浏览(234)

我在这一行得到下面的错误:瓦尔eventDS = sparkSession.sqlContext.read.json(“src/main/resources/output/intermediateOutput.json”)

val sparkSession: SparkSession = InjectorProvider.get().getInstance(classOf[SparkSession])
    import sparkSession.implicits._
  try {
    val eventDS = sparkSession.sqlContext.read.json("src/main/resources/output/intermediateOutput.json")
    eventDS.createOrReplaceTempView("events")
  } catch {
    case e: Exception =>
      // Log the exception
      println("Exception occurred while reading intermediate JSON event: " + e.getMessage)
  }

字符串
这是我在阅读json文件时遇到的错误
java.lang.ArrayIndexOutOfBoundsException:索引17418超出长度206的界限
这是上面位置的json事件:

[
  {
    "modelName": "test",
    "docKey": "a9f1900b-19fe-41cd-8cdb-7eb581d06421#1700638844739",
    "isUpdate": true,
    "isUpsert": false,
    "etlTime": 1700638844739,
    "etlTimeHuman": "2023-11-22T07:40:44.739000",
    "accountId": "12312",
    "timeStamp": 1695686400000,
    "doc": {
      "campaignId": 1233,
      "engagementId": 3244,
      "skillId": 3424,
      "rule": "Show Campaign 1 if online agents is greater than 0",
      "action": "SHOW",
      "actionCount": 1,
      "timeInState": 51703,
      "eventType": "Ereport",
      "reportTime": "00"
    }
  },
  {
    "modelName": "test",
    "docKey": "816fdbc7-7797-4ea1-b873-a309be84865a#1700638844753",
    "isUpdate": true,
    "isUpsert": false,
    "etlTime": 1700638844753,
    "etlTimeHuman": "2023-11-22T07:40:44.753000",
    "accountId": "12312",
    "timeStamp": 1695686400000,
    "doc": {
      "campaignId": 12312,
      "engagementId": 12312,
      "skillId": 4233,
      "rule": "fasdfa",
      "action": "HIDE",
      "actionCount": 1,
      "timeInState": 81704,
      "eventType": "Ereport",
      "reportTime": "00"
    }
  }
]

fcg9iug3

fcg9iug31#

import org.apache.spark.sql.{SparkSession, DataFrame}
import org.apache.spark.sql.types._

val sparkSession: SparkSession = InjectorProvider.get().getInstance(classOf[SparkSession])

try {
  // Define the schema for your JSON data
  val schema = StructType(Seq(
    StructField("modelName", StringType),
    StructField("docKey", StringType),
    StructField("isUpdate", BooleanType),
    // ... add other fields as per your JSON structure
  ))

  // Read the JSON file with the specified schema
  val eventDS: DataFrame = sparkSession.read.schema(schema).json("src/main/resources/output/intermediateOutput.json")

  eventDS.createOrReplaceTempView("events")
} catch {
  case e: Exception =>
    // Log the exception
    println("Exception occurred while reading intermediate JSON event: " + e.getMessage)
}

字符串

相关问题