spark submit中的java.lang.nosuchmethoderror

cigdeys3  于 2021-05-27  发布在  Spark
关注(0)|答案(1)|浏览(621)

在使用编译代码之后 sbt package 在spark中提交:

  1. sudo -u spark spark-submit --master yarn --deploy-mode client --executor-memory 2G --num-executors 6 --class viterbiAlgorithm.viterbiAlgo ./target/scala-2.11/vibertialgo_2.11-1.3.4.jar

我有个错误:

  1. Exception in thread "main" java.lang.NoSuchMethodError: breeze.linalg.DenseVector$.tabulate$mDc$sp(ILscala/Function1;Lscala/reflect/ClassTag;)Lbreeze/linalg/DenseVector;
  2. at viterbiAlgorithm.User$$anonfun$eval$2.apply(viterbiAlgo.scala:84)
  3. at viterbiAlgorithm.User$$anonfun$eval$2.apply(viterbiAlgo.scala:80)
  4. at scala.collection.immutable.Range.foreach(Range.scala:160)
  5. at viterbiAlgorithm.User.eval(viterbiAlgo.scala:80)
  6. at viterbiAlgorithm.viterbiAlgo$.main(viterbiAlgo.scala:28)
  7. at viterbiAlgorithm.viterbiAlgo.main(viterbiAlgo.scala)
  8. at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
  9. at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
  10. at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
  11. at java.lang.reflect.Method.invoke(Method.java:498)
  12. at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
  13. at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:851)
  14. at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:167)
  15. at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:195)
  16. at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:86)
  17. at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:926)
  18. at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:935)
  19. at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)

sbt构建文件如下所示:

  1. name := "vibertiAlgo"
  2. version := "1.3.4"
  3. scalaVersion := "2.11.2"
  4. libraryDependencies ++= Seq(
  5. "org.scalanlp" %% "breeze" % "1.0",
  6. "org.apache.spark" %% "spark-core" % "2.4.0",
  7. "org.apache.spark" %% "spark-sql" % "2.4.0")

我可以成功地在本地运行代码,虽然与 sbt run ,所以我不知道我的代码有什么问题。另外,scala和spark的编译和运行时版本是相同的。
的代码 viterbiAlgo.scala 是:

  1. package viterbiAlgorithm
  2. import breeze.linalg._
  3. // import org.apache.spark.sql.SparkSession
  4. object viterbiAlgo {
  5. def main(arg: Array[String]) {
  6. val A = DenseMatrix((0.5,0.2,0.3),
  7. (0.3,0.5,0.2),
  8. (0.2,0.3,0.5))
  9. val B = DenseMatrix((0.5,0.5),
  10. (0.4,0.6),
  11. (0.7,0.3))
  12. val pi = DenseVector(0.2,0.4,0.4)
  13. val o = DenseVector[Int](0,1,0) //Hive time + cell_id
  14. val model = new Model(A,B,pi)
  15. val user = new User("Jack", model, o) //Hive
  16. user.eval() // run algorithm
  17. user.printResult()
  18. //spark sql
  19. // val warehouseLocation = "spark-warehouse"
  20. // val spark = SparkSession.builder().appName("Spark.sql.warehouse.dir").config("spark.sql.warehouse.dir", warehouseLocation).enableHiveSupport().getOrCreate()
  21. // import spark.implicits._
  22. // import spark.sql
  23. // val usr = "1"
  24. // val model = new Model(A,B,pi)
  25. // val get_statement = "SELECT * FROM viterbi.observation"
  26. // val df = sql(get_statement)
  27. // val o = DenseVector(df.filter(df("usr")===usr).select(df("obs")).collect().map(_.getInt(0)))
  28. // val user = new User(usr, model, o)
  29. // user.eval()
  30. // user.printResult()
  31. }
  32. }
  33. class Model (val A: DenseMatrix[Double], val B:DenseMatrix[Double], val pi: DenseVector[Double]) {
  34. def info():Unit = {
  35. println("The model is:")
  36. println("A:")
  37. println(A)
  38. println("B:")
  39. println(B)
  40. println("Pi:")
  41. println(pi)
  42. }
  43. }
  44. class User (val usr_name: String, val model: Model, val o:DenseVector[Int]) {
  45. val N = model.A.rows // state number
  46. val M = model.B.cols // observation state
  47. val T = o.length // time
  48. val delta = DenseMatrix.zeros[Double](N,T)
  49. val psi = DenseMatrix.zeros[Int](N,T)
  50. val best_route = DenseVector.zeros[Int](T)
  51. def eval():Unit = {
  52. //1. Initialization
  53. delta(::,0) := model.pi * model.B(::, o(0))
  54. psi(::,0) := DenseVector.zeros[Int](N)
  55. /*2. Induction
  56. */
  57. val tempDelta = DenseMatrix.zeros[Double](N,N)// Initialization
  58. val tempB = DenseMatrix.zeros[Double](N,N)// Initialization
  59. for (t <- 1 to T-1) {
  60. // Delta
  61. tempDelta := DenseMatrix.tabulate(N, N){case (i, j) => delta(i,t-1)}
  62. tempB := DenseMatrix.tabulate(N, N){case (i, j) => model.B(j, o(t))}
  63. delta(::, t) := DenseVector.tabulate(N){i => max((tempDelta *:* model.A *:* tempB).t.t(::,i))}
  64. }
  65. //3. Maximum
  66. val P_star = max(delta(::, T-1))
  67. val i_star_T = argmax(delta(::, T-1))
  68. best_route(T-1) = i_star_T
  69. //4. Backward
  70. for (t <- T-2 to 0 by -1) {
  71. best_route(t) = psi(best_route(t+1),t+1)
  72. }
  73. }
  74. def printResult():Unit = {
  75. println("User: " + usr_name)
  76. model.info()
  77. println
  78. println("Observed: ")
  79. printRoute(o)
  80. println("Best_route is: ")
  81. printRoute(best_route)
  82. println("delta is")
  83. println(delta)
  84. println("psi is: ")
  85. println(psi)
  86. }
  87. def printRoute(v: DenseVector[Int]):Unit = {
  88. for (i <- v(0 to -2)){
  89. print(i + "->")
  90. }
  91. println(v(-1))
  92. }
  93. }

我也试过了 --jars 参数并传递了breeze库的位置,但得到了相同的错误。
我需要提到的是,我在服务器上“本地”测试了代码,并在sparkshell上测试了所有方法(我可以在服务器上的sparkshell上导入breeze库)。
服务器scala版本与sbt构建文件中的版本匹配。尽管spark版本是2.4.0-cdh6.2.1,如果我在“2.4.0”之后添加“cdh6.2.1”,sbt将不会编译。
我尝试了维克托提供的两种可能的解决办法,但没有成功。但是,我将sbt构建文件中的breeze版本更改为 0.13.21.0 ,一切正常。但我不知道出了什么问题。

lo8azlld

lo8azlld1#

如果您在本地而不是在服务器上运行代码,这意味着您没有在提交作业的类路径中提供库。
您有两个选择:
使用 --jars 参数并传递所有库的位置(在您的示例中,它似乎是 breeze 图书馆)。
使用 sbt assembly 插件,它将生成一个包含所有所需依赖项的胖jar,然后将该jar提交给作业。

相关问题