classnotfoundexception hadoopmapreducecommitprotocol

zxlwwiss  于 2021-05-29  发布在  Hadoop
关注(0)|答案(1)|浏览(387)

我试图在本地模式下运行spark示例,但得到以下堆栈跟踪:

  1. Exception in thread "main" java.lang.NoClassDefFoundError: org/apache/spark/internal/io/HadoopMapReduceCommitProtocol
  2. at java.lang.ClassLoader.defineClass1(Native Method)
  3. at java.lang.ClassLoader.defineClass(ClassLoader.java:763)
  4. at java.security.SecureClassLoader.defineClass(SecureClassLoader.java:142)
  5. at java.net.URLClassLoader.defineClass(URLClassLoader.java:467)
  6. at java.net.URLClassLoader.access$100(URLClassLoader.java:73)
  7. at java.net.URLClassLoader$1.run(URLClassLoader.java:368)
  8. at java.net.URLClassLoader$1.run(URLClassLoader.java:362)
  9. at java.security.AccessController.doPrivileged(Native Method)
  10. at java.net.URLClassLoader.findClass(URLClassLoader.java:361)
  11. at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
  12. at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:331)
  13. at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
  14. at org.apache.spark.sql.internal.SQLConf$.<init>(SQLConf.scala:383)
  15. at org.apache.spark.sql.internal.SQLConf$.<clinit>(SQLConf.scala)
  16. at org.apache.spark.sql.internal.StaticSQLConf$$anonfun$buildConf$1.apply(SQLConf.scala:930)
  17. at org.apache.spark.sql.internal.StaticSQLConf$$anonfun$buildConf$1.apply(SQLConf.scala:928)
  18. at org.apache.spark.internal.config.TypedConfigBuilder$$anonfun$createWithDefault$1.apply(ConfigBuilder.scala:122)
  19. at org.apache.spark.internal.config.TypedConfigBuilder$$anonfun$createWithDefault$1.apply(ConfigBuilder.scala:122)
  20. at scala.Option.foreach(Option.scala:257)
  21. at org.apache.spark.internal.config.TypedConfigBuilder.createWithDefault(ConfigBuilder.scala:122)
  22. at org.apache.spark.sql.internal.StaticSQLConf$.<init>(SQLConf.scala:937)
  23. at org.apache.spark.sql.internal.StaticSQLConf$.<clinit>(SQLConf.scala)
  24. at org.apache.spark.sql.SparkSession$.org$apache$spark$sql$SparkSession$$sessionStateClassName(SparkSession.scala:962)
  25. at org.apache.spark.sql.SparkSession.sessionState$lzycompute(SparkSession.scala:111)
  26. at org.apache.spark.sql.SparkSession.sessionState(SparkSession.scala:109)
  27. at org.apache.spark.sql.SparkSession$Builder$$anonfun$getOrCreate$5.apply(SparkSession.scala:878)
  28. at org.apache.spark.sql.SparkSession$Builder$$anonfun$getOrCreate$5.apply(SparkSession.scala:878)
  29. at scala.collection.mutable.HashMap$$anonfun$foreach$1.apply(HashMap.scala:99)
  30. at scala.collection.mutable.HashMap$$anonfun$foreach$1.apply(HashMap.scala:99)
  31. at scala.collection.mutable.HashTable$class.foreachEntry(HashTable.scala:230)
  32. at scala.collection.mutable.HashMap.foreachEntry(HashMap.scala:40)
  33. at scala.collection.mutable.HashMap.foreach(HashMap.scala:99)
  34. at org.apache.spark.sql.SparkSession$Builder.getOrCreate(SparkSession.scala:878)
  35. at com.megaport.PipelineExample$.main(PipelineExample.scala:37)
  36. at com.megaport.PipelineExample.main(PipelineExample.scala)
  37. at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
  38. at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
  39. at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
  40. at java.lang.reflect.Method.invoke(Method.java:498)
  41. at com.intellij.rt.execution.application.AppMain.main(AppMain.java:147)
  42. Caused by: java.lang.ClassNotFoundException: org.apache.spark.internal.io.HadoopMapReduceCommitProtocol
  43. at java.net.URLClassLoader.findClass(URLClassLoader.java:381)
  44. at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
  45. at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:331)
  46. at java.lang.ClassLoader.loadClass(ClassLoader.java:357)

我可以在github repo中看到这个类,但它不在maven lib中,也不在发行版(我将发行版与hadoop捆绑在一起)spark-core_2.11-2.0.2.jar中。
我尝试运行的代码取自spark发行版中的示例,它在getorcreate阶段失败。。。

  1. // scalastyle:off println
  2. package com.megaport
  3. // $example on$
  4. import org.apache.spark.ml.{Pipeline, PipelineModel}
  5. import org.apache.spark.ml.classification.LogisticRegression
  6. import org.apache.spark.ml.feature.{HashingTF, Tokenizer}
  7. import org.apache.spark.ml.linalg.Vector
  8. import org.apache.spark.sql.Row
  9. // $example off$
  10. import org.apache.spark.sql.SparkSession
  11. object PipelineExample {
  12. def main(args: Array[String]): Unit = {
  13. val spark = SparkSession.builder
  14. .appName("My Spark Application") // optional and will be autogenerated if not specified
  15. .master("local[*]") // avoid hardcoding the deployment environment
  16. // .enableHiveSupport() // self-explanatory, isn't it?
  17. .getOrCreate
  18. // $example on$
  19. // Prepare training documents from a list of (id, text, label) tuples.
  20. val training = spark.createDataFrame(Seq(
  21. (0L, "a b c d e spark", 1.0),
  22. (1L, "b d", 0.0),
  23. (2L, "spark f g h", 1.0),
  24. (3L, "hadoop mapreduce", 0.0)
  25. )).toDF("id", "text", "label")
  26. // Configure an ML pipeline, which consists of three stages: tokenizer, hashingTF, and lr.
  27. val tokenizer = new Tokenizer()
  28. .setInputCol("text")
  29. .setOutputCol("words")
  30. val hashingTF = new HashingTF()
  31. .setNumFeatures(1000)
  32. .setInputCol(tokenizer.getOutputCol)
  33. .setOutputCol("features")
  34. val lr = new LogisticRegression()
  35. .setMaxIter(10)
  36. .setRegParam(0.01)
  37. val pipeline = new Pipeline()
  38. .setStages(Array(tokenizer, hashingTF, lr))
  39. // Fit the pipeline to training documents.
  40. val model = pipeline.fit(training)
  41. // Now we can optionally save the fitted pipeline to disk
  42. model.write.overwrite().save("/tmp/spark-logistic-regression-model")
  43. // We can also save this unfit pipeline to disk
  44. pipeline.write.overwrite().save("/tmp/unfit-lr-model")
  45. // And load it back in during production
  46. val sameModel = PipelineModel.load("/tmp/spark-logistic-regression-model")
  47. // Prepare test documents, which are unlabeled (id, text) tuples.
  48. val test = spark.createDataFrame(Seq(
  49. (4L, "spark i j k"),
  50. (5L, "l m n"),
  51. (6L, "mapreduce spark"),
  52. (7L, "apache hadoop")
  53. )).toDF("id", "text")
  54. // Make predictions on test documents.
  55. model.transform(test)
  56. .select("id", "text", "probability", "prediction")
  57. .collect()
  58. .foreach { case Row(id: Long, text: String, prob: Vector, prediction: Double) =>
  59. println(s"($id, $text) --> prob=$prob, prediction=$prediction")
  60. }
  61. // $example off$
  62. spark.stop()
  63. }
  64. }
z9ju0rcb

z9ju0rcb1#

如果它不在您的java库中,那么您应该下载依赖jar并添加它。查看此以了解更多详细信息
如何在eclipse中导入jar

相关问题