用maven在hadoop集群上运行java程序

ybzsozfc  于 2021-06-04  发布在  Hadoop
关注(0)|答案(1)|浏览(383)

我可以通过在hadoop集群上用maven构建java程序来执行它。
我的程序的源代码

  1. package com.ait.summer.clusterermapreduce;
  2. import java.io.IOException;
  3. import org.apache.hadoop.conf.Configuration;
  4. import org.apache.hadoop.fs.FileSystem;
  5. import org.apache.hadoop.fs.Path;
  6. import org.apache.mahout.clustering.canopy.CanopyDriver;
  7. import org.apache.mahout.clustering.kmeans.KMeansDriver;
  8. import org.apache.mahout.common.HadoopUtil;
  9. import org.apache.mahout.common.distance.EuclideanDistanceMeasure;
  10. public class ClustererMapReduce {
  11. public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
  12. Configuration conf = new Configuration();
  13. FileSystem hdfs = FileSystem.get(conf);
  14. Path ProjectFolder = new Path (args[0]);
  15. Path InputDir = new Path(args[0], "in");
  16. Path OutputDir = new Path(args[0], "out");
  17. Path canopyCentroids = new Path(args[0], "canopyCentroids");
  18. HadoopUtil.delete(conf, OutputDir);
  19. double clusterClassificationThreshold = 0.5;
  20. double t1 = 20;
  21. double t2 = 30;
  22. // how to set up correctly param?
  23. CanopyDriver.run(conf, InputDir, canopyCentroids, new EuclideanDistanceMeasure(),
  24. t1, t2, false, clusterClassificationThreshold, false);
  25. KMeansDriver.run(conf, InputDir, new Path(canopyCentroids, "clusters-0"), OutputDir, new EuclideanDistanceMeasure(), 0.5 , 5, true, 0.5 , false);
  26. }
  27. }

我的pom.xml文件

  1. <?xml version="1.0" encoding="UTF-8"?>
  2. <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  3. <modelVersion>4.0.0</modelVersion>
  4. <groupId>com.ait.summer</groupId>
  5. <artifactId>ClustererMapReduce</artifactId>
  6. <version>1.0-SNAPSHOT</version>
  7. <packaging>jar</packaging>
  8. <repositories>
  9. <repository>
  10. <id>apache.snapshots</id>
  11. <name>Apache Snapshot Repository</name>
  12. <url>http://repository.apache.org/snapshots</url>
  13. <releases>
  14. <enabled>false</enabled>
  15. </releases>
  16. </repository>
  17. <repository>
  18. <id>cdh.repo</id>
  19. <url>https://repository.cloudera.com/artifactory/cloudera-repos</url>
  20. <name>Cloudera Repositories</name>
  21. <snapshots>
  22. <enabled>false</enabled>
  23. </snapshots>
  24. </repository>
  25. <repository>
  26. <id>cdh.snapshots.repo</id>
  27. <url>https://repository.cloudera.com/artifactory/libs-snapshot-local</url>
  28. <name>Cloudera Snapshots Repository</name>
  29. <snapshots>
  30. <enabled>true</enabled>
  31. </snapshots>
  32. <releases>
  33. <enabled>false</enabled>
  34. </releases>
  35. </repository>
  36. </repositories>
  37. <dependencies>
  38. <dependency>
  39. <groupId>org.apache.hadoop</groupId>
  40. <artifactId>hadoop-common</artifactId>
  41. <version>2.0.0-cdh4.7.0</version>
  42. <type>jar</type>
  43. </dependency>
  44. <dependency>
  45. <groupId>org.apache.mahout</groupId>
  46. <artifactId>mahout-core</artifactId>
  47. <version>0.7-cdh4.7.0</version>
  48. <type>jar</type>
  49. </dependency>
  50. <dependency>
  51. <groupId>jfree</groupId>
  52. <artifactId>jfreechart</artifactId>
  53. <version>1.0.8a</version>
  54. </dependency>
  55. </dependencies>
  56. <properties>
  57. <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
  58. <maven.compiler.source>1.7</maven.compiler.source>
  59. <maven.compiler.target>1.7</maven.compiler.target>
  60. </properties>
  61. </project>

从命令行编译此代码后(我位于projectdir中)

  1. $ mvn install

我和你一起跑

  1. $ hadoop jar

收到了这个错误日志

  1. Exception in thread "main" java.lang.NoClassDefFoundError: org/apache/mahout/common/distance/DistanceMeasure
  2. at java.lang.Class.forName0(Native Method)
  3. at java.lang.Class.forName(Class.java:270)
  4. at org.apache.hadoop.util.RunJar.main(RunJar.java:201)
  5. Caused by: java.lang.ClassNotFoundException: org.apache.mahout.common.distance.DistanceMeasure
  6. at java.net.URLClassLoader$1.run(URLClassLoader.java:366)
  7. at java.net.URLClassLoader$1.run(URLClassLoader.java:355)
  8. at java.security.AccessController.doPrivileged(Native Method)
  9. at java.net.URLClassLoader.findClass(URLClassLoader.java:354)
  10. at java.lang.ClassLoader.loadClass(ClassLoader.java:425)
  11. at java.lang.ClassLoader.loadClass(ClassLoader.java:358)
  12. ... 3 more

我使用cdh-4.7.0jdk1.7.0.65使用netbeanside8.0
这个问题的原因是什么?

kxkpmulp

kxkpmulp1#

如果您已经用maven创建了jar,那么可以运行以下命令:
在linux中: export CLASSPATH=<path_jars_dependencies and jar_ClustererMapReduce> java -cp $CLASSPATH com.ait.summer.clusterermapreduce.ClustererMapReduce [path\u jars\u dependencies]:您可以使用post导出应用程序的依赖项:如何使用maven创建具有依赖项的可执行jar?
[jar\u clusterermapreduce]:它是用命令生成的jar maven install

相关问题