means集群时出现java-io异常

s71maibg  于 2021-06-04  发布在  Hadoop
关注(0)|答案(1)|浏览(395)

这个问题不太可能帮助任何未来的游客;它只与一个小的地理区域、一个特定的时刻或一个非常狭窄的情况有关,而这些情况通常不适用于互联网的全球受众。有关使此问题更广泛适用的帮助,请访问帮助中心。
7年前关门了。
我正在尝试使用mahout运行一个集群程序

  1. package com;
  2. import java.io.File;
  3. import java.io.IOException;
  4. import java.util.ArrayList;
  5. import java.util.List;
  6. import org.apache.hadoop.conf.Configuration;
  7. import org.apache.hadoop.fs.FileSystem;
  8. import org.apache.hadoop.fs.Path;
  9. import org.apache.hadoop.io.IntWritable;
  10. import org.apache.hadoop.io.LongWritable;
  11. import org.apache.hadoop.io.SequenceFile;
  12. import org.apache.hadoop.io.Text;
  13. import org.apache.mahout.clustering.WeightedVectorWritable;
  14. import org.apache.mahout.clustering.kmeans.Cluster;
  15. import org.apache.mahout.clustering.kmeans.KMeansDriver;
  16. import org.apache.mahout.common.distance.EuclideanDistanceMeasure;
  17. import org.apache.mahout.math.RandomAccessSparseVector;
  18. import org.apache.mahout.math.Vector;
  19. import org.apache.mahout.math.VectorWritable;
  20. public class ClusteringDemo {
  21. public static final double[][] points = { { 1, 1 }, { 2, 1 }, { 1, 2 },
  22. { 2, 2 }, { 3, 3 }, { 8, 8 }, { 9, 8 }, { 8, 9 }, { 9, 9 } };
  23. public static void writePointsToFile(List<Vector> points, String fileName,
  24. FileSystem fs, Configuration conf) throws IOException {
  25. Path path = new Path(fileName);
  26. SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path,
  27. LongWritable.class, VectorWritable.class);
  28. long recNum = 0;
  29. VectorWritable vec = new VectorWritable();
  30. for (Vector point : points) {
  31. vec.set(point);
  32. writer.append(new LongWritable(recNum++), vec);
  33. }
  34. writer.close();
  35. }
  36. public static List<Vector> getPoints(double[][] raw) {
  37. List<Vector> points = new ArrayList<Vector>();
  38. for (int i = 0; i < raw.length; i++) {
  39. double[] fr = raw[i];
  40. Vector vec = new RandomAccessSparseVector(fr.length);
  41. vec.assign(fr);
  42. points.add(vec);
  43. }
  44. return points;
  45. }
  46. public static void main(String args[]) throws Exception {
  47. int k = 3;
  48. List<Vector> vectors = getPoints(points);
  49. File testData = new File("/home/vishal/testdata");
  50. if (!testData.exists()) {
  51. testData.mkdir();
  52. }
  53. testData = new File("/home/vishal/testdata/points");
  54. if (!testData.exists()) {
  55. testData.mkdir();
  56. }
  57. Configuration conf = new Configuration();
  58. FileSystem fs = FileSystem.get(conf);
  59. writePointsToFile(vectors, "/home/vishal/testdata/points/file1", fs,
  60. conf);
  61. Path path = new Path("/home/vishal/testdata/clusters/part-00000");
  62. SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path,
  63. Text.class, Cluster.class);
  64. for (int i = 0; i < k; i++) {
  65. Vector vec = vectors.get(i);
  66. Cluster cluster = new Cluster(vec, i,
  67. new EuclideanDistanceMeasure());
  68. writer.append(new Text(cluster.getIdentifier()), cluster);
  69. }
  70. writer.close();
  71. KMeansDriver.run(conf, new Path("/home/vishal/testdata/points"),
  72. new Path("/home/vishal/testdata/clusters"), new Path(
  73. "/home/vishal/output"), new EuclideanDistanceMeasure(),
  74. 0.001, 10, true, false);
  75. SequenceFile.Reader reader = new SequenceFile.Reader(fs, new Path(
  76. "/home/vishal/output/" + Cluster.CLUSTERED_POINTS_DIR
  77. + "/part-m-00000"), conf);
  78. IntWritable key = new IntWritable();
  79. WeightedVectorWritable value = new WeightedVectorWritable();
  80. while (reader.next(key, value)) {
  81. System.out.println(value.toString() + " belongs to cluster "
  82. + key.toString());
  83. }
  84. reader.close();
  85. }
  86. }

但当我运行它时,它开始正常执行,但最后给了我一个错误。。下面是我运行它时得到的堆栈跟踪。

  1. 13/05/30 09:49:22 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
  2. 13/05/30 09:49:22 INFO kmeans.KMeansDriver: Input: /home/vishal/testdata/points Clusters In: /home/vishal/testdata/clusters Out: /home/vishal/output Distance: org.apache.mahout.common.distance.EuclideanDistanceMeasure
  3. 13/05/30 09:49:22 INFO kmeans.KMeansDriver: convergence: 0.0010 max Iterations: 10 num Reduce Tasks: org.apache.mahout.math.VectorWritable Input Vectors: {}
  4. 13/05/30 09:49:22 INFO kmeans.KMeansDriver: K-Means Iteration 1
  5. 13/05/30 09:49:22 INFO common.HadoopUtil: Deleting /home/vishal/output/clusters-1
  6. 13/05/30 09:49:23 WARN mapred.JobClient: Use GenericOptionsParser for parsing the arguments. Applications should implement Tool for the same.
  7. 13/05/30 09:49:23 INFO input.FileInputFormat: Total input paths to process : 1
  8. 13/05/30 09:49:23 INFO mapred.JobClient: Running job: job_local_0001
  9. 13/05/30 09:49:23 INFO util.ProcessTree: setsid exited with exit code 0
  10. 13/05/30 09:49:23 INFO mapred.Task: Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@15fc40c
  11. 13/05/30 09:49:23 INFO mapred.MapTask: io.sort.mb = 100
  12. 13/05/30 09:49:23 INFO mapred.MapTask: data buffer = 79691776/99614720
  13. 13/05/30 09:49:23 INFO mapred.MapTask: record buffer = 262144/327680
  14. 13/05/30 09:49:23 INFO mapred.MapTask: Starting flush of map output
  15. 13/05/30 09:49:23 INFO mapred.MapTask: Finished spill 0
  16. 13/05/30 09:49:23 INFO mapred.Task: Task:attempt_local_0001_m_000000_0 is done. And is in the process of commiting
  17. 13/05/30 09:49:24 INFO mapred.JobClient: map 0% reduce 0%
  18. 13/05/30 09:49:26 INFO mapred.LocalJobRunner:
  19. 13/05/30 09:49:26 INFO mapred.Task: Task 'attempt_local_0001_m_000000_0' done.
  20. 13/05/30 09:49:26 INFO mapred.Task: Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@15ed659
  21. 13/05/30 09:49:26 INFO mapred.LocalJobRunner:
  22. 13/05/30 09:49:26 INFO mapred.Merger: Merging 1 sorted segments
  23. 13/05/30 09:49:26 INFO mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 185 bytes
  24. 13/05/30 09:49:26 INFO mapred.LocalJobRunner:
  25. 13/05/30 09:49:26 INFO mapred.Task: Task:attempt_local_0001_r_000000_0 is done. And is in the process of commiting
  26. 13/05/30 09:49:26 INFO mapred.LocalJobRunner:
  27. 13/05/30 09:49:26 INFO mapred.Task: Task attempt_local_0001_r_000000_0 is allowed to commit now
  28. 13/05/30 09:49:26 INFO output.FileOutputCommitter: Saved output of task 'attempt_local_0001_r_000000_0' to /home/vishal/output/clusters-1
  29. 13/05/30 09:49:27 INFO mapred.JobClient: map 100% reduce 0%
  30. 13/05/30 09:49:29 INFO mapred.LocalJobRunner: reduce > reduce
  31. 13/05/30 09:49:29 INFO mapred.Task: Task 'attempt_local_0001_r_000000_0' done.
  32. 13/05/30 09:49:30 INFO mapred.JobClient: map 100% reduce 100%
  33. 13/05/30 09:49:30 INFO mapred.JobClient: Job complete: job_local_0001
  34. 13/05/30 09:49:30 INFO mapred.JobClient: Counters: 21
  35. 13/05/30 09:49:30 INFO mapred.JobClient: File Output Format Counters
  36. 13/05/30 09:49:30 INFO mapred.JobClient: Bytes Written=474
  37. 13/05/30 09:49:30 INFO mapred.JobClient: Clustering
  38. 13/05/30 09:49:30 INFO mapred.JobClient: Converged Clusters=1
  39. 13/05/30 09:49:30 INFO mapred.JobClient: FileSystemCounters
  40. 13/05/30 09:49:30 INFO mapred.JobClient: FILE_BYTES_READ=3328461
  41. 13/05/30 09:49:30 INFO mapred.JobClient: FILE_BYTES_WRITTEN=3422872
  42. 13/05/30 09:49:30 INFO mapred.JobClient: File Input Format Counters
  43. 13/05/30 09:49:30 INFO mapred.JobClient: Bytes Read=443
  44. 13/05/30 09:49:30 INFO mapred.JobClient: Map-Reduce Framework
  45. 13/05/30 09:49:30 INFO mapred.JobClient: Map output materialized bytes=189
  46. 13/05/30 09:49:30 INFO mapred.JobClient: Map input records=9
  47. 13/05/30 09:49:30 INFO mapred.JobClient: Reduce shuffle bytes=0
  48. 13/05/30 09:49:30 INFO mapred.JobClient: Spilled Records=6
  49. 13/05/30 09:49:30 INFO mapred.JobClient: Map output bytes=531
  50. 13/05/30 09:49:30 INFO mapred.JobClient: Total committed heap usage (bytes)=325713920
  51. 13/05/30 09:49:30 INFO mapred.JobClient: CPU time spent (ms)=0
  52. 13/05/30 09:49:30 INFO mapred.JobClient: SPLIT_RAW_BYTES=104
  53. 13/05/30 09:49:30 INFO mapred.JobClient: Combine input records=9
  54. 13/05/30 09:49:30 INFO mapred.JobClient: Reduce input records=3
  55. 13/05/30 09:49:30 INFO mapred.JobClient: Reduce input groups=3
  56. 13/05/30 09:49:30 INFO mapred.JobClient: Combine output records=3
  57. 13/05/30 09:49:30 INFO mapred.JobClient: Physical memory (bytes) snapshot=0
  58. 13/05/30 09:49:30 INFO mapred.JobClient: Reduce output records=3
  59. 13/05/30 09:49:30 INFO mapred.JobClient: Virtual memory (bytes) snapshot=0
  60. 13/05/30 09:49:30 INFO mapred.JobClient: Map output records=9
  61. 13/05/30 09:49:30 INFO kmeans.KMeansDriver: K-Means Iteration 2
  62. 13/05/30 09:49:30 INFO common.HadoopUtil: Deleting /home/vishal/output/clusters-2
  63. 13/05/30 09:49:30 WARN mapred.JobClient: Use GenericOptionsParser for parsing the arguments. Applications should implement Tool for the same.
  64. 13/05/30 09:49:30 INFO input.FileInputFormat: Total input paths to process : 1
  65. 13/05/30 09:49:30 INFO mapred.JobClient: Running job: job_local_0002
  66. 13/05/30 09:49:30 INFO mapred.Task: Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@13f136e
  67. 13/05/30 09:49:30 INFO mapred.MapTask: io.sort.mb = 100
  68. 13/05/30 09:49:30 INFO mapred.MapTask: data buffer = 79691776/99614720
  69. 13/05/30 09:49:30 INFO mapred.MapTask: record buffer = 262144/327680
  70. 13/05/30 09:49:30 INFO mapred.MapTask: Starting flush of map output
  71. 13/05/30 09:49:30 INFO mapred.MapTask: Finished spill 0
  72. 13/05/30 09:49:30 INFO mapred.Task: Task:attempt_local_0002_m_000000_0 is done. And is in the process of commiting
  73. 13/05/30 09:49:31 INFO mapred.JobClient: map 0% reduce 0%
  74. 13/05/30 09:49:33 INFO mapred.LocalJobRunner:
  75. 13/05/30 09:49:33 INFO mapred.Task: Task 'attempt_local_0002_m_000000_0' done.
  76. 13/05/30 09:49:33 INFO mapred.Task: Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@d6b059
  77. 13/05/30 09:49:33 INFO mapred.LocalJobRunner:
  78. 13/05/30 09:49:33 INFO mapred.Merger: Merging 1 sorted segments
  79. 13/05/30 09:49:33 INFO mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 124 bytes
  80. 13/05/30 09:49:33 INFO mapred.LocalJobRunner:
  81. 13/05/30 09:49:33 INFO mapred.Task: Task:attempt_local_0002_r_000000_0 is done. And is in the process of commiting
  82. 13/05/30 09:49:33 INFO mapred.LocalJobRunner:
  83. 13/05/30 09:49:33 INFO mapred.Task: Task attempt_local_0002_r_000000_0 is allowed to commit now
  84. 13/05/30 09:49:33 INFO output.FileOutputCommitter: Saved output of task 'attempt_local_0002_r_000000_0' to /home/vishal/output/clusters-2
  85. 13/05/30 09:49:34 INFO mapred.JobClient: map 100% reduce 0%
  86. 13/05/30 09:49:36 INFO mapred.LocalJobRunner: reduce > reduce
  87. 13/05/30 09:49:36 INFO mapred.Task: Task 'attempt_local_0002_r_000000_0' done.
  88. 13/05/30 09:49:37 INFO mapred.JobClient: map 100% reduce 100%
  89. 13/05/30 09:49:37 INFO mapred.JobClient: Job complete: job_local_0002
  90. 13/05/30 09:49:37 INFO mapred.JobClient: Counters: 20
  91. 13/05/30 09:49:37 INFO mapred.JobClient: File Output Format Counters
  92. 13/05/30 09:49:37 INFO mapred.JobClient: Bytes Written=364
  93. 13/05/30 09:49:37 INFO mapred.JobClient: FileSystemCounters
  94. 13/05/30 09:49:37 INFO mapred.JobClient: FILE_BYTES_READ=6658544
  95. 13/05/30 09:49:37 INFO mapred.JobClient: FILE_BYTES_WRITTEN=6844248
  96. 13/05/30 09:49:37 INFO mapred.JobClient: File Input Format Counters
  97. 13/05/30 09:49:37 INFO mapred.JobClient: Bytes Read=443
  98. 13/05/30 09:49:37 INFO mapred.JobClient: Map-Reduce Framework
  99. 13/05/30 09:49:37 INFO mapred.JobClient: Map output materialized bytes=128
  100. 13/05/30 09:49:37 INFO mapred.JobClient: Map input records=9
  101. 13/05/30 09:49:37 INFO mapred.JobClient: Reduce shuffle bytes=0
  102. 13/05/30 09:49:37 INFO mapred.JobClient: Spilled Records=4
  103. 13/05/30 09:49:37 INFO mapred.JobClient: Map output bytes=531
  104. 13/05/30 09:49:37 INFO mapred.JobClient: Total committed heap usage (bytes)=525074432
  105. 13/05/30 09:49:37 INFO mapred.JobClient: CPU time spent (ms)=0
  106. 13/05/30 09:49:37 INFO mapred.JobClient: SPLIT_RAW_BYTES=104
  107. 13/05/30 09:49:37 INFO mapred.JobClient: Combine input records=9
  108. 13/05/30 09:49:37 INFO mapred.JobClient: Reduce input records=2
  109. 13/05/30 09:49:37 INFO mapred.JobClient: Reduce input groups=2
  110. 13/05/30 09:49:37 INFO mapred.JobClient: Combine output records=2
  111. 13/05/30 09:49:37 INFO mapred.JobClient: Physical memory (bytes) snapshot=0
  112. 13/05/30 09:49:37 INFO mapred.JobClient: Reduce output records=2
  113. 13/05/30 09:49:37 INFO mapred.JobClient: Virtual memory (bytes) snapshot=0
  114. 13/05/30 09:49:37 INFO mapred.JobClient: Map output records=9
  115. 13/05/30 09:49:37 INFO kmeans.KMeansDriver: K-Means Iteration 3
  116. 13/05/30 09:49:37 INFO common.HadoopUtil: Deleting /home/vishal/output/clusters-3
  117. 13/05/30 09:49:37 WARN mapred.JobClient: Use GenericOptionsParser for parsing the arguments. Applications should implement Tool for the same.
  118. 13/05/30 09:49:37 INFO input.FileInputFormat: Total input paths to process : 1
  119. 13/05/30 09:49:37 INFO mapred.JobClient: Running job: job_local_0003
  120. 13/05/30 09:49:37 INFO mapred.Task: Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@988707
  121. 13/05/30 09:49:37 INFO mapred.MapTask: io.sort.mb = 100
  122. 13/05/30 09:49:37 INFO mapred.MapTask: data buffer = 79691776/99614720
  123. 13/05/30 09:49:37 INFO mapred.MapTask: record buffer = 262144/327680
  124. 13/05/30 09:49:37 INFO mapred.MapTask: Starting flush of map output
  125. 13/05/30 09:49:37 INFO mapred.MapTask: Finished spill 0
  126. 13/05/30 09:49:37 INFO mapred.Task: Task:attempt_local_0003_m_000000_0 is done. And is in the process of commiting
  127. 13/05/30 09:49:38 INFO mapred.JobClient: map 0% reduce 0%
  128. 13/05/30 09:49:40 INFO mapred.LocalJobRunner:
  129. 13/05/30 09:49:40 INFO mapred.Task: Task 'attempt_local_0003_m_000000_0' done.
  130. 13/05/30 09:49:40 INFO mapred.Task: Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@6214f5
  131. 13/05/30 09:49:40 INFO mapred.LocalJobRunner:
  132. 13/05/30 09:49:40 INFO mapred.Merger: Merging 1 sorted segments
  133. 13/05/30 09:49:40 INFO mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 124 bytes
  134. 13/05/30 09:49:40 INFO mapred.LocalJobRunner:
  135. 13/05/30 09:49:40 INFO mapred.Task: Task:attempt_local_0003_r_000000_0 is done. And is in the process of commiting
  136. 13/05/30 09:49:40 INFO mapred.LocalJobRunner:
  137. 13/05/30 09:49:40 INFO mapred.Task: Task attempt_local_0003_r_000000_0 is allowed to commit now
  138. 13/05/30 09:49:40 INFO output.FileOutputCommitter: Saved output of task 'attempt_local_0003_r_000000_0' to /home/vishal/output/clusters-3
  139. 13/05/30 09:49:41 INFO mapred.JobClient: map 100% reduce 0%
  140. 13/05/30 09:49:43 INFO mapred.LocalJobRunner: reduce > reduce
  141. 13/05/30 09:49:43 INFO mapred.Task: Task 'attempt_local_0003_r_000000_0' done.
  142. 13/05/30 09:49:44 INFO mapred.JobClient: map 100% reduce 100%
  143. 13/05/30 09:49:44 INFO mapred.JobClient: Job complete: job_local_0003
  144. 13/05/30 09:49:44 INFO mapred.JobClient: Counters: 21
  145. 13/05/30 09:49:44 INFO mapred.JobClient: File Output Format Counters
  146. 13/05/30 09:49:44 INFO mapred.JobClient: Bytes Written=364
  147. 13/05/30 09:49:44 INFO mapred.JobClient: Clustering
  148. 13/05/30 09:49:44 INFO mapred.JobClient: Converged Clusters=2
  149. 13/05/30 09:49:44 INFO mapred.JobClient: FileSystemCounters
  150. 13/05/30 09:49:44 INFO mapred.JobClient: FILE_BYTES_READ=9988052
  151. 13/05/30 09:49:44 INFO mapred.JobClient: FILE_BYTES_WRITTEN=10265506
  152. 13/05/30 09:49:44 INFO mapred.JobClient: File Input Format Counters
  153. 13/05/30 09:49:44 INFO mapred.JobClient: Bytes Read=443
  154. 13/05/30 09:49:44 INFO mapred.JobClient: Map-Reduce Framework
  155. 13/05/30 09:49:44 INFO mapred.JobClient: Map output materialized bytes=128
  156. 13/05/30 09:49:44 INFO mapred.JobClient: Map input records=9
  157. 13/05/30 09:49:44 INFO mapred.JobClient: Reduce shuffle bytes=0
  158. 13/05/30 09:49:44 INFO mapred.JobClient: Spilled Records=4
  159. 13/05/30 09:49:44 INFO mapred.JobClient: Map output bytes=531
  160. 13/05/30 09:49:44 INFO mapred.JobClient: Total committed heap usage (bytes)=724434944
  161. 13/05/30 09:49:44 INFO mapred.JobClient: CPU time spent (ms)=0
  162. 13/05/30 09:49:44 INFO mapred.JobClient: SPLIT_RAW_BYTES=104
  163. 13/05/30 09:49:44 INFO mapred.JobClient: Combine input records=9
  164. 13/05/30 09:49:44 INFO mapred.JobClient: Reduce input records=2
  165. 13/05/30 09:49:44 INFO mapred.JobClient: Reduce input groups=2
  166. 13/05/30 09:49:44 INFO mapred.JobClient: Combine output records=2
  167. 13/05/30 09:49:44 INFO mapred.JobClient: Physical memory (bytes) snapshot=0
  168. 13/05/30 09:49:44 INFO mapred.JobClient: Reduce output records=2
  169. 13/05/30 09:49:44 INFO mapred.JobClient: Virtual memory (bytes) snapshot=0
  170. 13/05/30 09:49:44 INFO mapred.JobClient: Map output records=9
  171. Exception in thread "main" java.io.IOException: Target /home/vishal/output/clusters-3-final/clusters-3 is a directory
  172. at org.apache.hadoop.fs.FileUtil.checkDest(FileUtil.java:359)
  173. at org.apache.hadoop.fs.FileUtil.checkDest(FileUtil.java:361)
  174. at org.apache.hadoop.fs.FileUtil.copy(FileUtil.java:211)
  175. at org.apache.hadoop.fs.FileUtil.copy(FileUtil.java:163)
  176. at org.apache.hadoop.fs.RawLocalFileSystem.rename(RawLocalFileSystem.java:287)
  177. at org.apache.hadoop.fs.ChecksumFileSystem.rename(ChecksumFileSystem.java:425)
  178. at org.apache.mahout.clustering.kmeans.KMeansDriver.buildClustersMR(KMeansDriver.java:322)
  179. at org.apache.mahout.clustering.kmeans.KMeansDriver.buildClusters(KMeansDriver.java:239)
  180. at org.apache.mahout.clustering.kmeans.KMeansDriver.run(KMeansDriver.java:154)
  181. at com.ClusteringDemo.main(ClusteringDemo.java:80)

原因是什么??
谢谢

0tdrvxhp

0tdrvxhp1#

这是什么 KMeansDriver 正在尝试:

  1. Path finalClustersIn = new Path(output, AbstractCluster.CLUSTERS_DIR + (iteration-1) + "-final");
  2. FileSystem.get(conf).rename(new Path(output, AbstractCluster.CLUSTERS_DIR + (iteration-1)), finalClustersIn);

如您所见,它已经在3次迭代后收敛,并试图将目录clusters-3中的第3次迭代的结果合并到clusters-3-final中,以显示它已经完成。
现在 rename 方法 FileSystem 在实际重命名之前进行检查,以确保它没有尝试重命名到已存在的目录。事实上,看起来您已经有了这个目录clusters-3-final,可能是以前运行过的。
删除此目录应该可以解决您的问题,您可以通过以下命令行执行此操作:

  1. hadoop fs -rmr /home/vishal/output/clusters-3-final

或者因为看起来您正在本地模式下运行作业:

  1. rm -rf /home/vishal/output/clusters-3-final

为了避免此类问题,我建议每次运行分析时都使用一个唯一的输出目录,例如,可以获取当前日期并将其附加到输出的文件名中 Path ,例如使用 System.currentTimeMillis() .
编辑:第二期关于:

  1. Exception in thread "main" java.io.IOException: wrong value class: 0.0: null is not class org.apache.mahout.clustering.WeightedPropertyVectorWritable at org.apache.hadoop.io.SequenceFile$Reader.next(SequenceFile.java:1932) at com.ClusteringDemo.main(ClusteringDemo.java:90)

实际上,由于使用了较旧的mahout版本,您正在遭受mahout版本之间的冲突 WeightedVectorWritable 而最近的使用 WeightedPropertyVectorWritable . 要解决这个问题,只需更改 value 变量来自:

  1. WeightedVectorWritable value = new WeightedVectorWritable();

收件人:

  1. WeightedPropertyVectorWritable value = new WeightedPropertyVectorWritable();
展开查看全部

相关问题