本文整理了Java中org.apache.spark.mllib.clustering.KMeans
类的一些代码示例,展示了KMeans
类的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。KMeans
类的具体详情如下:
包路径:org.apache.spark.mllib.clustering.KMeans
类名称:KMeans
暂无
代码示例来源:origin: OryxProject/oryx
/**
* @param sparkContext active Spark Context
* @param trainData training data on which to build a model
* @param hyperParameters ordered list of hyper parameter values to use in building model
* @param candidatePath directory where additional model files can be written
* @return a {@link PMML} representation of a model trained on the given data
*/
@Override
public PMML buildModel(JavaSparkContext sparkContext,
JavaRDD<String> trainData,
List<?> hyperParameters,
Path candidatePath) {
int numClusters = (Integer) hyperParameters.get(0);
Preconditions.checkArgument(numClusters > 1);
log.info("Building KMeans Model with {} clusters", numClusters);
JavaRDD<Vector> trainingData = parsedToVectorRDD(trainData.map(MLFunctions.PARSE_FN));
KMeansModel kMeansModel = KMeans.train(trainingData.rdd(), numClusters, maxIterations,
numberOfRuns, initializationStrategy);
return kMeansModelToPMML(kMeansModel, fetchClusterCountsFromModel(trainingData, kMeansModel));
}
代码示例来源:origin: OryxProject/oryx
public KMeansUpdate(Config config) {
super(config);
initializationStrategy = config.getString("oryx.kmeans.initialization-strategy");
evaluationStrategy = Enum.valueOf(KMeansEvalStrategy.class, config.getString("oryx.kmeans.evaluation-strategy"));
numberOfRuns = config.getInt("oryx.kmeans.runs");
maxIterations = config.getInt("oryx.kmeans.iterations");
hyperParamValues = new ArrayList<>();
hyperParamValues.add(HyperParams.fromConfig(config, "oryx.kmeans.hyperparams.k"));
inputSchema = new InputSchema(config);
Preconditions.checkArgument(maxIterations > 0);
Preconditions.checkArgument(numberOfRuns > 0);
Preconditions.checkArgument(
initializationStrategy.equals(KMeans.K_MEANS_PARALLEL()) ||
initializationStrategy.equals(KMeans.RANDOM()));
// Should be an unsupervised problem. This impl only supports numeric features.
Preconditions.checkArgument(!inputSchema.hasTarget());
for (int i = 0; i < inputSchema.getNumFeatures(); i++) {
Preconditions.checkArgument(!inputSchema.isCategorical(i));
}
}
代码示例来源:origin: org.apache.spark/spark-mllib_2.11
@Test
public void runKMeansUsingConstructor() {
List<Vector> points = Arrays.asList(
Vectors.dense(1.0, 2.0, 6.0),
Vectors.dense(1.0, 3.0, 0.0),
Vectors.dense(1.0, 4.0, 6.0)
);
Vector expectedCenter = Vectors.dense(1.0, 3.0, 4.0);
JavaRDD<Vector> data = jsc.parallelize(points, 2);
KMeansModel model = new KMeans().setK(1).setMaxIterations(5).run(data.rdd());
assertEquals(1, model.clusterCenters().length);
assertEquals(expectedCenter, model.clusterCenters()[0]);
model = new KMeans()
.setK(1)
.setMaxIterations(1)
.setInitializationMode(KMeans.RANDOM())
.run(data.rdd());
assertEquals(expectedCenter, model.clusterCenters()[0]);
}
代码示例来源:origin: org.apache.spark/spark-mllib_2.11
@Test
public void testPredictJavaRDD() {
List<Vector> points = Arrays.asList(
Vectors.dense(1.0, 2.0, 6.0),
Vectors.dense(1.0, 3.0, 0.0),
Vectors.dense(1.0, 4.0, 6.0)
);
JavaRDD<Vector> data = jsc.parallelize(points, 2);
KMeansModel model = new KMeans().setK(1).setMaxIterations(5).run(data.rdd());
JavaRDD<Integer> predictions = model.predict(data);
// Should be able to get the first prediction.
predictions.first();
}
}
代码示例来源:origin: org.apache.spark/spark-mllib
@Test
public void runKMeansUsingStaticMethods() {
List<Vector> points = Arrays.asList(
Vectors.dense(1.0, 2.0, 6.0),
Vectors.dense(1.0, 3.0, 0.0),
Vectors.dense(1.0, 4.0, 6.0)
);
Vector expectedCenter = Vectors.dense(1.0, 3.0, 4.0);
JavaRDD<Vector> data = jsc.parallelize(points, 2);
KMeansModel model = KMeans.train(data.rdd(), 1, 1, 1, KMeans.K_MEANS_PARALLEL());
assertEquals(1, model.clusterCenters().length);
assertEquals(expectedCenter, model.clusterCenters()[0]);
model = KMeans.train(data.rdd(), 1, 1, 1, KMeans.RANDOM());
assertEquals(expectedCenter, model.clusterCenters()[0]);
}
代码示例来源:origin: locationtech/geowave
final KMeans kmeans = new KMeans();
kmeans.setInitializationMode("kmeans||");
kmeans.setK(numClusters);
kmeans.setMaxIterations(numIterations);
kmeans.setEpsilon(epsilon);
outputModel = kmeans.run(centroidVectors.rdd());
代码示例来源:origin: org.apache.spark/spark-mllib_2.10
@Test
public void runKMeansUsingConstructor() {
List<Vector> points = Arrays.asList(
Vectors.dense(1.0, 2.0, 6.0),
Vectors.dense(1.0, 3.0, 0.0),
Vectors.dense(1.0, 4.0, 6.0)
);
Vector expectedCenter = Vectors.dense(1.0, 3.0, 4.0);
JavaRDD<Vector> data = jsc.parallelize(points, 2);
KMeansModel model = new KMeans().setK(1).setMaxIterations(5).run(data.rdd());
assertEquals(1, model.clusterCenters().length);
assertEquals(expectedCenter, model.clusterCenters()[0]);
model = new KMeans()
.setK(1)
.setMaxIterations(1)
.setInitializationMode(KMeans.RANDOM())
.run(data.rdd());
assertEquals(expectedCenter, model.clusterCenters()[0]);
}
代码示例来源:origin: org.apache.spark/spark-mllib_2.10
@Test
public void testPredictJavaRDD() {
List<Vector> points = Arrays.asList(
Vectors.dense(1.0, 2.0, 6.0),
Vectors.dense(1.0, 3.0, 0.0),
Vectors.dense(1.0, 4.0, 6.0)
);
JavaRDD<Vector> data = jsc.parallelize(points, 2);
KMeansModel model = new KMeans().setK(1).setMaxIterations(5).run(data.rdd());
JavaRDD<Integer> predictions = model.predict(data);
// Should be able to get the first prediction.
predictions.first();
}
}
代码示例来源:origin: org.apache.spark/spark-mllib_2.10
@Test
public void runKMeansUsingStaticMethods() {
List<Vector> points = Arrays.asList(
Vectors.dense(1.0, 2.0, 6.0),
Vectors.dense(1.0, 3.0, 0.0),
Vectors.dense(1.0, 4.0, 6.0)
);
Vector expectedCenter = Vectors.dense(1.0, 3.0, 4.0);
JavaRDD<Vector> data = jsc.parallelize(points, 2);
KMeansModel model = KMeans.train(data.rdd(), 1, 1, 1, KMeans.K_MEANS_PARALLEL());
assertEquals(1, model.clusterCenters().length);
assertEquals(expectedCenter, model.clusterCenters()[0]);
model = KMeans.train(data.rdd(), 1, 1, 1, KMeans.RANDOM());
assertEquals(expectedCenter, model.clusterCenters()[0]);
}
代码示例来源:origin: org.apache.spark/spark-mllib
@Test
public void runKMeansUsingConstructor() {
List<Vector> points = Arrays.asList(
Vectors.dense(1.0, 2.0, 6.0),
Vectors.dense(1.0, 3.0, 0.0),
Vectors.dense(1.0, 4.0, 6.0)
);
Vector expectedCenter = Vectors.dense(1.0, 3.0, 4.0);
JavaRDD<Vector> data = jsc.parallelize(points, 2);
KMeansModel model = new KMeans().setK(1).setMaxIterations(5).run(data.rdd());
assertEquals(1, model.clusterCenters().length);
assertEquals(expectedCenter, model.clusterCenters()[0]);
model = new KMeans()
.setK(1)
.setMaxIterations(1)
.setInitializationMode(KMeans.RANDOM())
.run(data.rdd());
assertEquals(expectedCenter, model.clusterCenters()[0]);
}
代码示例来源:origin: org.apache.spark/spark-mllib
@Test
public void testPredictJavaRDD() {
List<Vector> points = Arrays.asList(
Vectors.dense(1.0, 2.0, 6.0),
Vectors.dense(1.0, 3.0, 0.0),
Vectors.dense(1.0, 4.0, 6.0)
);
JavaRDD<Vector> data = jsc.parallelize(points, 2);
KMeansModel model = new KMeans().setK(1).setMaxIterations(5).run(data.rdd());
JavaRDD<Integer> predictions = model.predict(data);
// Should be able to get the first prediction.
predictions.first();
}
}
代码示例来源:origin: org.apache.spark/spark-mllib_2.11
@Test
public void runKMeansUsingStaticMethods() {
List<Vector> points = Arrays.asList(
Vectors.dense(1.0, 2.0, 6.0),
Vectors.dense(1.0, 3.0, 0.0),
Vectors.dense(1.0, 4.0, 6.0)
);
Vector expectedCenter = Vectors.dense(1.0, 3.0, 4.0);
JavaRDD<Vector> data = jsc.parallelize(points, 2);
KMeansModel model = KMeans.train(data.rdd(), 1, 1, 1, KMeans.K_MEANS_PARALLEL());
assertEquals(1, model.clusterCenters().length);
assertEquals(expectedCenter, model.clusterCenters()[0]);
model = KMeans.train(data.rdd(), 1, 1, 1, KMeans.RANDOM());
assertEquals(expectedCenter, model.clusterCenters()[0]);
}
代码示例来源:origin: ypriverol/spark-java8
KMeansModel clusters = org.apache.spark.mllib.clustering.KMeans.train(parsedData.rdd(), numClusters, numIterations);
代码示例来源:origin: apache/lens
KMeansModel model = KMeans.train(trainableRDD.rdd(), k, maxIterations, runs, initializationMode);
return new KMeansClusteringModel(modelId, model);
} catch (Exception e) {
内容来源于网络,如有侵权,请联系作者删除!