本文整理了Java中org.apache.spark.rdd.RDD.toJavaRDD
方法的一些代码示例,展示了RDD.toJavaRDD
的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。RDD.toJavaRDD
方法的具体详情如下:
包路径:org.apache.spark.rdd.RDD
类名称:RDD
方法名:toJavaRDD
暂无
代码示例来源:origin: apache/tinkerpop
@Override
public boolean cp(final String sourceLocation, final String targetLocation) {
final List<String> rdds = Spark.getRDDs().stream().filter(r -> r.name().startsWith(sourceLocation)).map(RDD::name).collect(Collectors.toList());
if (rdds.size() == 0)
return false;
for (final String rdd : rdds) {
Spark.getRDD(rdd).toJavaRDD().filter(a -> true).setName(rdd.equals(sourceLocation) ? targetLocation : rdd.replace(sourceLocation, targetLocation)).cache().count();
// TODO: this should use the original storage level
}
return true;
}
代码示例来源:origin: apache/tinkerpop
@Override
public Iterator<String> head(final String location, final int totalLines) {
return IteratorUtils.map(Spark.getRDD(location).toJavaRDD().take(totalLines).iterator(), Object::toString);
}
代码示例来源:origin: apache/tinkerpop
@Override
public <K, V> JavaPairRDD<K, V> readMemoryRDD(final Configuration configuration, final String memoryKey, final JavaSparkContext sparkContext) {
if (!configuration.containsKey(Constants.GREMLIN_HADOOP_INPUT_LOCATION))
throw new IllegalArgumentException("There is no provided " + Constants.GREMLIN_HADOOP_INPUT_LOCATION + " to read the persisted RDD from");
return JavaPairRDD.fromJavaRDD((JavaRDD) Spark.getRDD(Constants.getMemoryLocation(configuration.getString(Constants.GREMLIN_HADOOP_INPUT_LOCATION), memoryKey)).toJavaRDD());
}
}
代码示例来源:origin: apache/tinkerpop
@Override
public JavaPairRDD<Object, VertexWritable> readGraphRDD(final Configuration configuration, final JavaSparkContext sparkContext) {
if (!configuration.containsKey(Constants.GREMLIN_HADOOP_INPUT_LOCATION))
throw new IllegalArgumentException("There is no provided " + Constants.GREMLIN_HADOOP_INPUT_LOCATION + " to read the persisted RDD from");
Spark.create(sparkContext.sc());
final Optional<String> graphLocation = Constants.getSearchGraphLocation(configuration.getString(Constants.GREMLIN_HADOOP_INPUT_LOCATION), SparkContextStorage.open());
return graphLocation.isPresent() ? JavaPairRDD.fromJavaRDD((JavaRDD) Spark.getRDD(graphLocation.get()).toJavaRDD()) : JavaPairRDD.fromJavaRDD(sparkContext.emptyRDD());
}
代码示例来源:origin: Impetus/Kundera
@Override
public boolean persist(List listEntity, EntityMetadata m, SparkClient sparkClient)
{
Seq s = scala.collection.JavaConversions.asScalaBuffer(listEntity).toList();
ClassTag tag = scala.reflect.ClassTag$.MODULE$.apply(m.getEntityClazz());
JavaRDD personRDD = sparkClient.sparkContext.parallelize(s, 1, tag).toJavaRDD();
DataFrame df = sparkClient.sqlContext.createDataFrame(personRDD, m.getEntityClazz());
String outputFilePath = getOutputFilePath(sparkClient.properties);
String ext = (String) sparkClient.properties.get("format");
FileType fileType = FileFormatConstants.extension.get(ext);
switch (fileType)
{
case CSV:
return writeDataInCsvFile(df, outputFilePath);
case JSON:
return writeDataInJsonFile(df, outputFilePath);
default:
throw new UnsupportedOperationException("Files of type " + ext + " are not yet supported.");
}
}
代码示例来源:origin: Impetus/Kundera
JavaRDD javaRDD = sparkClient.sparkContext.parallelize(s, 1, tag).toJavaRDD();
代码示例来源:origin: Impetus/Kundera
public void registerTable(EntityMetadata m, SparkClient sparkClient)
{
final Class clazz = m.getEntityClazz();
SparkContext sc = sparkClient.sparkContext;
Configuration config = new Configuration();
config.set(
"mongo.input.uri",
buildMongoURIPath(sc.getConf().get("hostname"), sc.getConf().get("portname"), m.getSchema(),
m.getTableName()));
JavaRDD<Tuple2<Object, BSONObject>> mongoJavaRDD = sc.newAPIHadoopRDD(config, MongoInputFormat.class,
Object.class, BSONObject.class).toJavaRDD();
JavaRDD<Object> mongoRDD = mongoJavaRDD.flatMap(new FlatMapFunction<Tuple2<Object, BSONObject>, Object>()
{
@Override
public Iterable<Object> call(Tuple2<Object, BSONObject> arg)
{
BSONObject obj = arg._2();
Object javaObject = generateJavaObjectFromBSON(obj, clazz);
return Arrays.asList(javaObject);
}
});
sparkClient.sqlContext.createDataFrame(mongoRDD, m.getEntityClazz()).registerTempTable(m.getTableName());
}
代码示例来源:origin: Impetus/Kundera
@Override
public boolean persist(List listEntity, EntityMetadata m, SparkClient sparkClient)
{
try
{
Seq s = scala.collection.JavaConversions.asScalaBuffer(listEntity).toList();
ClassTag tag = scala.reflect.ClassTag$.MODULE$.apply(m.getEntityClazz());
JavaRDD personRDD = sparkClient.sparkContext.parallelize(s, 1, tag).toJavaRDD();
DataFrame df = sparkClient.sqlContext.createDataFrame(personRDD, m.getEntityClazz());
sparkClient.sqlContext.sql("use " + m.getSchema());
if (logger.isDebugEnabled())
{
logger.info("Below are the registered table with hive context: ");
sparkClient.sqlContext.sql("show tables").show();
}
df.write().insertInto(m.getTableName());
return true;
}
catch (Exception e)
{
throw new KunderaException("Cannot persist object(s)", e);
}
}
代码示例来源:origin: Impetus/Kundera
@Override
public boolean persist(List listEntity, EntityMetadata m, SparkClient sparkClient)
{
try
{
Seq s = scala.collection.JavaConversions.asScalaBuffer(listEntity).toList();
ClassTag tag = scala.reflect.ClassTag$.MODULE$.apply(m.getEntityClazz());
JavaRDD personRDD = sparkClient.sparkContext.parallelize(s, 1, tag).toJavaRDD();
CassandraJavaUtil.javaFunctions(personRDD)
.writerBuilder(m.getSchema(), m.getTableName(), CassandraJavaUtil.mapToRow(m.getEntityClazz()))
.saveToCassandra();
return true;
}
catch (Exception e)
{
throw new KunderaException("Cannot persist object(s)", e);
}
}
代码示例来源:origin: org.apache.pig/pig
@Override
public RDD<Tuple> convert(List<RDD<Tuple>> predecessors,
POStream poStream) throws IOException {
SparkUtil.assertPredecessorSize(predecessors, poStream, 1);
RDD<Tuple> rdd = predecessors.get(0);
StreamFunction streamFunction = new StreamFunction(poStream);
return rdd.toJavaRDD().mapPartitions(streamFunction, true).rdd();
}
代码示例来源:origin: org.apache.pig/pig
@Override
public RDD<Tuple> convert(List<RDD<Tuple>> predecessors,
POPoissonSampleSpark po) throws IOException {
SparkUtil.assertPredecessorSize(predecessors, po, 1);
RDD<Tuple> rdd = predecessors.get(0);
PoissionSampleFunction poissionSampleFunction = new PoissionSampleFunction(po);
return rdd.toJavaRDD().mapPartitions(poissionSampleFunction, false).rdd();
}
代码示例来源:origin: org.apache.pig/pig
@Override
public RDD<Tuple> convert(List<RDD<Tuple>> predecessors,
POCollectedGroup physicalOperator) throws IOException {
SparkUtil.assertPredecessorSize(predecessors, physicalOperator, 1);
RDD<Tuple> rdd = predecessors.get(0);
CollectedGroupFunction collectedGroupFunction
= new CollectedGroupFunction(physicalOperator);
return rdd.toJavaRDD().mapPartitions(collectedGroupFunction, true)
.rdd();
}
代码示例来源:origin: org.apache.pig/pig
@Override
public RDD<Tuple> convert(List<RDD<Tuple>> predecessors,
POMergeCogroup physicalOperator) {
SparkUtil.assertPredecessorSize(predecessors, physicalOperator, 1);
RDD<Tuple> rdd = predecessors.get(0);
MergeCogroupFunction mergeCogroupFunction = new MergeCogroupFunction(physicalOperator);
return rdd.toJavaRDD().mapPartitions(mergeCogroupFunction, true).rdd();
}
代码示例来源:origin: org.apache.pig/pig
@Override
public RDD<Tuple> convert(List<RDD<Tuple>> predecessors,
POCounter poCounter) throws IOException {
SparkUtil.assertPredecessorSize(predecessors, poCounter, 1);
RDD<Tuple> rdd = predecessors.get(0);
CounterConverterFunction f = new CounterConverterFunction(poCounter);
JavaRDD<Tuple> jRdd = rdd.toJavaRDD().mapPartitionsWithIndex(f, true);
// jRdd = jRdd.cache();
return jRdd.rdd();
}
代码示例来源:origin: org.apache.pig/pig
@Override
public RDD<Tuple> convert(List<RDD<Tuple>> predecessors,
POMergeJoin poMergeJoin) throws IOException {
SparkUtil.assertPredecessorSize(predecessors, poMergeJoin, 1);
RDD<Tuple> rdd = predecessors.get(0);
MergeJoinFunction mergeJoinFunction = new MergeJoinFunction(poMergeJoin);
return rdd.toJavaRDD().mapPartitions(mergeJoinFunction, true).rdd();
}
代码示例来源:origin: org.apache.pig/pig
public RDD<Tuple> convert(List<RDD<Tuple>> predecessors,
POFRJoin poFRJoin) throws IOException {
SparkUtil.assertPredecessorSizeGreaterThan(predecessors, poFRJoin, 1);
RDD<Tuple> rdd = predecessors.get(0);
attachReplicatedInputs((POFRJoinSpark) poFRJoin);
FRJoinFunction frJoinFunction = new FRJoinFunction(poFRJoin);
return rdd.toJavaRDD().mapPartitions(frJoinFunction, true).rdd();
}
代码示例来源:origin: org.apache.pig/pig
@Override
public RDD<Tuple> convert(List<RDD<Tuple>> predecessors, POLimit poLimit)
throws IOException {
SparkUtil.assertPredecessorSize(predecessors, poLimit, 1);
RDD<Tuple> rdd = predecessors.get(0);
LimitFunction limitFunction = new LimitFunction(poLimit);
RDD<Tuple> rdd2 = rdd.coalesce(1, false, null);
return rdd2.toJavaRDD().mapPartitions(limitFunction, false).rdd();
}
代码示例来源:origin: org.apache.pig/pig
@Override
public RDD<Tuple> convert(List<RDD<Tuple>> predecessors,
POForEach physicalOperator) {
byte[] confBytes = KryoSerializer.serializeJobConf(jobConf);
SparkUtil.assertPredecessorSize(predecessors, physicalOperator, 1);
RDD<Tuple> rdd = predecessors.get(0);
ForEachFunction forEachFunction = new ForEachFunction(physicalOperator, confBytes);
return rdd.toJavaRDD().mapPartitions(forEachFunction, true).rdd();
}
代码示例来源:origin: com.couchbase.client/spark-connector
@SuppressWarnings({"unchecked"})
public JavaRDD<SubdocLookupResult> couchbaseSubdocLookup(List<String> get, List<String> exists, String bucket) {
return new RDDFunctions<T>(source.rdd()).couchbaseSubdocLookup(
SparkUtil.listToSeq(get),
SparkUtil.listToSeq(exists),
bucket,
scala.Option.<Duration>apply(null),
LCLIdentity.INSTANCE
).toJavaRDD();
}
代码示例来源:origin: com.couchbase.client/spark-connector
@SuppressWarnings({"unchecked"})
public JavaRDD<SubdocLookupResult> couchbaseSubdocLookup(List<String> get, List<String> exists, String bucket, long timeout) {
return new RDDFunctions<T>(source.rdd()).couchbaseSubdocLookup(
SparkUtil.listToSeq(get),
SparkUtil.listToSeq(exists),
bucket,
scala.Option.<Duration>apply(Duration.create(timeout, TimeUnit.MILLISECONDS)),
LCLIdentity.INSTANCE
).toJavaRDD();
}
内容来源于网络,如有侵权,请联系作者删除!