org.apache.spark.rdd.RDD.toJavaRDD()方法的使用及代码示例

x33g5p2x  于2022-01-28 转载在 其他  
字(10.1k)|赞(0)|评价(0)|浏览(473)

本文整理了Java中org.apache.spark.rdd.RDD.toJavaRDD方法的一些代码示例,展示了RDD.toJavaRDD的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。RDD.toJavaRDD方法的具体详情如下:
包路径:org.apache.spark.rdd.RDD
类名称:RDD
方法名:toJavaRDD

RDD.toJavaRDD介绍

暂无

代码示例

代码示例来源:origin: apache/tinkerpop

@Override
public boolean cp(final String sourceLocation, final String targetLocation) {
  final List<String> rdds = Spark.getRDDs().stream().filter(r -> r.name().startsWith(sourceLocation)).map(RDD::name).collect(Collectors.toList());
  if (rdds.size() == 0)
    return false;
  for (final String rdd : rdds) {
    Spark.getRDD(rdd).toJavaRDD().filter(a -> true).setName(rdd.equals(sourceLocation) ? targetLocation : rdd.replace(sourceLocation, targetLocation)).cache().count();
    // TODO: this should use the original storage level
  }
  return true;
}

代码示例来源:origin: apache/tinkerpop

@Override
public Iterator<String> head(final String location, final int totalLines) {
  return IteratorUtils.map(Spark.getRDD(location).toJavaRDD().take(totalLines).iterator(), Object::toString);
}

代码示例来源:origin: apache/tinkerpop

@Override
  public <K, V> JavaPairRDD<K, V> readMemoryRDD(final Configuration configuration, final String memoryKey, final JavaSparkContext sparkContext) {
    if (!configuration.containsKey(Constants.GREMLIN_HADOOP_INPUT_LOCATION))
      throw new IllegalArgumentException("There is no provided " + Constants.GREMLIN_HADOOP_INPUT_LOCATION + " to read the persisted RDD from");
    return JavaPairRDD.fromJavaRDD((JavaRDD) Spark.getRDD(Constants.getMemoryLocation(configuration.getString(Constants.GREMLIN_HADOOP_INPUT_LOCATION), memoryKey)).toJavaRDD());
  }
}

代码示例来源:origin: apache/tinkerpop

@Override
public JavaPairRDD<Object, VertexWritable> readGraphRDD(final Configuration configuration, final JavaSparkContext sparkContext) {
  if (!configuration.containsKey(Constants.GREMLIN_HADOOP_INPUT_LOCATION))
    throw new IllegalArgumentException("There is no provided " + Constants.GREMLIN_HADOOP_INPUT_LOCATION + " to read the persisted RDD from");
  Spark.create(sparkContext.sc());
  final Optional<String> graphLocation = Constants.getSearchGraphLocation(configuration.getString(Constants.GREMLIN_HADOOP_INPUT_LOCATION), SparkContextStorage.open());
  return graphLocation.isPresent() ? JavaPairRDD.fromJavaRDD((JavaRDD) Spark.getRDD(graphLocation.get()).toJavaRDD()) : JavaPairRDD.fromJavaRDD(sparkContext.emptyRDD());
}

代码示例来源:origin: Impetus/Kundera

@Override
public boolean persist(List listEntity, EntityMetadata m, SparkClient sparkClient)
{
  Seq s = scala.collection.JavaConversions.asScalaBuffer(listEntity).toList();
  ClassTag tag = scala.reflect.ClassTag$.MODULE$.apply(m.getEntityClazz());
  JavaRDD personRDD = sparkClient.sparkContext.parallelize(s, 1, tag).toJavaRDD();
  DataFrame df = sparkClient.sqlContext.createDataFrame(personRDD, m.getEntityClazz());
  String outputFilePath = getOutputFilePath(sparkClient.properties);
  String ext = (String) sparkClient.properties.get("format");
  FileType fileType = FileFormatConstants.extension.get(ext);
  switch (fileType)
  {
  case CSV:
    return writeDataInCsvFile(df, outputFilePath);
  case JSON:
    return writeDataInJsonFile(df, outputFilePath);
  default:
    throw new UnsupportedOperationException("Files of type " + ext + " are not yet supported.");
  }
}

代码示例来源:origin: Impetus/Kundera

JavaRDD javaRDD = sparkClient.sparkContext.parallelize(s, 1, tag).toJavaRDD();

代码示例来源:origin: Impetus/Kundera

public void registerTable(EntityMetadata m, SparkClient sparkClient)
{
  final Class clazz = m.getEntityClazz();
  SparkContext sc = sparkClient.sparkContext;
  Configuration config = new Configuration();
  config.set(
      "mongo.input.uri",
      buildMongoURIPath(sc.getConf().get("hostname"), sc.getConf().get("portname"), m.getSchema(),
          m.getTableName()));
  JavaRDD<Tuple2<Object, BSONObject>> mongoJavaRDD = sc.newAPIHadoopRDD(config, MongoInputFormat.class,
      Object.class, BSONObject.class).toJavaRDD();
  JavaRDD<Object> mongoRDD = mongoJavaRDD.flatMap(new FlatMapFunction<Tuple2<Object, BSONObject>, Object>()
  {
    @Override
    public Iterable<Object> call(Tuple2<Object, BSONObject> arg)
    {
      BSONObject obj = arg._2();
      Object javaObject = generateJavaObjectFromBSON(obj, clazz);
      return Arrays.asList(javaObject);
    }
  });
  sparkClient.sqlContext.createDataFrame(mongoRDD, m.getEntityClazz()).registerTempTable(m.getTableName());
}

代码示例来源:origin: Impetus/Kundera

@Override
public boolean persist(List listEntity, EntityMetadata m, SparkClient sparkClient)
{
  try
  {
    Seq s = scala.collection.JavaConversions.asScalaBuffer(listEntity).toList();
    ClassTag tag = scala.reflect.ClassTag$.MODULE$.apply(m.getEntityClazz());
    JavaRDD personRDD = sparkClient.sparkContext.parallelize(s, 1, tag).toJavaRDD();
    DataFrame df = sparkClient.sqlContext.createDataFrame(personRDD, m.getEntityClazz());
    sparkClient.sqlContext.sql("use " + m.getSchema());
    if (logger.isDebugEnabled())
    {
      logger.info("Below are the registered table with hive context: ");
      sparkClient.sqlContext.sql("show tables").show();
    }
    df.write().insertInto(m.getTableName());
    return true;
  }
  catch (Exception e)
  {
    throw new KunderaException("Cannot persist object(s)", e);
  }
}

代码示例来源:origin: Impetus/Kundera

@Override
public boolean persist(List listEntity, EntityMetadata m, SparkClient sparkClient)
{
  try
  {
    Seq s = scala.collection.JavaConversions.asScalaBuffer(listEntity).toList();
    ClassTag tag = scala.reflect.ClassTag$.MODULE$.apply(m.getEntityClazz());
    JavaRDD personRDD = sparkClient.sparkContext.parallelize(s, 1, tag).toJavaRDD();
    CassandraJavaUtil.javaFunctions(personRDD)
        .writerBuilder(m.getSchema(), m.getTableName(), CassandraJavaUtil.mapToRow(m.getEntityClazz()))
        .saveToCassandra();
    return true;
  }
  catch (Exception e)
  {
    throw new KunderaException("Cannot persist object(s)", e);
  }
}

代码示例来源:origin: org.apache.pig/pig

@Override
public RDD<Tuple> convert(List<RDD<Tuple>> predecessors,
    POStream poStream) throws IOException {
  SparkUtil.assertPredecessorSize(predecessors, poStream, 1);
  RDD<Tuple> rdd = predecessors.get(0);
  StreamFunction streamFunction = new StreamFunction(poStream);
  return rdd.toJavaRDD().mapPartitions(streamFunction, true).rdd();
}

代码示例来源:origin: org.apache.pig/pig

@Override
public RDD<Tuple> convert(List<RDD<Tuple>> predecessors,
             POPoissonSampleSpark po) throws IOException {
  SparkUtil.assertPredecessorSize(predecessors, po, 1);
  RDD<Tuple> rdd = predecessors.get(0);
  PoissionSampleFunction poissionSampleFunction = new PoissionSampleFunction(po);
  return rdd.toJavaRDD().mapPartitions(poissionSampleFunction, false).rdd();
}

代码示例来源:origin: org.apache.pig/pig

@Override
public RDD<Tuple> convert(List<RDD<Tuple>> predecessors,
             POCollectedGroup physicalOperator) throws IOException {
  SparkUtil.assertPredecessorSize(predecessors, physicalOperator, 1);
  RDD<Tuple> rdd = predecessors.get(0);
  CollectedGroupFunction collectedGroupFunction
      = new CollectedGroupFunction(physicalOperator);
  return rdd.toJavaRDD().mapPartitions(collectedGroupFunction, true)
      .rdd();
}

代码示例来源:origin: org.apache.pig/pig

@Override
public RDD<Tuple> convert(List<RDD<Tuple>> predecessors,
             POMergeCogroup physicalOperator) {
  SparkUtil.assertPredecessorSize(predecessors, physicalOperator, 1);
  RDD<Tuple> rdd = predecessors.get(0);
  MergeCogroupFunction mergeCogroupFunction = new MergeCogroupFunction(physicalOperator);
  return rdd.toJavaRDD().mapPartitions(mergeCogroupFunction, true).rdd();
}

代码示例来源:origin: org.apache.pig/pig

@Override
  public RDD<Tuple> convert(List<RDD<Tuple>> predecessors, 
      POCounter poCounter) throws IOException {
    SparkUtil.assertPredecessorSize(predecessors, poCounter, 1);
    RDD<Tuple> rdd = predecessors.get(0);
    CounterConverterFunction f = new CounterConverterFunction(poCounter);
    JavaRDD<Tuple> jRdd = rdd.toJavaRDD().mapPartitionsWithIndex(f, true);
//        jRdd = jRdd.cache();
    return jRdd.rdd();
  }

代码示例来源:origin: org.apache.pig/pig

@Override
public RDD<Tuple> convert(List<RDD<Tuple>> predecessors,
             POMergeJoin poMergeJoin) throws IOException {
  SparkUtil.assertPredecessorSize(predecessors, poMergeJoin, 1);
  RDD<Tuple> rdd = predecessors.get(0);
  MergeJoinFunction mergeJoinFunction = new MergeJoinFunction(poMergeJoin);
  return rdd.toJavaRDD().mapPartitions(mergeJoinFunction, true).rdd();
}

代码示例来源:origin: org.apache.pig/pig

public RDD<Tuple> convert(List<RDD<Tuple>> predecessors,
             POFRJoin poFRJoin) throws IOException {
  SparkUtil.assertPredecessorSizeGreaterThan(predecessors, poFRJoin, 1);
  RDD<Tuple> rdd = predecessors.get(0);
  attachReplicatedInputs((POFRJoinSpark) poFRJoin);
  FRJoinFunction frJoinFunction = new FRJoinFunction(poFRJoin);
  return rdd.toJavaRDD().mapPartitions(frJoinFunction, true).rdd();
}

代码示例来源:origin: org.apache.pig/pig

@Override
public RDD<Tuple> convert(List<RDD<Tuple>> predecessors, POLimit poLimit)
    throws IOException {
  SparkUtil.assertPredecessorSize(predecessors, poLimit, 1);
  RDD<Tuple> rdd = predecessors.get(0);
  LimitFunction limitFunction = new LimitFunction(poLimit);
  RDD<Tuple> rdd2 = rdd.coalesce(1, false, null);
  return rdd2.toJavaRDD().mapPartitions(limitFunction, false).rdd();
}

代码示例来源:origin: org.apache.pig/pig

@Override
public RDD<Tuple> convert(List<RDD<Tuple>> predecessors,
    POForEach physicalOperator) {
  byte[] confBytes = KryoSerializer.serializeJobConf(jobConf);
  SparkUtil.assertPredecessorSize(predecessors, physicalOperator, 1);
  RDD<Tuple> rdd = predecessors.get(0);
  ForEachFunction forEachFunction = new ForEachFunction(physicalOperator, confBytes);
  return rdd.toJavaRDD().mapPartitions(forEachFunction, true).rdd();
}

代码示例来源:origin: com.couchbase.client/spark-connector

@SuppressWarnings({"unchecked"})
public JavaRDD<SubdocLookupResult> couchbaseSubdocLookup(List<String> get, List<String> exists, String bucket) {
  return new RDDFunctions<T>(source.rdd()).couchbaseSubdocLookup(
    SparkUtil.listToSeq(get),
    SparkUtil.listToSeq(exists),
    bucket,
    scala.Option.<Duration>apply(null),
    LCLIdentity.INSTANCE
  ).toJavaRDD();
}

代码示例来源:origin: com.couchbase.client/spark-connector

@SuppressWarnings({"unchecked"})
public JavaRDD<SubdocLookupResult> couchbaseSubdocLookup(List<String> get, List<String> exists, String bucket, long timeout) {
  return new RDDFunctions<T>(source.rdd()).couchbaseSubdocLookup(
    SparkUtil.listToSeq(get),
    SparkUtil.listToSeq(exists),
    bucket,
    scala.Option.<Duration>apply(Duration.create(timeout, TimeUnit.MILLISECONDS)),
    LCLIdentity.INSTANCE
  ).toJavaRDD();
}

相关文章