org.apache.beam.sdk.io.WriteFilesResult类的使用及代码示例

x33g5p2x  于2022-02-03 转载在 其他  
字(7.8k)|赞(0)|评价(0)|浏览(113)

本文整理了Java中org.apache.beam.sdk.io.WriteFilesResult类的一些代码示例,展示了WriteFilesResult类的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。WriteFilesResult类的具体详情如下:
包路径:org.apache.beam.sdk.io.WriteFilesResult
类名称:WriteFilesResult

WriteFilesResult介绍

[英]The result of a WriteFiles transform.
[中]WriteFile转换的结果。

代码示例

代码示例来源:origin: org.apache.beam/beam-sdks-java-core

static <DestinationT> WriteFilesResult<DestinationT> in(
  Pipeline pipeline,
  TupleTag<KV<DestinationT, String>> perDestinationOutputFilenamesTag,
  PCollection<KV<DestinationT, String>> perDestinationOutputFilenames) {
 return new WriteFilesResult<>(
   pipeline, perDestinationOutputFilenamesTag, perDestinationOutputFilenames);
}

代码示例来源:origin: org.apache.beam/beam-sdks-java-core

@Test
@Category(NeedsRunner.class)
public void testCustomShardedWrite() throws IOException {
 // Flag to validate that the pipeline options are passed to the Sink
 WriteOptions options = TestPipeline.testingPipelineOptions().as(WriteOptions.class);
 options.setTestFlag("test_value");
 Pipeline p = TestPipeline.create(options);
 List<String> inputs = new ArrayList<>();
 // Prepare timestamps for the elements.
 List<Long> timestamps = new ArrayList<>();
 for (long i = 0; i < 1000; i++) {
  inputs.add(Integer.toString(3));
  timestamps.add(i + 1);
 }
 SimpleSink<Void> sink = makeSimpleSink();
 WriteFiles<String, ?, String> write = WriteFiles.to(sink).withSharding(new LargestInt());
 p.apply(Create.timestamped(inputs, timestamps).withCoder(StringUtf8Coder.of()))
   .apply(IDENTITY_MAP)
   .apply(write)
   .getPerDestinationOutputFilenames()
   .apply(new VerifyFilesExist<>());
 p.run();
 checkFileContents(
   getBaseOutputFilename(), inputs, Optional.of(3), true /* expectRemovedTempDirectory */);
}

代码示例来源:origin: org.apache.beam/beam-runners-google-cloud-dataflow-java

private void testStreamingWriteOverride(PipelineOptions options, int expectedNumShards) {
 TestPipeline p = TestPipeline.fromOptions(options);
 StreamingShardedWriteFactory<Object, Void, Object> factory =
   new StreamingShardedWriteFactory<>(p.getOptions());
 WriteFiles<Object, Void, Object> original = WriteFiles.to(new TestSink(tmpFolder.toString()));
 PCollection<Object> objs = (PCollection) p.apply(Create.empty(VoidCoder.of()));
 AppliedPTransform<PCollection<Object>, WriteFilesResult<Void>, WriteFiles<Object, Void, Object>>
   originalApplication =
     AppliedPTransform.of("writefiles", objs.expand(), Collections.emptyMap(), original, p);
 WriteFiles<Object, Void, Object> replacement =
   (WriteFiles<Object, Void, Object>)
     factory.getReplacementTransform(originalApplication).getTransform();
 assertThat(replacement, not(equalTo((Object) original)));
 assertThat(replacement.getNumShardsProvider().get(), equalTo(expectedNumShards));
 WriteFilesResult<Void> originalResult = objs.apply(original);
 WriteFilesResult<Void> replacementResult = objs.apply(replacement);
 Map<PValue, ReplacementOutput> res =
   factory.mapOutputs(originalResult.expand(), replacementResult);
 assertEquals(1, res.size());
 assertEquals(
   originalResult.getPerDestinationOutputFilenames(),
   res.get(replacementResult.getPerDestinationOutputFilenames()).getOriginal().getValue());
}

代码示例来源:origin: Talend/components

WriteFilesResult results =
    pc1.apply(FileIO.<String> write().withNumShards(1).via(TextIO.sink()).to(path));
return PDone.in(results.getPipeline());

代码示例来源:origin: org.apache.beam/beam-runners-core-construction-java

@Test
public void testExtractionDirectFromTransform() throws Exception {
 PCollection<String> input = p.apply(Create.of("hello"));
 WriteFilesResult<Void> output = input.apply(writeFiles);
 AppliedPTransform<PCollection<String>, WriteFilesResult<Void>, WriteFiles<String, Void, String>>
   appliedPTransform =
     AppliedPTransform.of("foo", input.expand(), output.expand(), writeFiles, p);
 assertThat(
   WriteFilesTranslation.isRunnerDeterminedSharding(appliedPTransform),
   equalTo(
     writeFiles.getNumShardsProvider() == null && writeFiles.getComputeNumShards() == null));
 assertThat(
   WriteFilesTranslation.isWindowedWrites(appliedPTransform),
   equalTo(writeFiles.getWindowedWrites()));
 assertThat(
   WriteFilesTranslation.<String, Void, String>getSink(appliedPTransform),
   equalTo(writeFiles.getSink()));
}

代码示例来源:origin: org.apache.beam/beam-sdks-java-core

.apply(transform)
  .apply(write)
  .getPerDestinationOutputFilenames()
  .apply(new VerifyFilesExist<>());
p.run();

代码示例来源:origin: org.talend.components/simplefileio-runtime

WriteFilesResult results =
    pc1.apply(FileIO.<String> write().withNumShards(1).via(TextIO.sink()).to(path));
return PDone.in(results.getPipeline());

代码示例来源:origin: org.apache.beam/beam-sdks-java-core

res = input.apply(writeFiles);
res.getPerDestinationOutputFilenames().apply(new VerifyFilesExist<>());
p.run();

代码示例来源:origin: gojektech/feast

/** Writes to different file sinks based on a */
 @Override
 public PDone expand(PCollection<FeatureRowExtended> input) {
  final String folderName = options.jobName != null ? options.jobName : "unknown-jobs";
  FileIO.Write<String, FeatureRowExtended> write =
    FileIO.<String, FeatureRowExtended>writeDynamic()
      .by((rowExtended) -> rowExtended.getRow().getEntityName())
      .withDestinationCoder(StringUtf8Coder.of())
      .withNaming(
        Contextful.fn(
          (entityName) -> FileIO.Write.defaultNaming(folderName + "/" + entityName, suffix)))
      .via(Contextful.fn(toTextFunction), Contextful.fn((entityName) -> TextIO.sink()))
      .to(options.path);
  if (input.isBounded().equals(IsBounded.UNBOUNDED)) {
   Window<FeatureRowExtended> minuteWindow =
     Window.<FeatureRowExtended>into(FixedWindows.of(options.getWindowDuration()))
       .triggering(AfterWatermark.pastEndOfWindow())
       .discardingFiredPanes()
       .withAllowedLateness(Duration.ZERO);
   input = input.apply(minuteWindow);
   write = write.withNumShards(10);
  }
  WriteFilesResult<String> outputFiles = input.apply(write);
  return PDone.in(outputFiles.getPipeline());
 }
}

代码示例来源:origin: org.apache.beam/beam-sdks-java-core

@Test
@Category(NeedsRunner.class)
public void testWriteViaSink() throws Exception {
 List<String> data = ImmutableList.of("a", "b", "c", "d", "e", "f");
 PAssert.that(
     p.apply(Create.of(data))
       .apply(
         FileIO.<String>write()
           .to(tempFolder.getRoot().toString())
           .withSuffix(".txt")
           .via(TextIO.sink())
           .withIgnoreWindowing())
       .getPerDestinationOutputFilenames()
       .apply(Values.create())
       .apply(TextIO.readAll()))
   .containsInAnyOrder(data);
 p.run();
}

代码示例来源:origin: org.apache.beam/beam-sdks-java-core

@Test
@Category(NeedsRunner.class)
public void testWindowedWritesWithOnceTrigger() throws Throwable {
 // Tests for https://issues.apache.org/jira/browse/BEAM-3169
 PCollection<String> data =
   p.apply(Create.of("0", "1", "2"))
     .apply(
       Window.<String>into(FixedWindows.of(Duration.standardSeconds(1)))
         // According to this trigger, all data should be written.
         // However, the continuation of this trigger is elementCountAtLeast(1),
         // so with a buggy implementation that used a GBK before renaming files,
         // only 1 file would be renamed.
         .triggering(AfterPane.elementCountAtLeast(3))
         .withAllowedLateness(Duration.standardMinutes(1))
         .discardingFiredPanes());
 PCollection<String> filenames =
   data.apply(
       TextIO.write()
         .to(new File(tempFolder.getRoot(), "windowed-writes").getAbsolutePath())
         .withNumShards(2)
         .withWindowedWrites()
         .<Void>withOutputFilenames())
     .getPerDestinationOutputFilenames()
     .apply(Values.create());
 PAssert.that(filenames.apply(TextIO.readAll())).containsInAnyOrder("0", "1", "2");
 p.run();
}

代码示例来源:origin: org.apache.beam/beam-sdks-java-io-xml

.withPrefix("birds")
    .withSuffix(".xml"))
.getPerDestinationOutputFilenames()
.apply(Values.create())
.apply(FileIO.matchAll())

代码示例来源:origin: GoogleCloudPlatform/DataflowTemplates

fileWriteResults.getPerDestinationOutputFilenames().apply(GroupByKey.create());

相关文章