
x33g5p2x  于2022-01-19 转载在 其他  



[英]PTransforms for filtering from a PCollection the elements satisfying a predicate, or satisfying an inequality with a given value based on the elements' natural ordering.


代码示例来源:origin: jbonofre/beam-samples

public static PCollection<String> filterByCountry(PCollection<String> data, final String country) {
  return data.apply("FilterByCountry", SerializableFunction<String, Boolean>() {
    public Boolean apply(String row) {
      return getCountry(row).equals(country);

代码示例来源:origin: org.apache.beam/beam-sdks-java-core

public void testDisplayData() {
 assertThat(DisplayData.from(Filter.lessThan(123)), hasDisplayItem("predicate", "x < 123"));
 assertThat(DisplayData.from(Filter.lessThanEq(234)), hasDisplayItem("predicate", "x ≤ 234"));
 assertThat(DisplayData.from(Filter.greaterThan(345)), hasDisplayItem("predicate", "x > 345"));
 assertThat(DisplayData.from(Filter.greaterThanEq(456)), hasDisplayItem("predicate", "x ≥ 456"));
 assertThat(DisplayData.from(Filter.equal(567)), hasDisplayItem("predicate", "x == 567"));

代码示例来源:origin: org.apache.beam/beam-sdks-java-core

 * Returns a new {@link Filter} {@link PTransform} that's like this {@link PTransform} but with
 * the specified description for {@link DisplayData}. Does not modify this {@link PTransform}.
Filter<T> described(String description) {
 return new Filter<>(predicate, description);

代码示例来源:origin: org.apache.beam/beam-sdks-java-core

 * Returns a {@code PTransform} that takes an input {@code PCollection<T>} and returns a {@code
 * PCollection<T>} with elements that equals to a given value. Elements must be {@code
 * Comparable}.
 * <p>Example of use:
 * <pre>{@code
 * PCollection<Integer> listOfNumbers = ...;
 * PCollection<Integer> equalNumbers = listOfNumbers.apply(Filter.equal(1000));
 * }</pre>
 * <p>See also {@link #greaterThan}, {@link #lessThan}, {@link #lessThanEq} and {@link
 * #greaterThanEq}, which return elements satisfying various inequalities with the specified value
 * based on the elements' natural ordering.
 * <p>See also {@link #by}, which returns elements that satisfy the given predicate.
public static <T extends Comparable<T>> Filter<T> equal(final T value) {
 return by((SerializableFunction<T, Boolean>) input -> input.compareTo(value) == 0)
   .described(String.format("x == %s", value));

代码示例来源:origin: org.apache.beam/beam-sdks-java-core

public void testFilterGreaterThan() {
 PCollection<Integer> output =
   p.apply(Create.of(1, 2, 3, 4, 5, 6, 7)).apply(Filter.greaterThan(4));
 PAssert.that(output).containsInAnyOrder(5, 6, 7);;

代码示例来源:origin: org.apache.beam/beam-sdks-java-core

 * Returns a {@code PTransform} that takes an input {@code PCollection<T>} and returns a {@code
 * PCollection<T>} with elements that are less than or equal to a given value, based on the
 * elements' natural ordering. Elements must be {@code Comparable}.
 * <p>Example of use:
 * <pre>{@code
 * PCollection<Integer> listOfNumbers = ...;
 * PCollection<Integer> smallOrEqualNumbers =
 *     listOfNumbers.apply(Filter.lessThanEq(10));
 * }</pre>
 * <p>See also {@link #lessThan}, {@link #greaterThanEq}, {@link #equal} and {@link #greaterThan},
 * which return elements satisfying various inequalities with the specified value based on the
 * elements' natural ordering.
 * <p>See also {@link #by}, which returns elements that satisfy the given predicate.
public static <T extends Comparable<T>> Filter<T> lessThanEq(final T value) {
 return by((SerializableFunction<T, Boolean>) input -> input.compareTo(value) <= 0)
   .described(String.format("x ≤ %s", value));

代码示例来源:origin: org.apache.beam/beam-runners-direct-java

public void setup() {
 createdInts = p.apply("createdInts", Create.of(1, 2, 3));
 filtered = createdInts.apply("filtered", Filter.greaterThan(1));
 filteredTimesTwo =
       new DoFn<Integer, Integer>() {
        public void processElement(ProcessContext c) throws Exception {
         c.output(c.element() * 2);
 keyed = createdInts.apply("keyed", WithKeys.of("MyKey"));
 intsToFlatten = p.apply("intsToFlatten", Create.of(-1, 256, 65535));
 PCollectionList<Integer> preFlatten = PCollectionList.of(createdInts).and(intsToFlatten);
 flattened = preFlatten.apply("flattened", Flatten.pCollections());
 clock = MockClock.fromInstant(new Instant(1000));
 graph = DirectGraphs.getGraph(p);
 manager = WatermarkManager.create(clock, graph, AppliedPTransform::getFullName);
 bundleFactory = ImmutableListBundleFactory.create();


 * Compute a PCollection of reference allele frequencies for SNPs of interest.
 * The SNPs all have only a single alternate allele, and neither the
 * reference nor the alternate allele have a population frequency < minFreq.
 * The results are returned in a PCollection indexed by Position.
 * @param variants a set of variant calls for a reference population
 * @param minFreq the minimum allele frequency for the set
 * @return a PCollection mapping Position to AlleleCounts
static PCollection<KV<Position, AlleleFreq>> getFreq(
  PCollection<Variant> variants, double minFreq) {
 return variants.apply("PassingFilter",
   .apply(ParDo.of(new GetAlleleFreq()))
   .apply( FilterFreq(minFreq)));

代码示例来源:origin: org.apache.beam/beam-sdks-java-core

 * Returns a {@code PTransform} that takes an input {@link PCollection} and returns a {@link
 * PCollection} with elements that are less than a given value, based on the elements' natural
 * ordering. Elements must be {@code Comparable}.
 * <p>Example of use:
 * <pre>{@code
 * PCollection<Integer> listOfNumbers = ...;
 * PCollection<Integer> smallNumbers =
 *     listOfNumbers.apply(Filter.lessThan(10));
 * }</pre>
 * <p>See also {@link #lessThanEq}, {@link #greaterThanEq}, {@link #equal} and {@link
 * #greaterThan}, which return elements satisfying various inequalities with the specified value
 * based on the elements' natural ordering.
 * <p>See also {@link #by}, which returns elements that satisfy the given predicate.
public static <T extends Comparable<T>> Filter<T> lessThan(final T value) {
 return by((SerializableFunction<T, Boolean>) input -> input.compareTo(value) < 0)
   .described(String.format("x < %s", value));

代码示例来源:origin: org.apache.beam/beam-sdks-java-core

 * Returns a {@code PTransform} that takes an input {@code PCollection<T>} and returns a {@code
 * PCollection<T>} with elements that satisfy the given predicate. The predicate must be a {@code
 * SerializableFunction<T, Boolean>}.
 * <p>Example of use:
 * <pre>{@code
 * PCollection<String> wordList = ...;
 * PCollection<String> longWords =
 *     wordList.apply( MatchIfWordLengthGT(6)));
 * }</pre>
 * <p>See also {@link #lessThan}, {@link #lessThanEq}, {@link #greaterThan}, {@link
 * #greaterThanEq}, which return elements satisfying various inequalities with the specified value
 * based on the elements' natural ordering.
public static <T, PredicateT extends SerializableFunction<T, Boolean>> Filter<T> by(
  PredicateT predicate) {
 return new Filter<>(predicate);

代码示例来源:origin: googlegenomics/dataflow-java

 * Compute a PCollection of reference allele frequencies for SNPs of interest.
 * The SNPs all have only a single alternate allele, and neither the
 * reference nor the alternate allele have a population frequency < minFreq.
 * The results are returned in a PCollection indexed by Position.
 * @param variants a set of variant calls for a reference population
 * @param minFreq the minimum allele frequency for the set
 * @return a PCollection mapping Position to AlleleCounts
static PCollection<KV<Position, AlleleFreq>> getFreq(
  PCollection<Variant> variants, double minFreq) {
 return variants.apply("PassingFilter",
   .apply(ParDo.of(new GetAlleleFreq()))
   .apply( FilterFreq(minFreq)));

代码示例来源:origin: org.apache.beam/beam-sdks-java-core

 * Returns a {@code PTransform} that takes an input {@code PCollection<T>} and returns a {@code
 * PCollection<T>} with elements that are greater than a given value, based on the elements'
 * natural ordering. Elements must be {@code Comparable}.
 * <p>Example of use:
 * <pre>{@code
 * PCollection<Integer> listOfNumbers = ...;
 * PCollection<Integer> largeNumbers =
 *     listOfNumbers.apply(Filter.greaterThan(1000));
 * }</pre>
 * <p>See also {@link #greaterThanEq}, {@link #lessThan}, {@link #equal} and {@link #lessThanEq},
 * which return elements satisfying various inequalities with the specified value based on the
 * elements' natural ordering.
 * <p>See also {@link #by}, which returns elements that satisfy the given predicate.
public static <T extends Comparable<T>> Filter<T> greaterThan(final T value) {
 return by((SerializableFunction<T, Boolean>) input -> input.compareTo(value) > 0)
   .described(String.format("x > %s", value));


.apply(ParDo.of(new SplitReads()))
.apply( SampleReads(samplingFraction, samplingPrefix)));

代码示例来源:origin: org.apache.beam/beam-sdks-java-core

 * Returns a {@code PTransform} that takes an input {@code PCollection<T>} and returns a {@code
 * PCollection<T>} with elements that are greater than or equal to a given value, based on the
 * elements' natural ordering. Elements must be {@code Comparable}.
 * <p>Example of use:
 * <pre>{@code
 * PCollection<Integer> listOfNumbers = ...;
 * PCollection<Integer> largeOrEqualNumbers =
 *     listOfNumbers.apply(Filter.greaterThanEq(1000));
 * }</pre>
 * <p>See also {@link #greaterThan}, {@link #lessThan}, {@link #equal} and {@link #lessThanEq},
 * which return elements satisfying various inequalities with the specified value based on the
 * elements' natural ordering.
 * <p>See also {@link #by}, which returns elements that satisfy the given predicate.
public static <T extends Comparable<T>> Filter<T> greaterThanEq(final T value) {
 return by((SerializableFunction<T, Boolean>) input -> input.compareTo(value) >= 0)
   .described(String.format("x ≥ %s", value));

代码示例来源:origin: googlegenomics/dataflow-java

.apply(ParDo.of(new SplitReads()))
.apply( SampleReads(samplingFraction, samplingPrefix)));

代码示例来源:origin: jbonofre/beam-samples

public PCollection<String> expand(PCollection<String> inputCollection) {
  PCollection<String> compositeKeys =
      inputCollection.apply("ExtractCompositeKey", ParDo.of(new DoFn<String, String>() {
        public void processElement(ProcessContext c) {
      })).apply("FilterValidCompositeKeys", SerializableFunction<String, Boolean>() {
        public Boolean apply(String input) {
          return (!input.equals("NA"));
  PCollection<KV<String, Long>> compositesEventsPairs =
      compositeKeys.apply("GetEventsByCompositeKey", Count.<String>perElement());
  PCollection<String> result = compositesEventsPairs.apply("FormatOutput", MapElements.via(
      new SimpleFunction<KV<String, Long>, String>() {
        public String apply(KV<String, Long> kv) {
          StringBuilder str = new StringBuilder();
          String[] split = kv.getKey().split("_");
          String country = split[0];
          String subject = split[1];
          Long eventsNb = kv.getValue();
          str.append(country).append(" ").append(subject).append(" ").append(eventsNb);
          return str.toString();
  return result;

代码示例来源:origin: org.apache.beam/beam-sdks-java-io-redis

 public PCollection<KV<String, String>> expand(PCollection<KV<String, String>> input) {
  // reparallelize mimics the same behavior as in JdbcIO
  // breaking fusion
  PCollectionView<Iterable<KV<String, String>>> empty =
  PCollection<KV<String, String>> materialized =
          new DoFn<KV<String, String>, KV<String, String>>() {
           public void processElement(ProcessContext context) {
  return materialized.apply(Reshuffle.viaRandomKey());

代码示例来源:origin: org.apache.beam/beam-sdks-java-io-jdbc

 public PCollection<T> expand(PCollection<T> input) {
  // See
  // We use a combined approach to "break fusion" here:
  // (see
  // 1) force the data to be materialized by passing it as a side input to an identity fn,
  // then 2) reshuffle it with a random key. Initial materialization provides some parallelism
  // and ensures that data to be shuffled can be generated in parallel, while reshuffling
  // provides perfect parallelism.
  // In most cases where a "fusion break" is needed, a simple reshuffle would be sufficient.
  // The current approach is necessary only to support the particular case of JdbcIO where
  // a single query may produce many gigabytes of query results.
  PCollectionView<Iterable<T>> empty =
  PCollection<T> materialized =
          new DoFn<T, T>() {
           public void process(ProcessContext c) {
  return materialized.apply(Reshuffle.viaRandomKey());

代码示例来源:origin: GoogleCloudPlatform/DataflowTemplates

 public PCollection<T> expand(PCollection<T> input) {
  // See
  // We use a combined approach to "break fusion" here:
  // (see
  // 1) force the data to be materialized by passing it as a side input to an identity fn,
  // then 2) reshuffle it with a random key. Initial materialization provides some parallelism
  // and ensures that data to be shuffled can be generated in parallel, while reshuffling
  // provides perfect parallelism.
  // In most cases where a "fusion break" is needed, a simple reshuffle would be sufficient.
  // The current approach is necessary only to support the particular case of JdbcIO where
  // a single query may produce many gigabytes of query results.
  PCollectionView<Iterable<T>> empty =
  PCollection<T> materialized =
          new DoFn<T, T>() {
           public void process(ProcessContext c) {
  return materialized.apply(Reshuffle.viaRandomKey());

代码示例来源:origin: org.talend.components/processing-runtime

PCollection<IndexedRecord> output = mainPCollection.apply(ctx.getPTransformName(),;
  ctx.putPCollectionByLinkName(hasFlow ? flowLink : rejectLink, output);
} else {
