Exemplo n.º 1
0
 def run_split_pipeline(self, split_manager, elements, element_counter=None):
   with fn_api_runner.split_manager('Identity', split_manager):
     with self.create_pipeline() as p:
       res = (p
              | beam.Create(elements)
              | beam.Reshuffle()
              | 'Identity' >> beam.Map(lambda x: x)
              | beam.Map(lambda x: element_counter.increment() or x))
       assert_that(res, equal_to(elements))
Exemplo n.º 2
0
 def run_split_pipeline(self, split_manager, elements, element_counter=None):
   with fn_api_runner.split_manager('Identity', split_manager):
     with self.create_pipeline() as p:
       res = (p
              | beam.Create(elements)
              | beam.Reshuffle()
              | 'Identity' >> beam.Map(lambda x: x)
              | beam.Map(lambda x: element_counter.increment() or x))
       assert_that(res, equal_to(elements))
Exemplo n.º 3
0
    def run_sdf_split_pipeline(self,
                               split_manager,
                               elements,
                               element_counter,
                               expected_groups=None):
        # Define an SDF that for each input x produces [(x, k) for k in range(x)].

        class EnumerateProvider(beam.transforms.core.RestrictionProvider):
            def initial_restriction(self, element):
                return (0, element)

            def create_tracker(self, restriction):
                return restriction_trackers.OffsetRestrictionTracker(
                    *restriction)

            def split(self, element, restriction):
                # Don't do any initial splitting to simplify test.
                return [restriction]

            def restriction_size(self, element, restriction):
                return restriction[1] - restriction[0]

        class EnumerateSdf(beam.DoFn):
            def process(self, element,
                        restriction_tracker=EnumerateProvider()):
                to_emit = []
                for k in range(*restriction_tracker.current_restriction()):
                    if restriction_tracker.try_claim(k):
                        to_emit.append((element, k))
                        element_counter.increment()
                    else:
                        break
                # Emitting in batches for tighter testing.
                yield to_emit

        expected = [(e, k) for e in elements for k in range(e)]

        with fn_api_runner.split_manager('SDF', split_manager):
            with self.create_pipeline() as p:
                grouped = (p
                           | beam.Create(elements)
                           | 'SDF' >> beam.ParDo(EnumerateSdf()))
                flat = grouped | beam.FlatMap(lambda x: x)
                assert_that(flat, equal_to(expected))
                if expected_groups:
                    assert_that(grouped,
                                equal_to(expected_groups),
                                label='CheckGrouped')
Exemplo n.º 4
0
  def run_sdf_split_pipeline(
      self, split_manager, elements, element_counter, expected_groups=None):
    # Define an SDF that for each input x produces [(x, k) for k in range(x)].

    class EnumerateProvider(beam.transforms.core.RestrictionProvider):
      def initial_restriction(self, element):
        return (0, element)

      def create_tracker(self, restriction):
        return restriction_trackers.OffsetRestrictionTracker(
            *restriction)

      def split(self, element, restriction):
        # Don't do any initial splitting to simplify test.
        return [restriction]

      def restriction_size(self, element, restriction):
        return restriction[1] - restriction[0]

    class EnumerateSdf(beam.DoFn):
      def process(self, element, restriction_tracker=EnumerateProvider()):
        to_emit = []
        for k in range(*restriction_tracker.current_restriction()):
          if restriction_tracker.try_claim(k):
            to_emit.append((element, k))
            element_counter.increment()
          else:
            break
        # Emitting in batches for tighter testing.
        yield to_emit

    expected = [(e, k) for e in elements for k in range(e)]

    with fn_api_runner.split_manager('SDF', split_manager):
      with self.create_pipeline() as p:
        grouped = (
            p
            | beam.Create(elements)
            | 'SDF' >> beam.ParDo(EnumerateSdf()))
        flat = grouped | beam.FlatMap(lambda x: x)
        assert_that(flat, equal_to(expected))
        if expected_groups:
          assert_that(grouped, equal_to(expected_groups), label='CheckGrouped')