Exemplos de equal_to em Python, exemplos de google.cloud.dataflow.transforms.util.equal_to em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: window_test.py Projeto: wangjiahong/DataflowPythonSDK

 def test_timestamped_with_combiners(self):
   p = Pipeline('DirectPipelineRunner')
   result = (p
             # Create some initial test values.
             | Create('start', [(k, k) for k in range(10)])
             # The purpose of the WindowInto transform is to establish a
             # FixedWindows windowing function for the PCollection.
             # It does not bucket elements into windows since the timestamps
             # from Create are not spaced 5 ms apart and very likely they all
             # fall into the same window.
             | WindowInto('w', FixedWindows(5))
             # Generate timestamped values using the values as timestamps.
             # Now there are values 5 ms apart and since Map propagates the
             # windowing function from input to output the output PCollection
             # will have elements falling into different 5ms windows.
             | Map(lambda (x, t): TimestampedValue(x, t))
             # We add a 'key' to each value representing the index of the
             # window. This is important since there is no guarantee of
             # order for the elements of a PCollection.
             | Map(lambda v: (v / 5, v)))
   # Sum all elements associated with a key and window. Although it
   # is called CombinePerKey it is really CombinePerKeyAndWindow the
   # same way GroupByKey is really GroupByKeyAndWindow.
   sum_per_window = result | CombinePerKey(sum)
   # Compute mean per key and window.
   mean_per_window = result | combiners.Mean.PerKey()
   assert_that(sum_per_window, equal_to([(0, 10), (1, 35)]),
               label='assert:sum')
   assert_that(mean_per_window, equal_to([(0, 2.0), (1, 7.0)]),
               label='assert:mean')
   p.run()

Exemplo n.º 2

0

Exibir arquivo

Arquivo: window_test.py Projeto: CSberger/DataflowPythonSDK

 def test_timestamped_with_combiners(self):
   p = Pipeline('DirectPipelineRunner')
   result = (p
             # Create some initial test values.
             | Create('start', [(k, k) for k in range(10)])
             # The purpose of the WindowInto transform is to establish a
             # FixedWindows windowing function for the PCollection.
             # It does not bucket elements into windows since the timestamps
             # from Create are not spaced 5 ms apart and very likely they all
             # fall into the same window.
             | WindowInto('w', FixedWindows(5))
             # Generate timestamped values using the values as timestamps.
             # Now there are values 5 ms apart and since Map propagates the
             # windowing function from input to output the output PCollection
             # will have elements falling into different 5ms windows.
             | Map(lambda (x, t): TimestampedValue(x, t))
             # We add a 'key' to each value representing the index of the
             # window. This is important since there is no guarantee of
             # order for the elements of a PCollection.
             | Map(lambda v: (v / 5, v)))
   # Sum all elements associated with a key and window. Although it
   # is called CombinePerKey it is really CombinePerKeyAndWindow the
   # same way GroupByKey is really GroupByKeyAndWindow.
   sum_per_window = result | CombinePerKey(sum)
   # Compute mean per key and window.
   mean_per_window = result | combiners.Mean.PerKey()
   assert_that(sum_per_window, equal_to([(0, 10), (1, 35)]),
               label='assert:sum')
   assert_that(mean_per_window, equal_to([(0, 2.0), (1, 7.0)]),
               label='assert:mean')
   p.run()

Exemplo n.º 3

0

Exibir arquivo

Arquivo: pipeline_test.py Projeto: finiterank/DataflowPythonSDK

 def test_reuse_cloned_custom_transform_instance(self):
     pipeline = Pipeline(DirectPipelineRunner())
     pcoll1 = pipeline | Create('pcoll1', [1, 2, 3])
     pcoll2 = pipeline | Create('pcoll2', [4, 5, 6])
     transform = PipelineTest.CustomTransform()
     result1 = pcoll1 | transform
     result2 = pcoll2 | transform.clone('new label')
     assert_that(result1, equal_to([2, 3, 4]), label='r1')
     assert_that(result2, equal_to([5, 6, 7]), label='r2')
     pipeline.run()

Exemplo n.º 4

0

Exibir arquivo

Arquivo: pipeline_test.py Projeto: CSberger/DataflowPythonSDK

  def test_create(self):
    pipeline = Pipeline('DirectPipelineRunner')
    pcoll = pipeline | Create('label1', [1, 2, 3])
    assert_that(pcoll, equal_to([1, 2, 3]))

    # Test if initial value is an iterator object.
    pcoll2 = pipeline | Create('label2', iter((4, 5, 6)))
    pcoll3 = pcoll2 | FlatMap('do', lambda x: [x + 10])
    assert_that(pcoll3, equal_to([14, 15, 16]), label='pcoll3')
    pipeline.run()

Exemplo n.º 5

0

Exibir arquivo

Arquivo: pipeline_test.py Projeto: finiterank/DataflowPythonSDK

    def test_create(self):
        pipeline = Pipeline('DirectPipelineRunner')
        pcoll = pipeline | Create('label1', [1, 2, 3])
        assert_that(pcoll, equal_to([1, 2, 3]))

        # Test if initial value is an iterator object.
        pcoll2 = pipeline | Create('label2', iter((4, 5, 6)))
        pcoll3 = pcoll2 | FlatMap('do', lambda x: [x + 10])
        assert_that(pcoll3, equal_to([14, 15, 16]), label='pcoll3')
        pipeline.run()

Exemplo n.º 6

0

Exibir arquivo

Arquivo: pipeline_test.py Projeto: CSberger/DataflowPythonSDK

 def test_reuse_cloned_custom_transform_instance(self):
   pipeline = Pipeline(DirectPipelineRunner())
   pcoll1 = pipeline | Create('pcoll1', [1, 2, 3])
   pcoll2 = pipeline | Create('pcoll2', [4, 5, 6])
   transform = PipelineTest.CustomTransform()
   result1 = pcoll1 | transform
   result2 = pcoll2 | transform.clone('new label')
   assert_that(result1, equal_to([2, 3, 4]), label='r1')
   assert_that(result2, equal_to([5, 6, 7]), label='r2')
   pipeline.run()

Exemplo n.º 7

0

Exibir arquivo

Arquivo: dataflow_test.py Projeto: CSberger/DataflowPythonSDK

 def test_empty_side_outputs(self):
   pipeline = Pipeline('DirectPipelineRunner')
   nums = pipeline | Create('Some Numbers', [1, 3, 5])
   results = nums | FlatMap(
       'ClassifyNumbers',
       lambda x: [x, SideOutputValue('even' if x % 2 == 0 else 'odd', x)]
   ).with_outputs('odd', 'even', main='main')
   assert_that(results.main, equal_to([1, 3, 5]))
   assert_that(results.odd, equal_to([1, 3, 5]), label='assert:odd')
   assert_that(results.even, equal_to([]), label='assert:even')
   pipeline.run()

Exemplo n.º 8

0

Exibir arquivo

 def test_empty_side_outputs(self):
     pipeline = Pipeline('DirectPipelineRunner')
     nums = pipeline | Create('Some Numbers', [1, 3, 5])
     results = nums | FlatMap(
         'ClassifyNumbers', lambda x:
         [x, SideOutputValue('even'
                             if x % 2 == 0 else 'odd', x)]).with_outputs()
     assert_that(results[None], equal_to([1, 3, 5]))
     assert_that(results.odd, equal_to([1, 3, 5]), label='assert:odd')
     assert_that(results.even, equal_to([]), label='assert:even')
     pipeline.run()

Exemplo n.º 9

0

Exibir arquivo

Arquivo: combiners_test.py Projeto: CSberger/DataflowPythonSDK

  def test_combine_globally_with_default_side_input(self):
    class CombineWithSideInput(PTransform):
      def apply(self, pcoll):
        side = pcoll | CombineGlobally(sum).as_singleton_view()
        main = pcoll.pipeline | Create([None])
        return main | Map(lambda _, s: s, side)

    p = Pipeline('DirectPipelineRunner')
    result1 = p | Create('label1', []) | CombineWithSideInput('L1')
    result2 = p | Create('label2', [1, 2, 3, 4]) | CombineWithSideInput('L2')
    assert_that(result1, equal_to([0]), label='r1')
    assert_that(result2, equal_to([10]), label='r2')
    p.run()

Exemplo n.º 10

0

Exibir arquivo

Arquivo: dataflow_test.py Projeto: CSberger/DataflowPythonSDK

 def test_undeclared_side_outputs(self):
   pipeline = Pipeline('DirectPipelineRunner')
   nums = pipeline | Create('Some Numbers', [1, 2, 3, 4])
   results = nums | FlatMap(
       'ClassifyNumbers',
       lambda x: [x, SideOutputValue('even' if x % 2 == 0 else 'odd', x)]
   ).with_outputs('odd', 'even', main='main')
   # TODO(silviuc): Revisit this test to check for undeclared side outputs.
   # This should work with .with_outputs() without any tags declared and
   # the results[None] should work also.
   assert_that(results.main, equal_to([1, 2, 3, 4]))
   assert_that(results.odd, equal_to([1, 3]), label='assert:odd')
   assert_that(results.even, equal_to([2, 4]), label='assert:even')
   pipeline.run()

Exemplo n.º 11

0

Exibir arquivo

 def test_undeclared_side_outputs(self):
     pipeline = Pipeline('DirectPipelineRunner')
     nums = pipeline | Create('Some Numbers', [1, 2, 3, 4])
     results = nums | FlatMap(
         'ClassifyNumbers', lambda x:
         [x, SideOutputValue('even'
                             if x % 2 == 0 else 'odd', x)]).with_outputs()
     # TODO(silviuc): Revisit this test to check for undeclared side outputs.
     # This should work with .with_outputs() without any tags declared and
     # the results[None] should work also.
     assert_that(results[None], equal_to([1, 2, 3, 4]))
     assert_that(results.odd, equal_to([1, 3]), label='assert:odd')
     assert_that(results.even, equal_to([2, 4]), label='assert:even')
     pipeline.run()

Exemplo n.º 12

0

Exibir arquivo

Arquivo: combiners_test.py Projeto: wangjiahong/DataflowPythonSDK

    def test_combine_globally_with_default_side_input(self):
        class CombineWithSideInput(PTransform):
            def apply(self, pcoll):
                side = pcoll | CombineGlobally(sum).as_singleton_view()
                main = pcoll.pipeline | Create([None])
                return main | Map(lambda _, s: s, side)

        p = Pipeline('DirectPipelineRunner')
        result1 = p | Create('label1', []) | CombineWithSideInput('L1')
        result2 = p | Create('label2',
                             [1, 2, 3, 4]) | CombineWithSideInput('L2')
        assert_that(result1, equal_to([0]), label='r1')
        assert_that(result2, equal_to([10]), label='r2')
        p.run()

Exemplo n.º 13

0

Exibir arquivo

    def test_par_do_with_multiple_outputs_and_using_return(self):
        def some_fn(v):
            if v % 2 == 0:
                return [v, SideOutputValue('even', v)]
            else:
                return [v, SideOutputValue('odd', v)]

        pipeline = Pipeline('DirectPipelineRunner')
        nums = pipeline | Create('Some Numbers', [1, 2, 3, 4])
        results = nums | FlatMap('ClassifyNumbers', some_fn).with_outputs(
            'odd', 'even', main='main')
        assert_that(results.main, equal_to([1, 2, 3, 4]))
        assert_that(results.odd, equal_to([1, 3]), label='assert:odd')
        assert_that(results.even, equal_to([2, 4]), label='assert:even')
        pipeline.run()

Exemplo n.º 14

0

Exibir arquivo

Arquivo: dataflow_test.py Projeto: CSberger/DataflowPythonSDK

  def test_par_do_with_multiple_outputs_and_using_return(self):
    def some_fn(v):
      if v % 2 == 0:
        return [v, SideOutputValue('even', v)]
      else:
        return [v, SideOutputValue('odd', v)]

    pipeline = Pipeline('DirectPipelineRunner')
    nums = pipeline | Create('Some Numbers', [1, 2, 3, 4])
    results = nums | FlatMap(
        'ClassifyNumbers', some_fn).with_outputs('odd', 'even', main='main')
    assert_that(results.main, equal_to([1, 2, 3, 4]))
    assert_that(results.odd, equal_to([1, 3]), label='assert:odd')
    assert_that(results.even, equal_to([2, 4]), label='assert:even')
    pipeline.run()

Exemplo n.º 15

0

Exibir arquivo

Arquivo: combiners_test.py Projeto: CSberger/DataflowPythonSDK

  def test_top_shorthands(self):
    pipeline = Pipeline('DirectPipelineRunner')

    pcoll = pipeline | Create('start', [6, 3, 1, 1, 9, 1, 5, 2, 0, 6])
    result_top = pcoll | df.CombineGlobally('top', combiners.Largest(5))
    result_bot = pcoll | df.CombineGlobally('bot', combiners.Smallest(4))
    assert_that(result_top, equal_to([[9, 6, 6, 5, 3]]), label='assert:top')
    assert_that(result_bot, equal_to([[0, 1, 1, 1]]), label='assert:bot')

    pcoll = pipeline | Create(
        'start-perkey', [('a', x) for x in [6, 3, 1, 1, 9, 1, 5, 2, 0, 6]])
    result_ktop = pcoll | df.CombinePerKey('top-perkey', combiners.Largest(5))
    result_kbot = pcoll | df.CombinePerKey('bot-perkey', combiners.Smallest(4))
    assert_that(result_ktop, equal_to([('a', [9, 6, 6, 5, 3])]), label='k:top')
    assert_that(result_kbot, equal_to([('a', [0, 1, 1, 1])]), label='k:bot')
    pipeline.run()

Exemplo n.º 16

0

Exibir arquivo

Arquivo: sources_test.py Projeto: volnt/DataflowPythonSDK

  def test_run_direct(self):
    file_name = self._create_temp_file('aaaa\nbbbb\ncccc\ndddd')
    pipeline = df.Pipeline('DirectPipelineRunner')
    pcoll = pipeline | df.Read(LineSource(file_name))
    assert_that(pcoll, equal_to(['aaaa', 'bbbb', 'cccc', 'dddd']))

    pipeline.run()

Exemplo n.º 17

0

Exibir arquivo

 def test_map(self):
     pipeline = Pipeline('DirectPipelineRunner')
     lines = pipeline | Create('input', ['a', 'b', 'c'])
     result = (lines
               | Map('upper', str.upper)
               | Map('prefix', lambda x, prefix: prefix + x, 'foo-'))
     assert_that(result, equal_to(['foo-A', 'foo-B', 'foo-C']))
     pipeline.run()

Exemplo n.º 18

0

Exibir arquivo

Arquivo: dataflow_test.py Projeto: CSberger/DataflowPythonSDK

 def test_map(self):
   pipeline = Pipeline('DirectPipelineRunner')
   lines = pipeline | Create('input', ['a', 'b', 'c'])
   result = (lines
             | Map('upper', str.upper)
             | Map('prefix', lambda x, prefix: prefix + x, 'foo-'))
   assert_that(result, equal_to(['foo-A', 'foo-B', 'foo-C']))
   pipeline.run()

Exemplo n.º 19

0

Exibir arquivo

Arquivo: dataflow_test.py Projeto: CSberger/DataflowPythonSDK

 def test_word_count_using_get(self):
   pipeline = Pipeline('DirectPipelineRunner')
   lines = pipeline | Create('SomeWords', [DataflowTest.SAMPLE_DATA])
   result = (
       (lines | FlatMap('GetWords', lambda x: re.findall(r'\w+', x)))
       .apply('CountWords', DataflowTest.Count))
   assert_that(result, equal_to(DataflowTest.SAMPLE_RESULT))
   pipeline.run()

Exemplo n.º 20

0

Exibir arquivo

 def test_iterable_side_input(self):
     pipeline = Pipeline('DirectPipelineRunner')
     pcol = pipeline | Create('start', [1, 2])
     side = pipeline | Create('side', [3, 4])  # 2 values in side input.
     result = pcol | FlatMap('compute', lambda x, s: [x * y for y in s],
                             AllOf(side))
     assert_that(result, equal_to([3, 4, 6, 8]))
     pipeline.run()

Exemplo n.º 21

0

Exibir arquivo

Arquivo: dataflow_test.py Projeto: CSberger/DataflowPythonSDK

 def test_iterable_side_input(self):
   pipeline = Pipeline('DirectPipelineRunner')
   pcol = pipeline | Create('start', [1, 2])
   side = pipeline | Create('side', [3, 4])  # 2 values in side input.
   result = pcol | FlatMap('compute',
                           lambda x, s: [x * y for y in s], AllOf(side))
   assert_that(result, equal_to([3, 4, 6, 8]))
   pipeline.run()

Exemplo n.º 22

0

Exibir arquivo

 def test_default_value_singleton_side_input(self):
     pipeline = Pipeline('DirectPipelineRunner')
     pcol = pipeline | Create('start', [1, 2])
     side = pipeline | Create('side', [])  # 0 values in side input.
     result = (pcol | FlatMap('compute', lambda x, s: [x * s],
                              AsSingleton(side, 10)))
     assert_that(result, equal_to([10, 20]))
     pipeline.run()

Exemplo n.º 23

0

Exibir arquivo

 def test_word_count_using_get(self):
     pipeline = Pipeline('DirectPipelineRunner')
     lines = pipeline | Create('SomeWords', [DataflowTest.SAMPLE_DATA])
     result = ((lines | FlatMap('GetWords',
                                lambda x: re.findall(r'\w+', x))).apply(
                                    'CountWords', DataflowTest.Count))
     assert_that(result, equal_to(DataflowTest.SAMPLE_RESULT))
     pipeline.run()

Exemplo n.º 24

0

Exibir arquivo

Arquivo: dataflow_test.py Projeto: CSberger/DataflowPythonSDK

 def test_default_value_singleton_side_input(self):
   pipeline = Pipeline('DirectPipelineRunner')
   pcol = pipeline | Create('start', [1, 2])
   side = pipeline | Create('side', [])  # 0 values in side input.
   result = (
       pcol | FlatMap('compute', lambda x, s: [x * s], AsSingleton(side, 10)))
   assert_that(result, equal_to([10, 20]))
   pipeline.run()

Exemplo n.º 25

0

Exibir arquivo

    def test_par_do_with_multiple_outputs_and_using_yield(self):
        class SomeDoFn(DoFn):
            """A custom DoFn using yield."""
            def process(self, context):
                yield context.element
                if context.element % 2 == 0:
                    yield SideOutputValue('even', context.element)
                else:
                    yield SideOutputValue('odd', context.element)

        pipeline = Pipeline('DirectPipelineRunner')
        nums = pipeline | Create('Some Numbers', [1, 2, 3, 4])
        results = nums | ParDo('ClassifyNumbers', SomeDoFn()).with_outputs(
            'odd', 'even', main='main')
        assert_that(results.main, equal_to([1, 2, 3, 4]))
        assert_that(results.odd, equal_to([1, 3]), label='assert:odd')
        assert_that(results.even, equal_to([2, 4]), label='assert:even')
        pipeline.run()

Exemplo n.º 26

0

Exibir arquivo

Arquivo: combiners_test.py Projeto: wangjiahong/DataflowPythonSDK

 def test_tuple_combine_fn(self):
     p = Pipeline('DirectPipelineRunner')
     result = (p
               | Create([('a', 100, 0.0), ('b', 10, -1), ('c', 1, 100)])
               | df.CombineGlobally(
                   combine.TupleCombineFn(max, combine.MeanCombineFn(),
                                          sum)).without_defaults())
     assert_that(result, equal_to([('c', 111.0 / 3, 99.0)]))
     p.run()

Exemplo n.º 27

0

Exibir arquivo

Arquivo: combiners_test.py Projeto: CSberger/DataflowPythonSDK

 def test_tuple_combine_fn_without_defaults(self):
   p = Pipeline('DirectPipelineRunner')
   result = (
       p
       | Create([1, 1, 2, 3])
       | df.CombineGlobally(
           combine.TupleCombineFn(min, combine.MeanCombineFn(), max)
           .with_common_input()).without_defaults())
   assert_that(result, equal_to([(1, 7.0 / 4, 3)]))
   p.run()

Exemplo n.º 28

0

Exibir arquivo

Arquivo: dataflow_test.py Projeto: CSberger/DataflowPythonSDK

  def test_par_do_with_multiple_outputs_and_using_yield(self):
    class SomeDoFn(DoFn):
      """A custom DoFn using yield."""

      def process(self, context):
        yield context.element
        if context.element % 2 == 0:
          yield SideOutputValue('even', context.element)
        else:
          yield SideOutputValue('odd', context.element)

    pipeline = Pipeline('DirectPipelineRunner')
    nums = pipeline | Create('Some Numbers', [1, 2, 3, 4])
    results = nums | ParDo(
        'ClassifyNumbers', SomeDoFn()).with_outputs('odd', 'even', main='main')
    assert_that(results.main, equal_to([1, 2, 3, 4]))
    assert_that(results.odd, equal_to([1, 3]), label='assert:odd')
    assert_that(results.even, equal_to([2, 4]), label='assert:even')
    pipeline.run()

Exemplo n.º 29

0

Exibir arquivo

Arquivo: combiners_test.py Projeto: CSberger/DataflowPythonSDK

 def test_tuple_combine_fn(self):
   p = Pipeline('DirectPipelineRunner')
   result = (
       p
       | Create([('a', 100, 0.0), ('b', 10, -1), ('c', 1, 100)])
       | df.CombineGlobally(combine.TupleCombineFn(max,
                                                   combine.MeanCombineFn(),
                                                   sum)).without_defaults())
   assert_that(result, equal_to([('c', 111.0 / 3, 99.0)]))
   p.run()

Exemplo n.º 30

0

Exibir arquivo

Arquivo: combiners_test.py Projeto: wangjiahong/DataflowPythonSDK

 def test_tuple_combine_fn_without_defaults(self):
     p = Pipeline('DirectPipelineRunner')
     result = (p
               | Create([1, 1, 2, 3])
               | df.CombineGlobally(
                   combine.TupleCombineFn(
                       min, combine.MeanCombineFn(),
                       max).with_common_input()).without_defaults())
     assert_that(result, equal_to([(1, 7.0 / 4, 3)]))
     p.run()

Exemplo n.º 31

0

Exibir arquivo

  def test_cached_pvalues_are_refcounted(self):
    """Test that cached PValues are refcounted and deleted.

    The intermediary PValues computed by the workflow below contain
    one million elements so if the refcounting does not work the number of
    objects tracked by the garbage collector will increase by a few millions
    by the time we execute the final Map checking the objects tracked.
    Anything that is much larger than what we started with will fail the test.
    """
    def check_memory(value, count_threshold):
      gc.collect()
      objects_count = len(gc.get_objects())
      if objects_count > count_threshold:
        raise RuntimeError(
            'PValues are not refcounted: %s, %s' % (
                objects_count, count_threshold))
      return value

    def create_dupes(o, _):
      yield o
      yield SideOutputValue('side', o)

    pipeline = Pipeline('DirectPipelineRunner')

    gc.collect()
    count_threshold = len(gc.get_objects()) + 10000
    biglist = pipeline | Create('oom:create', ['x'] * 1000000)
    dupes = (
        biglist
        | Map('oom:addone', lambda x: (x, 1))
        | FlatMap('oom:dupes', create_dupes,
                  AsIter(biglist)).with_outputs('side', main='main'))
    result = (
        (dupes.side, dupes.main, dupes.side)
        | Flatten('oom:flatten')
        | CombinePerKey('oom:combine', sum)
        | Map('oom:check', check_memory, count_threshold))

    assert_that(result, equal_to([('x', 3000000)]))
    pipeline.run()
    self.assertEqual(
        pipeline.runner.debug_counters['element_counts'],
        {
            'oom:flatten': 3000000,
            ('oom:combine/GroupByKey/reify_windows', None): 3000000,
            ('oom:dupes/oom:dupes', 'side'): 1000000,
            ('oom:dupes/oom:dupes', None): 1000000,
            'oom:create': 1000000,
            ('oom:addone', None): 1000000,
            'oom:combine/GroupByKey/group_by_key': 1,
            ('oom:check', None): 1,
            'assert_that/singleton': 1,
            ('assert_that/Map(match)', None): 1,
            ('oom:combine/GroupByKey/group_by_window', None): 1,
            ('oom:combine/Combine/ParDo(CombineValuesDoFn)', None): 1})

Exemplo n.º 32

0

Exibir arquivo

Arquivo: pipeline_test.py Projeto: finiterank/DataflowPythonSDK

  def test_cached_pvalues_are_refcounted(self):
    """Test that cached PValues are refcounted and deleted.

    The intermediary PValues computed by the workflow below contain
    one million elements so if the refcounting does not work the number of
    objects tracked by the garbage collector will increase by a few millions
    by the time we execute the final Map checking the objects tracked.
    Anything that is much larger than what we started with will fail the test.
    """
    def check_memory(value, count_threshold):
      gc.collect()
      objects_count = len(gc.get_objects())
      if objects_count > count_threshold:
        raise RuntimeError(
            'PValues are not refcounted: %s, %s' % (
                objects_count, count_threshold))
      return value

    def create_dupes(o, _):
      yield o
      yield SideOutputValue('side', o)

    pipeline = Pipeline('DirectPipelineRunner')

    gc.collect()
    count_threshold = len(gc.get_objects()) + 10000
    biglist = pipeline | Create('oom:create', ['x'] * 1000000)
    dupes = (
        biglist
        | Map('oom:addone', lambda x: (x, 1))
        | FlatMap('oom:dupes', create_dupes,
                  AsIter(biglist)).with_outputs('side', main='main'))
    result = (
        (dupes.side, dupes.main, dupes.side)
        | Flatten('oom:flatten')
        | CombinePerKey('oom:combine', sum)
        | Map('oom:check', check_memory, count_threshold))

    assert_that(result, equal_to([('x', 3000000)]))
    pipeline.run()
    self.assertEqual(
        pipeline.runner.debug_counters['element_counts'],
        {
            'oom:flatten': 3000000,
            ('oom:combine/GroupByKey/reify_windows', None): 3000000,
            ('oom:dupes/oom:dupes', 'side'): 1000000,
            ('oom:dupes/oom:dupes', None): 1000000,
            'oom:create': 1000000,
            ('oom:addone', None): 1000000,
            'oom:combine/GroupByKey/group_by_key': 1,
            ('oom:check', None): 1,
            'assert_that/singleton': 1,
            ('assert_that/Map(match)', None): 1,
            ('oom:combine/GroupByKey/group_by_window', None): 1,
            ('oom:combine/Combine/ParDo(CombineValuesDoFn)', None): 1})

Exemplo n.º 33

0

Exibir arquivo

 def test_par_do_with_side_input_as_arg(self):
     pipeline = Pipeline('DirectPipelineRunner')
     words_list = ['aa', 'bb', 'cc']
     words = pipeline | Create('SomeWords', words_list)
     prefix = pipeline | Create('SomeString', ['xyz'])  # side in
     suffix = 'zyx'
     result = words | FlatMap(
         'DecorateWords', lambda x, pfx, sfx: ['%s-%s-%s' % (pfx, x, sfx)],
         AsSingleton(prefix), suffix)
     assert_that(result, equal_to(['xyz-%s-zyx' % x for x in words_list]))
     pipeline.run()

Exemplo n.º 34

0

Exibir arquivo

Arquivo: window_test.py Projeto: wangjiahong/DataflowPythonSDK

 def test_timestamped_value(self):
   p = Pipeline('DirectPipelineRunner')
   result = (p
             | Create('start', [(k, k) for k in range(10)])
             | Map(lambda (x, t): TimestampedValue(x, t))
             | WindowInto('w', FixedWindows(5))
             | Map(lambda v: ('key', v))
             | GroupByKey())
   assert_that(result, equal_to([('key', [0, 1, 2, 3, 4]),
                                 ('key', [5, 6, 7, 8, 9])]))
   p.run()

Exemplo n.º 35

0

Exibir arquivo

Arquivo: window_test.py Projeto: CSberger/DataflowPythonSDK

 def test_timestamped_value(self):
   p = Pipeline('DirectPipelineRunner')
   result = (p
             | Create('start', [(k, k) for k in range(10)])
             | Map(lambda (x, t): TimestampedValue(x, t))
             | WindowInto('w', FixedWindows(5))
             | Map(lambda v: ('key', v))
             | GroupByKey())
   assert_that(result, equal_to([('key', [0, 1, 2, 3, 4]),
                                 ('key', [5, 6, 7, 8, 9])]))
   p.run()

Exemplo n.º 36

0

Exibir arquivo

Arquivo: dataflow_test.py Projeto: CSberger/DataflowPythonSDK

  def test_empty_singleton_side_input(self):
    pipeline = Pipeline('DirectPipelineRunner')
    pcol = pipeline | Create('start', [1, 2])
    side = pipeline | Create('side', [])  # Empty side input.

    def my_fn(k, s):
      v = ('empty' if isinstance(s, EmptySideInput) else 'full')
      return [(k, v)]
    result = pcol | FlatMap('compute', my_fn, AsSingleton(side))
    assert_that(result, equal_to([(1, 'empty'), (2, 'empty')]))
    pipeline.run()

Exemplo n.º 37

0

Exibir arquivo

Arquivo: window_test.py Projeto: wangjiahong/DataflowPythonSDK

 def test_sliding_windows(self):
   p = Pipeline('DirectPipelineRunner')
   pcoll = self.timestamped_key_values(p, 'key', 1, 2, 3)
   result = (pcoll
             | WindowInto('w', SlidingWindows(period=2, size=4))
             | GroupByKey()
             | reify_windows)
   expected = [('key @ [-2.0, 2.0)', [1]),
               ('key @ [0.0, 4.0)', [1, 2, 3]),
               ('key @ [2.0, 6.0)', [2, 3])]
   assert_that(result, equal_to(expected))
   p.run()

Exemplo n.º 38

0

Exibir arquivo

    def test_empty_singleton_side_input(self):
        pipeline = Pipeline('DirectPipelineRunner')
        pcol = pipeline | Create('start', [1, 2])
        side = pipeline | Create('side', [])  # Empty side input.

        def my_fn(k, s):
            v = ('empty' if isinstance(s, EmptySideInput) else 'full')
            return [(k, v)]

        result = pcol | FlatMap('compute', my_fn, AsSingleton(side))
        assert_that(result, equal_to([(1, 'empty'), (2, 'empty')]))
        pipeline.run()

Exemplo n.º 39

0

Exibir arquivo

Arquivo: dataflow_test.py Projeto: CSberger/DataflowPythonSDK

 def test_par_do_with_side_input_as_keyword_arg(self):
   pipeline = Pipeline('DirectPipelineRunner')
   words_list = ['aa', 'bb', 'cc']
   words = pipeline | Create('SomeWords', words_list)
   prefix = 'zyx'
   suffix = pipeline | Create('SomeString', ['xyz'])  # side in
   result = words | FlatMap(
       'DecorateWords',
       lambda x, pfx, sfx: ['%s-%s-%s' % (pfx, x, sfx)],
       prefix, sfx=AsSingleton(suffix))
   assert_that(result, equal_to(['zyx-%s-xyz' % x for x in words_list]))
   pipeline.run()

Exemplo n.º 40

0

Exibir arquivo

Arquivo: combiners_test.py Projeto: CSberger/DataflowPythonSDK

  def test_builtin_combines(self):
    pipeline = Pipeline('DirectPipelineRunner')

    vals = [6, 3, 1, 1, 9, 1, 5, 2, 0, 6]
    mean = sum(vals) / float(len(vals))
    size = len(vals)

    # First for global combines.
    pcoll = pipeline | Create('start', vals)
    result_mean = pcoll | combine.Mean.Globally('mean')
    result_count = pcoll | combine.Count.Globally('count')
    assert_that(result_mean, equal_to([mean]), label='assert:mean')
    assert_that(result_count, equal_to([size]), label='assert:size')

    # Again for per-key combines.
    pcoll = pipeline | Create('start-perkey', [('a', x) for x in vals])
    result_key_mean = pcoll | combine.Mean.PerKey('mean-perkey')
    result_key_count = pcoll | combine.Count.PerKey('count-perkey')
    assert_that(result_key_mean, equal_to([('a', mean)]), label='key:mean')
    assert_that(result_key_count, equal_to([('a', size)]), label='key:size')
    pipeline.run()

Exemplo n.º 41

0

Exibir arquivo

Arquivo: window_test.py Projeto: CSberger/DataflowPythonSDK

 def test_sliding_windows(self):
   p = Pipeline('DirectPipelineRunner')
   pcoll = self.timestamped_key_values(p, 'key', 1, 2, 3)
   result = (pcoll
             | WindowInto('w', SlidingWindows(period=2, size=4))
             | GroupByKey()
             | reify_windows)
   expected = [('key @ [-2, 2)', [1]),
               ('key @ [0, 4)', [1, 2, 3]),
               ('key @ [2, 6)', [2, 3])]
   assert_that(result, equal_to(expected))
   p.run()

Exemplo n.º 42

0

Exibir arquivo

Arquivo: window_test.py Projeto: CSberger/DataflowPythonSDK

 def test_sessions(self):
   p = Pipeline('DirectPipelineRunner')
   pcoll = self.timestamped_key_values(p, 'key', 1, 2, 3, 20, 35, 27)
   result = (pcoll
             | WindowInto('w', Sessions(10))
             | GroupByKey()
             | sort_values
             | reify_windows)
   expected = [('key @ [1, 13)', [1, 2, 3]),
               ('key @ [20, 45)', [20, 27, 35])]
   assert_that(result, equal_to(expected))
   p.run()

Exemplo n.º 43

0

Exibir arquivo

Arquivo: window_test.py Projeto: wangjiahong/DataflowPythonSDK

 def test_sessions(self):
   p = Pipeline('DirectPipelineRunner')
   pcoll = self.timestamped_key_values(p, 'key', 1, 2, 3, 20, 35, 27)
   result = (pcoll
             | WindowInto('w', Sessions(10))
             | GroupByKey()
             | sort_values
             | reify_windows)
   expected = [('key @ [1.0, 13.0)', [1, 2, 3]),
               ('key @ [20.0, 45.0)', [20, 27, 35])]
   assert_that(result, equal_to(expected))
   p.run()

Exemplo n.º 44

0

Exibir arquivo

Arquivo: combiners_test.py Projeto: wangjiahong/DataflowPythonSDK

    def test_builtin_combines(self):
        pipeline = Pipeline('DirectPipelineRunner')

        vals = [6, 3, 1, 1, 9, 1, 5, 2, 0, 6]
        mean = sum(vals) / float(len(vals))
        size = len(vals)

        # First for global combines.
        pcoll = pipeline | Create('start', vals)
        result_mean = pcoll | combine.Mean.Globally('mean')
        result_count = pcoll | combine.Count.Globally('count')
        assert_that(result_mean, equal_to([mean]), label='assert:mean')
        assert_that(result_count, equal_to([size]), label='assert:size')

        # Again for per-key combines.
        pcoll = pipeline | Create('start-perkey', [('a', x) for x in vals])
        result_key_mean = pcoll | combine.Mean.PerKey('mean-perkey')
        result_key_count = pcoll | combine.Count.PerKey('count-perkey')
        assert_that(result_key_mean, equal_to([('a', mean)]), label='key:mean')
        assert_that(result_key_count,
                    equal_to([('a', size)]),
                    label='key:size')
        pipeline.run()

Exemplo n.º 45

0

Exibir arquivo

Arquivo: combiners_test.py Projeto: CSberger/DataflowPythonSDK

 def match(actual):
   # There is always exactly one result.
   equal_to([1])([len(actual)])
   # There are always exactly three samples in the result.
   equal_to([3])([len(actual[0])])
   # Sampling is without replacement.
   num_ones = sum(1 for x in actual[0] if x == 1)
   num_twos = sum(1 for x in actual[0] if x == 2)
   equal_to([1, 2])([num_ones, num_twos])

Exemplo n.º 46

0

Exibir arquivo

Arquivo: combiners_test.py Projeto: wangjiahong/DataflowPythonSDK

 def match(actual):
     # There is always exactly one result.
     equal_to([1])([len(actual)])
     # There are always exactly three samples in the result.
     equal_to([3])([len(actual[0])])
     # Sampling is without replacement.
     num_ones = sum(1 for x in actual[0] if x == 1)
     num_twos = sum(1 for x in actual[0] if x == 2)
     equal_to([1, 2])([num_ones, num_twos])

Exemplo n.º 47

0

Exibir arquivo

Arquivo: combiners_test.py Projeto: wangjiahong/DataflowPythonSDK

    def test_top_shorthands(self):
        pipeline = Pipeline('DirectPipelineRunner')

        pcoll = pipeline | Create('start', [6, 3, 1, 1, 9, 1, 5, 2, 0, 6])
        result_top = pcoll | df.CombineGlobally('top', combiners.Largest(5))
        result_bot = pcoll | df.CombineGlobally('bot', combiners.Smallest(4))
        assert_that(result_top,
                    equal_to([[9, 6, 6, 5, 3]]),
                    label='assert:top')
        assert_that(result_bot, equal_to([[0, 1, 1, 1]]), label='assert:bot')

        pcoll = pipeline | Create(
            'start-perkey', [('a', x) for x in [6, 3, 1, 1, 9, 1, 5, 2, 0, 6]])
        result_ktop = pcoll | df.CombinePerKey('top-perkey',
                                               combiners.Largest(5))
        result_kbot = pcoll | df.CombinePerKey('bot-perkey',
                                               combiners.Smallest(4))
        assert_that(result_ktop,
                    equal_to([('a', [9, 6, 6, 5, 3])]),
                    label='k:top')
        assert_that(result_kbot,
                    equal_to([('a', [0, 1, 1, 1])]),
                    label='k:bot')
        pipeline.run()

Exemplo n.º 48

0

Exibir arquivo

    def test_deferred_side_input_iterable(self):
        @typehints.with_input_types(str, typehints.Iterable[str])
        def concat(glue, items):
            return glue.join(sorted(items))

        p = df.Pipeline(options=PipelineOptions([]))
        main_input = p | df.Create(['a', 'bb', 'c'])
        side_input = p | df.Create('side', ['x', 'y', 'z'])
        result = main_input | df.Map(concat, pvalue.AsIter(side_input))
        assert_that(result, equal_to(['xayaz', 'xbbybbz', 'xcycz']))
        p.run()

        bad_side_input = p | df.Create('bad_side', [1, 2, 3])
        with self.assertRaises(typehints.TypeCheckError):
            main_input | df.Map('fail', concat, pvalue.AsIter(bad_side_input))

Exemplo n.º 49

0

Exibir arquivo

Arquivo: dataflow_test.py Projeto: CSberger/DataflowPythonSDK

  def test_par_do_with_do_fn_object(self):
    class SomeDoFn(DoFn):
      """A custom DoFn for a FlatMap transform."""

      def process(self, context, prefix, suffix):
        return ['%s-%s-%s' % (prefix, context.element, suffix)]

    pipeline = Pipeline('DirectPipelineRunner')
    words_list = ['aa', 'bb', 'cc']
    words = pipeline | Create('SomeWords', words_list)
    prefix = 'zyx'
    suffix = pipeline | Create('SomeString', ['xyz'])  # side in
    result = words | ParDo('DecorateWordsDoFn', SomeDoFn(), prefix,
                           suffix=AsSingleton(suffix))
    assert_that(result, equal_to(['zyx-%s-xyz' % x for x in words_list]))
    pipeline.run()

Exemplo n.º 50

0

Exibir arquivo

    def test_deferred_side_inputs(self):
        @typehints.with_input_types(str, int)
        def repeat(s, times):
            return s * times

        p = df.Pipeline(options=PipelineOptions([]))
        main_input = p | df.Create(['a', 'bb', 'c'])
        side_input = p | df.Create('side', [3])
        result = main_input | df.Map(repeat, pvalue.AsSingleton(side_input))
        assert_that(result, equal_to(['aaa', 'bbbbbb', 'ccc']))
        p.run()

        bad_side_input = p | df.Create('bad_side', ['z'])
        with self.assertRaises(typehints.TypeCheckError):
            main_input | df.Map('again', repeat,
                                pvalue.AsSingleton(bad_side_input))

Exemplo n.º 51

0

Exibir arquivo

Arquivo: trigger_test.py Projeto: MMMdata/DataflowPythonSDK

 def test_after_count(self):
   p = Pipeline('DirectPipelineRunner')
   result = (p
             | df.Create([1, 2, 3, 4, 5, 10, 11])
             | df.FlatMap(lambda t: [('A', t), ('B', t + 5)])
             | df.Map(lambda (k, t): TimestampedValue((k, t), t))
             | df.WindowInto(FixedWindows(10), trigger=AfterCount(3),
                             accumulation_mode=AccumulationMode.DISCARDING)
             | df.GroupByKey()
             | df.Map(lambda (k, v): ('%s-%s' % (k, len(v)), set(v))))
   assert_that(result, equal_to(
       {
           'A-5': {1, 2, 3, 4, 5},
           # A-10, A-11 never emitted due to AfterCount(3) never firing.
           'B-4': {6, 7, 8, 9},
           'B-3': {10, 15, 16},
       }.iteritems()))

Exemplo n.º 52

0

Exibir arquivo

    def test_par_do_with_do_fn_object(self):
        class SomeDoFn(DoFn):
            """A custom DoFn for a FlatMap transform."""
            def process(self, context, prefix, suffix):
                return ['%s-%s-%s' % (prefix, context.element, suffix)]

        pipeline = Pipeline('DirectPipelineRunner')
        words_list = ['aa', 'bb', 'cc']
        words = pipeline | Create('SomeWords', words_list)
        prefix = 'zyx'
        suffix = pipeline | Create('SomeString', ['xyz'])  # side in
        result = words | ParDo('DecorateWordsDoFn',
                               SomeDoFn(),
                               prefix,
                               suffix=AsSingleton(suffix))
        assert_that(result, equal_to(['zyx-%s-xyz' % x for x in words_list]))
        pipeline.run()

Exemplo n.º 53

0

Exibir arquivo

Arquivo: trigger_test.py Projeto: volnt/DataflowPythonSDK

 def test_after_count(self):
     p = Pipeline('DirectPipelineRunner')
     result = (p
               | df.Create([1, 2, 3, 4, 5, 10, 11])
               | df.FlatMap(lambda t: [('A', t), ('B', t + 5)])
               | df.Map(lambda (k, t): TimestampedValue((k, t), t))
               | df.WindowInto(
                   FixedWindows(10),
                   trigger=AfterCount(3),
                   accumulation_mode=AccumulationMode.DISCARDING)
               | df.GroupByKey()
               | df.Map(lambda (k, v): ('%s-%s' % (k, len(v)), set(v))))
     assert_that(
         result,
         equal_to({
             'A-5': {1, 2, 3, 4, 5},
             # A-10, A-11 never emitted due to AfterCount(3) never firing.
             'B-4': {6, 7, 8, 9},
             'B-3': {10, 15, 16},
         }.iteritems()))

Exemplo n.º 54

0

Exibir arquivo

Arquivo: dataflow_test.py Projeto: CSberger/DataflowPythonSDK

  def test_window_transform(self):
    class TestWindowFn(WindowFn):
      """Windowing function adding two disjoint windows to each element."""

      def assign(self, assign_context):
        _ = assign_context
        return [IntervalWindow(10, 20), IntervalWindow(20, 30)]

      def merge(self, existing_windows):
        return existing_windows

    pipeline = Pipeline('DirectPipelineRunner')
    numbers = pipeline | Create('KVs', [(1, 10), (2, 20), (3, 30)])
    result = (numbers
              | WindowInto('W', windowfn=TestWindowFn())
              | GroupByKey('G'))
    assert_that(
        result, equal_to([(1, [10]), (1, [10]), (2, [20]),
                          (2, [20]), (3, [30]), (3, [30])]))
    pipeline.run()

Exemplo n.º 55

0

Exibir arquivo

    def test_window_transform(self):
        class TestWindowFn(WindowFn):
            """Windowing function adding two disjoint windows to each element."""
            def assign(self, assign_context):
                _ = assign_context
                return [IntervalWindow(10, 20), IntervalWindow(20, 30)]

            def merge(self, existing_windows):
                return existing_windows

        pipeline = Pipeline('DirectPipelineRunner')
        numbers = pipeline | Create('KVs', [(1, 10), (2, 20), (3, 30)])
        result = (numbers
                  | WindowInto('W', windowfn=TestWindowFn())
                  | GroupByKey('G'))
        assert_that(
            result,
            equal_to([(1, [10]), (1, [10]), (2, [20]), (2, [20]), (3, [30]),
                      (3, [30])]))
        pipeline.run()

Exemplo n.º 56

0

Exibir arquivo

Arquivo: combiners_test.py Projeto: wangjiahong/DataflowPythonSDK

    def test_top(self):
        pipeline = Pipeline('DirectPipelineRunner')

        # A parameter we'll be sharing with a custom comparator.
        names = {
            0: 'zo',
            1: 'one',
            2: 'twoo',
            3: 'three',
            5: 'fiiive',
            6: 'sssssix',
            9: 'nniiinne'
        }

        # First for global combines.
        pcoll = pipeline | Create('start', [6, 3, 1, 1, 9, 1, 5, 2, 0, 6])
        result_top = pcoll | combine.Top.Largest('top', 5)
        result_bot = pcoll | combine.Top.Smallest('bot', 4)
        result_cmp = pcoll | combine.Top.Of(
            'cmp', 6, lambda a, b, names: len(names[a]) < len(names[b]),
            names)  # Note parameter passed to comparator.
        assert_that(result_top,
                    equal_to([[9, 6, 6, 5, 3]]),
                    label='assert:top')
        assert_that(result_bot, equal_to([[0, 1, 1, 1]]), label='assert:bot')
        assert_that(result_cmp,
                    equal_to([[9, 6, 6, 5, 3, 2]]),
                    label='assert:cmp')

        # Again for per-key combines.
        pcoll = pipeline | Create(
            'start-perkey', [('a', x) for x in [6, 3, 1, 1, 9, 1, 5, 2, 0, 6]])
        result_key_top = pcoll | combine.Top.LargestPerKey('top-perkey', 5)
        result_key_bot = pcoll | combine.Top.SmallestPerKey('bot-perkey', 4)
        result_key_cmp = pcoll | combine.Top.PerKey(
            'cmp-perkey', 6, lambda a, b, names: len(names[a]) < len(names[b]),
            names)  # Note parameter passed to comparator.
        assert_that(result_key_top,
                    equal_to([('a', [9, 6, 6, 5, 3])]),
                    label='key:top')
        assert_that(result_key_bot,
                    equal_to([('a', [0, 1, 1, 1])]),
                    label='key:bot')
        assert_that(result_key_cmp,
                    equal_to([('a', [9, 6, 6, 5, 3, 2])]),
                    label='key:cmp')
        pipeline.run()

Exemplo n.º 57

0

Exibir arquivo

Arquivo: combiners_test.py Projeto: CSberger/DataflowPythonSDK

  def test_top(self):
    pipeline = Pipeline('DirectPipelineRunner')

    # A parameter we'll be sharing with a custom comparator.
    names = {0: 'zo',
             1: 'one',
             2: 'twoo',
             3: 'three',
             5: 'fiiive',
             6: 'sssssix',
             9: 'nniiinne'}

    # First for global combines.
    pcoll = pipeline | Create('start', [6, 3, 1, 1, 9, 1, 5, 2, 0, 6])
    result_top = pcoll | combine.Top.Largest('top', 5)
    result_bot = pcoll | combine.Top.Smallest('bot', 4)
    result_cmp = pcoll | combine.Top.Of(
        'cmp',
        6,
        lambda a, b, names: len(names[a]) < len(names[b]),
        names)  # Note parameter passed to comparator.
    assert_that(result_top, equal_to([[9, 6, 6, 5, 3]]), label='assert:top')
    assert_that(result_bot, equal_to([[0, 1, 1, 1]]), label='assert:bot')
    assert_that(result_cmp, equal_to([[9, 6, 6, 5, 3, 2]]), label='assert:cmp')

    # Again for per-key combines.
    pcoll = pipeline | Create(
        'start-perkey', [('a', x) for x in [6, 3, 1, 1, 9, 1, 5, 2, 0, 6]])
    result_key_top = pcoll | combine.Top.LargestPerKey('top-perkey', 5)
    result_key_bot = pcoll | combine.Top.SmallestPerKey('bot-perkey', 4)
    result_key_cmp = pcoll | combine.Top.PerKey(
        'cmp-perkey',
        6,
        lambda a, b, names: len(names[a]) < len(names[b]),
        names)  # Note parameter passed to comparator.
    assert_that(result_key_top, equal_to([('a', [9, 6, 6, 5, 3])]),
                label='key:top')
    assert_that(result_key_bot, equal_to([('a', [0, 1, 1, 1])]),
                label='key:bot')
    assert_that(result_key_cmp, equal_to([('a', [9, 6, 6, 5, 3, 2])]),
                label='key:cmp')
    pipeline.run()

Exemplo n.º 58

0

Exibir arquivo

Arquivo: pipeline_test.py Projeto: CSberger/DataflowPythonSDK

 def test_create_singleton_pcollection(self):
   pipeline = Pipeline(DirectPipelineRunner())
   pcoll = pipeline | Create('label', [[1, 2, 3]])
   assert_that(pcoll, equal_to([[1, 2, 3]]))
   pipeline.run()

Exemplo n.º 59

0

Exibir arquivo

Arquivo: pipeline_test.py Projeto: CSberger/DataflowPythonSDK

 def test_apply_custom_callable(self):
   pipeline = Pipeline('DirectPipelineRunner')
   pcoll = pipeline | Create('pcoll', [1, 2, 3])
   result = pipeline.apply(PipelineTest.custom_callable, pcoll)
   assert_that(result, equal_to([2, 3, 4]))
   pipeline.run()