Пример #1
0
 def expand(self, p):
   if self._level <= 2:
     return p | beam.Create([1])
   else:
     a = p | 'A' >> beam.ExternalTransform(
         'fib', str(self._level - 1).encode('ascii'),
         expansion_service.ExpansionServiceServicer())
     b = p | 'B' >> beam.ExternalTransform(
         'fib', str(self._level - 2).encode('ascii'),
         expansion_service.ExpansionServiceServicer())
     return (
         (a, b)
         | beam.Flatten()
         | beam.CombineGlobally(sum).without_defaults())
Пример #2
0
    def test_external_empty_spec_translation(self):
        pipeline = beam.Pipeline()
        external_transform = beam.ExternalTransform(
            'beam:transforms:xlang:test:prefix',
            ImplicitSchemaPayloadBuilder({'data': u'0'}),
            expansion_service.ExpansionServiceServicer())
        _ = (pipeline | beam.Create(['a', 'b']) | external_transform)
        pipeline.run().wait_until_finish()

        external_transform_label = (
            'ExternalTransform(beam:transforms:xlang:test:prefix)/TestLabel')
        for transform in external_transform._expanded_components.transforms.values(
        ):
            # We clear the spec of one of the external transforms.
            if transform.unique_name == external_transform_label:
                transform.spec.Clear()

        context = pipeline_context.PipelineContext()
        proto_pipeline = pipeline.to_runner_api(context=context)

        proto_transform = None
        for transform in proto_pipeline.components.transforms.values():
            if (transform.unique_name ==
                    'ExternalTransform(beam:transforms:xlang:test:prefix)/TestLabel'
                ):
                proto_transform = transform

        self.assertIsNotNone(proto_transform)
        self.assertTrue(str(proto_transform).strip().find('spec {') == -1)
Пример #3
0
    def test_job_python_from_python_it(self):
        @ptransform.PTransform.register_urn('simple', None)
        class SimpleTransform(ptransform.PTransform):
            def expand(self, pcoll):
                return pcoll | beam.Map(lambda x: 'Simple(%s)' % x)

            def to_runner_api_parameter(self, unused_context):
                return 'simple', None

            @staticmethod
            def from_runner_api_parameter(_0, _1, _2):
                return SimpleTransform()

        pipeline = TestPipeline(is_integration_test=True)

        res = (pipeline
               | beam.Create(['a', 'b'])
               | beam.ExternalTransform(
                   'simple', None,
                   expansion_service.ExpansionServiceServicer()))
        assert_that(res, equal_to(['Simple(a)', 'Simple(b)']))

        proto_pipeline, _ = pipeline.to_runner_api(return_context=True)
        pipeline_from_proto = Pipeline.from_runner_api(proto_pipeline,
                                                       pipeline.runner,
                                                       pipeline._options)
        pipeline_from_proto.run().wait_until_finish()
Пример #4
0
def main(argv):
    parser = argparse.ArgumentParser()
    parser.add_argument('-p',
                        '--port',
                        type=int,
                        help='port on which to serve the job api')
    parser.add_argument('--fully_qualified_name_glob', default=None)
    known_args, pipeline_args = parser.parse_known_args(argv)
    pipeline_options = PipelineOptions(
        pipeline_args +
        ["--experiments=beam_fn_api", "--sdk_location=container"])

    with fully_qualified_named_transform.FullyQualifiedNamedTransform.with_filter(
            known_args.fully_qualified_name_glob):

        server = grpc.server(thread_pool_executor.shared_unbounded_instance())
        beam_expansion_api_pb2_grpc.add_ExpansionServiceServicer_to_server(
            expansion_service.ExpansionServiceServicer(pipeline_options),
            server)
        beam_artifact_api_pb2_grpc.add_ArtifactRetrievalServiceServicer_to_server(
            artifact_service.ArtifactRetrievalService(
                artifact_service.BeamFilesystemHandler(None).file_reader),
            server)
        server.add_insecure_port('localhost:{}'.format(known_args.port))
        server.start()
        _LOGGER.info('Listening for expansion requests at %d', known_args.port)

        def cleanup(unused_signum, unused_frame):
            _LOGGER.info('Shutting down expansion service.')
            server.stop(None)

        signal.signal(signal.SIGTERM, cleanup)
        signal.signal(signal.SIGINT, cleanup)
        # blocking main thread forever.
        signal.pause()
Пример #5
0
    def test_multi(self):
        @ptransform.PTransform.register_urn('multi', None)
        class MutltiTransform(ptransform.PTransform):
            def expand(self, pcolls):
                return {
                    'main': (pcolls['main1'], pcolls['main2'])
                    | beam.Flatten()
                    | beam.Map(lambda x, s: x + s,
                               beam.pvalue.AsSingleton(pcolls['side'])),
                    'side':
                    pcolls['side'] | beam.Map(lambda x: x + x),
                }

            def to_runner_api_parameter(self, unused_context):
                return 'multi', None

            @staticmethod
            def from_runner_api_parameter(unused_parameter, unused_context):
                return MutltiTransform()

        with beam.Pipeline() as p:
            main1 = p | 'Main1' >> beam.Create(['a', 'bb'], reshuffle=False)
            main2 = p | 'Main2' >> beam.Create(['x', 'yy', 'zzz'],
                                               reshuffle=False)
            side = p | 'Side' >> beam.Create(['s'])
            res = dict(main1=main1, main2=main2,
                       side=side) | beam.ExternalTransform(
                           'multi', None,
                           expansion_service.ExpansionServiceServicer())
            assert_that(res['main'],
                        equal_to(['as', 'bbs', 'xs', 'yys', 'zzzs']))
            assert_that(res['side'], equal_to(['ss']), label='CheckSide')
Пример #6
0
    def __init__(self,
                 transform,
                 pkgs=DEFAULT_PKGS,
                 container=DEFAULT_CONTAINER_NAME,
                 *args,
                 **kwargs):

        transform_urn = '{}.{}'.format(transform.__module__,
                                       transform.__name__)
        beam.PTransform.register_urn(transform_urn,
                                     bytes,
                                     constructor=functools.partial(
                                         self.constructor, transform))

        payload = pickle.dumps((args, kwargs))

        options = beam.pipeline.PipelineOptions(
            environment_type='DOCKER',
            # environment_config=(
            #     '-v /tmp/rillbeam/multiexternal '
            #     '-e DOCKER_ENTRYPOINT_SETPKG="{}" '
            #     '{}'.format(':'.join(pkgs), container)),
            environment_config=container,
        )

        # from apache_beam.portability import python_urns
        # options = beam.pipeline.PipelineOptions(
        #     environment_type=python_urns.SUBPROCESS_SDK,
        #     environment_config=(
        #         b'{} -m apache_beam.runners.worker.sdk_worker_main'.format(
        #             sys.executable.encode('ascii')))
        # )

        endpoint = expansion_service.ExpansionServiceServicer(options=options)
        super(EnvTransform, self).__init__(transform_urn, payload, endpoint)
Пример #7
0
    def test_pipeline_generation(self):
        @ptransform.PTransform.register_urn('simple', None)
        class SimpleTransform(ptransform.PTransform):
            def expand(self, pcoll):
                return pcoll | 'TestLabel' >> beam.Map(
                    lambda x: 'Simple(%s)' % x)

            def to_runner_api_parameter(self, unused_context):
                return 'simple', None

            @staticmethod
            def from_runner_api_parameter(unused_parameter, unused_context):
                return SimpleTransform()

        pipeline = beam.Pipeline()
        res = (pipeline
               | beam.Create(['a', 'b'])
               | beam.ExternalTransform(
                   'simple', None,
                   expansion_service.ExpansionServiceServicer()))
        assert_that(res, equal_to(['Simple(a)', 'Simple(b)']))

        proto, _ = pipeline.to_runner_api(return_context=True)
        pipeline_from_proto = Pipeline.from_runner_api(proto, pipeline.runner,
                                                       pipeline._options)

        # Original pipeline has the un-expanded external transform
        self.assertEqual([], pipeline.transforms_stack[0].parts[1].parts)

        # new pipeline has the expanded external transform
        self.assertNotEqual(
            [], pipeline_from_proto.transforms_stack[0].parts[1].parts)
        self.assertEqual(
            u'ExternalTransform(simple)/TestLabel', pipeline_from_proto.
            transforms_stack[0].parts[1].parts[0].full_label)
Пример #8
0
def main(unused_argv):
    PyPIArtifactRegistry.register_artifact('beautifulsoup4', '>=4.9,<5.0')
    parser = argparse.ArgumentParser()
    parser.add_argument('-p',
                        '--port',
                        type=int,
                        help='port on which to serve the job api')
    parser.add_argument('--fully_qualified_name_glob', default=None)
    options = parser.parse_args()

    global server
    with fully_qualified_named_transform.FullyQualifiedNamedTransform.with_filter(
            options.fully_qualified_name_glob):
        server = grpc.server(thread_pool_executor.shared_unbounded_instance())
        beam_expansion_api_pb2_grpc.add_ExpansionServiceServicer_to_server(
            expansion_service.ExpansionServiceServicer(
                PipelineOptions([
                    "--experiments", "beam_fn_api", "--sdk_location",
                    "container"
                ])), server)
        beam_artifact_api_pb2_grpc.add_ArtifactRetrievalServiceServicer_to_server(
            artifact_service.ArtifactRetrievalService(
                artifact_service.BeamFilesystemHandler(None).file_reader),
            server)
        server.add_insecure_port('localhost:{}'.format(options.port))
        server.start()
        _LOGGER.info('Listening for expansion requests at %d', options.port)

        signal.signal(signal.SIGTERM, cleanup)
        signal.signal(signal.SIGINT, cleanup)
        # blocking main thread forever.
        signal.pause()
Пример #9
0
    def __init__(self, transform, envvars, *args, **kwargs):
        urn = '{}.{}'.format(transform.__module__, transform.__name__)
        if urn not in beam.PTransform._known_urns:
            beam.PTransform.register_urn(urn,
                                         bytes,
                                         constructor=functools.partial(
                                             self.constructor, transform))

        # options = beam.pipeline.PipelineOptions(
        #     environment_type='DOCKER',
        #     environment_config=(
        #         '{env} '
        #         '{container}'.format(
        #             env=' '.join('-e {}={}'.format(k, v)
        #                          for k, v in envvars.items()),
        #             container='localhost:5000/beam/python:latest',
        #         )),
        #     )
        import sys
        from apache_beam.portability import python_urns
        env = ' '.join('{}={}'.format(k, v) for k, v in envvars.items())
        options = beam.pipeline.PipelineOptions(
            environment_type=python_urns.SUBPROCESS_SDK,
            environment_config=(
                b'{} {} -m apache_beam.runners.worker.sdk_worker_main'.format(
                    env, sys.executable.encode('ascii'))))

        payload = pickle.dumps((args, kwargs))
        endpoint = expansion_service.ExpansionServiceServicer(options=options)
        super(EnvTransform, self).__init__(urn, payload, endpoint)
Пример #10
0
 def test_simple(self):
     with beam.Pipeline() as p:
         res = (p
                | beam.Create(['a', 'b'])
                | beam.ExternalTransform(
                    'simple', None,
                    expansion_service.ExpansionServiceServicer()))
         assert_that(res, equal_to(['Simple(a)', 'Simple(b)']))
Пример #11
0
 def test_payload(self):
     with beam.Pipeline() as p:
         res = (p
                | beam.Create(['a', 'bb'], reshuffle=False)
                | beam.ExternalTransform(
                    'payload', b's',
                    expansion_service.ExpansionServiceServicer()))
         assert_that(res, equal_to(['as', 'bbs']))
Пример #12
0
 def test_multi(self):
   with beam.Pipeline() as p:
     main1 = p | 'Main1' >> beam.Create(['a', 'bb'], reshuffle=False)
     main2 = p | 'Main2' >> beam.Create(['x', 'yy', 'zzz'], reshuffle=False)
     side = p | 'Side' >> beam.Create(['s'])
     res = dict(main1=main1, main2=main2, side=side) | beam.ExternalTransform(
         'multi', None, expansion_service.ExpansionServiceServicer())
     assert_that(res['main'], equal_to(['as', 'bbs', 'xs', 'yys', 'zzzs']))
     assert_that(res['side'], equal_to(['ss']), label='CheckSide')
Пример #13
0
    def test_external_transform_finder_leaf(self):
        pipeline = beam.Pipeline()
        _ = (pipeline
             | beam.Create(['a', 'b'])
             | beam.ExternalTransform(
                 'beam:transforms:xlang:test:nooutput',
                 ImplicitSchemaPayloadBuilder({'data': u'0'}),
                 expansion_service.ExpansionServiceServicer()))
        pipeline.run().wait_until_finish()

        self.assertTrue(pipeline.contains_external_transforms)
Пример #14
0
 def test_as_external_transform_no_kwargs(self):
   with FullyQualifiedNamedTransform.with_filter('*'):
     with beam.Pipeline() as p:
       assert_that(
           p
           | beam.Create(['a', 'b', 'c'])
           | beam.ExternalTransform(
               PYTHON_FULLY_QUALIFIED_NAMED_TRANSFORM_URN,
               ImplicitSchemaPayloadBuilder({
                   'constructor': 'apache_beam.transforms'
                   '.fully_qualified_named_transform_test._TestTransform',
                   'args': beam.Row(arg0='x', arg1='y'),
               }),
               expansion_service.ExpansionServiceServicer()),
           equal_to(['xay', 'xby', 'xcy']))
Пример #15
0
 def test_no_output_coder(self):
     external_transform = beam.ExternalTransform(
         'map_to_union_types', None,
         expansion_service.ExpansionServiceServicer())
     with beam.Pipeline() as p:
         res = (p | beam.Create([2, 2], reshuffle=False)
                | external_transform)
         assert_that(res, equal_to([2, 2]))
     context = pipeline_context.PipelineContext(
         external_transform._expanded_components)
     self.assertEqual(len(external_transform._expanded_transform.outputs),
                      1)
     for _, pcol_id in external_transform._expanded_transform.outputs.items(
     ):
         pcol = context.pcollections.get_by_id(pcol_id)
         self.assertEqual(pcol.element_type, typehints.Any)
Пример #16
0
def main(unused_argv):
    parser = argparse.ArgumentParser()
    parser.add_argument('-p',
                        '--port',
                        type=int,
                        help='port on which to serve the job api')
    options = parser.parse_args()
    global server
    server = grpc.server(futures.ThreadPoolExecutor(max_workers=2))
    beam_expansion_api_pb2_grpc.add_ExpansionServiceServicer_to_server(
        expansion_service.ExpansionServiceServicer(PipelineOptions()), server)
    server.add_insecure_port('localhost:{}'.format(options.port))
    server.start()
    logging.info('Listening for expansion requests at %d', options.port)

    # blocking main thread forever.
    signal.pause()
Пример #17
0
  def test_pipeline_generation_with_runner_overrides(self):
    pipeline_properties = [
        '--dataflow_endpoint=ignored',
        '--job_name=test-job',
        '--project=test-project',
        '--staging_location=ignored',
        '--temp_location=/dev/null',
        '--no_auth',
        '--dry_run=True',
        '--sdk_location=container',
        '--runner=DataflowRunner',
        '--streaming'
    ]

    with beam.Pipeline(options=PipelineOptions(pipeline_properties)) as p:
      _ = (
          p
          | beam.io.ReadFromPubSub(
              subscription=
              'projects/dummy-project/subscriptions/dummy-subscription')
          | beam.ExternalTransform(
              'beam:transforms:xlang:test:prefix',
              ImplicitSchemaPayloadBuilder({'data': u'0'}),
              expansion_service.ExpansionServiceServicer()))

    pipeline_proto, _ = p.to_runner_api(return_context=True)

    pubsub_read_transform = None
    external_transform = None
    proto_transforms = pipeline_proto.components.transforms
    for id in proto_transforms:
      if 'beam:transforms:xlang:test:prefix' in proto_transforms[
          id].unique_name:
        external_transform = proto_transforms[id]
      if 'ReadFromPubSub' in proto_transforms[id].unique_name:
        pubsub_read_transform = proto_transforms[id]

    if not (pubsub_read_transform and external_transform):
      raise ValueError(
          'Could not find an external transform and the PubSub read transform '
          'in the pipeline')

    self.assertEqual(1, len(list(pubsub_read_transform.outputs.values())))
    self.assertEqual(
        list(pubsub_read_transform.outputs.values()),
        list(external_transform.inputs.values()))
Пример #18
0
def run(port=8197, options=None):
    global server

    if options is None:
        import argparse
        parser = argparse.ArgumentParser()
        known_args, pipeline_args = parser.parse_known_args()
        options = PipelineOptions(pipeline_args)

    server = grpc.server(futures.ThreadPoolExecutor(max_workers=2))
    beam_expansion_api_pb2_grpc.add_ExpansionServiceServicer_to_server(
        expansion_service.ExpansionServiceServicer(options), server)
    url = 'localhost:{}'.format(port)
    server.add_insecure_port(url)
    server.start()
    print('Listening for expansion requests at {!r}'.format(url))

    # blocking main thread forever.
    signal.pause()
Пример #19
0
def main(unused_argv):
    parser = argparse.ArgumentParser()
    parser.add_argument('-p',
                        '--port',
                        type=int,
                        help='port on which to serve the job api')
    options = parser.parse_args()
    global server
    server = grpc.server(UnboundedThreadPoolExecutor())
    beam_expansion_api_pb2_grpc.add_ExpansionServiceServicer_to_server(
        expansion_service.ExpansionServiceServicer(
            PipelineOptions(["--experiments", "beam_fn_api"])), server)
    server.add_insecure_port('localhost:{}'.format(options.port))
    server.start()
    _LOGGER.info('Listening for expansion requests at %d', options.port)

    signal.signal(signal.SIGTERM, cleanup)
    signal.signal(signal.SIGINT, cleanup)
    # blocking main thread forever.
    signal.pause()
Пример #20
0
    def test_simple(self):
        @ptransform.PTransform.register_urn('simple', None)
        class SimpleTransform(ptransform.PTransform):
            def expand(self, pcoll):
                return pcoll | beam.Map(lambda x: 'Simple(%s)' % x)

            def to_runner_api_parameter(self, unused_context):
                return 'simple', None

            @staticmethod
            def from_runner_api_parameter(unused_parameter, unused_context):
                return SimpleTransform()

        with beam.Pipeline() as p:
            res = (p
                   | beam.Create(['a', 'b'])
                   | beam.ExternalTransform(
                       'simple', None,
                       expansion_service.ExpansionServiceServicer()))
            assert_that(res, equal_to(['Simple(a)', 'Simple(b)']))
Пример #21
0
    def test_pipeline_generation(self):
        pipeline = beam.Pipeline()
        res = (pipeline
               | beam.Create(['a', 'b'])
               | beam.ExternalTransform(
                   'simple', None,
                   expansion_service.ExpansionServiceServicer()))
        assert_that(res, equal_to(['Simple(a)', 'Simple(b)']))

        proto, _ = pipeline.to_runner_api(return_context=True)
        pipeline_from_proto = Pipeline.from_runner_api(proto, pipeline.runner,
                                                       pipeline._options)

        # Original pipeline has the un-expanded external transform
        self.assertEqual([], pipeline.transforms_stack[0].parts[1].parts)

        # new pipeline has the expanded external transform
        self.assertNotEqual(
            [], pipeline_from_proto.transforms_stack[0].parts[1].parts)
        self.assertEqual(
            u'ExternalTransform(simple)/TestLabel', pipeline_from_proto.
            transforms_stack[0].parts[1].parts[0].full_label)
Пример #22
0
    def test_payload(self):
        @ptransform.PTransform.register_urn('payload', bytes)
        class PayloadTransform(ptransform.PTransform):
            def __init__(self, payload):
                self._payload = payload

            def expand(self, pcoll):
                return pcoll | beam.Map(lambda x, s: x + s, self._payload)

            def to_runner_api_parameter(self, unused_context):
                return b'payload', self._payload.encode('ascii')

            @staticmethod
            def from_runner_api_parameter(payload, unused_context):
                return PayloadTransform(payload.decode('ascii'))

        with beam.Pipeline() as p:
            res = (p
                   | beam.Create(['a', 'bb'], reshuffle=False)
                   | beam.ExternalTransform(
                       'payload', b's',
                       expansion_service.ExpansionServiceServicer()))
            assert_that(res, equal_to(['as', 'bbs']))
Пример #23
0
    def test_pipeline_generation(self):
        pipeline = beam.Pipeline()
        _ = (pipeline
             | beam.Create(['a', 'b'])
             | beam.ExternalTransform(
                 'beam:transforms:xlang:test:prefix',
                 ImplicitSchemaPayloadBuilder({'data': u'0'}),
                 expansion_service.ExpansionServiceServicer()))

        proto, _ = pipeline.to_runner_api(return_context=True)
        pipeline_from_proto = Pipeline.from_runner_api(proto, pipeline.runner,
                                                       pipeline._options)

        # Original pipeline has the un-expanded external transform
        self.assertEqual([], pipeline.transforms_stack[0].parts[1].parts)

        # new pipeline has the expanded external transform
        self.assertNotEqual(
            [], pipeline_from_proto.transforms_stack[0].parts[1].parts)
        self.assertEqual(
            u'ExternalTransform(beam:transforms:xlang:test:prefix)/TestLabel',
            pipeline_from_proto.transforms_stack[0].parts[1].parts[0].
            full_label)
Пример #24
0
def main(unused_argv):
    parser = argparse.ArgumentParser()
    parser.add_argument('-p',
                        '--port',
                        type=int,
                        help='port on which to serve the job api')
    options = parser.parse_args()
    global server
    server = grpc.server(UnboundedThreadPoolExecutor())

    # DOCKER SDK Harness
    beam_expansion_api_pb2_grpc.add_ExpansionServiceServicer_to_server(
        expansion_service.ExpansionServiceServicer(
            PipelineOptions([
                "--experiments", "beam_fn_api", "--sdk_location", "container"
            ])), server)

    # PROCESS SDK Harness
    # beam_expansion_api_pb2_grpc.add_ExpansionServiceServicer_to_server(
    #     expansion_service.ExpansionServiceServicer(
    #         PipelineOptions.from_dictionary({
    #             'environment_type': 'PROCESS',
    #             'environment_config': '{"command": "sdks/python/container/build/target/launcher/darwin_amd64/boot"}',
    #             'experiments': 'beam_fn_api',
    #             'sdk_location': 'container',
    #         })
    #     ), server
    # )

    server.add_insecure_port('localhost:{}'.format(options.port))
    server.start()
    _LOGGER.info('Listening for expansion requests at %d', options.port)

    signal.signal(signal.SIGTERM, cleanup)
    signal.signal(signal.SIGINT, cleanup)
    # blocking main thread forever.
    signal.pause()