Пример #1
0
        return transform(*args, **kwargs)


def main(options):

    root = '/tmp/rillbeam/multiexternal'
    if os.path.exists(root):
        shutil.rmtree(root)
    os.makedirs(root)

    pipe = beam.Pipeline(options=options)
    (pipe
     | 'Gen' >> beam.Create([
         (root, 'f1.ext'),
         (root, 'f2.ext'),
         (root, 'f3.ext'),
     ])
     | 'Join' >> EnvTransform(Join)
     | 'Touch' >> EnvTransform(Touch)
     | 'Log' >> Log())

    # proto, ctx = pipe.to_runner_api(return_context=True)
    result = pipe.run()
    result.wait_until_finish()


if __name__ == '__main__':
    from rillbeam.helpers import get_options
    pipeline_args, _ = get_options(__name__)
    main(pipeline_args)
Пример #2
0
Basic graph to showcase that logging happens eagerly.
"""
import apache_beam as beam

from rillbeam.transforms import SleepFn, Log


def main(options, runner=None):
    from rillbeam.helpers import write_pipeline_text, write_pipeline_svg

    pipe = beam.Pipeline(options=options, runner=runner)
    (pipe
     | 'Init' >> beam.Create(range(10))
     | 'Sleep' >> beam.ParDo(SleepFn(), duration=1.0)
     | 'Log' >> Log())

    # write_pipeline_svg(pipe, __name__)

    result = pipe.run()
    result.wait_until_finish()


if __name__ == '__main__':
    from rillbeam.helpers import get_options
    options, args = get_options(__name__)
    runner = None
    if args.defaults == 'rill':
        import rill.runner
        runner = rill.runner.RillRunner()
    main(options, runner=runner)
Пример #3
0
     | 'GroupByKey' >> beam.GroupByKey()
     | 'Format' >> beam.Map(format_result)
     | 'ToBytes' >> beam.Map(lambda x: bytes(x))
     | 'PubSubOutflow' >> beam.io.WriteToPubSub(OUTPUT_TOPIC))

    print
    cprint('Starting pipeline...', 'yellow', attrs=['bold'])
    result = pipe.run()  # type: PipelineResult
    time.sleep(10)
    while result.state != PipelineState.RUNNING:
        time.sleep(10)

    try:
        pubsub_interface(SUBSCRIPTION_PATH, INPUT_TOPIC)
    finally:
        print
        cprint('Shutting down pipeline...', 'yellow', attrs=['bold'])
        result.cancel()
        print


if __name__ == '__main__':
    from rillbeam.helpers import get_options
    pipeline_args, _ = get_options(
        __name__,
        None,
        'streaming',
    )
    logging.getLogger().setLevel(logging.INFO)
    main(pipeline_args)
Пример #4
0
           attrs=['bold'])
    print

    cprint('Generating farm jobs:', 'yellow')
    for k, v in farm_kw:
        print '  {}={}'.format(k, colored(repr(v), 'white', attrs=['bold']))
    print

    publisher = pubsub_v1.PublisherClient()
    for payload in rillbeam.data.farm.gen_farm_messages(**dict(farm_kw)):
        publisher.publish(INPUT_TOPIC,
                          data=bytes(json.dumps(payload)),
                          tags=__name__)

    try:
        result.wait_until_finish()
    except KeyboardInterrupt:
        print
        cprint('Shutting down...', 'yellow')
        result.cancel()


if __name__ == '__main__':
    from rillbeam.helpers import get_options

    logging.getLogger().setLevel(logging.INFO)

    pipeline_args, known_args = get_options(__name__, None, 'streaming')

    main(pipeline_args, known_args)
Пример #5
0
         # | 'KafkaWrite' >> WriteToKafka(
         #       producer_config={
         #           'bootstrap.servers': 'localhost:9092',
         #       },
         #       topic=TOPIC,
         #       # key_serializer='org.apache.kafka.common.serialization.ByteArraySerializer',
         #       # value_serializer='org.apache.kafka.common.serialization.ByteArraySerializer',
         #       expansion_service='localhost:8097',
         #   )
         )


if __name__ == '__main__':
    from rillbeam.helpers import get_parser, get_options

    parser = get_parser()
    parser.add_argument('--send', action='store_true')

    logging.getLogger().setLevel(logging.INFO)

    pipeline_args, known_args = get_options(
        __name__,
        parser,
        'streaming',
    )

    if known_args.send:
        send(pipeline_args)
    else:
        main(pipeline_args, known_args)
Пример #6
0
"""
Basic graph to test docker volume mounting.
"""
import apache_beam as beam

DIR_NAME = '/tmp/beam_test'


def touch(f):
    import os
    with open(f, 'a'):
        os.utime(f, None)


def main(options):
    with beam.Pipeline(options=options) as pipe:
        (pipe
         | 'Init' >> beam.Create(['{}/f1.ext'.format(DIR_NAME)])
         | 'Touch' >> beam.Map(touch))


if __name__ == '__main__':
    from rillbeam.helpers import get_options, REGISTRY_URL
    pipeline_args, _ = get_options(
        __name__,
        environment_config='-v {}:{} {}/beam/python:latest'.format(
            DIR_NAME, DIR_NAME, REGISTRY_URL),
    )
    main(pipeline_args)