Пример #1
0
def test_spark_producer(app):
    producer_output = []

    @app.consumer()
    def callback_func(a):
        producer_output.append(a)

    @app.pipeline()
    def spark_simple_pipeline(pipeline, a, b):
        return a.subscribe_consumer(callback_func)

    @app.spark_producer(spark_simple_pipeline)
    def spark_producer():
        """
        Simple producer which yields data to `main` pipeline.
        More information here: http://stairspy.com/#producer
        """

        spark = SparkSession \
            .builder \
            .getOrCreate()

        f = sc.textFile("test.row", 10)
        df = spark.read.json(f)

        return df

    spark_producer.flush()
    spark_producer()
    spark_simple_pipeline.compile()
    run_pipelines(app)

    assert len(producer_output) == 20
Пример #2
0
def test_connected_pipelines_multiple(app):
    t = GlobalTestData()

    @app.pipeline()
    def p_builder(worker, sentence, use_lower):
        data = concatenate(sentence=sentence, use_lower=use_lower)

        data_with_name = data\
            .subscribe_flow(NameExtractionOneWayFlow())

        return data_with_name

    @app.pipeline()
    def p_builder_general(worker, sentence, use_lower):
        data = concatenate(sentence=sentence, use_lower=use_lower)

        v1 = data.subscribe_pipeline(p_builder)\
                 .subscribe_flow(NameExtractionFlowMultiple(use_lower=use_lower))
        v2 = data.subscribe_pipeline(p_builder)\
                 .subscribe_flow(NameExtractionFlowMultiple(use_lower=use_lower))

        return concatenate(v1=v1, v2=v2).subscribe_func(t.save_one_item)

    p_builder.compile()
    p_builder_general.compile()
    p_builder_general(sentence="Oleg", use_lower=True)
    run_pipelines(app)
    result = t.get_result()

    assert result['v1']['names'][0] == "oleg"
    assert result['v2']['names'][0] == "oleg"
def test_multiple_pipelines(app):
    t = GlobalTestData()

    @app.pipeline()
    def callback_func(pipeline, x):
        return x.subscribe_func(t.save_multiple_items)

    @app.pipeline()
    def callback_func2(pipeline, x):
        return x.subscribe_func(t.save_multiple_items)

    @app.producer(callback_func, callback_func2)
    def simple_producer():
        for i in range(100):
            yield dict(x=1)

    callback_func.compile()
    callback_func2.compile()

    simple_producer.flush()
    simple_producer.run()

    run_pipelines(app)

    assert len(t.get_result()) == 200
Пример #4
0
def test_make_add_value_with_flow(app):
    t = GlobalTestData()

    @app.pipeline(config=dict(path='123'))
    def p_builder3(worker, sentence2):
        return sentence2\
            .rename(sentence=sentence2)\
            .add_value(path=worker.config['path'])\
            .subscribe_flow(NameExtractionOneWayFlow(use_lower=True))\
            .subscribe_func(t.save_one_item)

    @app.pipeline()
    def p_builder_general3(worker, sentence2: DataFrame):
        return sentence2.subscribe_pipeline(p_builder3,
                                            config=dict(path='321'))

    p_builder3.compile()
    p_builder_general3.compile()
    p_builder_general3(sentence2="Oleg")
    run_pipelines(app)

    result = t.get_result()
    assert check_keys(result.keys(), ['sentence', 'path', 'names'])
    assert result['names'][0] == "oleg"
    assert result['path'] == '321'
Пример #5
0
def test_worker_producer(app):
    t = GlobalTestData()

    @app.pipeline()
    def simple_pipeline(pipeline, x):
        return x.subscribe_func(t.save_multiple_items)

    @app.producer(simple_pipeline)
    def batch_handler(x):
        yield dict(x=x)

    @app.batch_producer(batch_handler)
    def worker_producer():
        for i in range(10):
            # here we should yield batch of data
            yield dict(x=1)

    simple_pipeline.compile()
    worker_producer()
    try:
        worker_producer()
        run_jobs_processor(app.project,
                           [batch_handler],
                           die_when_empty=True)
    except SystemExit:
        pass

    run_pipelines(app)

    assert len(t.get_result()) == 20  # 10 batches with 2 jobs
def test_connected_pipelines(app):
    @app.consumer_iter()
    def save_globaly(func1, func2, **kwargs):
        return dict(func1=func1, func2=func2)

    @app.pipeline()
    def p_builder(worker, sentence):
        return sentence \
            .subscribe_func(lambda sentence: dict(func1="ok"), name='root1') \
            .subscribe_func_as_producer(
            lambda sentence: [dict(func2="ok"), dict(func2="ok")], name='root2') \
            .subscribe_consumer(save_globaly)

    p_builder.compile()
    p_builder(sentence="Oleg")
    run_pipelines(app)
    it = save_globaly.iter(die_when_empty=True)
    result = next(it)

    assert len(result) == 2
Пример #7
0
def test_connected_pipelines(app):
    @app.pipeline()
    def p_builder(worker, sentence, use_lower):
        data = concatenate(sentence=sentence, use_lower=use_lower)

        data_with_name = data \
            .subscribe_flow(NameExtractionOneWayFlow(),
                            as_worker=False)

        return data_with_name

    @app.pipeline()
    def p_builder_general(worker, sentence, use_lower):
        data = concatenate(sentence=sentence, use_lower=use_lower)

        return data.subscribe_pipeline(p_builder)\
                   .subscribe_flow(NameExtractionFlowMultiple(),
                                   as_worker=False)

    p_builder.compile()
    p_builder_general.compile()
    p_builder_general(sentence="Oleg", use_lower=True)
    run_pipelines(app)
Пример #8
0
def test_deep_tree_functions(app):
    t = GlobalTestData()

    @app.consumer()
    def save_globaly(name, result, **kwargs):
        t.save_multiple_items(**{name: result})

    @app.pipeline()
    def p_builder(worker, sentence):
        root_branch = sentence \
            .subscribe_func(lambda sentence: dict(func1="ok"), name='root1') \
            .subscribe_func(lambda sentence: dict(func2="ok"), name='root2')

        return concatenate(branch_1=root_branch.subscribe_pipeline(branch_1),
                           branch_2=root_branch.subscribe_pipeline(branch_2),
                           branch_3=root_branch.subscribe_pipeline(branch_3))

    @app.pipeline()
    def branch_1(work, func1, func2):
        root_branch = concatenate(func1=func1, func2=func2)

        return root_branch \
            .add_value(name='branch_1') \
            .subscribe_func(lambda func1, func2: dict(func1_1="ok"),
                            name='branch_1_1') \
            .subscribe_func(lambda func1, func2: dict(result="branch_1"),
                            name='branch_1_2')\
            .subscribe_consumer(save_globaly)

    @app.pipeline()
    def branch_2(work, func1, func2):
        root_branch = concatenate(func1=func1, func2=func2)
        return root_branch \
            .add_value(name='branch_2') \
            .subscribe_func(lambda func1, func2: dict(func1_1="ok"),
                            name='branch_2_1') \
            .subscribe_func(lambda func1, func2: dict(result="branch_2"),
                            name='branch_2_2')\
            .subscribe_consumer(save_globaly)

    @app.pipeline()
    def branch_3(work, func1, func2):
        root_branch = concatenate(func1=func1, func2=func2)
        return root_branch \
            .add_value(name='branch_3') \
            .subscribe_func(lambda func1, func2: dict(func1_1="ok"),
                            name='branch_3_1') \
            .subscribe_func(lambda func1, func2: dict(result="branch_3"),
                            name='branch_3_2')\
            .subscribe_consumer(save_globaly)

    p_builder.compile()
    branch_1.compile()
    branch_2.compile()
    branch_3.compile()

    p_builder(sentence="Oleg", use_lower=True)
    run_pipelines(app)
    result = t.get_result()

    assert len(result) == 3
    assert list(result[0].keys()) == list(result[0].values())
    assert list(result[1].keys()) == list(result[1].values())
    assert list(result[2].keys()) == list(result[2].values())