Exemplo n.º 1
0
def test_dependencies_include_wrapped_artifacts(dbdiskrepo):
    @p.provenance()
    def add(a, b):
        return a + b

    sub = add(5, 5).artifact.id

    # @p.provenance(returns_composite=True)
    @p.provenance()
    def calc(a, b):
        return {'add': add(a, b), 'mult': a * b}

    result = calc(5, 5)

    deps = set([a.id for a in p.dependencies(result.artifact.id)])
    assert sub in deps
Exemplo n.º 2
0
def test_integration_test(repo):
    @p.provenance(version=0, name='initial_data')
    def load_data(filename):
        return [1, 2]

    @p.provenance(repo=repo, remove=('to_remove', ))
    def process_data_A(data, process_a_inc, to_remove):
        return [i + process_a_inc for i in data]

    times_called = 0

    @p.provenance(ignore=('to_ignore', ))
    def process_data_B(data, process_b_inc, to_ignore):
        nonlocal times_called
        times_called += 1
        return [i + process_b_inc for i in data]

    @p.provenance(tags=['tag_a'])
    def combine_processed_data(inc_a, inc_b):
        return [a + b for a, b in zip(inc_a, inc_b)]

    def run_pipeline(filename, to_ignore, to_remove):
        data = load_data(filename)  # [1, 2]
        inc_a = process_data_A(data, 1, to_remove)  # [2, 3]
        inc_b = process_data_B(data, 5, to_ignore)  # [6, 7]
        res = combine_processed_data(inc_a, inc_b)  # [8, 10]
        return res

    result = run_pipeline('foo-bar.csv', 'something', 'removed')
    artifact = result.artifact
    inc_a_artifact = artifact.inputs['kargs']['inc_a'].artifact
    inc_b_artifact = artifact.inputs['kargs']['inc_b'].artifact

    assert result == [8, 10]

    # check initial wrapping
    assert artifact.value_id == hash([8, 10])

    # check for custom_fields and tags in result
    assert artifact.custom_fields == {'tags': ['tag_a']}
    assert artifact.tags == ['tag_a']

    # check that inputs were removed
    assert inc_a_artifact.inputs == {
        'kargs': {
            'data': [1, 2],
            'process_a_inc': 1
        },
        'varargs': (),
    }

    # check metadata
    data_artifact = inc_a_artifact.inputs['kargs']['data'].artifact
    assert data_artifact.name == 'initial_data'
    assert data_artifact.version == 0

    # Check caching
    assert times_called == 1
    new_res = run_pipeline('foo-bar.csv', 'something-different',
                           'removed-again')
    assert new_res == [8, 10]
    assert times_called == 1

    # Check that the dependencies can be returned
    dependencies = p.dependencies(result.artifact.id)
    assert dependencies == [
        data_artifact,
        inc_a_artifact,
        artifact.inputs['kargs']['inc_b'].artifact,
        artifact,
    ]

    # Check that the input_artifact_ids were properly stored
    assert result.artifact.input_artifact_ids == frozenset(
        (inc_a_artifact.id, inc_b_artifact.id))