Exemplo n.º 1
0
def test_extract_variables_from_notebooks(tmp_nbs):
    with open('pipeline.yaml') as f:
        d = yaml.safe_load(f)

    d['meta']['extract_upstream'] = True
    d['meta']['extract_product'] = True

    for t in d['tasks']:
        t.pop('upstream', None)
        t.pop('product', None)

    dag = DAGSpec(d).to_dag()
    dependencies = {name: set(task.upstream) for name, task in dag.items()}
    products = {
        name: task.product.to_json_serializable()
        for name, task in dag.items()
    }

    expected_dependencies = {
        'clean': {'load'},
        'plot': {'clean'},
        'load': set()
    }

    expected_products = {
        'clean': {
            'data': str(Path(tmp_nbs, 'output', 'clean.csv').resolve()),
            'nb': str(Path(tmp_nbs, 'output', 'clean.ipynb').resolve()),
        },
        'load': {
            'data': str(Path(tmp_nbs, 'output', 'data.csv').resolve()),
            'nb': str(Path(tmp_nbs, 'output', 'load.ipynb').resolve()),
        },
        'plot': str(Path(tmp_nbs, 'output', 'plot.ipynb').resolve()),
    }

    assert dependencies == expected_dependencies
    assert products == expected_products
Exemplo n.º 2
0
def test_infer_dependencies_sql(tmp_pipeline_sql, add_current_to_sys_path):
    expected = {'filter': {'load'}, 'transform': {'filter'}, 'load': set()}

    with open('pipeline-postgres.yaml') as f:
        d = yaml.safe_load(f)

    d['meta']['extract_upstream'] = True

    for t in d['tasks']:
        t.pop('upstream', None)

    dag = DAGSpec(d).to_dag()

    deps = {name: set(task.upstream) for name, task in dag.items()}
    assert deps == expected