Exemplo n.º 1
0
def test_error_invalid_dag_level_client_dotted_path(tmp_sample_tasks,
                                                    add_current_to_sys_path,
                                                    no_sys_modules_cache, code,
                                                    expected_error):
    Path('dag_level_client_dotted_path.py').write_text(code)

    spec = DAGSpec({
        'meta': {
            'extract_product': False,
            'extract_upstream': True,
        },
        'tasks': [
            {
                'source': 'sample.sql',
                'product': ['name', 'table']
            },
        ],
        'clients': {
            'SQLScript': 'dag_level_client_dotted_path.get'
        }
    })

    with pytest.raises(TypeError) as excinfo:
        spec.to_dag()

    assert expected_error in str(excinfo.value)
Exemplo n.º 2
0
def test_spec_with_functions(lazy_import, backup_spec_with_functions,
                             add_current_to_sys_path):
    """
    Check we can create pipeline where the task is a function defined in a
    local file
    """
    spec = DAGSpec('pipeline.yaml', lazy_import=lazy_import)
    spec.to_dag().build()
Exemplo n.º 3
0
def test_import_tasks_from_with_non_empty_env(tmp_nbs):
    some_tasks = [{
        'source': 'extra_task.py',
        'name': 'extra_task',
        'product': 'extra.ipynb',
        'params': {
            'some_param': '{{some_param}}'
        }
    }]
    Path('some_tasks.yaml').write_text(yaml.dump(some_tasks))
    Path('extra_task.py').write_text("""
# + tags=["parameters"]
# -
""")
    spec_d = yaml.safe_load(Path('pipeline.yaml').read_text())
    spec_d['meta']['import_tasks_from'] = 'some_tasks.yaml'

    spec = DAGSpec(spec_d, env={'some_param': 'some_value'})

    dag = spec.to_dag()
    dag.render()
    assert dag['extra_task'].params['some_param'] == 'some_value'
    assert str(Path('extra_task.py').resolve()) in [
        str(t['source']) for t in spec['tasks']
    ]
Exemplo n.º 4
0
def test_import_tasks_from_loads_relative_to_pipeline_spec(tmp_nbs):
    some_tasks = [{'source': 'extra_task.py', 'product': 'extra.ipynb'}]
    Path('some_tasks.yaml').write_text(yaml.dump(some_tasks))
    Path('extra_task.py').write_text("""
# + tags=["parameters"]
# -
""")

    spec_d = yaml.safe_load(Path('pipeline.yaml').read_text())
    spec_d['meta']['import_tasks_from'] = 'some_tasks.yaml'

    Path('pipeline.yaml').write_text(yaml.dump(spec_d))

    # move to another dir to make sure we can still load the spec
    Path('subdir').mkdir()
    os.chdir('subdir')

    spec = DAGSpec('../pipeline.yaml')
    dag = spec.to_dag()
    dag.render()

    assert spec['meta']['import_tasks_from'] == str(
        Path('..', 'some_tasks.yaml').resolve())
    assert str(Path('..', 'extra_task.py').resolve()) in [
        str(t['source']) for t in spec['tasks']
    ]
Exemplo n.º 5
0
def test_grid_and_upstream_wildcard_callables(spec_raw, tmp_directory,
                                              add_current_to_sys_path,
                                              no_sys_modules_cache):
    Path('sample_source_callables.py').write_text("""
from pathlib import Path

def unserializer(product):
    return Path(product).read_text()

def upstream(product, param):
    Path(product).touch()

def downstream(product, upstream):
    up = upstream['upstream-*']
    one = up['upstream-0']
    another = up['upstream-1']
    Path(product).touch()
""")

    spec = DAGSpec(spec_raw)

    dag = spec.to_dag().render()
    # to build faster
    dag.executor = Serial(build_in_subprocess=False)

    # make sure unserializing works correctly
    dag.build()

    assert set(dag) == {'upstream-1', 'upstream-0', 'downstream'}
    assert set(dag['downstream'].params['upstream']['upstream-*']) == {
        'upstream-1', 'upstream-0'
    }
Exemplo n.º 6
0
def test_python_callables_with_extract_upstream(tmp_directory):
    spec = DAGSpec({
        'tasks': [
            {
                'source': 'test_pkg.callables.root',
                'product': 'root.csv'
            },
            {
                'source': 'test_pkg.callables.a',
                'product': 'a.csv'
            },
            {
                'source': 'test_pkg.callables.b',
                'product': 'b.csv'
            },
        ],
        'meta': {
            'extract_product': False,
            'extract_upstream': True
        }
    })

    dag = spec.to_dag()

    dag.build()

    assert set(dag) == {'a', 'b', 'root'}
    assert not dag['root'].upstream
    assert set(dag['a'].upstream) == {'root'}
    assert set(dag['b'].upstream) == {'root'}
Exemplo n.º 7
0
def test_import_tasks_from_paths_are_relative_to_the_yaml_spec(
        tmp_nbs, tmp_path):
    tasks_yaml = tmp_path / 'some_tasks.yaml'

    # source is a relative path
    some_tasks = [{'source': 'extra_task.py', 'product': 'extra.ipynb'}]
    tasks_yaml.write_text(yaml.dump(some_tasks))
    #  write the source code in the same folder as some_tasks.yaml
    Path(tmp_path, 'extra_task.py').write_text("""
# + tags=["parameters"]
# -
""")

    spec_d = yaml.safe_load(Path('pipeline.yaml').read_text())

    # set an absolute path
    spec_d['meta']['import_tasks_from'] = str(tasks_yaml.resolve())
    Path('pipeline.yaml').write_text(yaml.dump(spec_d))

    spec = DAGSpec('pipeline.yaml')
    dag = spec.to_dag()
    dag.render()

    # paths must be interpreted as relative to tasks.yaml, not to the
    # current working directory
    assert str(Path(tmp_path, 'extra_task.py').resolve()) in [
        str(t['source']) for t in spec['tasks']
    ]
Exemplo n.º 8
0
def test_sets_clients(tmp_sample_tasks, add_current_to_sys_path,
                      no_sys_modules_cache, dotted_path_spec):
    Path('test_sets_clients.py').write_text("""
from unittest.mock import Mock

def get(a=None):
    return Mock()
""")

    spec = DAGSpec({
        'meta': {
            'extract_product': False,
            'extract_upstream': True,
        },
        'tasks': [
            {
                'source': 'sample.sql',
                'product': ['name', 'table']
            },
        ],
        'clients': {
            'SQLScript': dotted_path_spec
        }
    })

    dag = spec.to_dag()

    assert isinstance(dag.clients[SQLScript], Mock)
Exemplo n.º 9
0
def test_lazy_load_product_level_client(tmp_directory, tmp_imports,
                                        my_testing_module, client_spec):
    Path('script.sql').write_text("""
CREATE TABLE {{product}} AS SELECT * FROM my_table
""")

    with sqlite3.connect('my.db') as conn:
        pd.DataFrame({'x': range(5)}).to_sql('my_table', conn)

    tasks = [
        {
            'source': 'script.sql',
            'product': [None, 'name', 'table'],
            'client': client_spec,
            'product_client': client_spec,
            'product_class': 'GenericSQLRelation',
        },
    ]

    data = {'tasks': tasks}

    spec = DAGSpec(data, lazy_import=True)

    dag = spec.to_dag()
    dag.executor = Serial(build_in_subprocess=False)

    # since lazy_load=True, creating the dag should not import
    # my_testing_module
    assert 'my_testing_module' not in sys.modules

    dag.build()

    # should be imported now
    assert 'my_testing_module' in sys.modules
Exemplo n.º 10
0
def test_lazy_load_dag_level_client(tmp_directory, tmp_imports,
                                    my_testing_module, client_spec):

    tasks = [
        {
            'source': 'my_testing_module.task',
            'product': 'output.csv'
        },
    ]

    data = {
        'tasks': tasks,
        'clients': {
            'File': client_spec
        },
    }

    spec = DAGSpec(data, lazy_import=True)

    dag = spec.to_dag()
    dag.executor = Serial(build_in_subprocess=False)

    # since lazy_load=True, creating the dag should not import
    # my_testing_module
    assert 'my_testing_module' not in sys.modules

    dag.build()

    # should be imported now
    assert 'my_testing_module' in sys.modules
    assert Path('backup', 'output.csv').exists()
Exemplo n.º 11
0
def test_lazy_load(tmp_directory, tmp_imports):
    Path('my_module.py').write_text("""
def fn():
    pass
""")

    tasks = [
        {
            'source': 'my_module.fn',
            'product': 'report.ipynb',
            'on_finish': 'not_a_module.not_a_function',
            'on_render': 'not_a_module.not_a_function',
            'on_failure': 'not_a_module.not_a_function',
            'serializer': 'not_a_module.not_a_function',
            'unserializer': 'not_a_module.not_a_function',
            'product_client': 'not_a_module.not_a_function'
        },
    ]

    data = {
        'tasks': tasks,
        'serializer': 'not_a_module.not_a_function',
        'unserializer': 'not_a_module.not_a_function',
    }

    spec = DAGSpec(data, lazy_import=True)

    assert spec.to_dag()
Exemplo n.º 12
0
def test_loads_serializer_and_unserializer(backup_online,
                                           add_current_to_sys_path):

    spec = DAGSpec({
        'tasks': [{
            'source': 'online_tasks.get',
            'product': 'output/get.parquet',
        }, {
            'source': 'online_tasks.square',
            'product': 'output/square.parquet',
        }],
        'meta': {
            'extract_product': False
        },
        'serializer':
        'online_io.serialize',
        'unserializer':
        'online_io.unserialize',
    })

    dag = spec.to_dag()

    from online_io import serialize, unserialize

    assert dag['get']._serializer is serialize
    assert dag['get']._unserializer is unserialize
    assert dag['square']._serializer is serialize
    assert dag['square']._unserializer is unserialize
Exemplo n.º 13
0
def test_error_if_location_returns_none(tmp_directory, add_current_to_sys_path,
                                        no_sys_modules_cache):
    Path('test_error_if_location_is_not_a_callable.py').write_text("""
def make_dag():
    return None
""")

    spec = DAGSpec(
        {'location': 'test_error_if_location_is_not_a_callable.make_dag'})

    with pytest.raises(TypeError) as excinfo:
        spec.to_dag()

    expected = ("Error calling dotted path 'test_error_if_location_is_"
                "not_a_callable.make_dag'. Expected a value but got None")
    assert str(excinfo.value) == expected
Exemplo n.º 14
0
def test_import_tasks_from(tmp_nbs):
    some_tasks = [{'source': 'extra_task.py', 'product': 'extra.ipynb'}]
    Path('some_tasks.yaml').write_text(yaml.dump(some_tasks))
    Path('extra_task.py').write_text("""
# + tags=["parameters"]
# -
""")

    spec_d = yaml.safe_load(Path('pipeline.yaml').read_text())
    spec_d['meta']['import_tasks_from'] = 'some_tasks.yaml'

    spec = DAGSpec(spec_d)

    spec.to_dag().render()
    assert str(Path('extra_task.py').resolve()) in [
        str(t['source']) for t in spec['tasks']
    ]
Exemplo n.º 15
0
def test_error_if_location_is_not_a_callable(tmp_directory,
                                             add_current_to_sys_path,
                                             no_sys_modules_cache):
    Path('test_error_if_location_is_not_a_callable.py').write_text("""
make_dag = 1
""")

    spec = DAGSpec(
        {'location': 'test_error_if_location_is_not_a_callable.make_dag'})

    with pytest.raises(TypeError) as excinfo:
        spec.to_dag()

    expected = ("Error loading dotted path 'test_error_if_"
                "location_is_not_a_callable.make_dag'. Expected a "
                "callable object (i.e., some kind of function). Got 1 "
                "(an object of type: int)")
    assert str(excinfo.value) == expected
Exemplo n.º 16
0
def load_entry_point(entry_point):
    type_ = find_entry_point_type(entry_point)

    if type_ == EntryPoint.Directory:
        spec = DAGSpec.from_directory(entry_point)
        path = Path(entry_point)

    elif type_ == EntryPoint.File:
        spec = DAGSpec(entry_point)
        path = Path(entry_point).parent
    else:
        raise NotImplementedError(
            f'loading entry point type {type_!r} is unsupported')

    return spec, spec.to_dag(), path
Exemplo n.º 17
0
def test_grid_and_upstream_wildcard_scripts(spec, tmp_directory):
    Path('upstream.py').write_text("""
# + tags=['parameters']
upstream = None
""")

    Path('downstream.py').write_text("""
# + tags=['parameters']
upstream = ['upstream-*']
""")

    spec = DAGSpec(spec)

    dag = spec.to_dag().render()

    assert set(dag) == {'upstream-1', 'upstream-0', 'downstream'}

    assert set(dag['downstream'].params['upstream']['upstream-*']) == {
        'upstream-1', 'upstream-0'
    }
Exemplo n.º 18
0
def test_python_callables_spec(tmp_directory, add_current_to_sys_path):
    Path('test_python_callables_spec.py').write_text("""
def task1(product):
    pass
""")

    spec = DAGSpec({
        'tasks': [
            {
                'source': 'test_python_callables_spec.task1',
                'product': 'some_file.csv'
            },
        ],
        'meta': {
            'extract_product': False,
            'extract_upstream': False
        }
    })

    dag = spec.to_dag()
    assert isinstance(dag['task1'], PythonCallable)
Exemplo n.º 19
0
def test_source_loader(monkeypatch, tmp_directory, no_sys_modules_cache):
    monkeypatch.syspath_prepend(tmp_directory)

    spec = DAGSpec({
        'meta': {
            'source_loader': {
                'path': 'templates',
                'module': 'test_pkg'
            },
            'extract_product': False,
            'extract_upstream': False,
        },
        'tasks': [{
            'source': 'create-table.sql',
            'product': ['some_table', 'table'],
            'client': 'db.get_client'
        }]
    })

    Path('db.py').write_text("""
from ploomber.clients import SQLAlchemyClient

def get_client():
    return SQLAlchemyClient('sqlite://')
""")

    # check source loader is working correctly with a template that has a macro
    loader = spec['meta']['source_loader']
    template = loader['create-table.sql']

    expected = ('\nDROP TABLE IF EXISTS some_table;\nCREATE TABLE '
                'some_table AS\nSELECT * FROM table')
    assert template.render({'product': 'some_table'}) == expected

    # test the task source is correctly resolved when converted to a dag
    dag = spec.to_dag()
    dag.render()

    assert str(dag['create-table'].source) == expected
Exemplo n.º 20
0
def test_spec_from_yaml_resolves_paths_from_wildcard(tmp_directory, spec):
    Path('upstream.py').write_text("""
# + tags=['parameters']
upstream = None
""")

    Path('downstream.py').write_text("""
# + tags=['parameters']
upstream = ['upstream-*']
""")

    spec = DAGSpec(spec)

    dag = spec.to_dag().render()

    # on windows, paths do not resolve if the file doesn't exist
    Path('upstream-0.ipynb').touch()
    Path('upstream-1.ipynb').touch()

    assert str(Path(dag['upstream-0'].product).resolve()) == str(
        Path('upstream-0.ipynb').resolve())
    assert str(Path(dag['upstream-1'].product).resolve()) == str(
        Path('upstream-1.ipynb').resolve())
Exemplo n.º 21
0
def test_import_tasks_from_keeps_value_if_already_absolute(tmp_nbs, tmp_path):
    tasks_yaml = (tmp_path / 'some_tasks.yaml').resolve()
    path_to_script = (tmp_path / 'extra_task.py').resolve()

    some_tasks = [{'source': str(path_to_script), 'product': 'extra.ipynb'}]
    tasks_yaml.write_text(yaml.dump(some_tasks))
    path_to_script.write_text("""
# + tags=["parameters"]
# -
""")

    spec_d = yaml.safe_load(Path('pipeline.yaml').read_text())
    # set an absolute path
    spec_d['meta']['import_tasks_from'] = str(tasks_yaml)
    Path('pipeline.yaml').write_text(yaml.dump(spec_d))

    spec = DAGSpec('pipeline.yaml')
    dag = spec.to_dag()
    dag.render()

    # value should be the same because it was absolute
    assert spec['meta']['import_tasks_from'] == str(tasks_yaml)
    assert str(path_to_script) in [str(t['source']) for t in spec['tasks']]
Exemplo n.º 22
0
def test_to_dag_does_not_mutate_spec(tmp_nbs):
    spec = DAGSpec('pipeline.yaml')
    old_data = deepcopy(spec.data)
    spec.to_dag()
    assert spec.data == old_data