Exemplo n.º 1
0
def test_rendering_dag_also_renders_upstream_outside_dag(tmp_directory):
    sub_dag = DAG('sub_dag')

    ta = ShellScript('touch {{product}}', File('a.txt'), sub_dag, 'ta')
    tb = ShellScript('cat {{upstream["ta"]}} > {{product}}', File('b.txt'),
                     sub_dag, 'tb')

    dag = DAG('dag')

    tc = ShellScript('cat {{upstream["tb"]}} > {{product}}', File('c.txt'),
                     dag, 'tc')
    td = ShellScript('cat {{upstream["tc"]}} > {{product}}', File('d.txt'),
                     dag, 'td')

    ta >> tb >> tc >> td

    # FIXME: calling dag.build() alone does not work since .build
    # will be called on tb, tc and td only (not in ta), this is a dag
    # execution problem, when building a dag, if the current task to
    # build is not in the current dag, then its task.build() should build up
    # until that task, instead of just building that task
    # dag.build()

    # this works
    sub_dag.build()
    dag.build()
Exemplo n.º 2
0
def test_non_existent_file():
    dag = DAG()
    f = File('file.txt')
    ta = ShellScript('echo hi > {{product}}', f, dag, 'ta')
    ta.render()

    assert not f.exists()
    assert f._outdated()
    assert f._outdated_code_dependency()
    assert not f._outdated_data_dependencies()
Exemplo n.º 3
0
def test_raises_render_error_if_missing_param_in_product():
    dag = DAG('my dag')

    ta = ShellScript('echo "a" > {{product}}',
                     File('a_{{name}}.txt'),
                     dag,
                     name='my task')

    with pytest.raises(RenderError):
        ta.render()
Exemplo n.º 4
0
def test_raises_render_error_if_missing_param_in_code():
    dag = DAG('my dag')

    ta = ShellScript('{{command}} "a" > {{product}}',
                     File('a.txt'),
                     dag,
                     name='my task')

    with pytest.raises(RenderError):
        ta.render()
Exemplo n.º 5
0
def test_raises_render_error_if_extra_param_in_code():
    dag = DAG('my dag')

    ta = ShellScript('echo "a" > {{product}}',
                     File('a.txt'),
                     dag,
                     name='my task',
                     params=dict(extra_param=1))

    with pytest.raises(RenderError):
        ta.render()
Exemplo n.º 6
0
def test_can_create_task_with_many_products():
    dag = DAG()
    fa1 = File('a1.txt')
    fa2 = File('a2.txt')
    ta = ShellScript('echo {{product}}', [fa1, fa2], dag, 'ta')
    ta.render()

    assert not ta.product.exists()
    assert ta.product._outdated()
    assert ta.product._outdated_code_dependency()
    assert not ta.product._outdated_data_dependencies()
Exemplo n.º 7
0
def test_raises_render_error_if_non_existing_dependency_used():
    dag = DAG('my dag')

    ta = ShellScript('echo "a" > {{product}}', File('a.txt'), dag, name='bash')
    tb = ShellScript('cat {{upstream.not_valid}} > {{product}}',
                     File('b.txt'),
                     dag,
                     name='bash2')
    ta >> tb

    with pytest.raises(RenderError):
        tb.render()
Exemplo n.º 8
0
def test_lineage():
    dag = DAG('dag')

    ta = ShellScript('touch {{product}}', File(Path('a.txt')), dag, 'ta')
    tb = ShellScript('touch {{product}}', File(Path('b.txt')), dag, 'tb')
    tc = ShellScript('touch {{product}}', File(Path('c.txt')), dag, 'tc')

    ta >> tb >> tc

    assert ta._lineage is None
    assert tb._lineage == {'ta'}
    assert tc._lineage == {'ta', 'tb'}
Exemplo n.º 9
0
def test_can_get_upstream_tasks():
    dag = DAG('dag')

    ta = ShellScript('echo "a" > {{product}}', File('a.txt'), dag, 'ta')
    tb = ShellScript('cat {{upstream["ta"]}} > {{product}}',
                     File('b.txt'), dag, 'tb')
    tc = ShellScript('cat {{upstream["tb"]}} > {{product}}',
                     File('c.txt'), dag, 'tc')

    ta >> tb >> tc

    assert set(ta.upstream) == set()
    assert set(tb.upstream) == {'ta'}
    assert set(tc.upstream) == {'tb'}
Exemplo n.º 10
0
def test_adding_tasks_left():
    dag = DAG()

    fa = Path('a.txt')
    fb = Path('b.txt')
    fc = Path('c.txt')

    ta = ShellScript('touch {{product}}', File(fa), dag, 'ta')
    tb = ShellScript('touch {{product}}', File(fb), dag, 'tb')
    tc = ShellScript('touch {{product}}', File(fc), dag, 'tc')

    (ta + tb) >> tc

    assert not ta.upstream
    assert not tb.upstream
    assert set(tc.upstream.values()) == {ta, tb}
Exemplo n.º 11
0
def test_adding_tasks():
    dag = DAG()

    fa = Path('a.txt')
    fb = Path('b.txt')
    fc = Path('c.txt')

    ta = ShellScript('touch {{product}}', File(fa), dag, 'ta')
    tb = ShellScript('touch {{product}}', File(fb), dag, 'tb')
    tc = ShellScript('touch {{product}}', File(fc), dag, 'tc')

    assert list((ta + tb).tasks) == [ta, tb]
    assert list((tb + ta).tasks) == [tb, ta]
    assert list((ta + tb + tc).tasks) == [ta, tb, tc]
    assert list(((ta + tb) + tc).tasks) == [ta, tb, tc]
    assert list((ta + (tb + tc)).tasks) == [ta, tb, tc]
Exemplo n.º 12
0
def dag():
    dag = DAG()

    t1 = ShellScript('echo a > {{product}} ', File('1.txt'), dag, 't1')

    t2 = ShellScript(('cat {{upstream["t1"]}} > {{product}}'
                      '&& echo b >> {{product}} '),
                     File(('2_{{upstream["t1"]}}')), dag, 't2')

    t3 = ShellScript(('cat {{upstream["t2"]}} > {{product}} '
                      '&& echo c >> {{product}}'),
                     File(('3_{{upstream["t2"]}}')), dag, 't3')

    t1 >> t2 >> t3

    return dag
Exemplo n.º 13
0
def test_adding_tasks_right():
    dag = DAG()

    fa = Path('a.txt')
    fb = Path('b.txt')
    fc = Path('c.txt')

    ta = ShellScript('touch {{product}}', File(fa), dag, 'ta')
    tb = ShellScript('touch {{product}}', File(fb), dag, 'tb')
    tc = ShellScript('touch {{product}}', File(fc), dag, 'tc')

    ta >> (tb + tc)

    assert not ta.upstream
    assert list(tb.upstream.values()) == [ta]
    assert list(tc.upstream.values()) == [ta]
Exemplo n.º 14
0
def test_overloaded_operators():
    dag = DAG()

    fa = Path('a.txt')
    fb = Path('b.txt')
    fc = Path('c.txt')

    ta = ShellScript('touch {{product}}', File(fa), dag, 'ta')
    tb = ShellScript('touch {{product}}', File(fb), dag, 'tb')
    tc = ShellScript('touch {{product}}', File(fc), dag, 'tc')

    ta >> tb >> tc

    assert not ta.upstream
    assert tb in tc.upstream.values()
    assert ta in tb.upstream.values()
Exemplo n.º 15
0
def test_error_if_missing_product(tmp_directory):
    dag = DAG()

    with pytest.raises(SourceInitializationError) as excinfo:
        ShellScript('touch file.txt', File('file.txt'), dag, name='touch')

    assert ('ShellScript must include {{product}} in its source'
            in str(excinfo.value))
Exemplo n.º 16
0
def test_can_access_tasks_inside_dag_using_getitem():
    dag = DAG('dag')
    dag2 = DAG('dag2')

    ta = ShellScript('touch {{product}}', File(Path('a.txt')), dag, 'ta')
    tb = ShellScript('touch {{product}}', File(Path('b.txt')), dag, 'tb')
    tc = ShellScript('touch {{product}}', File(Path('c.txt')), dag, 'tc')

    # td is still discoverable from dag even though it was declared in dag2,
    # since it is a dependency for a task in dag
    td = ShellScript('touch {{product}}', File(Path('c.txt')), dag2, 'td')
    # te is not discoverable since it is not a dependency for any task in dag
    te = ShellScript('touch {{product}}', File(Path('e.txt')), dag2, 'te')

    td >> ta >> tb >> tc >> te

    assert set(dag) == {'ta', 'tb', 'tc', 'td'}
Exemplo n.º 17
0
def can_access_product_without_rendering_if_literal():
    dag = DAG()

    ShellScript('echo a > {{product}}', File('1.txt'), dag, 't1')

    # no rendering!

    # check str works even though we did not run dag.render()
    assert str(dag['t1'].product) == '1.txt'
Exemplo n.º 18
0
def test_passing_upstream_and_product_in_shellscript(tmp_directory):
    dag = DAG()

    fa = Path('a.txt')
    fb = Path('b.txt')
    fc = Path('c.txt')

    ta = ShellScript(('echo a > {{product}} '), File(fa), dag, 'ta')
    tb = ShellScript(('cat {{upstream["ta"]}} > {{product}}'
                      '&& echo b >> {{product}} '), File(fb), dag, 'tb')
    tc = ShellScript(('cat {{upstream["tb"]}} > {{product}} '
                      '&& echo c >> {{product}}'), File(fc), dag, 'tc')

    ta >> tb >> tc

    dag.build()

    assert fc.read_text() == 'a\nb\nc\n'
Exemplo n.º 19
0
def test_partial_build(tmp_directory):
    dag = DAG('dag')

    ta = ShellScript('echo "hi" >> {{product}}',
                     File(Path('a.txt')), dag, 'ta')
    code = 'cat {{upstream.first}} >> {{product}}'
    tb = ShellScript(code, File(Path('b.txt')), dag, 'tb')
    tc = ShellScript(code, File(Path('c.txt')), dag, 'tc')
    td = ShellScript(code, File(Path('d.txt')), dag, 'td')
    te = ShellScript(code, File(Path('e.txt')), dag, 'te')

    ta >> tb >> tc
    tb >> td >> te

    table = dag.build_partially('tc')

    assert {row['name'] for row in table} == {'ta', 'tb', 'tc'}
    assert all(row['Ran?'] for row in table)
Exemplo n.º 20
0
def test_can_create_task_with_more_than_one_product(tmp_directory):
    dag = DAG()

    fa = Path('a.txt')
    fb = Path('b.txt')
    fc = Path('c.txt')

    ta = ShellScript('touch {{product[0]}} {{product[1]}}',
                     (File(fa), File(fb)), dag, 'ta')
    tc = ShellScript(
        'cat {{upstream["ta"][0]}} {{upstream["ta"][1]}} > '
        '{{product}}', File(fc), dag, 'tc')

    ta >> tc

    dag.render()

    dag.build()
Exemplo n.º 21
0
def test_error_if_non_compatible_tasks():
    dag = DAG()
    ShellScript('touch {{product}}', File('file.txt'), dag, name='task')

    with pytest.raises(TypeError) as excinfo:
        InMemoryDAG(dag)

    expected = ('All tasks in the DAG must be PythonCallable, '
                'got unallowed types: ShellScript')
    assert str(excinfo.value) == expected
Exemplo n.º 22
0
def test_passing_upstream_and_product_in_shellscript(tmp_directory):
    dag = DAG()

    fa = Path('a.txt')
    fb = Path('b.txt')
    fc = Path('c.txt')

    ta = ShellScript(('echo a > {{product}}'), File(fa), dag, 'ta')
    tb = ShellScript(('cat {{upstream["ta"]}} > {{product}}'
                      ' && echo b >> {{product}}'), File(fb), dag, 'tb')
    tc = ShellScript(('cat {{upstream["tb"]}} > {{product}}'
                      ' && echo c >> {{product}}'), File(fc), dag, 'tc')

    ta >> tb >> tc

    dag.render()

    assert str(ta.source) == 'echo a > a.txt'
    assert str(tb.source) == 'cat a.txt > b.txt && echo b >> b.txt'
    assert str(tc.source) == 'cat b.txt > c.txt && echo c >> c.txt'
Exemplo n.º 23
0
def test_outdated_data_simple_dependency(tmp_directory):
    """ A -> B
    """
    dag = DAG()

    fa = Path('a.txt')
    fb = Path('b.txt')

    ta = ShellScript('touch {{product}}', File(fa), dag, 'ta')
    tb = ShellScript('cat {{upstream["ta"]}} > {{product}}', File(fb), dag,
                     'tb')

    ta >> tb

    ta.render()
    tb.render()

    assert not ta.product.exists()
    assert not tb.product.exists()
    assert ta.product._outdated()
    assert tb.product._outdated()

    dag.build()

    dag._clear_cached_outdated_status()

    # they both exist now
    assert ta.product.exists()
    assert tb.product.exists()

    # and arent outdated...
    assert not ta.product._outdated()
    assert not tb.product._outdated()

    # let's make b outdated
    ta.build(force=True)

    dag._clear_cached_outdated_status()

    assert not ta.product._outdated()
    assert tb.product._outdated()
Exemplo n.º 24
0
def test_can_pickle_dag():
    dag = DAG()

    t = ShellScript('cat "hi" > {{product}}',
                    File('/tmp/file.txt'),
                    dag,
                    name='bash')

    t2 = PythonCallable(fn, File('/tmp/file2.txt'), dag, name='fn')

    t >> t2

    pickle.loads(pickle.dumps(dag))
Exemplo n.º 25
0
def test_init_client_automatically(monkeypatch):
    m = Mock()
    monkeypatch.setattr(tasks, 'ShellClient', lambda: m)

    dag = DAG()
    # if client is None
    task = ShellScript('touch {{product}}',
                       File('file.txt'),
                       dag,
                       name='touch',
                       client=None)

    # must initialize one using ShelClient()
    assert task.client is m
Exemplo n.º 26
0
def test_many_upstream(tmp_directory):
    """ {A, B} -> C
    """
    dag = DAG()

    fa = Path('a.txt')
    fb = Path('b.txt')
    fc = Path('c.txt')

    ta = ShellScript('touch {{product}}', File(fa),
                     dag, 'ta')
    tb = ShellScript('touch {{product}} > {{product}}', File(fb),
                     dag, 'tb')
    tc = ShellScript('cat {{upstream["ta"]}} {{upstream["tb"]}} >  {{product}}',
                     File(fc), dag, 'tc')

    (ta + tb) >> tc

    dag.build()

    assert ta.product.exists()
    assert tb.product.exists()
    assert tc.product.exists()

    assert not ta.product._outdated()
    assert not tb.product._outdated()
    assert not tc.product._outdated()

    ta.build(force=True)
    dag._clear_cached_outdated_status()

    assert not ta.product._outdated()
    assert not tb.product._outdated()
    assert tc.product._outdated()

    dag.build()
    tb.build(force=True)
    dag._clear_cached_outdated_status()

    assert not ta.product._outdated()
    assert not tb.product._outdated()
    assert tc.product._outdated()
Exemplo n.º 27
0
def test_task_level_shell_client(tmp_directory, monkeypatch):
    path = Path(tmp_directory, 'a_file')
    dag = DAG()
    client = ShellClient(run_template='ruby {{path_to_code}}')
    dag.clients[ShellScript] = client

    ShellScript("""
    require 'fileutils'
    FileUtils.touch "{{product}}"
    """,
                product=File(path),
                dag=dag,
                name='ruby_script')

    mock = Mock(wraps=client.execute)
    monkeypatch.setattr(client, 'execute', mock)

    mock_res = Mock()
    mock_res.returncode = 0

    def side_effect(*args, **kwargs):
        Path('a_file').touch()
        return mock_res

    mock_run_call = Mock(side_effect=side_effect)
    monkeypatch.setattr(shell.subprocess, 'run', mock_run_call)
    # prevent tmp file from being removed so we can check contents
    monkeypatch.setattr(shell.Path, 'unlink', Mock())

    dag.build()

    mock.assert_called_once()

    cmd, path_arg = mock_run_call.call_args[0][0]
    kwargs = mock_run_call.call_args[1]

    expected_code = """
    require 'fileutils'
    FileUtils.touch "{path}"
    """.format(path=path)

    assert cmd == 'ruby'
    assert Path(path_arg).read_text() == expected_code
    assert kwargs == {
        'stderr': subprocess.PIPE,
        'stdout': subprocess.PIPE,
        'shell': False
    }
Exemplo n.º 28
0
def test_build_task(tmp_directory, monkeypatch):
    dag = DAG()
    task = ShellScript('touch {{product}}',
                       File('file.txt'),
                       dag,
                       name='touch')

    # need this to because dag.build verifies products exist after execution
    def side_effect(code):
        Path('file.txt').touch()

    # mock the actual execution to make this test work on windows
    mock_execute = Mock(side_effect=side_effect)
    monkeypatch.setattr(task.client, 'execute', mock_execute)

    dag.build()

    mock_execute.assert_called_once_with('touch file.txt')
Exemplo n.º 29
0
def test_can_access_sub_dag():
    sub_dag = DAG('sub_dag')

    ta = ShellScript('echo "a" > {{product}}', File('a.txt'), sub_dag, 'ta')
    tb = ShellScript('cat {{upstream["ta"]}} > {{product}}',
                     File('b.txt'), sub_dag, 'tb')
    tc = ShellScript('tcat {{upstream["tb"]}} > {{product}}',
                     File('c.txt'), sub_dag, 'tc')

    ta >> tb >> tc

    dag = DAG('dag')

    fd = Path('d.txt')
    td = ShellScript('touch {{product}}', File(fd), dag, 'td')

    td.set_upstream(sub_dag)

    assert 'sub_dag' in td.upstream
Exemplo n.º 30
0
def test_custom_client_in_dag(tmp_directory):
    path = Path(tmp_directory, 'a_file')

    dag = DAG()

    client = ShellClient(run_template='ruby {{path_to_code}}')

    dag.clients[ShellScript] = client

    ShellScript("""
    require 'fileutils'
    FileUtils.touch "{{product}}"
    """,
                product=File(path),
                dag=dag,
                name='ruby_script')

    assert not path.exists()

    dag.build()

    assert path.exists()