def test_rendering_dag_also_renders_upstream_outside_dag(tmp_directory): sub_dag = DAG('sub_dag') ta = ShellScript('touch {{product}}', File('a.txt'), sub_dag, 'ta') tb = ShellScript('cat {{upstream["ta"]}} > {{product}}', File('b.txt'), sub_dag, 'tb') dag = DAG('dag') tc = ShellScript('cat {{upstream["tb"]}} > {{product}}', File('c.txt'), dag, 'tc') td = ShellScript('cat {{upstream["tc"]}} > {{product}}', File('d.txt'), dag, 'td') ta >> tb >> tc >> td # FIXME: calling dag.build() alone does not work since .build # will be called on tb, tc and td only (not in ta), this is a dag # execution problem, when building a dag, if the current task to # build is not in the current dag, then its task.build() should build up # until that task, instead of just building that task # dag.build() # this works sub_dag.build() dag.build()
def test_non_existent_file(): dag = DAG() f = File('file.txt') ta = ShellScript('echo hi > {{product}}', f, dag, 'ta') ta.render() assert not f.exists() assert f._outdated() assert f._outdated_code_dependency() assert not f._outdated_data_dependencies()
def test_raises_render_error_if_missing_param_in_product(): dag = DAG('my dag') ta = ShellScript('echo "a" > {{product}}', File('a_{{name}}.txt'), dag, name='my task') with pytest.raises(RenderError): ta.render()
def test_raises_render_error_if_missing_param_in_code(): dag = DAG('my dag') ta = ShellScript('{{command}} "a" > {{product}}', File('a.txt'), dag, name='my task') with pytest.raises(RenderError): ta.render()
def test_raises_render_error_if_extra_param_in_code(): dag = DAG('my dag') ta = ShellScript('echo "a" > {{product}}', File('a.txt'), dag, name='my task', params=dict(extra_param=1)) with pytest.raises(RenderError): ta.render()
def test_can_create_task_with_many_products(): dag = DAG() fa1 = File('a1.txt') fa2 = File('a2.txt') ta = ShellScript('echo {{product}}', [fa1, fa2], dag, 'ta') ta.render() assert not ta.product.exists() assert ta.product._outdated() assert ta.product._outdated_code_dependency() assert not ta.product._outdated_data_dependencies()
def test_raises_render_error_if_non_existing_dependency_used(): dag = DAG('my dag') ta = ShellScript('echo "a" > {{product}}', File('a.txt'), dag, name='bash') tb = ShellScript('cat {{upstream.not_valid}} > {{product}}', File('b.txt'), dag, name='bash2') ta >> tb with pytest.raises(RenderError): tb.render()
def test_lineage(): dag = DAG('dag') ta = ShellScript('touch {{product}}', File(Path('a.txt')), dag, 'ta') tb = ShellScript('touch {{product}}', File(Path('b.txt')), dag, 'tb') tc = ShellScript('touch {{product}}', File(Path('c.txt')), dag, 'tc') ta >> tb >> tc assert ta._lineage is None assert tb._lineage == {'ta'} assert tc._lineage == {'ta', 'tb'}
def test_can_get_upstream_tasks(): dag = DAG('dag') ta = ShellScript('echo "a" > {{product}}', File('a.txt'), dag, 'ta') tb = ShellScript('cat {{upstream["ta"]}} > {{product}}', File('b.txt'), dag, 'tb') tc = ShellScript('cat {{upstream["tb"]}} > {{product}}', File('c.txt'), dag, 'tc') ta >> tb >> tc assert set(ta.upstream) == set() assert set(tb.upstream) == {'ta'} assert set(tc.upstream) == {'tb'}
def test_adding_tasks_left(): dag = DAG() fa = Path('a.txt') fb = Path('b.txt') fc = Path('c.txt') ta = ShellScript('touch {{product}}', File(fa), dag, 'ta') tb = ShellScript('touch {{product}}', File(fb), dag, 'tb') tc = ShellScript('touch {{product}}', File(fc), dag, 'tc') (ta + tb) >> tc assert not ta.upstream assert not tb.upstream assert set(tc.upstream.values()) == {ta, tb}
def test_adding_tasks(): dag = DAG() fa = Path('a.txt') fb = Path('b.txt') fc = Path('c.txt') ta = ShellScript('touch {{product}}', File(fa), dag, 'ta') tb = ShellScript('touch {{product}}', File(fb), dag, 'tb') tc = ShellScript('touch {{product}}', File(fc), dag, 'tc') assert list((ta + tb).tasks) == [ta, tb] assert list((tb + ta).tasks) == [tb, ta] assert list((ta + tb + tc).tasks) == [ta, tb, tc] assert list(((ta + tb) + tc).tasks) == [ta, tb, tc] assert list((ta + (tb + tc)).tasks) == [ta, tb, tc]
def dag(): dag = DAG() t1 = ShellScript('echo a > {{product}} ', File('1.txt'), dag, 't1') t2 = ShellScript(('cat {{upstream["t1"]}} > {{product}}' '&& echo b >> {{product}} '), File(('2_{{upstream["t1"]}}')), dag, 't2') t3 = ShellScript(('cat {{upstream["t2"]}} > {{product}} ' '&& echo c >> {{product}}'), File(('3_{{upstream["t2"]}}')), dag, 't3') t1 >> t2 >> t3 return dag
def test_adding_tasks_right(): dag = DAG() fa = Path('a.txt') fb = Path('b.txt') fc = Path('c.txt') ta = ShellScript('touch {{product}}', File(fa), dag, 'ta') tb = ShellScript('touch {{product}}', File(fb), dag, 'tb') tc = ShellScript('touch {{product}}', File(fc), dag, 'tc') ta >> (tb + tc) assert not ta.upstream assert list(tb.upstream.values()) == [ta] assert list(tc.upstream.values()) == [ta]
def test_overloaded_operators(): dag = DAG() fa = Path('a.txt') fb = Path('b.txt') fc = Path('c.txt') ta = ShellScript('touch {{product}}', File(fa), dag, 'ta') tb = ShellScript('touch {{product}}', File(fb), dag, 'tb') tc = ShellScript('touch {{product}}', File(fc), dag, 'tc') ta >> tb >> tc assert not ta.upstream assert tb in tc.upstream.values() assert ta in tb.upstream.values()
def test_error_if_missing_product(tmp_directory): dag = DAG() with pytest.raises(SourceInitializationError) as excinfo: ShellScript('touch file.txt', File('file.txt'), dag, name='touch') assert ('ShellScript must include {{product}} in its source' in str(excinfo.value))
def test_can_access_tasks_inside_dag_using_getitem(): dag = DAG('dag') dag2 = DAG('dag2') ta = ShellScript('touch {{product}}', File(Path('a.txt')), dag, 'ta') tb = ShellScript('touch {{product}}', File(Path('b.txt')), dag, 'tb') tc = ShellScript('touch {{product}}', File(Path('c.txt')), dag, 'tc') # td is still discoverable from dag even though it was declared in dag2, # since it is a dependency for a task in dag td = ShellScript('touch {{product}}', File(Path('c.txt')), dag2, 'td') # te is not discoverable since it is not a dependency for any task in dag te = ShellScript('touch {{product}}', File(Path('e.txt')), dag2, 'te') td >> ta >> tb >> tc >> te assert set(dag) == {'ta', 'tb', 'tc', 'td'}
def can_access_product_without_rendering_if_literal(): dag = DAG() ShellScript('echo a > {{product}}', File('1.txt'), dag, 't1') # no rendering! # check str works even though we did not run dag.render() assert str(dag['t1'].product) == '1.txt'
def test_passing_upstream_and_product_in_shellscript(tmp_directory): dag = DAG() fa = Path('a.txt') fb = Path('b.txt') fc = Path('c.txt') ta = ShellScript(('echo a > {{product}} '), File(fa), dag, 'ta') tb = ShellScript(('cat {{upstream["ta"]}} > {{product}}' '&& echo b >> {{product}} '), File(fb), dag, 'tb') tc = ShellScript(('cat {{upstream["tb"]}} > {{product}} ' '&& echo c >> {{product}}'), File(fc), dag, 'tc') ta >> tb >> tc dag.build() assert fc.read_text() == 'a\nb\nc\n'
def test_partial_build(tmp_directory): dag = DAG('dag') ta = ShellScript('echo "hi" >> {{product}}', File(Path('a.txt')), dag, 'ta') code = 'cat {{upstream.first}} >> {{product}}' tb = ShellScript(code, File(Path('b.txt')), dag, 'tb') tc = ShellScript(code, File(Path('c.txt')), dag, 'tc') td = ShellScript(code, File(Path('d.txt')), dag, 'td') te = ShellScript(code, File(Path('e.txt')), dag, 'te') ta >> tb >> tc tb >> td >> te table = dag.build_partially('tc') assert {row['name'] for row in table} == {'ta', 'tb', 'tc'} assert all(row['Ran?'] for row in table)
def test_can_create_task_with_more_than_one_product(tmp_directory): dag = DAG() fa = Path('a.txt') fb = Path('b.txt') fc = Path('c.txt') ta = ShellScript('touch {{product[0]}} {{product[1]}}', (File(fa), File(fb)), dag, 'ta') tc = ShellScript( 'cat {{upstream["ta"][0]}} {{upstream["ta"][1]}} > ' '{{product}}', File(fc), dag, 'tc') ta >> tc dag.render() dag.build()
def test_error_if_non_compatible_tasks(): dag = DAG() ShellScript('touch {{product}}', File('file.txt'), dag, name='task') with pytest.raises(TypeError) as excinfo: InMemoryDAG(dag) expected = ('All tasks in the DAG must be PythonCallable, ' 'got unallowed types: ShellScript') assert str(excinfo.value) == expected
def test_passing_upstream_and_product_in_shellscript(tmp_directory): dag = DAG() fa = Path('a.txt') fb = Path('b.txt') fc = Path('c.txt') ta = ShellScript(('echo a > {{product}}'), File(fa), dag, 'ta') tb = ShellScript(('cat {{upstream["ta"]}} > {{product}}' ' && echo b >> {{product}}'), File(fb), dag, 'tb') tc = ShellScript(('cat {{upstream["tb"]}} > {{product}}' ' && echo c >> {{product}}'), File(fc), dag, 'tc') ta >> tb >> tc dag.render() assert str(ta.source) == 'echo a > a.txt' assert str(tb.source) == 'cat a.txt > b.txt && echo b >> b.txt' assert str(tc.source) == 'cat b.txt > c.txt && echo c >> c.txt'
def test_outdated_data_simple_dependency(tmp_directory): """ A -> B """ dag = DAG() fa = Path('a.txt') fb = Path('b.txt') ta = ShellScript('touch {{product}}', File(fa), dag, 'ta') tb = ShellScript('cat {{upstream["ta"]}} > {{product}}', File(fb), dag, 'tb') ta >> tb ta.render() tb.render() assert not ta.product.exists() assert not tb.product.exists() assert ta.product._outdated() assert tb.product._outdated() dag.build() dag._clear_cached_outdated_status() # they both exist now assert ta.product.exists() assert tb.product.exists() # and arent outdated... assert not ta.product._outdated() assert not tb.product._outdated() # let's make b outdated ta.build(force=True) dag._clear_cached_outdated_status() assert not ta.product._outdated() assert tb.product._outdated()
def test_can_pickle_dag(): dag = DAG() t = ShellScript('cat "hi" > {{product}}', File('/tmp/file.txt'), dag, name='bash') t2 = PythonCallable(fn, File('/tmp/file2.txt'), dag, name='fn') t >> t2 pickle.loads(pickle.dumps(dag))
def test_init_client_automatically(monkeypatch): m = Mock() monkeypatch.setattr(tasks, 'ShellClient', lambda: m) dag = DAG() # if client is None task = ShellScript('touch {{product}}', File('file.txt'), dag, name='touch', client=None) # must initialize one using ShelClient() assert task.client is m
def test_many_upstream(tmp_directory): """ {A, B} -> C """ dag = DAG() fa = Path('a.txt') fb = Path('b.txt') fc = Path('c.txt') ta = ShellScript('touch {{product}}', File(fa), dag, 'ta') tb = ShellScript('touch {{product}} > {{product}}', File(fb), dag, 'tb') tc = ShellScript('cat {{upstream["ta"]}} {{upstream["tb"]}} > {{product}}', File(fc), dag, 'tc') (ta + tb) >> tc dag.build() assert ta.product.exists() assert tb.product.exists() assert tc.product.exists() assert not ta.product._outdated() assert not tb.product._outdated() assert not tc.product._outdated() ta.build(force=True) dag._clear_cached_outdated_status() assert not ta.product._outdated() assert not tb.product._outdated() assert tc.product._outdated() dag.build() tb.build(force=True) dag._clear_cached_outdated_status() assert not ta.product._outdated() assert not tb.product._outdated() assert tc.product._outdated()
def test_task_level_shell_client(tmp_directory, monkeypatch): path = Path(tmp_directory, 'a_file') dag = DAG() client = ShellClient(run_template='ruby {{path_to_code}}') dag.clients[ShellScript] = client ShellScript(""" require 'fileutils' FileUtils.touch "{{product}}" """, product=File(path), dag=dag, name='ruby_script') mock = Mock(wraps=client.execute) monkeypatch.setattr(client, 'execute', mock) mock_res = Mock() mock_res.returncode = 0 def side_effect(*args, **kwargs): Path('a_file').touch() return mock_res mock_run_call = Mock(side_effect=side_effect) monkeypatch.setattr(shell.subprocess, 'run', mock_run_call) # prevent tmp file from being removed so we can check contents monkeypatch.setattr(shell.Path, 'unlink', Mock()) dag.build() mock.assert_called_once() cmd, path_arg = mock_run_call.call_args[0][0] kwargs = mock_run_call.call_args[1] expected_code = """ require 'fileutils' FileUtils.touch "{path}" """.format(path=path) assert cmd == 'ruby' assert Path(path_arg).read_text() == expected_code assert kwargs == { 'stderr': subprocess.PIPE, 'stdout': subprocess.PIPE, 'shell': False }
def test_build_task(tmp_directory, monkeypatch): dag = DAG() task = ShellScript('touch {{product}}', File('file.txt'), dag, name='touch') # need this to because dag.build verifies products exist after execution def side_effect(code): Path('file.txt').touch() # mock the actual execution to make this test work on windows mock_execute = Mock(side_effect=side_effect) monkeypatch.setattr(task.client, 'execute', mock_execute) dag.build() mock_execute.assert_called_once_with('touch file.txt')
def test_can_access_sub_dag(): sub_dag = DAG('sub_dag') ta = ShellScript('echo "a" > {{product}}', File('a.txt'), sub_dag, 'ta') tb = ShellScript('cat {{upstream["ta"]}} > {{product}}', File('b.txt'), sub_dag, 'tb') tc = ShellScript('tcat {{upstream["tb"]}} > {{product}}', File('c.txt'), sub_dag, 'tc') ta >> tb >> tc dag = DAG('dag') fd = Path('d.txt') td = ShellScript('touch {{product}}', File(fd), dag, 'td') td.set_upstream(sub_dag) assert 'sub_dag' in td.upstream
def test_custom_client_in_dag(tmp_directory): path = Path(tmp_directory, 'a_file') dag = DAG() client = ShellClient(run_template='ruby {{path_to_code}}') dag.clients[ShellScript] = client ShellScript(""" require 'fileutils' FileUtils.touch "{{product}}" """, product=File(path), dag=dag, name='ruby_script') assert not path.exists() dag.build() assert path.exists()