def test_should_raise_if_target_does_not_exist(base_steps): """ Test raises if target step does not exist """ with pytest.raises(MlVToolException) as ex: get_dvc_dependencies(target_file_path='does_not_exit.dvc', dvc_files=base_steps) assert isinstance(ex.value.__cause__, IOError)
def test_should_raise_dvc_file_step_not_found(base_steps): """ Test raises if dvc file step not found """ base_steps.append('./does_not_exist_step.dvc') with pytest.raises(MlVToolException) as ex: get_dvc_dependencies(target_file_path=base_steps[0], dvc_files=base_steps) assert isinstance(ex.value.__cause__, IOError)
def test_should_raise_if_target_format_error(work_dir, base_steps): """ Test raises if target format error """ target = join(work_dir, 'format_error.dvc') with open(target, 'wb') as fd: fd.write(b'k:v:\n\t\t-') with pytest.raises(MlVToolException) as ex: get_dvc_dependencies(target_file_path=target, dvc_files=base_steps) assert isinstance(ex.value.__cause__, YAMLError)
def test_should_get_dependencies_steps(base_steps): """ Test all steps to in denpendency of the target step +-----------+ +---------------+ | step1.dvc | | isolated1.dvc | +-----------+ +---------------+ * +-----------+ +---------------+ | step2.dvc | | isolated2.dvc | +-----------+ +---------------+ ** ** +-----------+ +-----------+ | step3.dvc | | step4.dvc | +-----------+ +-----------+ ** ** +-----------+ | step5.dvc | +-----------+ """ dependencies = list( get_dvc_dependencies(target_file_path=base_steps[-1], dvc_files=base_steps)) # Topological sort solution can be 1 -> 2 -> 3 -> 4 -> 5 # or 1 -> 2 -> 4 -> 3 -> 5 expected_steps = ([basename(base_steps[idx]) for idx in (0, 1, 2, 3, 4)], [basename(base_steps[idx]) for idx in (0, 1, 3, 2, 4)]) assert [d.name for d in dependencies] in expected_steps
def export_pipeline(dvc_meta_file: str, output: str, work_dir: str): """ Generate an executable script to run a whole pipeline """ logging.info(f'Export pipeline from step {dvc_meta_file} to {output}') logging.debug(f'Work directory {work_dir}') ordered_dvc_metas = get_dvc_dependencies(dvc_meta_file, get_dvc_files(dvc_meta_file)) template_data = { 'work_dir': work_dir, 'cmds': [dvc_meta.cmd for dvc_meta in ordered_dvc_metas] } logging.debug(f'Template data: {template_data}') template_path = join(CURRENT_DIR, '..', 'template', PIPELINE_EXPORT_TEMPLATE_NAME) write_template(output, template_path, info=template_data) logging.log(logging.WARNING + 1, f'Pipeline successfully exported in {abspath(output)}')
def test_should_remove_not_targeted_steps(work_dir, base_steps): """ Test get dependencies but does not include steps not directly in dependencies +-----------+ | step1.dvc | +-----------+** * ********** +-----------+ +---------------+ | step2.dvc | | step2_bis.dvc | +-----------+ +---------------+ ** ** +-----------+ +-----------+ | step3.dvc | | step4.dvc | +-----------+ +-----------+ ** ** ** +-----------+ +-----------+ | step5.dvc | | step6.dvc | +-----------+ +-----------+ """ target_step = join(work_dir, 'step6.dvc') base_steps += [join(work_dir, 'step2_bis.dvc'), target_step] write_dvc_file(base_steps[-2], 'cmd2_bis', deps=['./s1_out'], outs=['./s2_bis_out']) write_dvc_file(base_steps[-1], 'cmd6', deps=['./s4_out'], outs=['./s6_out']) expected_steps = [base_steps[0], base_steps[1], base_steps[3], target_step] dependencies = list( get_dvc_dependencies(target_file_path=target_step, dvc_files=base_steps)) # Topological solution is 1 -> 2 -> 4 -> 6 assert [d.name for d in dependencies] == [basename(s) for s in expected_steps]