def test_generate_commands_should_raise_if_missing_variable(work_dir): """ Test dvc bash command generation fails if a docstring template variable is missing """ python_script = 'def my_funct(subset: str, rate: int):\n' \ '\t"""\n' \ ':param str locale: The data locale\n' \ ':param output_file: the output_file\n' \ ':dvc-out output_file: {{ conf.output_file }}\n' \ ':dvc-extra: --locale {{ conf.locale }}\n' \ '\t"""\n' \ '\tprint(\'toto\')\n' # Write python script script_path = join(work_dir, '') with open(script_path, 'w') as fd: fd.write(python_script) # Write docstring conf dc_conf_path = join(work_dir, 'dc_conf.yml') with open(dc_conf_path, 'w') as fd: yaml.dump({'output_file': './data/other.txt'}, fd) dvc_cmd_path = join(work_dir, 'dvc_cmd') arguments = ['-i', script_path, '--out-dvc-cmd', dvc_cmd_path, '--working-directory', work_dir, '--docstring-conf', dc_conf_path] with pytest.raises(MlVToolException): MlScriptToCmd().run(*arguments)
def test_should_overwrite_conf_for_docstring_conf(work_dir): """ Test main configuration is overwritten for docstring conf selection """ conf_dc_conf_path = join(work_dir, 'conf_dc_path.yml') write_docstring_conf(conf_dc_conf_path, './output_file_base.txt') conf_path, script_path = setup_with_conf( work_dir, conf_path=join(work_dir, DEFAULT_CONF_FILENAME), docstring_conf_path=conf_dc_conf_path) new_dc_conf_path = join(work_dir, 'new_dc_path.yml') write_docstring_conf(new_dc_conf_path, './output_file_overwritten.txt') dvc_cmd_path = join(work_dir, 'new_place_dvc') arguments = [ '-i', script_path, '--working-directory', work_dir, '--docstring-conf', new_dc_conf_path, '--out-dvc-cmd', dvc_cmd_path ] MlScriptToCmd().run(*arguments) # Assert docstring template value are replaced with the right conf content with open(dvc_cmd_path, 'r') as fd: content = assert './output_file_base.txt' not in content assert './output_file_overwritten.txt' in content
def test_should_raise_if_missing_dvc_command_output_path_argument_and_no_conf( ): """ Test command raise if dvc command output path is not provided when no conf """ arguments = ['-i', './', '--working-directory', './'] with pytest.raises(MlVToolException): MlScriptToCmd().run(*arguments)
def test_should_raise_if_dvc_command_output_path_exist_and_no_force(work_dir): """ Test command raise if dvc output path already exists and no force argument """ dvc_cmd_output = join(work_dir, 'dvc_out') with open(dvc_cmd_output, 'w') as fd: fd.write('') arguments = [ '-i', './', '--working-directory', work_dir, '--out-dvc-cmd', dvc_cmd_output ] with pytest.raises(MlVToolException): MlScriptToCmd().run(*arguments)
def test_should_get_output_path_from_auto_detected_conf(work_dir): """ Test commands are generated from python script using auto detected configuration """ conf_path, script_path = setup_with_conf(work_dir, conf_path=join( work_dir, DEFAULT_CONF_FILENAME)) arguments = ['-i', script_path, '--working-directory', work_dir] MlScriptToCmd().run(*arguments) # This path is generated using conf path and the script name dvc_cmd_path = join(work_dir, 'dvc_cmd', 'script_path_dvc') assert exists(dvc_cmd_path)
def test_should_generate_commands(work_dir): """ Test dvc bash command is generated from python script with param specified in docstring. """ python_script = 'def my_funct(subset, rate):\n' \ '\t"""\n' \ ':param str input_file: the input file\n' \ ':param output_file: the output_file\n' \ ':param rate: the rate\n' \ ':param int retry:\n' \ ':param List[int] threshold:\n' \ ':dvc-in input_file: ./data/train_set.csv\n' \ ':dvc-out output_file: {{ conf.output_file }}\n' \ ':dvc-out: ./data/other.txt\n' \ '\t"""\n' \ '\tprint(\'toto\')\n' # Write python script script_path = join(work_dir, '') with open(script_path, 'w') as fd: fd.write(python_script) # Write docstring conf dc_conf_path = join(work_dir, 'dc_conf.yml') with open(dc_conf_path, 'w') as fd: yaml.dump({'output_file': './data/other.txt'}, fd) dvc_cmd_path = join(work_dir, 'dvc_cmd') arguments = [ '-i', script_path, '--out-dvc-cmd', dvc_cmd_path, '--working-directory', work_dir, '--docstring-conf', dc_conf_path ] MlScriptToCmd().run(*arguments) assert exists(dvc_cmd_path) assert stat.S_IMODE(os_stat(dvc_cmd_path).st_mode) == 0o755 # Ensure dvc command is in dvc bash command with open(dvc_cmd_path, 'r') as fd: dvc_bash_content = assert 'OUTPUT_FILE="./data/other.txt"' in dvc_bash_content assert 'MLV_DVC_META_FILENAME="script_python.dvc"' in dvc_bash_content assert 'dvc run${NO_CACHE_OPT} --overwrite-dvcfile -f $MLV_DVC_META_FILENAME' in dvc_bash_content assert '-o $OUTPUT_FILE' in dvc_bash_content assert '-o ./data/other.txt' in dvc_bash_content assert '-d $INPUT_FILE' in dvc_bash_content
def test_should_get_output_path_from_conf(work_dir): """ Test commands are generated from python script using provided configuration """ conf_path, script_path = setup_with_conf(work_dir, conf_path=join( work_dir, 'my_conf')) arguments = [ '-i', script_path, '--working-directory', work_dir, '--conf-path', conf_path ] MlScriptToCmd().run(*arguments) # This path is generated using conf path and the script name dvc_cmd_path = join(work_dir, 'dvc_cmd', 'script_path_dvc') assert exists(dvc_cmd_path)
def test_should_overwrite_with_force_argument(work_dir): """ Test output paths are overwritten with force argument """ script_path = join(work_dir, '') write_min_script(script_path) dvc_cmd_path = join(work_dir, 'dvc_cmd') with open(dvc_cmd_path, 'w') as fd: fd.write('') arguments = [ '-i', script_path, '--out-dvc-cmd', dvc_cmd_path, '--working-directory', work_dir, '--force' ] MlScriptToCmd().run(*arguments) with open(dvc_cmd_path, 'r') as fd: assert
def test_should_overwrite_conf_for_path(work_dir): """ Test output paths argument overwrite conf """ conf_path, script_path = setup_with_conf(work_dir, conf_path=join( work_dir, DEFAULT_CONF_FILENAME)) arg_dvc_cmd_path = join(work_dir, 'new_place_dvc') arguments = [ '-i', script_path, '--working-directory', work_dir, '--out-dvc-cmd', arg_dvc_cmd_path ] MlScriptToCmd().run(*arguments) # Assert output path is the one from command argument not from conf assert exists(arg_dvc_cmd_path) conf_dvc_cmd_path = join(work_dir, 'dvc_cmd', 'script_path_dvc') assert not exists(conf_dvc_cmd_path)
def test_should_generate_dvc_with_whole_cmd(work_dir): """ Test dvc bash command is generated from python script with whole dvc command ad specified in docstring """ cmd = 'dvc run -o ./out_train.csv \n' \ '-o ./out_test.csv\n' \ '$MLV_PY_CMD_PATH -m train --out ./out_train.csv &&\n' \ './python/${MLV_PY_CMD_NAME} -m test --out ./out_test.csv' python_script = 'def my_funct(subset: str, rate: int):\n' \ '\t"""\n' \ ':param str input_file: the input file\n' \ ':param output_file: the output_file\n' \ f':dvc-cmd:{cmd}\n' \ '\t"""\n' \ '\tprint(\'toto\')\n' script_path = join(work_dir, '') with open(script_path, 'w') as fd: fd.write(python_script) makedirs(join(work_dir, 'python')) dvc_cmd_path = join(work_dir, 'dvc_cmd') arguments = [ '-i', script_path, '--out-dvc-cmd', dvc_cmd_path, '--working-directory', work_dir ] MlScriptToCmd().run(*arguments) assert exists(dvc_cmd_path) # Ensure whole command is in dvc bash command with open(dvc_cmd_path, 'r') as fd: dvc_bash_content = relative_py_cmd_path = relpath(script_path, work_dir) assert f'MLV_PY_CMD_PATH="{relative_py_cmd_path}"' in dvc_bash_content assert f'MLV_PY_CMD_NAME="{basename(relative_py_cmd_path)}"' in dvc_bash_content assert cmd.replace('\n', ' \\\n') in dvc_bash_content
def test_should_get_docstring_conf_from_main_conf(work_dir): """ Test docstring template values are replaced with docstring conf provided in main conf file """ conf_dc_conf_path = join(work_dir, 'conf_dc_path.yml') write_docstring_conf(conf_dc_conf_path, './output_file_base.txt') conf_path, script_path = setup_with_conf( work_dir, conf_path=join(work_dir, DEFAULT_CONF_FILENAME), docstring_conf_path=conf_dc_conf_path) dvc_cmd_path = join(work_dir, 'new_place_dvc') arguments = [ '-i', script_path, '--working-directory', work_dir, '--out-dvc-cmd', dvc_cmd_path ] MlScriptToCmd().run(*arguments) # Assert docstring template value are replaced with the docstring conf content with open(dvc_cmd_path, 'r') as fd: content = assert './output_file_base.txt' in content