def test_download_regression(dataset, expected): """Check for regression for a particular dataset downloaded only""" os.system("retriever download {0} -p raw_data/{0}".format(dataset)) current_md5 = getmd5(data="raw_data/{0}".format(dataset), data_type='dir', mode="rU") assert current_md5 == expected
def get_csv_md5(dataset, engines, tmpdir): workdir = tmpdir.mkdtemp() workdir.chdir() script_module = get_script_module(dataset) script_module.SCRIPT.download(engines) script_module.SCRIPT.engine.final_cleanup() script_module.SCRIPT.engine.to_csv() current_md5 = getmd5(data=str(workdir), data_type='dir') return current_md5
def get_csv_md5(dataset, engines): dump_dir() script_module = get_script_module(dataset) script_module.SCRIPT.download(engines) script_module.SCRIPT.engine.final_cleanup() script_module.SCRIPT.engine.to_csv() os.chdir("..") current_md5 = getmd5(data='output_dumps', data_type='dir') return current_md5
def get_csv_md5(dataset, engine, tmpdir, install_function, config): workdir = tmpdir.mkdtemp() os.system("cp -r {} {}/".format(os.path.join(retriever_root_dir, 'scripts'), os.path.join(str(workdir), 'scripts'))) workdir.chdir() script_module = get_script_module(dataset) install_function(dataset.replace("_", "-"), **config) engine_obj = script_module.SCRIPT.checkengine(engine) engine_obj.to_csv() os.system("rm -r scripts") # need to remove scripts before checking md5 on dir current_md5 = getmd5(data=str(workdir), data_type='dir') return current_md5
def test_getmd5_lines(): """Test md5 sum calculation given a line""" lines = ['a,b,c\n', '1,2,3\n', '4,5,6\n'] assert getmd5(data=lines, data_type='lines') == '0bec5bf6f93c547bc9c6774acaf85e1a'
def test_download_regression(dataset, expected): """Check for regression for a particular dataset downloaded only""" os.chdir(retriever_root_dir) os.system("retriever download {0} -p raw_data/{0}".format(dataset)) current_md5 = getmd5(data="raw_data/{0}".format(dataset), data_type='dir') assert current_md5 == expected
def test_getmd5_path(): """Test md5 sum calculation given a path to data source.""" data_file = create_file(['a,b,c', '1,2,3', '4,5,6']) exp_hash = '0bec5bf6f93c547bc9c6774acaf85e1a' assert getmd5(data=data_file, data_type='file') == exp_hash
def test_getmd5_line_end(): """Test md5 sum calculation given a line with end of line character.""" lines_end = ['a,b,c\n', '1,2,3\n', '4,5,6\n'] exp_hash = '0bec5bf6f93c547bc9c6774acaf85e1a' assert getmd5(data=lines_end, data_type='lines') == exp_hash
def test_getmd5_lines(): """Test md5 sum calculation given a line.""" lines = ['a,b,c', '1,2,3', '4,5,6'] exp_hash = 'ca471abda3ebd4ae8ce1b0814b8f470c' assert getmd5(data=lines, data_type='lines') == exp_hash
def test_getmd5_path(): """Test md5 sum calculation given a path to data source""" data_file = create_file('a,b,c\n1,2,3\n4,5,6\n') assert getmd5(data=data_file, data_type='file') == '0bec5bf6f93c547bc9c6774acaf85e1a'
def test_getmd5(): """Test md5 sum calculation""" lines = ['a,b,c\n', '1,2,3\n', '4,5,6\n'] assert getmd5(lines) == '0bec5bf6f93c547bc9c6774acaf85e1a'
def test_download_regression(dataset, expected, tmpdir): """Test download regression.""" os.chdir(retriever_root_dir) download(dataset, "raw_data/{0}".format(dataset)) current_md5 = getmd5(data="raw_data/{0}".format(dataset), data_type='dir') assert current_md5 == expected