def import_data(data_retreive_method, paths): """pipeline to import data from local or aws""" json_lst = [] if data_retreive_method == "Local file system": try: for path in paths: json_lst.append(md.collect_md(path)) except FileNotFoundError as err: st.sidebar.text(err) readme() else: passbuild = st.sidebar.checkbox("Only retreive build success records", value=True) try: configs = gh.auth_config() for path in paths: response = gh.get_request(path, passbuild, **configs) json_lst.append(ju.clean_report(response)) except (EnvironmentError, Exception) as err: st.sidebar.error(err) readme() # when data is retreived if json_lst: raw_df = pd.DataFrame() for item in json_lst: single_df = pd.DataFrame(item) raw_df = pd.concat([raw_df, single_df]).fillna("") tidy_df = df_preprocess(raw_df) return tidy_df, raw_df
def path_import(paths): """Read and compile files from given path.""" json_lst = [] try: for path in paths: json_lst.append(md.collect_md(path)) return json_lst except FileNotFoundError as err: st.sidebar.error(err)
def test_collect_md_with_two_inputs(tmp_path): """Test that md pipeline works""" directory = tmp_path / "sub" directory.mkdir() p_1 = directory / "hello.md" p_2 = directory / "world.md" txt = "Some solutions that can be developed to \ avoid harm or fix the harm are conducting more research and not offering it \ to a selective group of people. More research needs to be done especially in \ terms of embryos. In addition, if germline editing is only offered to a \ select group of people, the wealthy, it will be problematic for the class \ system." p_1.write_text(f"# Reflection by\n\n## header1\n{txt}\n## header2\n{txt}") p_2.write_text(f"# Reflection by\n\n## header1\n{txt}\n## header2\n{txt}") expected = { "reflection by": ["", ""], "header1": [txt + " ", txt + " "], "header2": [txt + " ", txt + " "], } output = md.collect_md(directory, is_clean=False) assert expected == output