def test_load_json(data_dir, data_object, capsys): assert (load_json(Path(data_dir, "2020-01-01T01-00-00Z_data.json")) ["elements"][0] == data_object["elements"][0]) load_json(Path(data_dir, ".gitkeep")) capture = capsys.readouterr() assert capture.out == "Unable to decode JSON. Expecting value: line 1 column 1 (char 0)\n"
def test_migrate_db(cosmos_client, small_data_dir, data_object, capsys): cosmos_client.insert_documents(small_data_dir, latest=False) cosmos_client.insert_documents(small_data_dir, latest=True) capture = capsys.readouterr() assert "Local data and Cosmos DB in sync" in capture.out # Removing the 5th element from cosmos add_gw_and_download_time(data_object["elements"], data_object["download_time"], 1) add_unique_id(data_object["elements"]) cosmos_client.delete_items([data_object["elements"][1]]) cosmos_client.insert_documents(small_data_dir) capture = capsys.readouterr() assert "Migrating from index 0:1" in capture.out files = list_data_dir(small_data_dir) loaded_json = load_json(files[0]) loaded_json["elements"] = loaded_json["elements"][:-1] dump_json(files[0], loaded_json) cosmos_client.insert_documents(small_data_dir) capture = capsys.readouterr() assert "Cosmos DB ahead of local data." in capture.out cosmos_client.insert_documents(small_data_dir, latest=False) capture = capsys.readouterr() assert "Could not insert" in capture.out
def __get_data(data_dir_path: str) -> list: """Load list of .json files to list of dicts. Args: data_dir_path (str): Path to directory holding Returns: list[dict]: list of dicts holding loaded .json """ data_files = io.list_data_dir(data_dir_path) dict_list = [] for i in data_files: dict_list.append(io.load_json(i)) return dict_list
def to_csv(data_path="data"): """Transform data and save as CSV. Args: data_path (str, optional): Path to dir holding JSON dumps. Defaults to "data". save_path (str, optional): Path to save transformed CSV. Defaults to "data_transformed.csv". """ elements = [] for data in tqdm(list_data_dir(data_path)): try: data = load_json(data) add_gw_and_download_time( data["elements"], data["download_time"], get_game_week(data["events"]) ) add_unique_id(data["elements"]) elements.extend(data["elements"]) # Add transformations here except TypeError: print(f"Something is wrong in {data}") return pd.DataFrame(elements)
def test_dump_json(data_dir): test_json = {"test1": "test"} dump_json(Path(data_dir, "test_json.json"), test_json) assert load_json(Path(data_dir, "test_json.json")) == test_json