示例#1
0
def test_load_json(data_dir, data_object, capsys):
    assert (load_json(Path(data_dir, "2020-01-01T01-00-00Z_data.json"))
            ["elements"][0] == data_object["elements"][0])

    load_json(Path(data_dir, ".gitkeep"))
    capture = capsys.readouterr()
    assert capture.out == "Unable to decode JSON. Expecting value: line 1 column 1 (char 0)\n"
示例#2
0
def test_migrate_db(cosmos_client, small_data_dir, data_object, capsys):
    cosmos_client.insert_documents(small_data_dir, latest=False)
    cosmos_client.insert_documents(small_data_dir, latest=True)
    capture = capsys.readouterr()
    assert "Local data and Cosmos DB in sync" in capture.out

    # Removing the 5th element from cosmos
    add_gw_and_download_time(data_object["elements"],
                             data_object["download_time"], 1)
    add_unique_id(data_object["elements"])
    cosmos_client.delete_items([data_object["elements"][1]])
    cosmos_client.insert_documents(small_data_dir)
    capture = capsys.readouterr()
    assert "Migrating from index 0:1" in capture.out

    files = list_data_dir(small_data_dir)
    loaded_json = load_json(files[0])
    loaded_json["elements"] = loaded_json["elements"][:-1]
    dump_json(files[0], loaded_json)
    cosmos_client.insert_documents(small_data_dir)
    capture = capsys.readouterr()
    assert "Cosmos DB ahead of local data." in capture.out

    cosmos_client.insert_documents(small_data_dir, latest=False)
    capture = capsys.readouterr()
    assert "Could not insert" in capture.out
示例#3
0
文件: cosmos.py 项目: martgra/fpl2021
    def __get_data(data_dir_path: str) -> list:
        """Load list of .json files to list of dicts.

        Args:
            data_dir_path (str): Path to directory holding

        Returns:
            list[dict]: list of dicts holding loaded .json
        """
        data_files = io.list_data_dir(data_dir_path)
        dict_list = []
        for i in data_files:
            dict_list.append(io.load_json(i))
        return dict_list
示例#4
0
def to_csv(data_path="data"):
    """Transform data and save as CSV.

    Args:
        data_path (str, optional): Path to dir holding JSON dumps. Defaults to "data".
        save_path (str, optional): Path to save transformed CSV. Defaults to "data_transformed.csv".
    """
    elements = []

    for data in tqdm(list_data_dir(data_path)):
        try:
            data = load_json(data)
            add_gw_and_download_time(
                data["elements"], data["download_time"], get_game_week(data["events"])
            )
            add_unique_id(data["elements"])
            elements.extend(data["elements"])

        # Add transformations here
        except TypeError:
            print(f"Something is wrong in {data}")

    return pd.DataFrame(elements)
示例#5
0
def test_dump_json(data_dir):
    test_json = {"test1": "test"}
    dump_json(Path(data_dir, "test_json.json"), test_json)
    assert load_json(Path(data_dir, "test_json.json")) == test_json