def test_daily_players(): handler = TennisDataHandler(data_dir, "rg17", include_qualifiers=True) daily_player_df = handler.show_daily_players() assert daily_player_df.shape[0] == 18 assert daily_player_df.shape[1] == 8 final_players = handler.get_daily_players("2017-06-11") assert len(final_players) == 14
def test_load_uo17_without_qualifiers(): handler = TennisDataHandler(data_dir, "uo17", include_qualifiers=False) summary = handler.summary() assert len(summary["dates"]) == 14 assert summary["start_time"] == 1503892800 assert summary["number_of_nodes"] == 99191 assert summary["number_of_edges"] == 417637
def test_load_rg17_without_qualifiers(): handler = TennisDataHandler(data_dir, "rg17", include_qualifiers=False) summary = handler.summary() assert len(summary["dates"]) == 15 assert summary["start_time"] == 1495922400 assert summary["number_of_nodes"] == 74984 assert summary["number_of_edges"] == 311562
def test_json_export_max_id(): handler = TennisDataHandler(data_dir, "rg17", include_qualifiers=True) json_fp = "rg17_temporal.json" handler.to_json(json_fp, task="classification", edge_type="temporal", max_snapshot_idx=3) data = load_json(json_fp) assert len(data) == 5
def test_multi_labels(): handler = TennisDataHandler(data_dir, "rg17", include_qualifiers=True) res = handler.get_daily_relevance_labels(binary=False) vals = res["2017-06-11"].values() unique_labels = set(vals) assert len(unique_labels) == 3 assert 0.0 in unique_labels assert 1.0 in unique_labels assert 2.0 in unique_labels
def test_load_uo17_with_qualifiers(): handler = TennisDataHandler(data_dir, "uo17", include_qualifiers=True) summary = handler.summary() assert summary["data_id"] == "uo17" assert len(summary["dates"]) == 20 assert len(summary["dates_with_no_game"]) == 2 assert '2017-08-26' in summary[ "dates_with_no_game"] and '2017-08-27' in summary["dates_with_no_game"] assert summary["start_time"] == 1503374400 assert summary["end_time"] == 1505102400 assert summary["number_of_nodes"] == 106106 assert summary["number_of_edges"] == 475085
def test_load_rg17_with_qualifiers(): handler = TennisDataHandler(data_dir, "rg17", include_qualifiers=True) summary = handler.summary() assert summary["data_id"] == "rg17" assert len(summary) == 8 assert len(summary["dates"]) == 19 assert len(summary["dates_with_no_game"]) == 1 assert '2017-05-27' in summary["dates_with_no_game"] assert summary["start_time"] == 1495576800 assert summary["end_time"] == 1497218400 assert summary["number_of_nodes"] == 78095 assert summary["number_of_edges"] == 336234
def test_content_check(): output_dir = os.path.join(fdir, "content_check") if os.path.exists(output_dir): shutil.rmtree(output_dir) handler = TennisDataHandler(data_dir, "rg17", include_qualifiers=False) handler.export_relevance_labels(output_dir, binary=True, only_pos_label=True) handler.export_edges(output_dir) files = os.listdir(output_dir) assert "summary.json" in files assert "edges.csv" assert len(files) == 17
def test_label_export(): dir1 = os.path.join(fdir, "rg17_with_qTrue") dir2 = dir1 + "_relevant" handler = TennisDataHandler(data_dir, "rg17", include_qualifiers=True) handler.export_relevance_labels(dir1, binary=True, only_pos_label=False) handler.export_relevance_labels(dir2, binary=True, only_pos_label=True) assert len(os.listdir(dir1)) == 20 assert len(os.listdir(dir2)) == 20 fp1 = os.path.join(dir1, "labels_18.csv") fp2 = os.path.join(dir2, "labels_18.csv") df1 = pd.read_csv(fp1, header=None) df2 = pd.read_csv(fp2, header=None) assert len(df1) == 78094 assert len(df2) == 18
def test_graph(): handler = TennisDataHandler(data_dir, "rg17", include_qualifiers=True) fig = handler.visualize(kind="graph") assert fig != None
def test_dimensions(): handler = TennisDataHandler(data_dir, "rg17", include_qualifiers=True) num_days = len(handler.dates) assert handler.weighted_edges.shape[1] == 4 assert len(handler.weighted_edges_grouped) == num_days assert len(handler.edges_grouped) == num_days