Exemplo n.º 1
0
def join_within_league_seasons(sources):
    left_df_fps = get_filepaths(STDZED_DIR / sources[0], ext='csv')
    right_df_fps = get_filepaths(STDZED_DIR / sources[1], ext='csv')
    left_df_fps, right_df_fps = get_matching_filepaths(left_df_fps,
                                                       right_df_fps,
                                                       sources[0], sources[1])
    left_dfs = read_csvs_to_dfs(left_df_fps)
    right_dfs = read_csvs_to_dfs(right_df_fps)
    merged_dfs = do_merge(left_dfs, right_dfs)

    merged_fps = make_filepaths_from_dfs(JOINED_DIR, merged_dfs, '')
    n_saved = write_dfs_to_filepaths(merged_dfs, merged_fps)
    print(n_saved)
Exemplo n.º 2
0
def stackseasons():
    filepaths = get_filepaths(JOINED_DIR)
    actual_scope_dict = get_actual_scope_dict(filepaths)

    n_written = 0
    stacked_leagues = []
    stacked_filepaths = []
    for nation, league in zip(actual_scope_dict['nations'],
                              actual_scope_dict['leagues']):
        league_seasons = [
            fp for fp in filepaths if nation in str(fp) and league in str(fp)
        ]
        league_seasons_dfs = read_csvs_to_dfs(league_seasons)
        league_season_df = pd.concat(league_seasons_dfs, axis=0, sort=True)
        league_season_df.sort_values(by=['season', 'date'], inplace=True)
        league_season_df.reset_index(drop=True, inplace=True)
        stacked_leagues.append(league_season_df)

        seasons = get_df_seasons(league_season_df)
        fn = seasons[0] + '__' + seasons[-1] + '.csv'
        save_path = STACKED_DIR / nation / league / fn
        stacked_filepaths.append(save_path)

    n_written = write_dfs_to_filepaths(stacked_leagues, stacked_filepaths)
    print(n_written)
Exemplo n.º 3
0
def standardize_all_dfs(source_dir):
    filepaths = get_filepaths(CLEANED_DIR / source_dir, ext='csv')
    dfs = read_csvs_to_dfs(filepaths)
    stdzed_dfs = standardize_dfs(dfs, REF_DIR)
    # Not all standardized dictionaries available yet
    stdzed_dfs = [df for df in stdzed_dfs if len(df) > 0]
    stdzed_dfs_fps = make_filepaths_from_dfs(STDZED_DIR, stdzed_dfs,
                                             source_dir)
    n_saved = write_dfs_to_filepaths(stdzed_dfs, stdzed_dfs_fps)
    print(n_saved)
def clean_whoscored_shotmap_images():
    image_dir = 'whoscored-com-shotmaps'
    filepaths = get_filepaths(SCOPED_DIR / image_dir, ext='png')

    # clean_images = clean_up_images(images)
    # cleaned_filepaths = make_filepaths_from_images(CLEANED_DIR,
    #                                                clean_images,
    #                                                data_origin=image_dir)
    # n_written = write_images_to_filepaths(clean_images, clean_filepaths)

    dest_fps = make_equiv_image_dest_fps(SCOPED_DIR, CLEANED_DIR, filepaths)
    n_copied = copy_files(filepaths, dest_fps)
    print(n_copied)
def join_images_within_seasons():
    image_types = ['heatmap', 'shotmap']
    for image_type in image_types:
        dfs = insert_rel_path_into_dfs(image_type)
        top_dir = 'whoscored-com-' + image_type + 's'
        existing_image_fps = get_filepaths(STDZED_DIR / top_dir, ext='png')
        new_image_fps = make_merged_image_filepaths(existing_image_fps,
                                                    top_dir, image_type)
        n_copied = copy_files(existing_image_fps, new_image_fps)
        print(n_copied)
        updated_dfs = update_for_image_file_exists(dfs, image_type)
        save_fps = make_filepaths_from_dfs(JOINED_DIR, updated_dfs, '')
        n_written = write_dfs_to_filepaths(updated_dfs, save_fps)
        print(n_written)
def standardize_image_team_names(source_dir):
    cleaned_src_fps = get_filepaths(CLEANED_DIR / source_dir, ext='png')
    stdzed_fns = standardize_filenames(cleaned_src_fps, REF_DIR)
    stdzed_dest_fps = standardize_filepaths(cleaned_src_fps, stdzed_fns,
                                            STDZED_DIR, source_dir)
    # Because not all the standardized dictionaries are available
    # We only want to copy images that have actually been standardized
    cleaned_src_fps = [
        src_fp for src_fp, dest_fp in zip(cleaned_src_fps, stdzed_dest_fps)
        if dest_fp is not False
    ]
    stdzed_dest_fps = [
        dest_fp for src_fp, dest_fp in zip(cleaned_src_fps, stdzed_dest_fps)
        if dest_fp is not False
    ]
    n_copied = copy_files(cleaned_src_fps, stdzed_dest_fps)
    print(n_copied)
def insert_rel_path_into_dfs(image_type):
    filepaths = get_filepaths(JOINED_DIR)
    df_origs = read_csvs_to_dfs(filepaths)
    dfs = insert_filepaths(df_origs, image_type)
    return dfs