示例#1
0
def _roster(team, season, checked=False):
    """
    Pulls retrosheet roster files
    """
    GH_TOKEN = os.getenv('GH_TOKEN', '')

    if not checked:
        g = Github(GH_TOKEN)
        try:
            repo = g.get_repo('chadwickbureau/retrosheet')
            tree = repo.get_git_tree('master')
            for t in tree.tree:
                if t.path == 'rosters':
                    subtree = t

            rosters = [t.path for t in repo.get_git_tree(subtree.sha).tree]
            file_name = f'{team}{season}.ROS'
            if file_name not in rosters:
                raise ValueError(
                    f'Roster not available for {team} in {season}')
        except RateLimitExceededException:
            warnings.warn(
                'Github rate limit exceeded. Cannot check if the file you want exists.',
                UserWarning)

    s = get_text_file(roster_url.format(team, season))
    data = pd.read_csv(StringIO(s), header=None, sep=',', quotechar='"')
    data.columns = roster_columns
    return data
示例#2
0
def lcs_logs():
    """
    Pull Retrosheet LCS Game Logs
    """
    s = get_text_file(gamelog_url.format('LC'))
    data = pd.read_csv(StringIO(s), header=None, sep=',', quotechar='"')
    data.columns = gamelog_columns
    return data
示例#3
0
def division_series_logs():
    """
    Pull Retrosheet Division Series Game Logs
    """
    s = get_text_file(gamelog_url.format('DV'))
    data = pd.read_csv(StringIO(s), header=None, sep=',', quotechar='"')
    data.columns = gamelog_columns
    return data
示例#4
0
def all_star_game_logs():
    """
    Pull Retrosheet All Star Game Logs
    """
    s = get_text_file(gamelog_url.format('AS'))
    data = pd.read_csv(StringIO(s), header=None, sep=',', quotechar='"')
    data.columns = gamelog_columns
    return data
示例#5
0
def park_codes():
    """
    Pulls retrosheet Park IDs
    """
    s = get_text_file(parkid_url)
    data = pd.read_csv(StringIO(s), sep=',', quotechar='"')
    data.columns = parkcode_columns
    return data
示例#6
0
def season_game_logs(season):
    """
    Pull Retrosheet game logs for a given season
    """
    GH_TOKEN = os.getenv('GH_TOKEN', '')
    # validate input
    g = Github(GH_TOKEN)
    repo = g.get_repo('chadwickbureau/retrosheet')
    gamelogs = [
        f.path[f.path.rfind('/') + 1:] for f in repo.get_contents('gamelog')
    ]
    file_name = f'GL{season}.TXT'

    if file_name not in gamelogs:
        raise ValueError(f'Season game logs not available for {season}')
    s = get_text_file(gamelog_url.format(season))
    data = pd.read_csv(StringIO(s), header=None, sep=',', quotechar='"')
    data.columns = gamelog_columns
    return data
示例#7
0
def schedules(season):
    """
    Pull retrosheet schedule for a given season
    """
    GH_TOKEN = os.getenv('GH_TOKEN', '')
    # validate input
    g = Github(GH_TOKEN)
    repo = g.get_repo('chadwickbureau/retrosheet')
    schedules = [
        f.path[f.path.rfind('/') + 1:] for f in repo.get_contents('schedule')
    ]
    file_name = f'{season}SKED.TXT'

    if file_name not in schedules:
        raise ValueError(f'Schedule not available for {season}')
    s = get_text_file(schedule_url.format(season))
    data = pd.read_csv(StringIO(s), header=None, sep=',', quotechar='"')
    data.columns = schedule_columns
    return data
示例#8
0
def events(season, type='regular', export_dir='.'):
    """
    Pulls retrosheet event files for an entire season. The `type` argument
    specifies whether to pull regular season, postseason or asg files. Valid
    arguments are 'regular', 'post', and 'asg'.

    Right now, pybaseball does not parse the retrosheet files but downloads and
    saves them.
    """
    GH_TOKEN = os.getenv('GH_TOKEN', '')
    if not os.path.exists(export_dir):
        os.mkdir(export_dir)

    try:
        g = Github(GH_TOKEN)
        repo = g.get_repo('chadwickbureau/retrosheet')
        tree = repo.get_git_tree('master')
        for t in tree.tree:
            if t.path == 'event':
                subtree = t

        subtree = repo.get_git_tree(subtree.sha)
        for t in subtree.tree:
            if t.path == type:
                subsubtree = t

        event_files = [
            t.path for t in repo.get_git_tree(subsubtree.sha).tree
            if str(season) in t.path
        ]
        if len(event_files) == 0:
            raise ValueError(f'Event files not available for {season}')
    except RateLimitExceededException:
        warnings.warn(
            'Github rate limit exceeded. Cannot check if the file you want exists.',
            UserWarning)

    for filename in event_files:
        print(f'Downloading {filename}')
        s = get_text_file(event_url.format(type, filename))
        with open(os.path.join(export_dir, filename), 'w') as f:
            f.write(s)