def get_scoreboard_file(year, month, day):
    is_valid_year(year)
    is_valid_day_in_month(month, day)

    base_file_name = 'game_mlb_year_'
    base_file_name += str(year) + '_month_'
    base_file_name += convert_number_to_string(month) + '_day_'
    base_file_name += convert_number_to_string(day) + '_scoreboard.xml'
    return base_file_name
Exemplo n.º 2
0
def get_base_url(year, month, day):
    is_valid_year(year)
    is_valid_day_in_month(month, day)

    base_url = 'http://gd2.mlb.com/components/game/mlb/'
    base_url = base_url + 'year_' + str(year) + '/'
    base_url = base_url + 'month_' + convert_number_to_string(month) + '/'
    base_url = base_url + 'day_' + convert_number_to_string(day) + '/'
    return base_url
Exemplo n.º 3
0
def download_game_data_for_today(url, year, month, day):
    base_file_name = get_scoreboard_file(year, month, day)
    base_scoreboard_directory = str(
        str(os.getcwd()) + '/data/' + str(year) + '/' +
        convert_month_int_to_string(month) + '/' +
        convert_number_to_string(day) + '/')

    print base_scoreboard_directory

    tree = BeautifulSoup(open(base_scoreboard_directory + base_file_name),
                         'lxml')
    for game in tree.find_all('game'):
        game_dir_path = base_scoreboard_directory + 'gid_' + str(
            game['id']) + '/'

        line_score_url = url + 'gid_' + str(game['id']) + '/linescore.xml'
        players_file_url = url + 'gid_' + str(game['id']) + '/players.xml'
        game_events_url = url + 'gid_' + str(game['id']) + '/game_events.xml'
        box_score_url = url + 'gid_' + str(game['id']) + '/boxscore.xml'

        print game_dir_path

        # print line_score_url
        # print players_file_url
        # print game_events_url
        print box_score_url
        retrieve_data_file(line_score_url, game_dir_path + 'linescore.xml')
        retrieve_data_file(players_file_url, game_dir_path + 'players.xml')
        retrieve_data_file(game_events_url, game_dir_path + 'game_events.xml')
        retrieve_data_file(box_score_url, game_dir_path + 'boxscore.xml')
Exemplo n.º 4
0
def download_scoreboard(url, year, month, day):
    scoreboard_url = url + 'scoreboard.xml'
    testfile = urllib.URLopener()
    file_name = scoreboard_url[30:].replace('/', '_')
    print file_name

    month_as_string = convert_month_int_to_string(month)
    directory = str(
        str(os.getcwd()) + '/data/' + str(year) + '/' + month_as_string + '/' +
        convert_number_to_string(day) + '/')

    print scoreboard_url
    print directory
    testfile.retrieve(scoreboard_url, directory + str(file_name))
def process_player_data_for_today(year, month, day, players_by_id):
    base_file_name = get_scoreboard_file(year, month, day)
    base_scoreboard_directory = str(str(os.getcwd()) + '/data/' + str(year) + '/' +
                                    convert_month_int_to_string(month) + '/' +
                                    convert_number_to_string(day) + '/')

    print base_scoreboard_directory

    try:
        tree = BeautifulSoup(open(base_scoreboard_directory + base_file_name), 'lxml')
        for game in tree.find_all('game'):
            game_dir_path = base_scoreboard_directory + 'gid_' + str(game['id']) + '/'
            get_players_data(game_dir_path, players_by_id, base_scoreboard_directory)
    except Exception as exception:
        print exception
def process_player_data_for_today(year, month, day, all_stats_array):
    base_file_name = get_scoreboard_file(year, month, day)
    base_scoreboard_directory = str(
        str(os.getcwd()) + '/data/' + str(year) + '/' +
        convert_month_int_to_string(month) + '/' +
        convert_number_to_string(day) + '/')

    print base_scoreboard_directory

    tree = BeautifulSoup(open(base_scoreboard_directory + base_file_name),
                         'lxml')

    for game in tree.find_all('game'):
        game_dir_path = base_scoreboard_directory + 'gid_' + str(
            game['id']) + '/'
        get_starter_stats(game_dir_path, all_stats_array)
def create_month_of_data_directories(month, start_day, end_day, all_star_day):
    start_and_end = get_default_start_and_end_of_month(month)
    if not start_day:
        start_day = start_and_end['start']
    if not end_day:
        end_day = start_and_end['end']

    month_as_string = convert_month_int_to_string(month)
    os.chdir(month_as_string)
    print os.getcwd()

    for day in range(start_day, end_day + 1):
        # skip the all star break
        if month == 7 and day in range(all_star_day - 1, all_star_day + 3):
            continue
        day_dir_path = str(os.getcwd()) + '/' + convert_number_to_string(day)
        if not os.path.exists(day_dir_path):
            os.makedirs(day_dir_path)

    os.chdir('..')
def process_player_data_for_today(year, month, day, all_events_array):
    base_file_name = get_scoreboard_file(year, month, day)
    base_scoreboard_directory = str(
        str(os.getcwd()) + '/data/' + str(year) + '/' +
        convert_month_int_to_string(month) + '/' +
        convert_number_to_string(day) + '/')

    print base_scoreboard_directory

    tree = BeautifulSoup(open(base_scoreboard_directory + base_file_name),
                         'lxml')

    for game in tree.find_all('game'):
        game_dir_path = base_scoreboard_directory + 'gid_' + str(
            game['id']) + '/'
        team_name_abbreviations = get_linescore_data(game_dir_path)
        expected_player_stats = get_boxscore_data(game_dir_path)
        outcome = get_outcomes_data(game_dir_path, expected_player_stats,
                                    team_name_abbreviations,
                                    base_scoreboard_directory)

        if outcome:
            all_events_array[game_dir_path] = outcome
def create_month_of_game_directories(year, month, start_day, end_day, all_star_day):

    start_and_end = get_default_start_and_end_of_month(month)
    if not start_day:
        start_day = start_and_end['start']
    if not end_day:
        end_day = start_and_end['end']

    month_as_string = convert_month_int_to_string(month)
    os.chdir(month_as_string)
    # print os.getcwd()

    for day in range(start_day, end_day + 1):
        if month == 7 and day in range(all_star_day - 1, all_star_day + 3):
            continue  # skip the all star break

        os.chdir(convert_number_to_string(day))
        # print os.getcwd()
        create_folders_from_xml_scoreboard(year, month, day)

        os.chdir('..')

    os.chdir('..')