def create_year_of_data_directories(year):
    is_valid_year(year)
    game_date_info = get_first_and_last_game_of_year(year)

    start_day = game_date_info['start_day']
    start_month = game_date_info['start_month']
    end_day = game_date_info['end_day']
    end_month = game_date_info['end_month']
    all_star_day = game_date_info['all_star_day']

    os.chdir('data')
    year_dir_path = str(os.getcwd()) + '/' + str(year)

    print year_dir_path

    if not os.path.exists(year_dir_path):
        os.makedirs(year_dir_path)

    os.chdir(str(year))

    month_dir_path = str(os.getcwd()) + '/' + str(
        convert_month_int_to_string(start_month))
    print month_dir_path

    first_month = True
    for month in range(start_month, end_month):
        month_as_string = convert_month_int_to_string(month)
        month_dir_path = str(os.getcwd()) + '/' + month_as_string
        if not os.path.exists(month_dir_path):
            os.makedirs(month_dir_path)

        if first_month:
            create_month_of_data_directories(month, start_day, 0, 0)
            first_month = False
        elif month == 7:
            create_month_of_data_directories(month, 0, 0, all_star_day)
        else:
            create_month_of_data_directories(month, 0, 0, 0)

    # Create last month
    month_as_string = convert_month_int_to_string(end_month)
    month_dir_path = str(os.getcwd()) + '/' + month_as_string
    if not os.path.exists(month_dir_path):
        os.makedirs(month_dir_path)

    create_month_of_data_directories(end_month, 0, end_day, 0)
    os.chdir('..')
    os.chdir('..')
Exemplo n.º 2
0
def download_game_data_for_today(url, year, month, day):
    base_file_name = get_scoreboard_file(year, month, day)
    base_scoreboard_directory = str(
        str(os.getcwd()) + '/data/' + str(year) + '/' +
        convert_month_int_to_string(month) + '/' +
        convert_number_to_string(day) + '/')

    print base_scoreboard_directory

    tree = BeautifulSoup(open(base_scoreboard_directory + base_file_name),
                         'lxml')
    for game in tree.find_all('game'):
        game_dir_path = base_scoreboard_directory + 'gid_' + str(
            game['id']) + '/'

        line_score_url = url + 'gid_' + str(game['id']) + '/linescore.xml'
        players_file_url = url + 'gid_' + str(game['id']) + '/players.xml'
        game_events_url = url + 'gid_' + str(game['id']) + '/game_events.xml'
        box_score_url = url + 'gid_' + str(game['id']) + '/boxscore.xml'

        print game_dir_path

        # print line_score_url
        # print players_file_url
        # print game_events_url
        print box_score_url
        retrieve_data_file(line_score_url, game_dir_path + 'linescore.xml')
        retrieve_data_file(players_file_url, game_dir_path + 'players.xml')
        retrieve_data_file(game_events_url, game_dir_path + 'game_events.xml')
        retrieve_data_file(box_score_url, game_dir_path + 'boxscore.xml')
Exemplo n.º 3
0
def download_scoreboard(url, year, month, day):
    scoreboard_url = url + 'scoreboard.xml'
    testfile = urllib.URLopener()
    file_name = scoreboard_url[30:].replace('/', '_')
    print file_name

    month_as_string = convert_month_int_to_string(month)
    directory = str(
        str(os.getcwd()) + '/data/' + str(year) + '/' + month_as_string + '/' +
        convert_number_to_string(day) + '/')

    print scoreboard_url
    print directory
    testfile.retrieve(scoreboard_url, directory + str(file_name))
def process_player_data_for_today(year, month, day, players_by_id):
    base_file_name = get_scoreboard_file(year, month, day)
    base_scoreboard_directory = str(str(os.getcwd()) + '/data/' + str(year) + '/' +
                                    convert_month_int_to_string(month) + '/' +
                                    convert_number_to_string(day) + '/')

    print base_scoreboard_directory

    try:
        tree = BeautifulSoup(open(base_scoreboard_directory + base_file_name), 'lxml')
        for game in tree.find_all('game'):
            game_dir_path = base_scoreboard_directory + 'gid_' + str(game['id']) + '/'
            get_players_data(game_dir_path, players_by_id, base_scoreboard_directory)
    except Exception as exception:
        print exception
def process_player_data_for_today(year, month, day, all_stats_array):
    base_file_name = get_scoreboard_file(year, month, day)
    base_scoreboard_directory = str(
        str(os.getcwd()) + '/data/' + str(year) + '/' +
        convert_month_int_to_string(month) + '/' +
        convert_number_to_string(day) + '/')

    print base_scoreboard_directory

    tree = BeautifulSoup(open(base_scoreboard_directory + base_file_name),
                         'lxml')

    for game in tree.find_all('game'):
        game_dir_path = base_scoreboard_directory + 'gid_' + str(
            game['id']) + '/'
        get_starter_stats(game_dir_path, all_stats_array)
def create_month_of_data_directories(month, start_day, end_day, all_star_day):
    start_and_end = get_default_start_and_end_of_month(month)
    if not start_day:
        start_day = start_and_end['start']
    if not end_day:
        end_day = start_and_end['end']

    month_as_string = convert_month_int_to_string(month)
    os.chdir(month_as_string)
    print os.getcwd()

    for day in range(start_day, end_day + 1):
        # skip the all star break
        if month == 7 and day in range(all_star_day - 1, all_star_day + 3):
            continue
        day_dir_path = str(os.getcwd()) + '/' + convert_number_to_string(day)
        if not os.path.exists(day_dir_path):
            os.makedirs(day_dir_path)

    os.chdir('..')
def process_player_data_for_today(year, month, day, all_events_array):
    base_file_name = get_scoreboard_file(year, month, day)
    base_scoreboard_directory = str(
        str(os.getcwd()) + '/data/' + str(year) + '/' +
        convert_month_int_to_string(month) + '/' +
        convert_number_to_string(day) + '/')

    print base_scoreboard_directory

    tree = BeautifulSoup(open(base_scoreboard_directory + base_file_name),
                         'lxml')

    for game in tree.find_all('game'):
        game_dir_path = base_scoreboard_directory + 'gid_' + str(
            game['id']) + '/'
        team_name_abbreviations = get_linescore_data(game_dir_path)
        expected_player_stats = get_boxscore_data(game_dir_path)
        outcome = get_outcomes_data(game_dir_path, expected_player_stats,
                                    team_name_abbreviations,
                                    base_scoreboard_directory)

        if outcome:
            all_events_array[game_dir_path] = outcome
def create_month_of_game_directories(year, month, start_day, end_day, all_star_day):

    start_and_end = get_default_start_and_end_of_month(month)
    if not start_day:
        start_day = start_and_end['start']
    if not end_day:
        end_day = start_and_end['end']

    month_as_string = convert_month_int_to_string(month)
    os.chdir(month_as_string)
    # print os.getcwd()

    for day in range(start_day, end_day + 1):
        if month == 7 and day in range(all_star_day - 1, all_star_day + 3):
            continue  # skip the all star break

        os.chdir(convert_number_to_string(day))
        # print os.getcwd()
        create_folders_from_xml_scoreboard(year, month, day)

        os.chdir('..')

    os.chdir('..')