def return_html(player_season_statistics_url): setup_logging() logging.getLogger() logging.info("making call to {0}".format(player_season_statistics_url)) content = urllib2.urlopen(player_season_statistics_url).read() player_season_statistics_html = html.fromstring(content) logging.info("received html for {0}".format(player_season_statistics_url)) return player_season_statistics_html
def return_html(box_score_url): setup_logging() logging.getLogger() logging.info("making call to {0}".format(box_score_url)) content = urllib2.urlopen(box_score_url).read() box_score_html = html.fromstring(content) logging.info("received html for {0}".format(box_score_url)) return box_score_html
def generate_url(season_start_year): setup_logging() logger = logging.getLogger() player_season_statistics_url_arguments = { 'year': season_start_year + 1 } player_season_statistics_url = 'http://www.basketball-reference.com/leagues/NBA_{year}_totals.html?lid=header_seasons'.format(**player_season_statistics_url_arguments) logger.info("box score url: {0}".format(player_season_statistics_url)) return player_season_statistics_url
def return_html(player_season_statistics_url): setup_logging() logging.getLogger() logging.info("making call to {0}".format(player_season_statistics_url)) content = urllib2.urlopen(player_season_statistics_url).read() player_season_statistics_html = html.fromstring(content) logging.info( "received html for {0}".format(player_season_statistics_url)) return player_season_statistics_html
def return_json_encoded_player_season_team_statistics( player_season_statistics_html, player_first_name, player_last_name, season_start_year, team_abbreviation): # TODO: currently hard-coded should probably change in the future setup_logging() logging.getLogger() logging.info( "starting to parse season statistics for {0} - {1} - {2} - {3}". format(player_first_name, player_last_name, season_start_year, team_abbreviation)) raw_player_season_statistics_list = ParsedPlayerSeasonStatisticsReturner.return_raw_player_season_statistics( player_season_statistics_html) json_encoded_player_season_team_statistics = list() for raw_player_season_statistics in raw_player_season_statistics_list: full_name = raw_player_season_statistics[1] first_name = full_name.split(" ")[0] last_name = full_name.split(" ")[1] if player_first_name.lower() == first_name.lower( ) and player_last_name.lower() == last_name.lower( ) and team_abbreviation.lower( ) == raw_player_season_statistics[4].lower(): player_season_statistics = PlayerSeasonStatistics( first_name, last_name, raw_player_season_statistics[3], raw_player_season_statistics[4], raw_player_season_statistics[2], raw_player_season_statistics[5], raw_player_season_statistics[6], raw_player_season_statistics[7], raw_player_season_statistics[8], raw_player_season_statistics[9], raw_player_season_statistics[11], raw_player_season_statistics[12], raw_player_season_statistics[14], raw_player_season_statistics[15], raw_player_season_statistics[18], raw_player_season_statistics[19], raw_player_season_statistics[21], raw_player_season_statistics[22], raw_player_season_statistics[24], raw_player_season_statistics[25], raw_player_season_statistics[26], raw_player_season_statistics[27], raw_player_season_statistics[28], raw_player_season_statistics[29], ) json_encoded_player_season_team_statistics.append( json.dumps(player_season_statistics, cls=PlayerSeasonStatisticsJsonEncoder)) logging.info("finished parsing season_statistics for {0}".format( season_start_year)) return json_encoded_player_season_team_statistics
def generate_url(date): setup_logging() logger = logging.getLogger() box_score_url_arguments = { 'day': date.day, 'month': date.month, 'year': date.year } box_score_url = 'http://www.basketball-reference.com/friv/dailyleaders.cgi?month={month}&day={day}&year={year}'.format( **box_score_url_arguments) logger.info("box score url: {0}".format(box_score_url)) return box_score_url
def generate_url(year): assert year is not None assert isinstance(year, int) setup_logging() logger = logging.getLogger("main") """ For seasons that span multiple years use greatest year value :param year: :return: """ schedule_url = "http://www.basketball-reference.com/leagues/NBA_{0}_games.html".format(year) logger.info("schedule url: {0}".format(schedule_url)) return schedule_url
def return_json_encoded_all_player_season_statistics( player_season_statistics_html, season_start_year): # TODO: currently hard-coded should probably change in the future setup_logging() logging.getLogger() logging.info("starting to parse season statistics for {0}".format( season_start_year)) raw_player_season_statistics_list = ParsedPlayerSeasonStatisticsReturner.return_raw_player_season_statistics( player_season_statistics_html) all_json_encoded_player_season_statistics = list() for raw_player_season_statistics in raw_player_season_statistics_list: # in case of total combined statistics if raw_player_season_statistics[4] != 'TOT': full_name = raw_player_season_statistics[1] first_name = full_name.split(" ")[0] last_name = full_name.split(" ")[1] player_season_statistics = PlayerSeasonStatistics( first_name, last_name, raw_player_season_statistics[3], raw_player_season_statistics[4], raw_player_season_statistics[2], raw_player_season_statistics[5], raw_player_season_statistics[6], raw_player_season_statistics[7], raw_player_season_statistics[8], raw_player_season_statistics[9], raw_player_season_statistics[11], raw_player_season_statistics[12], raw_player_season_statistics[14], raw_player_season_statistics[15], raw_player_season_statistics[18], raw_player_season_statistics[19], raw_player_season_statistics[21], raw_player_season_statistics[22], raw_player_season_statistics[24], raw_player_season_statistics[25], raw_player_season_statistics[26], raw_player_season_statistics[27], raw_player_season_statistics[28], raw_player_season_statistics[29], ) all_json_encoded_player_season_statistics.append( json.dumps(player_season_statistics, cls=PlayerSeasonStatisticsJsonEncoder)) logging.info("finished parsing season_statistics for {0}".format( season_start_year)) return all_json_encoded_player_season_statistics
def generate_url(year): assert year is not None assert isinstance(year, int) setup_logging() logger = logging.getLogger("main") """ For seasons that span multiple years use greatest year value :param year: :return: """ schedule_url = "http://www.basketball-reference.com/leagues/NBA_{0}_games.html".format( year) logger.info("schedule url: {0}".format(schedule_url)) return schedule_url
def return_json_encoded_all_player_season_statistics(player_season_statistics_html, season_start_year): # TODO: currently hard-coded should probably change in the future setup_logging() logging.getLogger() logging.info("starting to parse season statistics for {0}".format(season_start_year)) raw_player_season_statistics_list = ParsedPlayerSeasonStatisticsReturner.return_raw_player_season_statistics(player_season_statistics_html) all_json_encoded_player_season_statistics = list() for raw_player_season_statistics in raw_player_season_statistics_list: # in case of total combined statistics if raw_player_season_statistics[4] != 'TOT': full_name = raw_player_season_statistics[1] first_name = full_name.split(" ")[0] last_name = full_name.split(" ")[1] player_season_statistics = PlayerSeasonStatistics( first_name, last_name, raw_player_season_statistics[3], raw_player_season_statistics[4], raw_player_season_statistics[2], raw_player_season_statistics[5], raw_player_season_statistics[6], raw_player_season_statistics[7], raw_player_season_statistics[8], raw_player_season_statistics[9], raw_player_season_statistics[11], raw_player_season_statistics[12], raw_player_season_statistics[14], raw_player_season_statistics[15], raw_player_season_statistics[18], raw_player_season_statistics[19], raw_player_season_statistics[21], raw_player_season_statistics[22], raw_player_season_statistics[24], raw_player_season_statistics[25], raw_player_season_statistics[26], raw_player_season_statistics[27], raw_player_season_statistics[28], raw_player_season_statistics[29], ) all_json_encoded_player_season_statistics.append(json.dumps(player_season_statistics, cls=PlayerSeasonStatisticsJsonEncoder)) logging.info("finished parsing season_statistics for {0}".format(season_start_year)) return all_json_encoded_player_season_statistics
def return_json_encoded_player_season_team_statistics(player_season_statistics_html, player_first_name, player_last_name, season_start_year, team_abbreviation): # TODO: currently hard-coded should probably change in the future setup_logging() logging.getLogger() logging.info("starting to parse season statistics for {0} - {1} - {2} - {3}".format(player_first_name, player_last_name, season_start_year, team_abbreviation)) raw_player_season_statistics_list = ParsedPlayerSeasonStatisticsReturner.return_raw_player_season_statistics(player_season_statistics_html) json_encoded_player_season_team_statistics = list() for raw_player_season_statistics in raw_player_season_statistics_list: full_name = raw_player_season_statistics[1] first_name = full_name.split(" ")[0] last_name = full_name.split(" ")[1] if player_first_name.lower() == first_name.lower() and player_last_name.lower() == last_name.lower() and team_abbreviation.lower() == raw_player_season_statistics[4].lower(): player_season_statistics = PlayerSeasonStatistics( first_name, last_name, raw_player_season_statistics[3], raw_player_season_statistics[4], raw_player_season_statistics[2], raw_player_season_statistics[5], raw_player_season_statistics[6], raw_player_season_statistics[7], raw_player_season_statistics[8], raw_player_season_statistics[9], raw_player_season_statistics[11], raw_player_season_statistics[12], raw_player_season_statistics[14], raw_player_season_statistics[15], raw_player_season_statistics[18], raw_player_season_statistics[19], raw_player_season_statistics[21], raw_player_season_statistics[22], raw_player_season_statistics[24], raw_player_season_statistics[25], raw_player_season_statistics[26], raw_player_season_statistics[27], raw_player_season_statistics[28], raw_player_season_statistics[29], ) json_encoded_player_season_team_statistics.append(json.dumps(player_season_statistics, cls=PlayerSeasonStatisticsJsonEncoder)) logging.info("finished parsing season_statistics for {0}".format(season_start_year)) return json_encoded_player_season_team_statistics
def return_json_encoded_box_scores(box_scores_html, date): # TODO: currently hard-coded should probably change in the future setup_logging() logging.getLogger("main") logging.info("parsing box scores for {0}".format( date.strftime("%Y_%m_%d"))) box_score_list_of_lists = ParsedBoxScoresReturner.return_raw_box_score_list_of_lists( box_scores_html) json_encoded_box_scores = list() for box_score_list in box_score_list_of_lists: full_name = box_score_list[1] first_name = full_name.split(" ")[0] last_name = full_name.split(" ")[1] if box_score_list[6] == '': seconds_played = 0 else: x = time.strptime(box_score_list[6], "%M:%S") seconds_played = datetime.timedelta( hours=x.tm_hour, minutes=x.tm_min, seconds=x.tm_sec).total_seconds() if "@" == box_score_list[3]: is_home = False else: is_home = True box_score = BoxScore( first_name, last_name, str(date), box_score_list[2], box_score_list[4], is_home, seconds_played, box_score_list[7], box_score_list[8], box_score_list[10], box_score_list[11], box_score_list[13], box_score_list[14], box_score_list[16], box_score_list[17], box_score_list[18], box_score_list[19], box_score_list[20], box_score_list[21], box_score_list[22], box_score_list[23], box_score_list[24]) json_encoded_box_scores.append( json.dumps(box_score, cls=BoxScoreJsonEncoder)) logging.info("finished parsing box scores for {0}".format( date.strftime("%Y_%m_%d"))) return json_encoded_box_scores