def playbyplay_extractor (year, game_num): """ Extract play-by-play information from a html file on the local machine (in the form of events) """ tree = Operations.germinate_report_seed(year,game_num,'PL','02') events = [] # empty list for holding unspecified events for item in tree.xpath('//table/tr[@class="evenColor"]'): #for x in range (116, 120): # item = tree.xpath('//table/tr[@class="evenColor"]') [x] event_raw = item.xpath('./td/text()') num = unicode(event_raw[0]) per_num = unicode(event_raw[1]) strength = unicode(event_raw[2]) time = unicode(event_raw[3]) event_type = unicode(event_raw[5]) description = unicode(event_raw[6]) # Goals have an additional row in the description cell for assists if event_type == 'GOAL' and event_raw[7].find('Assist') != -1: description = unicode(" ".join(event_raw[6:8])) players_on_ice = item.xpath('./td/table') home_on_ice = [] away_on_ice = [] if len (players_on_ice) == 2: away_players_raw = players_on_ice[0].xpath ('.//font') for away_player in away_players_raw: position_name = away_player.xpath ('./@title') number = away_player.xpath ('./text()') [0] position, name = position_name[0].split(' - ') away_on_ice.append ([position, name, number]) home_players_raw = players_on_ice[1].xpath ('.//font') for home_player in home_players_raw: position_name = home_player.xpath ('./@title') number = home_player.xpath ('./text()') [0] position, name = position_name[0].split(' - ') home_on_ice.append ([position, name, number]) event = Objects.Event( num, per_num, strength, time, event_type, description,\ away_on_ice, home_on_ice ) events.append (event) return events
def raw_harvest (year, game_num, away_acronym, home_acronym, away_roster, home_roster): """ Extract play-by-play information from a html file on the local machine (in the form of raw, unspeficied events). Returns list of unspecified event objects """ tree = Operations.germinate_report_seed(year,game_num,'PL','02') events = [] # empty list for holding unspecified events for item in tree.xpath('//table/tr[@class="evenColor"]'): event_raw = item.xpath('./td/text()') num = int(event_raw[0]) per_num = int(event_raw[1]) strength = unicode(event_raw[2]) time = unicode(event_raw[3]) event_type = unicode(event_raw[5]) description = unicode(event_raw[6]) try: # Zone not always indicated in event description # A bit redudant, done also before pruning events description_raw = description.split() zone_index = description_raw.index('Zone,') - 1 zone = description_raw[zone_index] except ValueError: try: # Certain events have zone at end of description zone_index = description_raw.index('Zone') - 1 zone = description_raw[zone_index] except ValueError: zone = None assert zone == 'Neu.' or zone == 'Off.' or zone == 'Def.' \ or zone == None, "ERROR: Event zone(%s) invalid"%(zone) # Goals have an additional row in the description cell for assists if event_type == 'GOAL' and event_raw[7].find('Assist') != -1: description = unicode(" ".join(event_raw[6:8])) players_on_ice = item.xpath('./td/table') home_on_ice = [] away_on_ice = [] if len (players_on_ice) == 2: # Perhaps make this more robust? away_on_ice = Operations.chop_on_ice_branch ( players_on_ice[0], away_roster) home_on_ice = Operations.chop_on_ice_branch ( players_on_ice[1], home_roster) event = Event(num, per_num, strength, time, event_type, zone, description, away_acronym, home_acronym, away_on_ice, home_on_ice) events.append (event) return events
def harvest(year, game_num): """ Extract roster information from a html file on the local machine and create database entries """ game_info = GameHeader.harvest(year, game_num, "RO", "02") tree = Operations.germinate_report_seed(year, game_num, "RO", "02") tables = tree.xpath("//table//table//table//table") away_roster = chop_ind_roster_branch(tables, "away", game_info, year) home_roster = chop_ind_roster_branch(tables, "home", game_info, year) away_coach, home_coach = chop_coach_branch(tables) away_coach.team = game_info.away_team home_coach.team = game_info.home_team referees, linesmen = chop_officials_branch(tables) return GamePersonnel(away_roster, home_roster, away_coach, home_coach, referees, linesmen)
def harvest(year, game_num): game_info = GameHeader.harvest (year, game_num, 'RO', '02') away_full_name = Operations.team_acronym_to_uppercase( game_info.away_team) home_full_name = Operations.team_acronym_to_uppercase( game_info.home_team) tree = Operations.germinate_report_seed (year, game_num, "ES", '02') tables = tree.xpath('//table[@class="tablewidth" and @align="center"]/tr/td/table[@width="100%"]') rows = tables[3].xpath('./tr') roster = [] team_acronym = game_info.away_team for item in rows: if item.xpath('./td/text()')[0] == home_full_name: away_roster = roster roster = [] team_acronym = game_info.home_team elif item.get('class') == 'evenColor' or\ item.get('class') == 'oddColor': fields = item.xpath('./td/text()') for index, field in enumerate(fields): if field == u'\xa0': field = '0' if index == 0: number = field elif index == 1: position = field elif index == 2: name_raw = field.split(', ') first_name = name_raw[1] last_name = name_raw[0] elif index == 3: goals = field elif index == 4: assists = field elif index == 5: points = field elif index == 6: plus_minus = field elif index == 7: num_penalties = field elif index == 8: pim = field elif index == 9: total_minutes = field elif index == 10: num_shifts = field elif index == 11: avg_shift_length = field elif index == 12: powerplay_minutes = field elif index == 13: shorthanded_minutes = field elif index == 14: evenstrength_minutes = field elif index == 15: shots = field elif index == 16: attempts_blocked = field elif index == 17: missed_shots = field elif index == 18: hits = field elif index == 19: give_aways = field elif index == 20: take_aways = field elif index == 21: blocked_shots = field elif index == 22: faceoff_wins = field elif index == 23: faceoff_losses = field elif index == 24: faceoff_percentage = field playerid = Operations.get_playerid(first_name, last_name, team_acronym, year, position) roster.append (ES_Player(team_acronym, number, position, first_name, last_name, goals, assists, points, plus_minus, num_penalties, pim, total_minutes, num_shifts, avg_shift_length, powerplay_minutes, shorthanded_minutes, evenstrength_minutes, shots, attempts_blocked, missed_shots, hits, give_aways, take_aways, blocked_shots, faceoff_wins, faceoff_losses, faceoff_percentage, playerid)) home_roster = roster return away_roster, home_roster