Exemplo n.º 1
0
def playbyplay_extractor (year, game_num):
	"""
	Extract play-by-play information from a html file on the
	local machine (in the form of events)
	"""

	tree = Operations.germinate_report_seed(year,game_num,'PL','02')

	events = [] # empty list for holding unspecified events
	
	for item in tree.xpath('//table/tr[@class="evenColor"]'):
	#for x in range (116, 120):
	#   item = tree.xpath('//table/tr[@class="evenColor"]') [x]
					
		event_raw = item.xpath('./td/text()')

		num = unicode(event_raw[0])
		per_num = unicode(event_raw[1])
		strength = unicode(event_raw[2])
		time = unicode(event_raw[3])
		event_type = unicode(event_raw[5])
		description = unicode(event_raw[6])

		# Goals have an additional row in the description cell for assists
		if event_type == 'GOAL' and event_raw[7].find('Assist') != -1:
			description = unicode(" ".join(event_raw[6:8]))

		players_on_ice = item.xpath('./td/table')

		home_on_ice = []
		away_on_ice = []
			
		if len (players_on_ice) == 2:

			away_players_raw = players_on_ice[0].xpath ('.//font')
			for away_player in away_players_raw:
				position_name = away_player.xpath ('./@title')
				number = away_player.xpath ('./text()') [0]

				position, name = position_name[0].split(' - ')

				away_on_ice.append ([position, name, number])
			
			home_players_raw = players_on_ice[1].xpath ('.//font')
			for home_player in home_players_raw:
				position_name = home_player.xpath ('./@title')
				number = home_player.xpath ('./text()') [0]

				position, name = position_name[0].split(' - ')

				home_on_ice.append ([position, name, number])

		event = Objects.Event(
			num, per_num, strength, time, event_type, description,\
			away_on_ice, home_on_ice
			)
		
		events.append (event)	    
	return events
Exemplo n.º 2
0
def raw_harvest (year, game_num, away_acronym, home_acronym,
		away_roster, home_roster):
	"""
	Extract play-by-play information from a html file on the
	local machine (in the form of raw, unspeficied events).
	Returns list of unspecified event objects
	"""

	tree = Operations.germinate_report_seed(year,game_num,'PL','02')
	events = [] # empty list for holding unspecified events
	
	for item in tree.xpath('//table/tr[@class="evenColor"]'):
				
		event_raw = item.xpath('./td/text()')

		num = int(event_raw[0])
		per_num = int(event_raw[1])
		strength = unicode(event_raw[2])
		time = unicode(event_raw[3])
		event_type = unicode(event_raw[5])
		description = unicode(event_raw[6])
		try: # Zone not always indicated in event description
			# A bit redudant, done also before pruning events
			description_raw = description.split()
			zone_index = description_raw.index('Zone,') - 1
			zone = description_raw[zone_index]
		except ValueError:
			try: # Certain events have zone at end of description
				zone_index = description_raw.index('Zone') - 1
				zone = description_raw[zone_index]
			except ValueError:
				zone = None
		assert zone == 'Neu.' or zone == 'Off.' or zone == 'Def.' \
			or zone == None, "ERROR: Event zone(%s) invalid"%(zone)

		# Goals have an additional row in the description cell for assists
		if event_type == 'GOAL' and event_raw[7].find('Assist') != -1:
			description = unicode(" ".join(event_raw[6:8]))

		players_on_ice = item.xpath('./td/table')

		home_on_ice = []
		away_on_ice = []
			
		if len (players_on_ice) == 2: # Perhaps make this more robust?			
			away_on_ice = Operations.chop_on_ice_branch (
				players_on_ice[0], away_roster)
			home_on_ice = Operations.chop_on_ice_branch (
				players_on_ice[1], home_roster)

		event = Event(num, per_num, strength, time, event_type, zone, 
			description, away_acronym, home_acronym,
			away_on_ice, home_on_ice)		
		events.append (event)	    
	return events
Exemplo n.º 3
0
def harvest(year, game_num):
    """
	Extract roster information from a html file on the
	local machine and create database entries
	"""

    game_info = GameHeader.harvest(year, game_num, "RO", "02")

    tree = Operations.germinate_report_seed(year, game_num, "RO", "02")

    tables = tree.xpath("//table//table//table//table")

    away_roster = chop_ind_roster_branch(tables, "away", game_info, year)
    home_roster = chop_ind_roster_branch(tables, "home", game_info, year)

    away_coach, home_coach = chop_coach_branch(tables)
    away_coach.team = game_info.away_team
    home_coach.team = game_info.home_team

    referees, linesmen = chop_officials_branch(tables)

    return GamePersonnel(away_roster, home_roster, away_coach, home_coach, referees, linesmen)
def harvest(year, game_num):

	game_info = GameHeader.harvest (year, game_num, 'RO', '02')
	away_full_name = Operations.team_acronym_to_uppercase(
		game_info.away_team)
	home_full_name = Operations.team_acronym_to_uppercase(
		game_info.home_team)

	tree = Operations.germinate_report_seed (year, game_num, "ES", '02')

	tables = tree.xpath('//table[@class="tablewidth" and @align="center"]/tr/td/table[@width="100%"]')
	rows = tables[3].xpath('./tr')

	roster = []
	team_acronym = game_info.away_team
	
	for item in rows:
		if item.xpath('./td/text()')[0] == home_full_name:
			away_roster = roster
			roster = []
			team_acronym = game_info.home_team
		elif item.get('class') == 'evenColor' or\
				item.get('class') == 'oddColor':
			fields = item.xpath('./td/text()')
			for index, field in enumerate(fields):
				if field == u'\xa0':
					field = '0'

				if index == 0:
					number = field
				elif index == 1:
					position = field
				elif index == 2:
					name_raw = field.split(', ')
					first_name = name_raw[1]
					last_name = name_raw[0]
				elif index == 3:
					goals = field					
				elif index == 4:
					assists = field
				elif index == 5:
					points = field
				elif index == 6:
					plus_minus = field
				elif index == 7:
					num_penalties = field
				elif index == 8:
					pim = field
				elif index == 9:
					total_minutes = field
				elif index == 10:
					num_shifts = field
				elif index == 11:
					avg_shift_length = field
				elif index == 12:
					powerplay_minutes = field
				elif index == 13:
					shorthanded_minutes = field
				elif index == 14:
					evenstrength_minutes = field
				elif index == 15:
					shots = field
				elif index == 16:
					attempts_blocked = field
				elif index == 17:
					missed_shots = field
				elif index == 18:
					hits = field
				elif index == 19:
					give_aways = field
				elif index == 20:
					take_aways = field
				elif index == 21:
					blocked_shots = field
				elif index == 22:
					faceoff_wins = field
				elif index == 23:
					faceoff_losses = field
				elif index == 24:
					faceoff_percentage = field

			playerid = Operations.get_playerid(first_name, last_name, 
				team_acronym, year,	position)
			
			roster.append (ES_Player(team_acronym, number, position,
				first_name, last_name, goals, assists, points, plus_minus,
				num_penalties, pim, total_minutes, num_shifts,
				avg_shift_length, powerplay_minutes, shorthanded_minutes,
				evenstrength_minutes, shots, attempts_blocked, missed_shots,
				hits, give_aways, take_aways, blocked_shots, faceoff_wins,
				faceoff_losses, faceoff_percentage, playerid))

		home_roster = roster

	return away_roster, home_roster