Exemplo n.º 1
0
def raw_harvest (year, game_num, away_acronym, home_acronym,
		away_roster, home_roster):
	"""
	Extract play-by-play information from a html file on the
	local machine (in the form of raw, unspeficied events).
	Returns list of unspecified event objects
	"""

	tree = Operations.germinate_report_seed(year,game_num,'PL','02')
	events = [] # empty list for holding unspecified events
	
	for item in tree.xpath('//table/tr[@class="evenColor"]'):
				
		event_raw = item.xpath('./td/text()')

		num = int(event_raw[0])
		per_num = int(event_raw[1])
		strength = unicode(event_raw[2])
		time = unicode(event_raw[3])
		event_type = unicode(event_raw[5])
		description = unicode(event_raw[6])
		try: # Zone not always indicated in event description
			# A bit redudant, done also before pruning events
			description_raw = description.split()
			zone_index = description_raw.index('Zone,') - 1
			zone = description_raw[zone_index]
		except ValueError:
			try: # Certain events have zone at end of description
				zone_index = description_raw.index('Zone') - 1
				zone = description_raw[zone_index]
			except ValueError:
				zone = None
		assert zone == 'Neu.' or zone == 'Off.' or zone == 'Def.' \
			or zone == None, "ERROR: Event zone(%s) invalid"%(zone)

		# Goals have an additional row in the description cell for assists
		if event_type == 'GOAL' and event_raw[7].find('Assist') != -1:
			description = unicode(" ".join(event_raw[6:8]))

		players_on_ice = item.xpath('./td/table')

		home_on_ice = []
		away_on_ice = []
			
		if len (players_on_ice) == 2: # Perhaps make this more robust?			
			away_on_ice = Operations.chop_on_ice_branch (
				players_on_ice[0], away_roster)
			home_on_ice = Operations.chop_on_ice_branch (
				players_on_ice[1], home_roster)

		event = Event(num, per_num, strength, time, event_type, zone, 
			description, away_acronym, home_acronym,
			away_on_ice, home_on_ice)		
		events.append (event)	    
	return events