Пример #1
0
    def parse(self, response):
        # Get this game code from file
        with open(os.getcwd() + "/tmpfiles/" +
                  ''.join(e
                          for e in response.url if e.isalnum()) + ".txt") as f:
            data = f.read()
            m = re.search(r"Code: (?P<code>\d+)", data)
            code = str(m.group('code')).zfill(16)

        # Scrape box score
        away = int(long(code) / 1e12)
        home = int((long(code) / 1e8) % 1e3)
        date = int(long(code) % 1e8)
        away_TGS = Team_Game_Statistics(code, away)
        home_TGS = Team_Game_Statistics(code, home)

        # Scrape first downs
        first_div = response.xpath('//tr[@data-stat-attr="firstDowns"]')
        away_TGS.First_Down_Total = re.sub(
            r'[\\\t|\\\n]', '',
            first_div.xpath('.//td/text()').extract()[1])
        home_TGS.First_Down_Total = re.sub(
            r'[\\\t|\\\n]', '',
            first_div.xpath('.//td/text()').extract()[2])

        # Scrape turnovers
        fumble_div = response.xpath('//tr[@data-stat-attr="fumblesLost"]')
        away_TGS.Fum_Lost = re.sub(
            r'[\\\t|\\\n]', '',
            fumble_div.xpath('.//td/text()').extract()[1])
        home_TGS.Fum_Lost = re.sub(
            r'[\\\t|\\\n]', '',
            fumble_div.xpath('.//td/text()').extract()[2])
        away_TGS.Fum_Ret = home_TGS.Fum_Lost
        home_TGS.Fum_Ret = away_TGS.Fum_Lost

        # Scrape first down efficiency on 3rd down
        eff_div = response.xpath('//tr[@data-stat-attr="thirdDownEff"]')
        fatt_away = re.sub(r'[\\\t|\\\n]', '',
                           eff_div.xpath('.//td/text()').extract()[1])
        fatt_home = re.sub(r'[\\\t|\\\n]', '',
                           eff_div.xpath('.//td/text()').extract()[2])
        # away
        m = re.search(r'(?P<f>\d+)\-(?P<a>\d+)', fatt_away)
        away_TGS.Third_Down_Att = m.group('a')
        away_TGS.Third_Down_Conv = m.group('f')
        # home
        m = re.search(r'(?P<f>\d+)\-(?P<a>\d+)', fatt_home)
        home_TGS.Third_Down_Att = m.group('a')
        home_TGS.Third_Down_Conv = m.group('f')

        # Scrape first down efficiency on 4th down
        eff_div = response.xpath('//tr[@data-stat-attr="fourthDownEff"]')
        fatt_away = re.sub(r'[\\\t|\\\n]', '',
                           eff_div.xpath('.//td/text()').extract()[1])
        fatt_home = re.sub(r'[\\\t|\\\n]', '',
                           eff_div.xpath('.//td/text()').extract()[2])
        # away
        m = re.search(r'(?P<f>\d+)\-(?P<a>\d+)', fatt_away)
        away_TGS.Fourth_Down_Att = m.group('a')
        away_TGS.Fourth_Down_Conv = m.group('f')
        # home
        m = re.search(r'(?P<f>\d+)\-(?P<a>\d+)', fatt_home)
        home_TGS.Fourth_Down_Att = m.group('a')
        home_TGS.Fourth_Down_Conv = m.group('f')

        # Scrape time of possession
        top_div = response.xpath('//tr[@data-stat-attr="possessionTime"]')
        try:
            top_away = re.sub(r'[\\\t|\\\n]', '',
                              top_div.xpath('.//td/text()').extract()[1])
        except:
            top_away = "30:00"
        try:
            top_home = re.sub(r'[\\\t|\\\n]', '',
                              top_div.xpath('.//td/text()').extract()[2])
        except:
            top_home = "30:00"

        # away
        m_away = re.search(r'(?P<h>\d+)\:(?P<m>\d+)', top_away)
        # home
        m_home = re.search(r'(?P<h>\d+)\:(?P<m>\d+)', top_home)
        try:
            away_TGS.Time_Of_Possession = str(60 * int(m_away.group('h')) +
                                              int(m_away.group('m')))
            home_TGS.Time_Of_Possession = str(60 * int(m_home.group('h')) +
                                              int(m_home.group('m')))
        except:
            away_TGS.Time_Of_Possession = 1800
            home_TGS.Time_Of_Possession = 1800
        if int(away_TGS.Time_Of_Possession) == 1800 and int(
                home_TGS.Time_Of_Possession) != 1800:
            away_TGS.Time_Of_Possession = str(3600 -
                                              int(home_TGS.Time_Of_Possession))
        elif int(home_TGS.Time_Of_Possession) == 1800 and int(
                away_TGS.Time_Of_Possession) != 1800:
            home_TGS.Time_Of_Possession = str(3600 -
                                              int(away_TGS.Time_Of_Possession))

        # Scrape penalties
        pen_div = response.xpath('//tr[@data-stat-attr="totalPenaltiesYards"]')
        pen_away = re.sub(r'[\\\t|\\\n]', '',
                          pen_div.xpath('.//td/text()').extract()[1])
        pen_home = re.sub(r'[\\\t|\\\n]', '',
                          pen_div.xpath('.//td/text()').extract()[2])
        # away
        m = re.search(r'(?P<tot>\d+)\-(?P<yds>\d+)', pen_away)
        away_TGS.Penalty = m.group('tot')
        away_TGS.Penalty_Yard = m.group('yds')
        # home
        m = re.search(r'(?P<tot>\d+)\-(?P<yds>\d+)', pen_home)
        home_TGS.Penalty = m.group('tot')
        home_TGS.Penalty_Yard = m.group('yds')

        # Write stats to file
        if os.path.isfile(str(year) + " Stats/matchup-stats.csv"):
            f = open(str(year) + " Stats/matchup-stats.csv", "a")
            data_writer = csv.writer(f, lineterminator='\n')
            new_rows = []
            new_rows.append(away_TGS.Compile())
            new_rows.append(home_TGS.Compile())
            data_writer.writerows(new_rows)
            f.close()
        else:
            new_rows = []
            new_rows.append(away_TGS.Header())
            new_rows.append(away_TGS.Compile())
            new_rows.append(home_TGS.Compile())
            Write_CSV(new_rows, str(year) + " Stats/matchup-stats.csv")
Пример #2
0
	def parse(self, response):
		# Get this game code from file
		with open(os.getcwd() + "/tmpfiles/" + ''.join(e for e in response.url if e.isalnum()) + ".txt") as f:
			data = f.read()
			m = re.search(r"Code: (?P<code>\d+)", data)
			code = str(m.group('code')).zfill(16)
		# Scrape box score and save raw file
		table = response.xpath('//table[contains(@class, "mod-data")]')
		rows = []
		visitor = int(code) / 1000000000000
		home = (int(code) / 100000000) % 1000
		date = int(code) % 100000000
		for row in table.xpath('.//tr'):
			new_rows1 = [x.xpath('.//text()').extract() for x in row.xpath('.//td')]
			if len(new_rows1) > 0:
				rows.append(new_rows1)
			new_rows2 = [x.xpath('.//text()').extract() for x in row.xpath('.//th')]
			if len(new_rows2) > 0:
				if len(new_rows2) == 3:
					new_rows2 = [new_rows2[0], "", new_rows2[1], new_rows2[2]]
				rows.append(new_rows2)
			for i in range(0, len(rows[len(rows)-1])):
				rows[len(rows)-1][i] = ''.join([re.sub(r"\[u'\\xa0'\]|', |\[u'|u'|'\]|\[|\]", '', str(rows[len(rows)-1][i]))])
		Write_CSV(rows, "box/" + str(visitor).zfill(4) + str(home).zfill(4) + str(date) + ".csv")
		# Convert to team-game-statistics format
		visitor_TGS = Team_Game_Statistics(code, visitor)
		home_TGS = Team_Game_Statistics(code, home)
		team_names = Read_CSV("2014 Stats/team.csv")
		team_names = team_names[1:]
		team_abbvs = Read_CSV("2014 Stats/abbrevations.csv")
		# Get score
		for i in range(0, len(rows)):
			first_qtr = re.search(r"FIRST QUARTER", rows[i][0])
			if first_qtr:
				while len(rows[i+1]) >= 5:
					i += 1
				visitor_TGS.Points = rows[i][4]
				home_TGS.Points = rows[i][5]
			second_qtr = re.search(r"SECOND QUARTER", rows[i][0])
			if second_qtr:
				while len(rows[i+1]) >= 5:
					i += 1
				visitor_TGS.Points = rows[i][4]
				home_TGS.Points = rows[i][5]
			third_qtr = re.search(r"THIRD QUARTER", rows[i][0])
			if third_qtr:
				while len(rows[i+1]) >= 5:
					i += 1
				visitor_TGS.Points = rows[i][4]
				home_TGS.Points = rows[i][5]
			fourth_qtr = re.search(r"FOURTH QUARTER", rows[i][0])
			if fourth_qtr:
				while len(rows[i+1]) >= 5:
					i += 1
				visitor_TGS.Points = rows[i][4]
				home_TGS.Points = rows[i][5]
		# Box score stats
		for i in range(0, len(rows)):
			# Total 1st downs
			first_downs = re.search(r"1st Downs", rows[i][0])
			if first_downs:
				visitor_TGS.First_Down_Total = rows[i][1]
				home_TGS.First_Down_Total = rows[i][2]
			# 3rd down conversions
			third_downs = re.search(r"3rd down efficiency", rows[i][0])
			if third_downs:
				eff = re.match(r"(?P<conv>\d+)\-(?P<att>\d+)", rows[i][1])
				visitor_TGS.Third_Down_Att = eff.group("att")
				visitor_TGS.Third_Down_Conv = eff.group("conv")
				eff = re.match(r"(?P<conv>\d+)\-(?P<att>\d+)", rows[i][2])
				home_TGS.Third_Down_Att = eff.group("att")
				home_TGS.Third_Down_Conv = eff.group("conv")
			# 4th down conversions
			fourth_downs = re.search(r"4th down efficiency", rows[i][0])
			if fourth_downs:
				eff = re.match(r"(?P<conv>\d+)\-(?P<att>\d+)", rows[i][1])
				visitor_TGS.Fourth_Down_Att = eff.group("att")
				visitor_TGS.Fourth_Down_Conv = eff.group("conv")
				eff = re.match(r"(?P<conv>\d+)\-(?P<att>\d+)", rows[i][2])
				home_TGS.Fourth_Down_Att = eff.group("att")
				home_TGS.Fourth_Down_Conv = eff.group("conv")
			# Penalties
			penalties = re.search(r"Penalties", rows[i][0])
			if penalties:
				num_yrds = re.search(r"(?P<num>\d+)\-(?P<yrds>\d+)", rows[i][1])
				visitor_TGS.Penalty = num_yrds.group("num")
				visitor_TGS.Penalty_Yard = num_yrds.group("yrds")
				num_yrds = re.search(r"(?P<num>\d+)\-(?P<yrds>\d+)", rows[i][2])
				home_TGS.Penalty = num_yrds.group("num")
				home_TGS.Penalty_Yard = num_yrds.group("yrds")
			# Possession
			possession = re.search(r"Possession", rows[i][0])
			if possession:
				visitor_TGS.Time_Of_Possession = rows[i][1]
				home_TGS.Time_Of_Possession = rows[i][2]
			# Fumbles Lost
			fum_lost = re.search(r"Fumbles lost", rows[i][0])
			if fum_lost:
				visitor_TGS.Fum_Lost = rows[i][1]
				home_TGS.Fum_Lost = rows[i][2]
				visitor_TGS.Fum_Ret = home_TGS.Fum_Lost
				home_TGS.Fum_Ret = visitor_TGS.Fum_Lost
		# Find stats
		visitor_TGS = Parse_Box(rows, visitor_TGS, team_abbvs)
		# START DEBUG --
		#if int(visitor_TGS.Rush_Att) + int(visitor_TGS.Pass_Att) == 0:
			#pdb.set_trace()
			#visitor_TGS = Parse_Box(rows, visitor_TGS, team_abbvs)
		# END DEBUG --
		home_TGS = Parse_Box(rows, home_TGS, team_abbvs)
		# START DEBUG --
		#if int(home_TGS.Rush_Att) + int(home_TGS.Pass_Att) == 0:
			#pdb.set_trace()
			#home_TGS = Parse_Box(rows, visitor_TGS, team_abbvs)
		# END DEBUG --

		if os.path.isfile("2014 Stats/team-game-statistics.csv"):
			f = open("2014 Stats/team-game-statistics.csv","a")
			data_writer = csv.writer(f, lineterminator = '\n')
			new_rows = []
			new_rows.append(visitor_TGS.Compile())
			new_rows.append(home_TGS.Compile())
			data_writer.writerows(new_rows)
			f.close()
		else:
			new_rows = []
			new_rows.append(visitor_TGS.Header())
			new_rows.append(visitor_TGS.Compile())
			new_rows.append(home_TGS.Compile())
			Write_CSV(new_rows, "2014 Stats/team-game-statistics.csv")
Пример #3
0
	def parse(self, response):
		# Get this game code from file
		with open(os.getcwd() + "/tmpfiles/" + ''.join(e for e in response.url if e.isalnum()) + ".txt") as f:
			data = f.read()
			m = re.search(r"Code: (?P<code>\d+)", data)
			code = str(m.group('code')).zfill(16)

		# Scrape box score
		away = int(long(code) / 1e12)
		home = int((long(code) / 1e8) % 1e3)
		date = int(long(code) % 1e8)
		away_TGS = Team_Game_Statistics(code, away)
		home_TGS = Team_Game_Statistics(code, home)

		# Scrape first downs
		first_div = response.xpath('//tr[@data-stat-attr="firstDowns"]')
		away_TGS.First_Down_Total = re.sub(r'[\\\t|\\\n]','',first_div.xpath('.//td/text()').extract()[1])
		home_TGS.First_Down_Total = re.sub(r'[\\\t|\\\n]','',first_div.xpath('.//td/text()').extract()[2])

		# Scrape turnovers
		fumble_div = response.xpath('//tr[@data-stat-attr="fumblesLost"]')
		away_TGS.Fum_Lost = re.sub(r'[\\\t|\\\n]','',fumble_div.xpath('.//td/text()').extract()[1])
		home_TGS.Fum_Lost = re.sub(r'[\\\t|\\\n]','',fumble_div.xpath('.//td/text()').extract()[2])
		away_TGS.Fum_Ret = home_TGS.Fum_Lost
		home_TGS.Fum_Ret = away_TGS.Fum_Lost

		# Scrape first down efficiency on 3rd down
		eff_div = response.xpath('//tr[@data-stat-attr="thirdDownEff"]')
		fatt_away = re.sub(r'[\\\t|\\\n]','',eff_div.xpath('.//td/text()').extract()[1])
		fatt_home = re.sub(r'[\\\t|\\\n]','',eff_div.xpath('.//td/text()').extract()[2])
		# away
		m = re.search(r'(?P<f>\d+)\-(?P<a>\d+)', fatt_away)
		away_TGS.Third_Down_Att = m.group('a')
		away_TGS.Third_Down_Conv = m.group('f')
		# home
		m = re.search(r'(?P<f>\d+)\-(?P<a>\d+)', fatt_home)
		home_TGS.Third_Down_Att = m.group('a')
		home_TGS.Third_Down_Conv = m.group('f')

		# Scrape first down efficiency on 4th down
		eff_div = response.xpath('//tr[@data-stat-attr="fourthDownEff"]')
		fatt_away = re.sub(r'[\\\t|\\\n]','',eff_div.xpath('.//td/text()').extract()[1])
		fatt_home = re.sub(r'[\\\t|\\\n]','',eff_div.xpath('.//td/text()').extract()[2])
		# away
		m = re.search(r'(?P<f>\d+)\-(?P<a>\d+)', fatt_away)
		away_TGS.Fourth_Down_Att = m.group('a')
		away_TGS.Fourth_Down_Conv = m.group('f')
		# home
		m = re.search(r'(?P<f>\d+)\-(?P<a>\d+)', fatt_home)
		home_TGS.Fourth_Down_Att = m.group('a')
		home_TGS.Fourth_Down_Conv = m.group('f')

		# Scrape time of possession
		top_div = response.xpath('//tr[@data-stat-attr="possessionTime"]')
		try:
			top_away = re.sub(r'[\\\t|\\\n]','',top_div.xpath('.//td/text()').extract()[1])
		except:
			top_away =  "30:00"
		try:
			top_home = re.sub(r'[\\\t|\\\n]','',top_div.xpath('.//td/text()').extract()[2])
		except:
			top_home =  "30:00"

		# away
		m_away = re.search(r'(?P<h>\d+)\:(?P<m>\d+)', top_away)
		# home
		m_home = re.search(r'(?P<h>\d+)\:(?P<m>\d+)', top_home)
		try:
			away_TGS.Time_Of_Possession = str(60*int(m_away.group('h')) + int(m_away.group('m')))
			home_TGS.Time_Of_Possession = str(60*int(m_home.group('h')) + int(m_home.group('m')))
		except:
			away_TGS.Time_Of_Possession = 1800
			home_TGS.Time_Of_Possession = 1800
		if int(away_TGS.Time_Of_Possession) == 1800 and int(home_TGS.Time_Of_Possession) != 1800:
			away_TGS.Time_Of_Possession = str(3600 - int(home_TGS.Time_Of_Possession))
		elif int(home_TGS.Time_Of_Possession) == 1800 and int(away_TGS.Time_Of_Possession) != 1800:
			home_TGS.Time_Of_Possession = str(3600 - int(away_TGS.Time_Of_Possession))

		# Scrape penalties
		pen_div = response.xpath('//tr[@data-stat-attr="totalPenaltiesYards"]')
		pen_away = re.sub(r'[\\\t|\\\n]','',pen_div.xpath('.//td/text()').extract()[1])
		pen_home = re.sub(r'[\\\t|\\\n]','',pen_div.xpath('.//td/text()').extract()[2])
		# away
		m = re.search(r'(?P<tot>\d+)\-(?P<yds>\d+)', pen_away)
		away_TGS.Penalty = m.group('tot')
		away_TGS.Penalty_Yard = m.group('yds')
		# home
		m = re.search(r'(?P<tot>\d+)\-(?P<yds>\d+)', pen_home)
		home_TGS.Penalty = m.group('tot')
		home_TGS.Penalty_Yard = m.group('yds')


		# Write stats to file
		if os.path.isfile(str(year) + " Stats/matchup-stats.csv"):
			f = open(str(year) + " Stats/matchup-stats.csv","a")
			data_writer = csv.writer(f, lineterminator = '\n')
			new_rows = []
			new_rows.append(away_TGS.Compile())
			new_rows.append(home_TGS.Compile())
			data_writer.writerows(new_rows)
			f.close()
		else:
			new_rows = []
			new_rows.append(away_TGS.Header())
			new_rows.append(away_TGS.Compile())
			new_rows.append(home_TGS.Compile())
			Write_CSV(new_rows, str(year) + " Stats/matchup-stats.csv")