示例#1
0
    def parse(self, response):

        # Find folder location and set up data struct
        folder, gameid = sf.find_game_folder(response)

        try:
            teamstats = {}
            teamstats['awayTeam'] = {}
            teamstats['homeTeam'] = {}
            stats_area = response.xpath(
                '//div[contains(@class,"_bsTeamStats")]')

            # Get team names
            tm_divs = stats_area.xpath(
                './/div[contains(@class,"wisbb_bstsTeamDisplay")]')
            teamstats['awayTeam']['nameFull'] = tm_divs[0].xpath(
                './/span[contains(@class,"_bsFull")]/text()').extract()[0]
            teamstats['awayTeam']['nameShort'] = tm_divs[0].xpath(
                './/span[contains(@class,"_bsShort")]/text()').extract()[0]
            teamstats['homeTeam']['nameFull'] = tm_divs[1].xpath(
                './/span[contains(@class,"_bsFull")]/text()').extract()[0]
            teamstats['homeTeam']['nameShort'] = tm_divs[1].xpath(
                './/span[contains(@class,"_bsShort")]/text()').extract()[0]

            # Get boxscore stats
            boxtable = stats_area.xpath('.//tbody')
            stat_data = boxtable.xpath(
                './/td[contains(@class,"_bstsStat")]/text()').extract()
            stat_type = boxtable.xpath(
                './/td[contains(@class,"_bstsTitle")]/text()').extract()
            # away stats
            for sdata, stype in zip(stat_data[::2], stat_type[::2]):
                sf.add_boxscore_data(sdata, stype, teamstats['awayTeam'])
            # home stats
            for sdata, stype in zip(stat_data[1::2], stat_type[1::2]):
                sf.add_boxscore_data(sdata, stype, teamstats['homeTeam'])

            # Save
            sf.dump_json(teamstats, 'boxscore.json', fdir=folder)
            # if bad_boxscore file still here when it shouldn't be, delete it
            fbadbox = os.path.join(folder, 'bad_boxscore.json')
            if os.path.isfile(fbadbox):
                os.remove(fbadbox)

        # Log where problem occurred to debug scraper later
        except Exception, error:
            err = {}
            err["ERROR"] = str(error)
            err["LINE"] = str(sys.exc_info()[-1].tb_lineno)
            err["GAME"] = str(gameid)
            err["URL"] = response.url
            sf.dump_json(err, 'bad_boxscore.json', fdir=folder)
            # if boxscore file still here when it shouldn't be, delete it
            fgoodbox = os.path.join(folder, 'boxscore.json')
            if os.path.isfile(fgoodbox):
                os.remove(fgoodbox)
	def parse(self, response):

		# Find folder location and set up data struct
		folder, gameid = sf.find_game_folder(response)

		try:
			teamstats = {}
			teamstats['awayTeam'] = {}
			teamstats['homeTeam'] = {}
			stats_area = response.xpath('//div[contains(@class,"_bsTeamStats")]')

			# Get team names
			tm_divs = stats_area.xpath('.//div[contains(@class,"wisbb_bstsTeamDisplay")]')
			teamstats['awayTeam']['nameFull'] = tm_divs[0].xpath('.//span[contains(@class,"_bsFull")]/text()').extract()[0]
			teamstats['awayTeam']['nameShort'] = tm_divs[0].xpath('.//span[contains(@class,"_bsShort")]/text()').extract()[0]
			teamstats['homeTeam']['nameFull'] = tm_divs[1].xpath('.//span[contains(@class,"_bsFull")]/text()').extract()[0]
			teamstats['homeTeam']['nameShort'] = tm_divs[1].xpath('.//span[contains(@class,"_bsShort")]/text()').extract()[0]

			# Get boxscore stats
			boxtable = stats_area.xpath('.//tbody')
			stat_data = boxtable.xpath('.//td[contains(@class,"_bstsStat")]/text()').extract()
			stat_type = boxtable.xpath('.//td[contains(@class,"_bstsTitle")]/text()').extract()
			# away stats
			for sdata, stype in zip(stat_data[::2], stat_type[::2]):
				sf.add_boxscore_data(sdata, stype, teamstats['awayTeam'])
			# home stats
			for sdata, stype in zip(stat_data[1::2], stat_type[1::2]):
				sf.add_boxscore_data(sdata, stype, teamstats['homeTeam'])

			# Save
			sf.dump_json(teamstats, 'boxscore.json', fdir=folder)
			# if bad_boxscore file still here when it shouldn't be, delete it
			fbadbox = os.path.join(folder, 'bad_boxscore.json')
			if os.path.isfile(fbadbox):
				os.remove(fbadbox)

		# Log where problem occurred to debug scraper later
		except Exception,error:
			err = {}
			err["ERROR"] = str(error)
			err["LINE"] = str(sys.exc_info()[-1].tb_lineno)
			err["GAME"] = str(gameid)
			err["URL"] = response.url
			sf.dump_json(err, 'bad_boxscore.json', fdir=folder)
			# if boxscore file still here when it shouldn't be, delete it
			fgoodbox = os.path.join(folder, 'boxscore.json')
			if os.path.isfile(fgoodbox):
				os.remove(fgoodbox)
示例#3
0
    def parse(self, response):

        # Find folder location and set up data struct
        folder, gameid = sf.find_game_folder(response)

        try:
            # Assume away team is in first column
            main_content = response.xpath(
                '//div[' + sf.contains_str('wisbb_bsMainContent') + ']')
            box_areas = main_content.xpath('.//div[' +
                                           sf.contains_str('wisbb_bsArea') +
                                           ']')
            playerstats = {}
            playerstats['awayTeam'] = {}
            playerstats['homeTeam'] = {}
            teams = ['awayTeam', 'homeTeam']
            # Find all stat types
            for area in box_areas:
                # Go to stat table per team
                team_tables = area.xpath('.//div[' +
                                         sf.contains_str('wisbb_bsTable') +
                                         ']')
                for i, table in enumerate(team_tables):
                    column = table.xpath('.//table[' +
                                         sf.contains_str('wisbb_bsStandard') +
                                         ']')
                    header = column.xpath('.//thead/tr/th/text()').extract()
                    player_cols = column.xpath('.//tbody/tr')
                    playerstats[teams[i]][header[0]] = {}
                    # Find all players with stat
                    for player in player_cols:
                        try:
                            name = player.xpath(
                                './/td[' +
                                sf.contains_str('wisbb_bsNameCell') +
                                ']/a/text()').extract()[0]
                        except IndexError:
                            name = player.xpath(
                                './/td[' +
                                sf.contains_str('wisbb_bsNameCell') +
                                ']/span/text()').extract()[0]
                        stats = player.xpath(
                            './/td[contains(@class,"wisbb_priority")]/text()'
                        ).extract()
                        playerstats[teams[i]][header[0]][name] = {}
                        for j, stat in enumerate(stats):
                            try:
                                stat = float(stat)
                            except ValueError:
                                # Won't work when stat is null ("-")
                                pass
                            playerstats[teams[i]][header[0]][name][header[
                                j + 1]] = stat
            # Put players stats totals into boxscore file as well
            try:
                teamstats = sf.load_json('boxscore.json', fdir=folder)
            except IOError:
                teamstats = {}
            for team, stats in playerstats.iteritems():
                for statType, players in stats.iteritems():
                    try:
                        for stat, data in players['Total'].iteritems():
                            teamstats[team][statType + ' ' + stat] = data
                    except KeyError:
                        pass

            if teamstats:
                sf.dump_json(teamstats, 'boxscore.json', fdir=folder)
            if playerstats['homeTeam'] or playerstats['awayTeam']:
                sf.dump_json(playerstats, 'playerstats.json', fdir=folder)
            else:
                assert False, "No player stats found"
            # if bad_playerstats file still here when it shouldn't be, delete it
            fbadplyr = os.path.join(folder, 'bad_playerstats.json')
            if os.path.isfile(fbadplyr):
                os.remove(fbadplyr)

        # Log where problem occurred to debug scraper later
        except Exception, error:
            err = {}
            err["ERROR"] = str(error)
            err["LINE"] = str(sys.exc_info()[-1].tb_lineno)
            err["GAME"] = str(gameid)
            err["URL"] = response.url
            sf.dump_json(err, 'bad_playerstats.json', fdir=folder)
            # if playerstats file still here when it shouldn't be, delete it
            fgoodplyr = os.path.join(folder, 'playerstats.json')
            if os.path.isfile(fgoodplyr):
                os.remove(fgoodplyr)
	def parse(self, response):

		# Find folder location and set up data struct
		folder, gameid = sf.find_game_folder(response)

		try:
			# Assume away team is in first column
			main_content = response.xpath('//div['+ sf.contains_str('wisbb_bsMainContent') +']')
			box_areas = main_content.xpath('.//div['+ sf.contains_str('wisbb_bsArea') +']')
			playerstats = {}
			playerstats['awayTeam'] = {}
			playerstats['homeTeam'] = {}
			teams = ['awayTeam', 'homeTeam']
			# Find all stat types
			for area in box_areas:
				# Go to stat table per team
				team_tables = area.xpath('.//div['+ sf.contains_str('wisbb_bsTable') +']')
				for i, table in enumerate(team_tables):
					column = table.xpath('.//table['+ sf.contains_str('wisbb_bsStandard') +']')
					header = column.xpath('.//thead/tr/th/text()').extract()
					player_cols = column.xpath('.//tbody/tr')
					playerstats[teams[i]][header[0]] = {}
					# Find all players with stat
					for player in player_cols:
						try:
							name = player.xpath('.//td['+ sf.contains_str('wisbb_bsNameCell') +']/a/text()').extract()[0]
						except IndexError:
							name = player.xpath('.//td['+ sf.contains_str('wisbb_bsNameCell') +']/span/text()').extract()[0]
						stats = player.xpath('.//td[contains(@class,"wisbb_priority")]/text()').extract()
						playerstats[teams[i]][header[0]][name] = {}
						for j, stat in enumerate(stats):
							try:
								stat = float(stat)
							except ValueError:
								# Won't work when stat is null ("-")
								pass
							playerstats[teams[i]][header[0]][name][header[j+1]] = stat
			# Put players stats totals into boxscore file as well
			try:
				teamstats = sf.load_json('boxscore.json', fdir=folder)
			except IOError:
				teamstats = {}
			for team, stats in playerstats.iteritems():
				for statType, players in stats.iteritems():
					try:
						for stat, data in players['Total'].iteritems():
							teamstats[team][statType +' '+ stat] = data
					except KeyError:
						pass

			if teamstats:
				sf.dump_json(teamstats, 'boxscore.json', fdir=folder)
			if playerstats['homeTeam'] or playerstats['awayTeam']:
				sf.dump_json(playerstats, 'playerstats.json', fdir=folder)
			else:
				assert False, "No player stats found"
			# if bad_playerstats file still here when it shouldn't be, delete it
			fbadplyr = os.path.join(folder, 'bad_playerstats.json')
			if os.path.isfile(fbadplyr):
				os.remove(fbadplyr)

		# Log where problem occurred to debug scraper later
		except Exception,error:
			err = {}
			err["ERROR"] = str(error)
			err["LINE"] = str(sys.exc_info()[-1].tb_lineno)
			err["GAME"] = str(gameid)
			err["URL"] = response.url
			sf.dump_json(err, 'bad_playerstats.json', fdir=folder)
			# if playerstats file still here when it shouldn't be, delete it
			fgoodplyr = os.path.join(folder, 'playerstats.json')
			if os.path.isfile(fgoodplyr):
				os.remove(fgoodplyr)