def parse(self, response): # Find folder location and set up data struct folder, gameid = sf.find_game_folder(response) try: teamstats = {} teamstats['awayTeam'] = {} teamstats['homeTeam'] = {} stats_area = response.xpath( '//div[contains(@class,"_bsTeamStats")]') # Get team names tm_divs = stats_area.xpath( './/div[contains(@class,"wisbb_bstsTeamDisplay")]') teamstats['awayTeam']['nameFull'] = tm_divs[0].xpath( './/span[contains(@class,"_bsFull")]/text()').extract()[0] teamstats['awayTeam']['nameShort'] = tm_divs[0].xpath( './/span[contains(@class,"_bsShort")]/text()').extract()[0] teamstats['homeTeam']['nameFull'] = tm_divs[1].xpath( './/span[contains(@class,"_bsFull")]/text()').extract()[0] teamstats['homeTeam']['nameShort'] = tm_divs[1].xpath( './/span[contains(@class,"_bsShort")]/text()').extract()[0] # Get boxscore stats boxtable = stats_area.xpath('.//tbody') stat_data = boxtable.xpath( './/td[contains(@class,"_bstsStat")]/text()').extract() stat_type = boxtable.xpath( './/td[contains(@class,"_bstsTitle")]/text()').extract() # away stats for sdata, stype in zip(stat_data[::2], stat_type[::2]): sf.add_boxscore_data(sdata, stype, teamstats['awayTeam']) # home stats for sdata, stype in zip(stat_data[1::2], stat_type[1::2]): sf.add_boxscore_data(sdata, stype, teamstats['homeTeam']) # Save sf.dump_json(teamstats, 'boxscore.json', fdir=folder) # if bad_boxscore file still here when it shouldn't be, delete it fbadbox = os.path.join(folder, 'bad_boxscore.json') if os.path.isfile(fbadbox): os.remove(fbadbox) # Log where problem occurred to debug scraper later except Exception, error: err = {} err["ERROR"] = str(error) err["LINE"] = str(sys.exc_info()[-1].tb_lineno) err["GAME"] = str(gameid) err["URL"] = response.url sf.dump_json(err, 'bad_boxscore.json', fdir=folder) # if boxscore file still here when it shouldn't be, delete it fgoodbox = os.path.join(folder, 'boxscore.json') if os.path.isfile(fgoodbox): os.remove(fgoodbox)
def parse(self, response): # Find folder location and set up data struct folder, gameid = sf.find_game_folder(response) try: teamstats = {} teamstats['awayTeam'] = {} teamstats['homeTeam'] = {} stats_area = response.xpath('//div[contains(@class,"_bsTeamStats")]') # Get team names tm_divs = stats_area.xpath('.//div[contains(@class,"wisbb_bstsTeamDisplay")]') teamstats['awayTeam']['nameFull'] = tm_divs[0].xpath('.//span[contains(@class,"_bsFull")]/text()').extract()[0] teamstats['awayTeam']['nameShort'] = tm_divs[0].xpath('.//span[contains(@class,"_bsShort")]/text()').extract()[0] teamstats['homeTeam']['nameFull'] = tm_divs[1].xpath('.//span[contains(@class,"_bsFull")]/text()').extract()[0] teamstats['homeTeam']['nameShort'] = tm_divs[1].xpath('.//span[contains(@class,"_bsShort")]/text()').extract()[0] # Get boxscore stats boxtable = stats_area.xpath('.//tbody') stat_data = boxtable.xpath('.//td[contains(@class,"_bstsStat")]/text()').extract() stat_type = boxtable.xpath('.//td[contains(@class,"_bstsTitle")]/text()').extract() # away stats for sdata, stype in zip(stat_data[::2], stat_type[::2]): sf.add_boxscore_data(sdata, stype, teamstats['awayTeam']) # home stats for sdata, stype in zip(stat_data[1::2], stat_type[1::2]): sf.add_boxscore_data(sdata, stype, teamstats['homeTeam']) # Save sf.dump_json(teamstats, 'boxscore.json', fdir=folder) # if bad_boxscore file still here when it shouldn't be, delete it fbadbox = os.path.join(folder, 'bad_boxscore.json') if os.path.isfile(fbadbox): os.remove(fbadbox) # Log where problem occurred to debug scraper later except Exception,error: err = {} err["ERROR"] = str(error) err["LINE"] = str(sys.exc_info()[-1].tb_lineno) err["GAME"] = str(gameid) err["URL"] = response.url sf.dump_json(err, 'bad_boxscore.json', fdir=folder) # if boxscore file still here when it shouldn't be, delete it fgoodbox = os.path.join(folder, 'boxscore.json') if os.path.isfile(fgoodbox): os.remove(fgoodbox)
def parse(self, response): # Find folder location and set up data struct folder, gameid = sf.find_game_folder(response) try: # Assume away team is in first column main_content = response.xpath( '//div[' + sf.contains_str('wisbb_bsMainContent') + ']') box_areas = main_content.xpath('.//div[' + sf.contains_str('wisbb_bsArea') + ']') playerstats = {} playerstats['awayTeam'] = {} playerstats['homeTeam'] = {} teams = ['awayTeam', 'homeTeam'] # Find all stat types for area in box_areas: # Go to stat table per team team_tables = area.xpath('.//div[' + sf.contains_str('wisbb_bsTable') + ']') for i, table in enumerate(team_tables): column = table.xpath('.//table[' + sf.contains_str('wisbb_bsStandard') + ']') header = column.xpath('.//thead/tr/th/text()').extract() player_cols = column.xpath('.//tbody/tr') playerstats[teams[i]][header[0]] = {} # Find all players with stat for player in player_cols: try: name = player.xpath( './/td[' + sf.contains_str('wisbb_bsNameCell') + ']/a/text()').extract()[0] except IndexError: name = player.xpath( './/td[' + sf.contains_str('wisbb_bsNameCell') + ']/span/text()').extract()[0] stats = player.xpath( './/td[contains(@class,"wisbb_priority")]/text()' ).extract() playerstats[teams[i]][header[0]][name] = {} for j, stat in enumerate(stats): try: stat = float(stat) except ValueError: # Won't work when stat is null ("-") pass playerstats[teams[i]][header[0]][name][header[ j + 1]] = stat # Put players stats totals into boxscore file as well try: teamstats = sf.load_json('boxscore.json', fdir=folder) except IOError: teamstats = {} for team, stats in playerstats.iteritems(): for statType, players in stats.iteritems(): try: for stat, data in players['Total'].iteritems(): teamstats[team][statType + ' ' + stat] = data except KeyError: pass if teamstats: sf.dump_json(teamstats, 'boxscore.json', fdir=folder) if playerstats['homeTeam'] or playerstats['awayTeam']: sf.dump_json(playerstats, 'playerstats.json', fdir=folder) else: assert False, "No player stats found" # if bad_playerstats file still here when it shouldn't be, delete it fbadplyr = os.path.join(folder, 'bad_playerstats.json') if os.path.isfile(fbadplyr): os.remove(fbadplyr) # Log where problem occurred to debug scraper later except Exception, error: err = {} err["ERROR"] = str(error) err["LINE"] = str(sys.exc_info()[-1].tb_lineno) err["GAME"] = str(gameid) err["URL"] = response.url sf.dump_json(err, 'bad_playerstats.json', fdir=folder) # if playerstats file still here when it shouldn't be, delete it fgoodplyr = os.path.join(folder, 'playerstats.json') if os.path.isfile(fgoodplyr): os.remove(fgoodplyr)
def parse(self, response): # Find folder location and set up data struct folder, gameid = sf.find_game_folder(response) try: # Assume away team is in first column main_content = response.xpath('//div['+ sf.contains_str('wisbb_bsMainContent') +']') box_areas = main_content.xpath('.//div['+ sf.contains_str('wisbb_bsArea') +']') playerstats = {} playerstats['awayTeam'] = {} playerstats['homeTeam'] = {} teams = ['awayTeam', 'homeTeam'] # Find all stat types for area in box_areas: # Go to stat table per team team_tables = area.xpath('.//div['+ sf.contains_str('wisbb_bsTable') +']') for i, table in enumerate(team_tables): column = table.xpath('.//table['+ sf.contains_str('wisbb_bsStandard') +']') header = column.xpath('.//thead/tr/th/text()').extract() player_cols = column.xpath('.//tbody/tr') playerstats[teams[i]][header[0]] = {} # Find all players with stat for player in player_cols: try: name = player.xpath('.//td['+ sf.contains_str('wisbb_bsNameCell') +']/a/text()').extract()[0] except IndexError: name = player.xpath('.//td['+ sf.contains_str('wisbb_bsNameCell') +']/span/text()').extract()[0] stats = player.xpath('.//td[contains(@class,"wisbb_priority")]/text()').extract() playerstats[teams[i]][header[0]][name] = {} for j, stat in enumerate(stats): try: stat = float(stat) except ValueError: # Won't work when stat is null ("-") pass playerstats[teams[i]][header[0]][name][header[j+1]] = stat # Put players stats totals into boxscore file as well try: teamstats = sf.load_json('boxscore.json', fdir=folder) except IOError: teamstats = {} for team, stats in playerstats.iteritems(): for statType, players in stats.iteritems(): try: for stat, data in players['Total'].iteritems(): teamstats[team][statType +' '+ stat] = data except KeyError: pass if teamstats: sf.dump_json(teamstats, 'boxscore.json', fdir=folder) if playerstats['homeTeam'] or playerstats['awayTeam']: sf.dump_json(playerstats, 'playerstats.json', fdir=folder) else: assert False, "No player stats found" # if bad_playerstats file still here when it shouldn't be, delete it fbadplyr = os.path.join(folder, 'bad_playerstats.json') if os.path.isfile(fbadplyr): os.remove(fbadplyr) # Log where problem occurred to debug scraper later except Exception,error: err = {} err["ERROR"] = str(error) err["LINE"] = str(sys.exc_info()[-1].tb_lineno) err["GAME"] = str(gameid) err["URL"] = response.url sf.dump_json(err, 'bad_playerstats.json', fdir=folder) # if playerstats file still here when it shouldn't be, delete it fgoodplyr = os.path.join(folder, 'playerstats.json') if os.path.isfile(fgoodplyr): os.remove(fgoodplyr)