def prior_year_sd(t): '''takes a tournament and returns nothing''' try: prior_season = Season.objects.get(season=int(t.season.season) - 1) prior_t = Tournament.objects.get( pga_tournament_num=t.pga_tournament_num, season=prior_season) except Exception as e: print('no prior tournament, getting 2 years ago', e) try: prior_season = Season.objects.get(season=int(t.season.season) - 2) prior_t = Tournament.objects.get( pga_tournament_num=t.pga_tournament_num, season=prior_season) except Exception as f: print('no prior 2 years ago, returning nothing', f) return {} print('proir T: ', prior_t, prior_t.season) sd, created = ScoreDict.objects.get_or_create(tournament=prior_t) pga_nums = [ v.get('pga_num') for (k, v) in sd.data.items() if k != 'info' and v.get('pga_num') ] print('prior SD # of pga nums: ', len(pga_nums)) if (not created and (not sd.data or len(sd.data) == 0 or len(pga_nums) == 0)) or created: print('updating prior SD', prior_t) espn_t_num = scrape_espn.ScrapeESPN().get_t_num(prior_season) url = "https://www.espn.com/golf/leaderboard?tournamentId=" + espn_t_num score_dict = scrape_espn.ScrapeESPN(prior_t, url, True, True).get_data() sd.data = score_dict sd.save() return sd.data
def __init__(self, tournament=None, field=None): if tournament == None: self.tournament = Tournament.objects.get(current=True) else: self.tournament = tournament if field == None: #self.field = scrape_scores_picks.ScrapeScores(self.tournament).scrape() if self.tournament.pga_tournament_num == '999': mens_field = scrape_espn.ScrapeESPN( tournament=self.tournament, url= 'https://www.espn.com/golf/leaderboard?tournamentId=401285309', setup=True).get_data() womens_field = scrape_espn.ScrapeESPN( tournament=self.tournament, url= "https://www.espn.com/golf/leaderboard/_/tour/womens-olympics-golf", setup=True).get_data() self.field = {**mens_field, **womens_field} else: self.field = scrape_espn.ScrapeESPN().get_data() else: self.field = field
def prior_year_sd(t, current=None): '''takes a tournament and bool, returns nothing. Current skips prior year and resets the SD for that tournament''' if not current: try: prior_season = Season.objects.get(season=int(t.season.season)-1) prior_t = Tournament.objects.get(pga_tournament_num=t.pga_tournament_num, season=prior_season) except Exception as e: print ('no prior tournament, getting 2 years ago', e) try: prior_season = Season.objects.get(season=int(t.season.season)-2) prior_t = Tournament.objects.get(pga_tournament_num=t.pga_tournament_num, season=prior_season) except Exception as f: print ('no prior 2 years ago, returning nothing', f) return {} else: prior_season = t.season prior_t = t print ('proir T: ', prior_t, prior_t.season) sd, created = ScoreDict.objects.get_or_create(tournament=prior_t) if not created: pga_nums = [v.get('pga_num') for (k,v) in sd.data.items() if k != 'info' and v.get('pga_num')] print ('prior SD # of pga nums: ', len(pga_nums)) else: print ('created score dict') if (not created and (not sd.data or len(sd.data) == 0 or not sd.data.get('info'))) or created: #added info check to update if not from espn print ('updating prior SD', prior_t) espn_t_num = scrape_espn.ScrapeESPN(prior_t).get_t_num(prior_season) print ('espn T num', espn_t_num) url = "https://www.espn.com/golf/leaderboard?tournamentId=" + espn_t_num score_dict = scrape_espn.ScrapeESPN(prior_t,url, True, True).get_data() print ('saving prior SD, SD data len: ', prior_t, len(score_dict)) sd.data = score_dict sd.save() return sd.data
def get_espn_players(): espn_data = scrape_espn.ScrapeESPN(None, None, True, True).get_data() return espn_data
def get_field(t, owgr_rankings): '''takes a tournament object, goes to web to get field and returns a dict''' field_dict = {} if t.pga_tournament_num == '470': print ('match play') mp_dict = scrape_scores_picks.ScrapeScores(t, 'https://www.pgatour.com/competition/' + str(t.season.season) + '/wgc-dell-technologies-match-play/group-stage.html').mp_brackets() for player, data in mp_dict.items(): ranks = utils.fix_name(player, owgr_rankings) field_dict[player] = {'pga_num': data.get('pga_num'), 'curr_owgr': ranks[1][0], 'soy_owgr': ranks[1][2], 'sow_owgr': ranks[1][1] } print ('mp field dict: ', field_dict) elif t.pga_tournament_num == '999': #Olympics # update this to use the class from olympics_sd.py mens_field = scrape_espn.ScrapeESPN(tournament=t, url='https://www.espn.com/golf/leaderboard?tournamentId=401285309', setup=True).get_data() womens_field = scrape_espn.ScrapeESPN(tournament=t, url="https://www.espn.com/golf/leaderboard/_/tour/womens-olympics-golf", setup=True).get_data() for man, data in mens_field.items(): if man != 'info': ranks = utils.fix_name(man, owgr_rankings) field_dict[man] = {'espn_num': data.get('pga_num'), 'sex': 'dude', 'curr_owgr': ranks[1][0], 'soy_owgr': ranks[1][2], 'sow_owgr': ranks[1][1], 'flag': data.get('flag') } womens_ranks = get_womans_rankings() for woman, stats in womens_field.items(): if woman != 'info': rank = utils.fix_name(woman, womens_ranks) field_dict[woman] ={'espn_num': stats.get('pga_num'), 'sex': 'chick', 'curr_owgr': int(rank[1].get('rank')) + 1000, 'flag': stats.get('flag')} #field_dict['info'] = mens_field.get('info') #elif t.pga_tournament_num == 'RYDCUP': else: try: headers = {'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Mobile Safari/537.36'} json_url = 'https://statdata-api-prod.pgatour.com/api/clientfile/Field?T_CODE=r&T_NUM=' + str(t.pga_tournament_num) + '&YEAR=' + str(t.season.season) + '&format=json' print (json_url) req = Request(json_url, headers=headers) data = json.loads(urlopen(req).read()) print ('data', len(data)) for player in data["Tournament"]["Players"][0:]: if player["isAlternate"] == "Yes": #exclude alternates from the field continue name = (' '.join(reversed(player["PlayerName"].rsplit(', ', 1)))) playerID = player['TournamentPlayerId'] if player.get('TeamID'): team = player.get('TeamID') elif player.get('cupTeam'): team = player.get('cupTeam') else: team = None ranks = utils.fix_name(name, owgr_rankings) field_dict[name] = {'pga_num': playerID, 'team': team, 'curr_owgr': ranks[1][0], 'soy_owgr': ranks[1][2], 'sow_owgr': ranks[1][1]} except Exception as e: print ('pga scrape failed: ', e) #to use this need to update to key everything from espn_num data = espn_api.ESPNData(t=t, force_refresh=True, setup=True).field() for golfer in data: name = golfer.get('athlete').get('displayName') ranks = utils.fix_name(name, owgr_rankings) #need this for now, fix rest of code to use ESPN try: g_obj = Golfer.objects.get(espn_number=golfer.get('athlete').get('id')) print ('build field found golfer', g_obj) except Exception as f: print ('build field cant find: ', name, ' trying setup') pga_num = find_pga_num(name) if not pga_num: g_obj = get_golfer(player=name, pga_num=None, espn_num=golfer.get('athlete').get('id')) elif len(pga_num) == 1: g_obj = get_golfer(player=name, pga_num=pga_num[0], espn_num=golfer.get('athlete').get('id') ) else: g_obj = get_golfer(player=name, pga_num=None, espn_num=golfer.get('athlete').get('id')) ranks = utils.fix_name(name, owgr_rankings) field_dict[name] = {'pga_num': g_obj.golfer_pga_num, 'team': None, 'curr_owgr': ranks[1][0], 'soy_owgr': ranks[1][2], 'sow_owgr': ranks[1][1]} print (field_dict) return field_dict
def setup_t(tournament_number, espn_t_num=None): '''takes a t number as a string, returns a tournament object''' season = Season.objects.get(current=True) print ('getting field') if tournament_number != '999': #olympics json_url = 'https://statdata-api-prod.pgatour.com/api/clientfile/Field?T_CODE=r&T_NUM=' + str(tournament_number) + '&YEAR=' + str(season) + '&format=json' print (json_url) tourny = Tournament() try: req = Request(json_url, headers={'User-Agent': 'Mozilla/5.0'}) data = json.loads(urlopen(req).read()) print (data["Tournament"]["T_ID"][1:5], str(season)) if data["Tournament"]["T_ID"][1:5] != str(season): print ('check field, looks bad!') raise LookupError('Tournament season mismatch: ', data["Tournament"]["T_ID"]) tourny.name = data["Tournament"]["TournamentName"] except Exception as e: print ('PGA lookup issue, going to espn', e) url = 'https://www.espn.com/golf/leaderboard?tournamentId=' + str(espn_t_num) espn = scrape_espn.ScrapeESPN(tournament=tourny, setup=True, url=url) print ('espn T Name: ', espn.get_t_name()) tourny.name = espn.get_t_name() tourny.season = season start_date = datetime.date.today() print (start_date) while start_date.weekday() != 3: start_date += datetime.timedelta(1) tourny.start_date = start_date tourny.field_json_url = json_url tourny.score_json_url = 'https://statdata.pgatour.com/r/' + str(tournament_number) +'/' + str(season) + '/leaderboard-v2mini.json' tourny.pga_tournament_num = tournament_number tourny.current=True tourny.complete=False tourny.score_update_time = datetime.datetime.now() tourny.cut_score = "no cut info" tourny.saved_cut_num = 65 tourny.saved_round = 1 tourny.saved_cut_round = 2 if espn_t_num: tourny.espn_t_num = espn_t_num else: tourny.espn_t_num = scrape_espn.ScrapeESPN(tourny).get_t_num() tourny.save() elif tournament_number == '999': json_url = '' tourny = Tournament() tourny.name = "Olympic Golf" tourny.season = season start_date = datetime.date.today() while start_date.weekday() != 3: start_date += datetime.timedelta(1) tourny.start_date = start_date tourny.field_json_url = json_url tourny.score_json_url = '' tourny.pga_tournament_num = tournament_number tourny.current=True tourny.complete=False tourny.score_update_time = datetime.datetime.now() tourny.cut_score = "no cut info" tourny.saved_cut_num = 60 tourny.saved_round = 1 #tourny.saved_cut_round = 2 tourny.has_cut = False tourny.espn_t_num = '401285309' tourny.save() # Ryder cup: tourny.espn_t_num = '401219595' else: raise Exception('Unknown T Num logic, pls check') return tourny
from bs4 import BeautifulSoup from urllib.request import Request, urlopen from selenium import webdriver import urllib import json from golf_app import views, manual_score, populateField, withdraw, scrape_scores_picks, utils, scrape_cbs_golf, scrape_masters, scrape_espn from unidecode import unidecode from django.core import serializers from golf_app.utils import formatRank, format_name, fix_name #safeway t = Tournament.objects.get(pga_tournament_num='464', season__season='2021') print(Field.objects.filter(tournament=t).count()) web = scrape_espn.ScrapeESPN( t, 'https://www.espn.com/golf/leaderboard?tournamentId=401219793').get_data() #print (web) sd, created = ScoreDict.objects.get_or_create(tournament=t) #sd.tournament = t sd.data = web sd.save() t.espn_t_num = 401219793 t.save() #US Open t = Tournament.objects.get(pga_tournament_num='026', season__season='2021') print(Field.objects.filter(tournament=t).count())
def get_womens_field(self): womens_field = scrape_espn.ScrapeESPN( tournament=self.t, url=self.womens_url, setup=True).get_data() #needs set up mode or only retuns mens return womens_field
def get_mens_field(self): mens_field = scrape_espn.ScrapeESPN(tournament=self.t, url=self.mens_url, setup=True).get_data() return mens_field