def scrape_elo(): """ Returns list of winner and loser elo rankings, prior to each match played. """ # Get dataframe of elo ranking dates elo_dates = pd.DataFrame() for year in [2018, 2019, 2020]: dates = pd.read_json(f"https://www.ultimatetennisstatistics.com/seasonRankingDates?rankType=ELO_RANK&season={year}") elo_dates = elo_dates.append(dates) elo_dates.columns=['Date'] elo_dates.sort_values('Date', inplace=True) elo_dates.reset_index(inplace=True, drop=True) elo_dates = pd.to_datetime(elo_dates['Date']).dt.date # Get dataframe of tournament dates tourney_dates = atp_bet_data['Date'].dt.date # Get dates corresponding to elo ranking before tournament date_indices = tourney_dates.apply(lambda x: closest_past_date(elo_dates, x)) dates_before = elo_dates[date_indices] dates_before = [date.strftime('%d-%m-%Y') for date in dates_before] # Get elo rankings before tournament elo_winners = [] elo_losers = [] date = dates_before[0] elo = get_json(f"https://www.ultimatetennisstatistics.com/rankingsTableTable?current=1&rowCount=-1&searchPhrase=&rankType=ELO_RANK&season=&date={date}&_=1578626200145") elo_winner, elo_loser = get_elo(elo, atp_bet_data.iloc[0]) elo_winners.append(elo_winner) elo_losers.append(elo_loser) for i in range(1, len(dates_before)): # If date different to previous, get new elo rankings if dates_before[i] != dates_before[i-1]: date = dates_before[i] elo = get_json(f"https://www.ultimatetennisstatistics.com/rankingsTableTable?current=1&rowCount=-1&searchPhrase=&rankType=ELO_RANK&season=&date={date}&_=1578626200145") elo_winner, elo_loser = get_elo(elo, atp_bet_data.iloc[i]) elo_winners.append(elo_winner) elo_losers.append(elo_loser) return elo_winners, elo_losers
def get_player_ids(): """ Returns dataframe of player names and ids. """ elo = get_json("https://www.ultimatetennisstatistics.com/rankingsTableTable?current=1&rowCount=-1&searchPhrase=&rankType=ELO_RANK&season=&date=&_=1578626200145") player_ids = [] for ind, row in atp_bet_data.iterrows(): player_name = row['Winner'] if player_name[-1] == ' ': player_name = player_name[:-1] player_surname, player_first_name = player_name.replace('-',' ').replace(',','').split(' ')[-2], player_name.replace('-',' ').replace(',','').split(' ')[-1][0] try: player = elo[(elo['name'].str.split().str[-1]==player_surname) & (elo['name'].str.contains(player_first_name))] player_id = player['playerId'].iloc[0] except: player_id = None player_ids.append([player_name, player_id]) player_id = pd.DataFrame(player_ids, columns=['Name', 'PlayerId']).drop_duplicates() return player_id
players['won'], players['lost'], players['matches'] = 0, 0, 0 for index, row in players.iterrows():a try: # players.at[index,'won'] += match_hist['winner_name'].value_counts()[row['name']] # players.at[index,'lost'] += match_hist['loser_name'].value_counts()[row['name']] players.at[index,'won'] += match_hist[match_hist['tourney_date']<='20200106']['winner_name'].value_counts()[row['name']] players.at[index,'lost'] += match_hist[match_hist['tourney_date']<='20200106']['loser_name'].value_counts()[row['name']] except: pass players['matches'] = players['won'] + players['lost'] # players.to_csv('atpdata/elo_ratings_current.csv') players.to_csv('atpdata/current_elo_ratings.csv') get_num_matches(players) """ elo = get_json( "https://www.ultimatetennisstatistics.com/rankingsTableTable?current=1&rowCount=-1&searchPhrase=&rankType=ELO_RANK&season=&date=&_=1578626200145" ) elo.loc[elo['name'].str.contains('Wawrinka'), 'name'] = 'Stanislas Wawrinka' utr = get_utr("https://agw-prod.myutr.com/v2/player/top?gender=M&tags=Pro") utr = utr.sort_values('threeMonthRating', ascending=False) utr = utr.rename(columns={'displayName': 'name', 'threeMonthRating': 'points'}) atp_bet_data_2019 = pd.read_excel('atpdata/bet_data_2019.xlsx') r = requests.get("http://www.tennis-data.co.uk/2020/2020.zip") z = zipfile.ZipFile(io.BytesIO(r.content)) z.extractall() os.rename('2020.xlsx', 'atpdata/bet_data_2020.xlsx') atp_bet_data_2020 = pd.read_excel('atpdata/bet_data_2020.xlsx')
curr_date = datetime.today() #curr_date = datetime.strptime('2019-02-10', '%Y-%m-%d') from_date = (curr_date - timedelta(days=90)).strftime('%d-%m-%Y') to_date = curr_date.strftime('%d-%m-%Y') #time_filter = (curr_date-timedelta(days=7)) time_filter = curr_date ausopen_2019_matches = atp_bet_data[(atp_bet_data['Date'] > time_filter) & (atp_bet_data['Date'] < curr_date)] #ausopen_2019_matches = ausopen_2019_matches[ausopen_2019_matches['Location']=='Cordoba'] #ausopen_2019_matches = atp_bet_data[(atp_bet_data['Tournament']=='Cordoba Open')] ausopen_2019_matches.index = range(len(ausopen_2019_matches)) elo = get_json( "https://www.ultimatetennisstatistics.com/rankingsTableTable?current=1&rowCount=-1&searchPhrase=&rankType=ELO_RANK&season=&date=&_=1578626200145" ) elo.loc[elo['name'].str.contains('Wawrinka'), 'name'] = 'Stanislas Wawrinka' elo.loc[elo['name'].str.contains('Ramos'), 'name'] = 'Albert Ramos Vinolas' utr = get_utr("https://agw-prod.myutr.com/v2/player/top?gender=M&tags=Pro") utr = utr.sort_values('threeMonthRating', ascending=False) utr = utr.rename(columns={'displayName': 'name', 'threeMonthRating': 'points'}) utr['name'] = utr['name'].str.replace('-', ' ') from_date = (time_filter - timedelta(days=90)).strftime('%d-%m-%Y') to_date = time_filter.strftime('%d-%m-%Y') #to_date = datetime.today().strftime('%d-%m-%Y') win_perc = get_json( "https://www.ultimatetennisstatistics.com/statsLeadersTable?current=1&rowCount=-1&sort%5Bvalue%5D=desc&searchPhrase=&category=matchesWonPct&season=&fromDate={0}&toDate={1}&level=&bestOf=&surface=&indoor=&speed=&round=&result=&tournamentId=&opponent=&countryId=&minEntries=&_=1579235798414" .format(from_date, to_date))
# players.at[index,'won'] += match_hist['winner_name'].value_counts()[row['name']] # players.at[index,'lost'] += match_hist['loser_name'].value_counts()[row['name']] players.at[index,'won'] += match_hist[match_hist['tourney_date']<='20200106']['winner_name'].value_counts()[row['name']] players.at[index,'lost'] += match_hist[match_hist['tourney_date']<='20200106']['loser_name'].value_counts()[row['name']] except: pass players['matches'] = players['won'] + players['lost'] # players.to_csv('atpdata/elo_ratings_current.csv') players.to_csv('atpdata/current2_elo_ratings.csv') get_num_matches(players) """ elo = pd.read_csv('atpdata/test3_elo_ratings.csv') elo.loc[elo['name'].str.contains('Wawrinka'), 'name'] = 'Stanislas Wawrinka' aces = get_json( "https://www.ultimatetennisstatistics.com/statsLeadersTable?current=1&rowCount=100&sort%5Bvalue%5D=desc&searchPhrase=&category=acePct&season=-1&fromDate=&toDate=&level=&bestOf=&surface=&indoor=&speed=&round=&result=&tournamentId=&opponent=&countryId=&minEntries=&_=1578621981846" ) first_serve = get_json( "https://www.ultimatetennisstatistics.com/statsLeadersTable?current=1&rowCount=-1&sort%5Bvalue%5D=desc&searchPhrase=&category=firstServePct&season=-1&fromDate=&toDate=&level=&bestOf=&surface=&indoor=&speed=&round=&result=&tournamentId=&opponent=&countryId=&minEntries=&_=1578641519820" ) upsets_scored = get_json( "https://www.ultimatetennisstatistics.com/statsLeadersTable?current=1&rowCount=-1&sort%5Bvalue%5D=desc&searchPhrase=&category=upsetsScoredPct&season=&fromDate=&toDate=&level=&bestOf=&surface=&indoor=&speed=&round=&result=&tournamentId=&opponent=&countryId=&minEntries=&_=1578626448297" ) upsets_against = get_json( "https://www.ultimatetennisstatistics.com/statsLeadersTable?current=1&rowCount=-1&sort%5Bvalue%5D=desc&searchPhrase=&category=upsetsAgainstPct&season=&fromDate=&toDate=&level=&bestOf=&surface=&indoor=&speed=&round=&result=&tournamentId=&opponent=&countryId=&minEntries=&_=1578626448300" ) # test_elo = get_json("https://www.ultimatetennisstatistics.com/rankingsTableTable?current=1&rowCount=-1&searchPhrase=&rankType=ELO_RANK&season=&date=&_=1578626200145") rankings = get_json( "https://www.ultimatetennisstatistics.com/rankingsTableTable?current=1&rowCount=-1&searchPhrase=&rankType=RANK&season=&date=&_=1578782369780" ) win_perc = get_json(