def proj_vs_actual(start_date, end_date): compare = None days = (end_date - start_date).days for offset in range(days + 1): cur_date = start_date + dt.timedelta(days = offset) try: stats = pd.read_csv(format_fpath('stat', cur_date)) except: continue stats.loc[:,'Starters'] = stats.Starters.apply(lambda x: format_name(x)) lineups = pd.read_csv(format_fpath('line',cur_date)) lineups.loc[:,'Name'] = lineups.Name.apply(lambda x: format_name(x)) combo = lineups.join(stats.set_index('Starters').FP.rename('actual'), on = 'Name').sort_values('Name').set_index('Name') combo['date'] = cur_date mapper = player_team_map() listed = combo.index.to_series().apply(lambda x: x in mapper.index) combo['PTeam'] = 'UNK' combo.loc[listed, 'PTeam'] = combo[listed].index.to_series().apply(lambda x: mapper.loc[x]).values combo['Loc'] = combo.apply(lambda x: 'Home' if x.PTeam == x.Team else 'Away', axis = 1) combo['Name'] = combo.index.values combo.index = range(len(combo)) away = combo[combo.Loc == 'Away'].index temp = combo.loc[away,'Team'] combo.loc[away,'Team'] = combo.loc[away,'Opp'] combo.loc[away,'Opp'] = temp combo.set_index('Name') compare = combo if compare is None else compare.append(combo) compare.loc[:,'date'] = pd.to_datetime(compare.date) return compare
def boxStats(date): try: games = get_games(date) url_base = 'https://www.basketball-reference.com/boxscores/{}0{}.html' stats = None for home, away in games: t = pd.read_html(url_base.format(date.strftime('%Y%m%d'),home)) for idx in [0, int(len(t)/2)]: temp = t[idx] temp.columns = temp.columns.droplevel(0) temp = temp.set_index('Starters').drop('Reserves').drop('Team Totals').fillna(0) if idx == 0: temp['Loc'] = 'Away' temp['Team'] = away temp['Opp'] = home else: temp['Loc'] = 'Home' temp['Team'] = home temp['Opp'] = away stats = temp if stats is None else stats.append(temp) stats.drop(stats[stats.MP.str[:3] == 'Did'].index, inplace=True) stats.drop(stats[stats.MP.str[:3] == 'Not'].index, inplace=True) stats['FP'] = stats.PTS.astype('int') + stats.TRB.astype('int') * 1.2 + stats.AST.astype('int') * 1.5 + stats.BLK.astype('int') * 3 + stats.STL.astype('int') * 3 - stats.TOV.astype('int') stats.to_csv(format_fpath('stat', date)) except: print('No games for {}'.format(date)) traceback.print_exc(file = sys.stdout)
def main(): url = "https://www.numberfire.com/nba/daily-fantasy/daily-basketball-projections" soup = BeautifulSoup(requests.get(url).text, features="lxml") table = soup.find_all("table") row_marker = 0 data = [] for row in table[3].find_all("tr"): columns = row.find_all("td") data.append([x.get_text().strip() for x in columns]) cols = "Player,FP,Cost,Value,Min,Pts,Reb,Ast,Stl,Blk,TO".split(",") df = pd.DataFrame(data, columns=cols).dropna() player_data = df.Player.apply(parse_player) df = df.join( pd.DataFrame( player_data.to_list(), index=player_data.index, columns=["Name", "Pos", "Team", "Opp", "Game"], )).drop("Player", axis=1) for pos in df.Pos.unique(): col = 'is{}'.format(pos) df[col] = (df.Pos == pos).astype('int').to_numpy() df.loc[:, 'Cost'] = df.Cost.apply(lambda x: int(''.join(x[1:].split(',')))) df.to_csv(format_fpath('proj'), index=False)
def generate(date=dt.date.today(), lineups=25, to_file=True): data = get_proj(date) fp_col = 'FP' df = data.copy() pos_mat = np.transpose(df.loc[:, ('isPG', 'isSG', 'isSF', 'isPF', 'isC')].to_numpy()) cur_proj = df[fp_col].copy() b = np.array([2, 2, 2, 2, 1]) sal_max = 60000 x = cp.Variable(len(df), boolean=True) salary_columns = 'Cost' sal = df[salary_columns].to_numpy() selections = None for round in range(1, lineups + 1): c = cur_proj.to_numpy() objective = cp.Maximize(x.T @ c) constraints = [pos_mat @ x == b, x >= 0, x <= 1, x.T @ sal <= sal_max] #pos_mat @ x >= b_low prob = cp.Problem(objective, constraints) prob.solve(solver='GLPK_MI') picks = df.iloc[x.value == 1].copy() picks['round'] = round selections = picks if selections is None else selections.append(picks) cur_proj.loc[picks.index] = cur_proj.loc[picks.index].values * .95 if to_file: selections.to_csv(format_fpath('line', date), index=False) else: return selections
def statRange(start_date, end_date): stats = None for date in pd.date_range(start_date,end_date): try: temp = pd.read_csv(format_fpath('stat',date.date())) temp['Date'] = date except: continue stats = temp if stats is None else stats.append(temp) return stats
def build_feature_set(date = dt.date.today()): proj = pd.read_csv(format_fpath('proj', date)) team_translation(proj) teams = proj.Team.drop_duplicates() hist = game_data(date - dt.timedelta(days = 1), 15) offense, defense = calc_ratings(hist) def_dict = defense.to_dict() off_dict = offense.to_dict() off_def = teams.apply(lambda x: off_dict[x]).rename('ortg').to_frame().join(teams.apply(lambda x: def_dict[x]).rename('drtg')).mean() lineups = pd.read_csv(format_fpath('line', date)) team_translation(lineups) lineups['ortg'] = off_def.ortg lineups['drtg'] = off_def.drtg lineups['Games'] = len(teams)/2 enc = oneHotTeams(defense) defense_mat = enc.transform(np.reshape(lineups.Opp.to_numpy(),(-1,1))) offense_mat = enc.transform(np.reshape(lineups.Team.to_numpy(),(-1,1))) lineups['l_drtg'] = np.reshape(np.matmul(defense_mat,np.reshape(defense.values,(-1,1))),(-1)) lineups['l_ortg'] = np.reshape(np.matmul(offense_mat,np.reshape(offense.values,(-1,1))),(-1)) lineups['dscore'] = np.reshape(np.matmul(defense_mat, np.reshape(pd.read_csv(format_fpath('score',date - dt.timedelta(days = 1))).set_index('Defense').values,(-1,1))),(-1)) return lineups
def fp_score(cur_date, lookback): stats = statRange(cur_date - dt.timedelta(days = lookback),cur_date) df = game_data(cur_date,lookback) df = df.join(df.index.to_frame()).set_index(['Date', 'Offense']).join(stats.groupby(['Date','Team']).sum().FP, on = ['Date', 'Offense']).dropna() offense = df.groupby('Offense').mean().FP.rename('ortg') defense = df.groupby('Defense').mean().FP.rename('drtg') for _ in range(20): mapper = df.join(offense, on = 'Offense').join(defense, on = 'Defense') mapper['new-ortg'] = mapper.FP * 2 - mapper['drtg'] mapper['new-drtg'] = mapper.FP * 2 - mapper['ortg'] mapper.drtg = (mapper['new-drtg'] + mapper.drtg)/2 mapper.ortg = (mapper['new-ortg'] + mapper.ortg)/2 offense = mapper.groupby('Offense').mean().ortg defense = mapper.groupby('Defense').mean().drtg defense.to_csv(format_fpath('score',cur_date))
def get_proj(date): df = pd.read_csv(format_fpath('proj', date)) df.loc[:, 'Name'] = df.Name.apply(lambda x: format_name(x)) mapper = player_team_map() listed = df.Name.apply(lambda x: x in mapper.index) df['PTeam'] = 'UNK' df.loc[listed, 'PTeam'] = df.loc[listed].Name.apply(lambda x: mapper.loc[x]).values df['Loc'] = df.apply(lambda x: 'Home' if x.PTeam == x.Team else 'Away', axis=1) df['Name'] = df.index.values df.index = range(len(df)) away = df[df.Loc == 'Away'].index temp = df.loc[away, 'Team'] df.loc[away, 'Team'] = df.loc[away, 'Opp'] df.loc[away, 'Opp'] = temp df.set_index('Name') return df