def fanduel_lineup_points(playerlist,Date): player_list=playerlist.split(', ') not_starting_dict={} player_dict={} #player_map=Ugen.excel_mapping('Player Map',8,5) rw=1 missing_players=[] for player in player_list: #build dict of stats for lineup on given date #Cell("Output",rw,1).value=player sql="SELECT * FROM hist_player_data WHERE Sport = 'MLB' AND Player = "+ "'" +player+"'" + " AND Date = "+ "'" +Date+"'" player_data=dbo.read_from_db(sql,["Player","GameID","Player_Type"],True) #Cell('Output',rw,2).value=player_data if len(player_data)==0: print 'no db_data found for %s'%player missing_players.append(player) for player_key,stat_dict in player_data.iteritems(): if player_key.split("_")[1].split('-')[1]!='2': #Ian: right now for double headers only count points for first game player_dict[player_key]=stat_dict rw=rw+1 FD_points=0 for player,stats in player_dict.iteritems(): if stats['Player_Type']== 'batter': player_points= (int(stats['Stat1'])*1 + int(stats['Stat2'])*2 + int(stats['Stat3'])*3 + int(stats['Stat4'])*4 + int(stats['Stat6'])*1 + int(stats['Stat10'])*1 + int(stats['Stat11'])*1 + int(stats['Stat8'])*1 + int(stats['Stat13']) * 1 - ((int(stats['Stat7'])-int(stats['Stat5']))*.25)) elif stats['Player_Type']== 'pitcher': player_points = (int(stats['Stat1'])*4 - int(stats['Stat7'])*1 + int(stats['Stat9'])*1 + float(stats['Stat4'])*1) else: print 'unknown positions for %s' %player print player,player_points FD_points=FD_points+player_points return FD_points,missing_players
def hist_get_contest_ids(date): contest_list=[] sql = "SELECT * FROM hist_fanduel_data Where Sport='MLB' And Date="+"'" +date+"'" FD_db_data= dbo.read_from_db(sql,['Player','Position','contestID'],True) for e in FD_db_data: if e.split("_")[2] not in contest_list: contest_list.append(e.split("_")[2]) return contest_list
def get_db_gamedata(self,player,start_date="2010-01-01",end_date="2100-01-01",GameID=False): #Ian: query by date or GameID if GameID: query={'sport':self.sport,'player':player,'gameID':GameID} else: query={'sport':self.sport,'player':player,'date':{"$gte":dt.datetime.strptime(start_date,'%Y-%m-%d'),"$lte":dt.datetime.strptime(end_date,'%Y-%m-%d')}} player_data=dbo.read_from_db('hist_player_data',query,{'_id':0}) if player_data: player_data.sort(key=lambda e: e['date'], reverse=False) player_data_dict={key:[doc[key] for doc in player_data] for key in player_data[0].keys()} event_data=[dbo.read_from_db('hist_event_data',{'sport':self.sport,'gameID':gameID},{'_id':0,'date':0,'sport':0,'gameID':0})[0] for gameID in player_data_dict['gameID']] event_data_dict={key:[doc[key] for doc in event_data] for key in event_data[0].keys()} player_data_dict.update(event_data_dict) df=pandas.DataFrame(player_data_dict) #Ian: add some check for data?? else: return pandas.DataFrame() return df
def gameids(self): #Cole: Returns namedtuple of [lastgameid,list of all gameids in db] gameid_data = collections.namedtuple("gameid_data", ["lastgameid", "all_gameids"]) sql = "SELECT hist_player_data.GameID FROM hist_player_data WHERE Sport = '"+ self.sport +"'" query_data = dbo.read_from_db(sql,['GameID'],True) if query_data: return gameid_data(query_data.keys()[0],query_data.keys()) else: return gameid_data(None,[])
def hist_build_player_universe(self,date,contestID): #Ian: Decided to build separate from original, thought it would get too big..consider refactoring both.. 'yyyy-mm-dd' sql = "SELECT * FROM hist_fanduel_data Where Sport='MLB' And Date="+"'" +date+"' And contestID=" + "'" +contestID+"'" FD_db_data= dbo.read_from_db(sql,['Player','Position','contestID'],True) teams,starting_lineups = ds.mlb_starting_lineups(date) weather_forecast={} team_dict={team:data['start_time'] for team,data in teams.iteritems() if team==data['home_teamid']} #odds_dict=TeamOdds.vegas_odds_sportsbook(date) odds_dict={} #Ian: if you add this back in add weather var back into FD_points_model function call #weather_forecast={team:weather.weather_hist(team,date,start_time) for team,start_time in team_dict.iteritems()} # print ('getting weather') # for team,start_time in team_dict.iteritems(): # weather_forecast[team]=weather.weather_hist(team,date,start_time) # time.sleep(6.1) #print ('weather retrieved') omitted_teams = [] for FD_playerid,data in FD_db_data.iteritems(): #Ian: could this be turned into generator?? if data['Position'] == 'P': player_type = 'pitcher' else: player_type = 'batter' data['player_key'] = data['Player']+ '_' + player_type starting_players = [player for player in starting_lineups.keys() if starting_lineups[player]['teamid'] not in omitted_teams and 'PPD' not in starting_lineups[player]['start_time']] #Cole: is the PPD working? #FD_starting_player_data = {FD_playerid:data for FD_playerid,data in FD_db_data.iteritems() if data['player_key'] in starting_players and (int(starting_lineups[data['player_key']]['batting_order'])<=5 or int(starting_lineups[data['player_key']]['batting_order'])==10)} FD_starting_player_data = {FD_playerid:data for FD_playerid,data in FD_db_data.iteritems() if data['player_key'] in starting_players} player_universe = {} for FD_playerid,data in FD_starting_player_data.iteritems(): db_data = self.get_db_gamedata(data['Player'],"20140301",Ugen.previous_day(date).replace("-","")) # db_data = self.get_db_gamedata('Roenis Elias',"20130301",Ugen.previous_day(date).replace("-","")) player_key=data['player_key'] # player_key='Roenis Elias_pitcher' if player_key in db_data.keys(): player_universe[player_key] = {} player_universe[player_key]['FD_playerid'] = FD_playerid projected_FD_points = self.FD_points_model(player_key,db_data[player_key],starting_lineups,False,False,odds_dict,date) time.sleep(0.5) # os.system('pause') player_universe[player_key]['projected_FD_points'] = projected_FD_points.projected_points player_universe[player_key]['confidence'] = projected_FD_points.confidence player_universe[player_key]['Player_Type'] = player_key.split("_")[1] # if player_universe[player_key]['Player_Type']=='pitcher': # os.system('pause') player_universe[player_key]['name'] = player_key player_universe[player_key]['Salary']=float(data['FD_Salary']) player_universe[player_key]['GamesPlayed']=float(data['FD_GP']) player_universe[player_key]['PPG']=float(data['FD_FPPG']) player_universe[player_key]['FD_Position']=data['Position'] player_universe[player_key]['FD_name']=data['Player'] position_map = {key:1 if key == player_universe[player_key]['FD_Position'] else 0 for key in self.positions.keys()} tmp_dict = position_map.copy() player_universe[player_key].update(tmp_dict) else: print (player_key + ' not in db_player_data') return player_universe
def hist_get_contest_ids(date): contest_list = [] sql = "SELECT * FROM hist_fanduel_data Where Sport='MLB' And Date=" + "'" + date + "'" FD_db_data = dbo.read_from_db(sql, ['Player', 'Position', 'contestID'], True) for e in FD_db_data: if e.split("_")[2] not in contest_list: contest_list.append(e.split("_")[2]) return contest_list
def weather_checker(self,stadium,forecast): sql = "SELECT * FROM event_data WHERE stadium = '" + stadium +"'" event_data = dbo.read_from_db(sql,['event_id'],True) forecast_data =[{'forecast':data['forecast'].split("-")[1],'PoP':float(data['forecast'].split("-")[-1].replace("%",""))} for data in event_data.values()] vec = DictVectorizer() transformed_forecast_data = vec.fit_transform(forecast_data).toarray() print (transformed_forecast_data) PPD_data = numpy.array([0.0 if data['PPD'] == 'False' else 1.0 for data in event_data.values()]) clf = DecisionTreeClassifier(min_samples_split=1).fit(transformed_forecast_data, PPD_data) PPD_pred =clf.predict(vec.transform(forecast)) return PPD_pred
def hist_lineup_optimizer_points(lineup_optimizer,start_date,end_date): sql = "SELECT * FROM hist_lineup_optimizers WHERE Date BETWEEN " + "'" + start_date +"' AND " "'" + end_date + "'" db_data= dbo.read_from_db(sql,["Date"],True) hist_points={} not_starting_list=[] for date,lineup in db_data.iteritems(): mlb_lineup=lineup[lineup_optimizer] if mlb_lineup: print "now calculating points for %s"%date lineup_points,missing_players_list=fanduel_lineup_points(mlb_lineup,date) print 'total poitns was %d'%lineup_points hist_points[date]={} hist_points[date]['lineup']=lineup_points hist_points[date]['missing_players']=missing_players_list return hist_points
def hist_lineup_optimizer_points(lineup_optimizer, start_date, end_date): sql = "SELECT * FROM hist_lineup_optimizers WHERE Date BETWEEN " + "'" + start_date + "' AND " "'" + end_date + "'" db_data = dbo.read_from_db(sql, ["Date"], True) hist_points = {} not_starting_list = [] for date, lineup in db_data.iteritems(): mlb_lineup = lineup[lineup_optimizer] if mlb_lineup: print "now calculating points for %s" % date lineup_points, missing_players_list = fanduel_lineup_points( mlb_lineup, date) print 'total poitns was %d' % lineup_points hist_points[date] = {} hist_points[date]['lineup'] = lineup_points hist_points[date]['missing_players'] = missing_players_list return hist_points
def build_hist_win_tuples(): hist_perf_tuples = [] sql = ('SELECT fd_table_contests.avg_top_wins, hist_performance.Winnings' ' FROM autotrader.fd_table_contests' ' INNER JOIN autotrader.hist_performance' ' ON autotrader.fd_table_contests.entry_id = hist_performance.Entry_Id') result_set = dbo.read_from_db(sql) for key,rw in result_set.iteritems(): if rw[1] > 0: #should change this to mapping hist_perf_tuples.append((rw[0],1)) else: hist_perf_tuples.append((rw[0],0)) return hist_perf_tuples #data_scrapping.update_gamedata(Cell("Parameters",'clLastGameDataID').value) #print output_final_roster() #print run_enter_best_contests(100,25)#paramter passing getting out of hand, need to figure out how refactor. Classes? #dbo.load_csv_into_db('C:/Users/Cole/Desktop/FanDuel/Fanduel/team map.csv','team_map') #print Ugen.output_dict(build_pWins_vs_topwins_dict(5))
def fanduel_lineup_points(playerlist, Date): player_list = playerlist.split(', ') not_starting_dict = {} player_dict = {} #player_map=Ugen.excel_mapping('Player Map',8,5) rw = 1 missing_players = [] for player in player_list: #build dict of stats for lineup on given date #Cell("Output",rw,1).value=player sql = "SELECT * FROM hist_player_data WHERE Sport = 'MLB' AND Player = " + "'" + player + "'" + " AND Date = " + "'" + Date + "'" player_data = dbo.read_from_db(sql, ["Player", "GameID", "Player_Type"], True) #Cell('Output',rw,2).value=player_data if len(player_data) == 0: print 'no db_data found for %s' % player missing_players.append(player) for player_key, stat_dict in player_data.iteritems(): if player_key.split("_")[1].split( '-' )[1] != '2': #Ian: right now for double headers only count points for first game player_dict[player_key] = stat_dict rw = rw + 1 FD_points = 0 for player, stats in player_dict.iteritems(): if stats['Player_Type'] == 'batter': player_points = ( int(stats['Stat1']) * 1 + int(stats['Stat2']) * 2 + int(stats['Stat3']) * 3 + int(stats['Stat4']) * 4 + int(stats['Stat6']) * 1 + int(stats['Stat10']) * 1 + int(stats['Stat11']) * 1 + int(stats['Stat8']) * 1 + int(stats['Stat13']) * 1 - ((int(stats['Stat7']) - int(stats['Stat5'])) * .25)) elif stats['Player_Type'] == 'pitcher': player_points = (int(stats['Stat1']) * 4 - int(stats['Stat7']) * 1 + int(stats['Stat9']) * 1 + float(stats['Stat4']) * 1) else: print 'unknown positions for %s' % player print player, player_points FD_points = FD_points + player_points return FD_points, missing_players
def build_hist_win_tuples(): hist_perf_tuples = [] sql = ( 'SELECT fd_table_contests.avg_top_wins, hist_performance.Winnings' ' FROM autotrader.fd_table_contests' ' INNER JOIN autotrader.hist_performance' ' ON autotrader.fd_table_contests.entry_id = hist_performance.Entry_Id' ) result_set = dbo.read_from_db(sql) for key, rw in result_set.iteritems(): if rw[1] > 0: #should change this to mapping hist_perf_tuples.append((rw[0], 1)) else: hist_perf_tuples.append((rw[0], 0)) return hist_perf_tuples #data_scrapping.update_gamedata(Cell("Parameters",'clLastGameDataID').value) #print output_final_roster() #print run_enter_best_contests(100,25)#paramter passing getting out of hand, need to figure out how refactor. Classes? #dbo.load_csv_into_db('C:/Users/Cole/Desktop/FanDuel/Fanduel/team map.csv','team_map') #print Ugen.output_dict(build_pWins_vs_topwins_dict(5))
def build_player_universe(self,FDSession,contest_url): if self.backtest_date: query={'sport':self.sport,'date':dt.datetime.strptime(self.backtest_date,'%Y-%m-%d'),'contest_ID':self.backtest_contestID} FD_player_data=dbo.read_from_db('hist_fanduel_data',query)[0]['contest_dict'] starting_players =self.hist_starting_lineups([player for player in FD_player_data.keys()]) FD_starting_player_data = {player:player_data for player,player_data in FD_player_data.iteritems() if player in starting_players} date=self.backtest_date else: FD_player_data = FDSession.fanduel_api_data(contest_url)['players'] starting_players=self.starting_players() #Ian: build function FD_starting_player_data = {{player['first_name']+' '+player['last_name']:{key:player[key] for key in self.FD_player_data_keys}for player in FD_player_data if player not in starting_players}} date='2020-01-01' player_universe={} for FD_playerid,data in FD_starting_player_data.iteritems():#FD_starting_player_data.iteritems(): print ('building player_universe for: %s on %s' % (FD_playerid,self.backtest_date)) db_df = self.get_db_gamedata(FD_playerid,'2012-10-01',end_date=Ugen.previous_day(date)) #2012/2013/2014/2015 seasons player_key = FD_playerid if db_df.empty: #Ian: need player maps! continue if player_key == db_df['player'][0]: projected_FD_points = self.FD_points_model(db_df,False) if projected_FD_points.confidence<0.1: continue #don't add player to player_universe player_universe[player_key] = {} player_universe[player_key]['projected_FD_points'] = projected_FD_points.projected_points#random.randrange(0,200) player_universe[player_key]['confidence'] = projected_FD_points.confidence #5 player_universe[player_key]['R2']= projected_FD_points.R2 player_universe[player_key]['name'] = player_key for key,player_data in data.iteritems(): player_universe[player_key][key] = player_data position_map = {key:1 if key == player_universe[player_key]['position'] else 0 for key in self.positions.keys()} tmp_dict = position_map.copy() player_universe[player_key].update(tmp_dict) else: print (player_key + ' not in db_player_data') return player_universe
def get_db_gamedata_old(self,player,player_type,start_date="20100101",end_date="21000101",GameID=None): #Updated to get by GameID or by Dates if GameID: sql = ("SELECT hist_player_data.*, event_data.* FROM hist_player_data " "INNER JOIN event_data ON hist_player_data.GameID=event_data.event_id " "WHERE hist_player_data.Sport = '"+ self.sport +"' AND " " Player = '"+ player.replace("'","''") +"' AND Player_Type ='"+ player_type +"'" +"' AND GameID ='"+ GameID +"'" " ORDER BY Date ASC") else: sql = ("SELECT hist_player_data.*, event_data.* FROM hist_player_data " "INNER JOIN event_data ON hist_player_data.GameID=event_data.event_id " "WHERE hist_player_data.Sport = '"+ self.sport +"' AND " "Player = '"+ player.replace("'","''") +"' AND Player_Type ='"+ player_type +"' AND " "Date BETWEEN '" + start_date +"' AND " "'" + end_date + "' ORDER BY Date ASC") df = dbo.read_from_db(sql) df.rename(columns=self.inv_db_data_model[df['Player_Type'][0]],inplace=True) df['name'] = df['display_name'] + "_" + df['Player_Type'] #Cole: adding the field 'name' is required for openopts results output for col in list(df): try: df[col] = df[col].astype(float) except: pass return df
def get_contests(sport, date): query = {'sport': sport, 'date': dt.datetime.strptime(date, '%Y-%m-%d')} resultset = dbo.read_from_db('hist_fanduel_data', query) return [contest['contest_ID'] for contest in resultset]
def get_contests(sport,date): query={'sport':sport,'date':dt.datetime.strptime(date,'%Y-%m-%d')} resultset=dbo.read_from_db('hist_fanduel_data',query) return [contest['contest_ID'] for contest in resultset]
def player_mapping(key_col, map_col): sql = "SELECT " + key_col + "," + map_col + " FROM player_map" player_map = dbo.read_from_db(sql) return player_map
def get_db_event_data(self): sql = "SELECT * FROM event_data" return dbo.read_from_db(sql,["event_id"],True)
def get_stadium_data(self,prime_key = 'stadium'): sql = "SELECT * FROM stadium_data" return dbo.read_from_db(sql,[prime_key],True)
def team_mapping(): sql = "SELECT * FROM team_map" team_map = dbo.read_from_db(sql) return team_map
def player_mapping(key_col,map_col): sql = "SELECT " + key_col + "," + map_col + " FROM player_map" player_map = dbo.read_from_db(sql) return player_map