def get_projection(structureid, timeid, source): scoring = dbMgr.query("""select distinct scoring from contestStructure where structureid = '{}'""".format( structureid)) return dbMgr.query("""select playerid, value as proj from projections p where timeid = {} and statistic like 'FPTS_{}' and source like '{}'""".format( timeid, scoring['scoring'][0], source)).set_index('playerid')['proj']
def contest_insert(link, timeid=None, structureid=None): str_qry = 'select contestid from contests where link = ?' qry_df = dbMgr.query(str_qry, (link,)) if len(qry_df['contestid']) == 0: assert None not in (timeid, structureid), "Incorrect inputs into contest insert function" to_insert = pd.Series({'link': link, 'timeid': timeid, 'structureid': structureid}) dbMgr.series_insert(to_insert, 'contests', True) qry_df = dbMgr.query(str_qry, (link,)) return qry_df['contestid'][0]
def structure_search(platform, scoring, description): str_qry = """select structureid from contestStructure where platform = ? and scoring = ? and description = ?""" qry_df = dbMgr.query(str_qry, (platform, scoring, description)) if len(qry_df['structureid']) == 0: to_insert = pd.Series({'platform': platform, 'scoring': scoring, 'description': description, 'structureid': None}) dbMgr.series_insert(to_insert, 'contestStructure', True) qry_df = dbMgr.query(str_qry, (platform, scoring, description)) structure_constraints(qry_df['structureid'][0], platform, description) return qry_df['structureid'][0]
def game_insert(link, timeid=None, hometeamid=None, awayteamid=None, date=None): str_qry = 'select gameid from games where link = ?' qry_df = dbMgr.query(str_qry, (link,)) if len(qry_df['gameid']) == 0: assert None not in (timeid, hometeamid, awayteamid, date), "Incorrect inputs into game insert function" to_insert = pd.Series({'link': link, 'timeid': timeid, 'hometeamid': hometeamid, 'awayteamid': awayteamid, 'date': date}) dbMgr.series_insert(to_insert, 'games', True) qry_df = dbMgr.query(str_qry, (link,)) return qry_df['gameid'][0]
def time_search(season=None, week=None, date=None): if None not in (season, week): str_qry = 'select timeid from time where season = ? and week = ?' qry_df = dbMgr.query(str_qry, (season, week)) if len(qry_df['timeid']) == 0: to_insert = pd.Series({'season': season, 'week': week, 'timeid': None}) dbMgr.series_insert(to_insert, 'time', True) qry_df = dbMgr.query(str_qry, (season, week)) elif date is not None: str_qry = 'select distinct timeid from games where date = ?' qry_df = dbMgr.query(str_qry, (date,)) assert len(qry_df['timeid']) == 1, "Times are not loaded properly" return qry_df['timeid'][0]
def structure_constraints(structureid, platform, description): str_qry = """select structureid from structureConstraints where structureid = ?""" qry_df = dbMgr.query(str_qry, (structureid,)) constraint_df = None if len(qry_df['structureid']) == 0: if platform == 'fanduel': if description == 'all week without kicker': list_constraints = [("bound", "<=", None, 1), ("bound", ">=", None, 0), ("dot", "==", 'DST', 1), ("dot", "==", 'QB', 1), ("dot", ">=", 'RB', 2), ("dot", "<=", 'RB', 3), ("dot", ">=", 'WR', 3), ("dot", "<=", 'WR', 4), ("dot", ">=", 'TE', 1), ("dot", "<=", 'TE', 2), ("dot", "==", 'Flex', 7), ("dot", "<=", 'salary', 60000)] elif description == 'all week with kicker': list_constraints = [("bound", "<=", None, 1), ("bound", ">=", None, 0), ("dot", "==", 'DST', 1), ("dot", "==", 'QB', 1), ("dot", "==", 'RB', 2), ("dot", "==", 'WR', 3), ("dot", "==", 'TE', 1), ("dot", "<=", 'salary', 60000), ("dot", "==", 'K', 1)] elif platform == 'draftkings': list_constraints = [("bound", "<=", None, 1), ("bound", ">=", None, 0), ("dot", "==", 'DST', 1), ("dot", "==", 'QB', 1), ("dot", ">=", 'RB', 2), ("dot", "<=", 'RB', 3), ("dot", ">=", 'WR', 3), ("dot", "<=", 'WR', 4), ("dot", ">=", 'TE', 1), ("dot", "<=", 'TE', 2), ("dot", "==", 'Flex', 7), ("dot", "<=", 'salary', 50000)] elif platform == 'yahoo': list_constraints = [("bound", "<=", None, 1), ("bound", ">=", None, 0), ("dot", "==", 'DST', 1), ("dot", "==", 'QB', 1), ("dot", ">=", 'RB', 2), ("dot", "<=", 'RB', 3), ("dot", ">=", 'WR', 3), ("dot", "<=", 'WR', 4), ("dot", ">=", 'TE', 1), ("dot", "<=", 'TE', 2), ("dot", "==", 'Flex', 7), ("dot", "<=", 'salary', 200)] constraint_df = pd.DataFrame(list_constraints) if constraint_df is not None: constraint_df.columns = ['type', 'operator', 'vec', 'bound'] constraint_df['structureid'] = structureid dbMgr.df_insert(constraint_df, 'structureConstraints', True) return None
def get_player_cov(structureid, timeid): position_cov = get_position_cov(structureid) projections = proj.get_expected_points(structureid, timeid) player_mappings = dbMgr.query("""select cp.playerid, position, teamid, oppid from contestPlayers cp join contestStructure s on cp.structureid = s.structureid join games g on g.timeid = cp.timeid join playsForTeam pfr on pfr.gameid = g.gameid and pfr.playerid = cp.playerid where s.structureid = {s} and cp.timeid = {t}""". format(s=structureid, t=timeid)).set_index('playerid') pm = player_mappings.join(projections, how='inner') pm['indRank'] = pm.groupby(['teamid', 'position']).rank(ascending=False, method='dense')['proj'] pm['adjPosition'] = pm['position'] + pm['indRank'].astype(int).astype(str) filtered_data = pm[pm['adjPosition'].isin(list(position_cov.columns))] asset_max = {} for r1, row1 in filtered_data.iterrows(): for r2, row2 in filtered_data.iterrows(): if row1['teamid'] == row2['teamid']: asset_max[(r1, r2)] = position_cov.loc['Own'][ row1['adjPosition']][row2['adjPosition']] elif row1['teamid'] == row2['oppid']: asset_max[(r1, r2)] = position_cov.loc['Opposing'][ row1['adjPosition']][row2['adjPosition']] else: asset_max[(r1, r2)] = 0 return pd.Series(asset_max).unstack(level=1)
def load_projections(historical=False): for proj_source, proj_class in projections_sources.items(): if historical: all_data = pd.DataFrame() for year in years: for week in weeks: exist_df = dbMgr.query("""select playerid from projections proj join time t on proj.timeid = t.timeid where season = {} and week = {} and source = '{}'""".format(year, week, proj_source)) if len(exist_df) == 0: platform_df = proj_class.get_projections(year, week) if platform_df is not None: platform_df['timeid'] = exp.time_search(year, week) platform_df['source'] = proj_source all_data = all_data.append(platform_df, ignore_index=True, sort=False) else: 1 + 1 if len(all_data) > 0: all_data['playerid'] = all_data.apply(lambda x: exp.player_search(x.PLAYER, dst_mode=(x.position == 'DST'), filters={'position': x.position, 'year': x.year, 'team': x.TEAM}, auto_insert=True, source=proj_source, sourceid=x.SOURCE, allow_missing=True), axis=1) all_data = all_data[all_data['playerid'].notnull()] stats = list(set(sum(proj_class.positions.values(), [])).union({'FPTS_HALF', 'FPTS_STD', 'FPTS_FULL'})) stacked_df = all_data.groupby(['timeid', 'playerid', 'source'])[stats].mean().stack().dropna().reset_index() stacked_df.columns = ['timeid', 'playerid', 'source', 'statistic', 'value'] dbMgr.df_insert(stacked_df, 'projections', True) return None
def load_ownership(historical=False): if historical: dates = dbMgr.query("""select distinct date, season from games g join time t on g.timeid = t.timeid order by date""") all_dates = dates[dates['date'] > '2016-09-10'].set_index('date', verify_integrity=True)['season'] import pickle with open('ownership.pickle', 'rb') as handle: test_data = pickle.load(handle) day_df = {} for key in test_data.keys(): if (test_data[key][0] is not None) and (pd.to_datetime(key).day_name() == 'Sunday'): possible_keys = [x for x in test_data[key][0].keys() if ('Classic' in x or 'SalaryCap' in x) and '1:00 pm' in x] if len(possible_keys) > 0: temp_max = 0 temp_contest = None for pk in possible_keys: if test_data[key][0][pk].shape[0] > temp_max: temp_max = test_data[key][0][pk].shape[0] temp_contest = pk if temp_contest is not None: if len(test_data[key][1][temp_contest]) > 100: day_df[key] = temp_contest for dd, val in day_df.items(): contest_df = test_data[dd][0][val] ownership_df = test_data[dd][1][val] ownership_df.columns = [x.replace('$', '').replace(',', '') for x in ownership_df.columns] exclude_contests = contest_df['Name'].value_counts()[contest_df['Name'].value_counts() > 1].index filtered_df = ownership_df[ownership_df['Avg'] > 0.0001].drop(exclude_contests, axis=1, errors='ignore') valid_contests = [x for x in filtered_df.columns if x not in ['Player', 'Pos', 'Avg', 'Fpts']] valid_df = contest_df[ contest_df['Name'].isin(valid_contests) & -contest_df['Name'].isin(exclude_contests)].drop_duplicates() timeid = exp.time_search(date=dd) structureid = exp.structure_search('draftkings', 'FULL', 'all week without kicker') valid_df['contestid'] = valid_df.apply(lambda x: exp.contest_insert(x.Link, timeid, structureid), axis=1) link_map = valid_df.set_index('Name')['contestid'] pivoted_df = valid_df.set_index('contestid').drop(['Name', 'Link', 'Winner'], axis=1).unstack().reset_index() pivoted_df.columns = ['stat', 'contestid', 'value'] dbMgr.df_insert(pivoted_df[['contestid', 'stat', 'value']], 'contestStats', True) filtered_df = filtered_df[['Player', 'Pos', 'Avg', 'Fpts']+list(link_map.keys())] filtered_df.columns = [link_map[x] if x in valid_contests else x for x in filtered_df.columns] filtered_df['playerid'] = filtered_df.apply( lambda x: exp.player_search(x.Player, dst_mode=(x.Pos == 'DST'), filters={'position': x.Pos, 'year': all_dates[dd], 'points': x.Fpts, 'platform': 'dk', 'date': dd}, auto_insert=True, allow_missing=True), axis=1) player_df = filtered_df.set_index('playerid').drop(['Player', 'Pos', 'Avg', 'Fpts'], axis=1) stacked_df = player_df.replace(0.0, np.nan).unstack().dropna().reset_index().drop_duplicates() stacked_df.columns = ['contestid', 'playerid', 'value'] dbMgr.df_insert(stacked_df[['contestid', 'playerid', 'value']], 'contestOwnership', True) else: 1 + 1 return None
def get_platform_constraints(structureid, data_uni=None): constraint_df = dbMgr.query( """select type, operator, vec, bound from structureConstraints con where structureid = {}""".format(structureid)) if data_uni is not None: constraint_df['vec'] = constraint_df['vec'].apply( lambda x: data_uni[x] if x is not None else x) return list(constraint_df.itertuples(index=False, name=None))
def get_platform_data(structureid, timeid): p_data = dbMgr.query( """select playerid, position, salary, points from contestPlayers cp where timeid = {} and structureid = {}""".format( timeid, structureid)).set_index('playerid') p_data = p_data.join(pd.get_dummies(p_data['position'])) p_data['Flex'] = p_data['RB'] + p_data['TE'] + p_data['WR'] return p_data
def get_position_cov(structureid): contest_data = dbMgr.query(""" select cp.playerid, t.timeid, season, week, position, points as actualPoints, teamid, oppid, value as projectedPoints from contestPlayers cp join contestStructure s on cp.structureid = s.structureid join games g on g.timeid = cp.timeid join playsForTeam pfr on pfr.gameid = g.gameid and pfr.playerid = cp.playerid join projections proj on proj.timeid = cp.timeid and proj.playerid = cp.playerid join time t on t.timeid = proj.timeid where s.structureid = {} and source = 'fantasypros' and statistic = 'FPTS_' || s.scoring""".format( structureid)) universe_data = dbMgr.query(""" select timeid, playerid, avg(value) as meanOwnership from contestOwnership co join contests c on c.contestid = co.contestid group by structureid, timeid, playerid""") filtered_data = universe_data.merge(contest_data, on=['timeid', 'playerid']) filtered_data['indRank'] = filtered_data.groupby( ['timeid', 'teamid', 'position']).rank(ascending=False, method='dense')['projectedPoints'] filtered_data['posCategory'] = filtered_data['position'] + filtered_data[ 'indRank'].astype(int).astype(str) count_positions = filtered_data.groupby( 'posCategory').count()['actualPoints'] eligible_positions = count_positions[ count_positions / filtered_data['actualPoints'].count() > .05].index eligible_data = filtered_data[filtered_data['posCategory'].isin( eligible_positions)] merged_data = eligible_data.merge(eligible_data, left_on=['timeid', 'teamid'], right_on=['timeid', 'teamid']) merged_data['side'] = 'Own' merged_data_opp = eligible_data.merge(eligible_data, left_on=['timeid', 'teamid'], right_on=['timeid', 'oppid']) merged_data_opp['side'] = 'Opposing' concat_data = merged_data.append(merged_data_opp, ignore_index=True, sort=False) return concat_data.groupby(['side', 'posCategory_x', 'posCategory_y'])[[ 'actualPoints_x', 'actualPoints_y' ]].cov().iloc[0::2, -1].unstack(level=[3, 1])['actualPoints_x']
def team_search(name_search, auto_insert=True): name_search = name_search.lower() if name_search == 'fa': # fantasy pros return -999 alias_qry = "select teamid from teamAliases where teamalias = ?" alias_df = dbMgr.query(alias_qry, (name_search,)) if len(alias_df['teamid']) == 1: return alias_df['teamid'][0] team_name, link = pfr.find_team_page(name_search) str_qry = "select teamid from teams where link like ?" qry_df = dbMgr.query(str_qry, (link,)) if len(qry_df['teamid']) == 0: team_insert = pd.Series({'name': team_name, 'link': link, 'teamid': None}) dbMgr.series_insert(team_insert, 'teams', auto_insert) qry_df = dbMgr.query(str_qry, (link,)) alias_insert = pd.Series({'teamalias': name_search, 'teamid': int(qry_df['teamid'][0])}) dbMgr.series_insert(alias_insert, 'teamAliases', True) return qry_df['teamid'][0]
def player_search(name_search, auto_insert=True, dst_mode=False, filters=None, source=None, sourceid=None, allow_missing=False): insert_source = False if source is not None and sourceid is not None: alias_qry = """select playerid from playerAliases where source like ? and sourceid like ?""" alias_df = dbMgr.query(alias_qry, (source, sourceid)) if len(alias_df['playerid']) == 1: return alias_df['playerid'][0] insert_source = True if dst_mode: if filters is not None: if 'team' in filters.keys(): name_search = filters['team'] teamid = team_search(name_search) str_qry = "select name, link from teams where teamid like ?" qry_df = dbMgr.query(str_qry, (teamid,)) player_name = qry_df['name'][0] link = qry_df['link'][0] else: player_name, link = pfr.find_player_page(name_search, filters, allow_missing) if link is None: return None str_qry = "select playerid from players where link like ?" qry_df = dbMgr.query(str_qry, (link,)) if len(qry_df['playerid']) == 0: to_insert = pd.Series({'name': player_name, 'link': link, 'playerid': None}) dbMgr.series_insert(to_insert, 'players', auto_insert) qry_df = dbMgr.query(str_qry, (link,)) if insert_source: to_insert = pd.Series({'source': source, 'sourceid': sourceid, 'playerid': qry_df['playerid'][0]}) dbMgr.series_insert(to_insert, 'playerAliases', True) return qry_df['playerid'][0]
def load_game_logs(historical=False): if historical: players_df = dbMgr.query("""select playerid, link, name from players where playerid not in ( select distinct playerid from playsForTeam)""") for ix, row in players_df.iterrows(): team = False if row['link'].startswith('https://www.pro-football-reference.com/teams/'): team = True gamelog_df, position = pfr.get_game_log(row['link']) if len(gamelog_df) > 0: gamelog_df['playerid'] = row['playerid'] gamelog_df['timeid'] = gamelog_df.apply(lambda x: exp.time_search(x.year_id, x.week_num), axis=1) gamelog_df['teamid'] = gamelog_df['team'].apply(lambda x: exp.team_search(x)) gamelog_df['oppid'] = gamelog_df['opp'].apply(lambda x: exp.team_search(x)) gamelog_df['hometeamid'] = gamelog_df.apply(lambda x: exp.team_search(x.opp) if x.game_location == '@' else exp.team_search(x.team), axis=1) gamelog_df['awayteamid'] = gamelog_df.apply(lambda x: exp.team_search(x.team) if x.game_location == '@' else exp.team_search(x.opp), axis=1) if team: gamelog_df['gameid'] = gamelog_df.apply(lambda x: exp.game_insert(x.boxscore_word_link, x.timeid, x.hometeamid, x.awayteamid, x.game_date), axis=1) else: gamelog_df['gameid'] = gamelog_df.apply(lambda x: exp.game_insert(x.game_result_link, x.timeid, x.hometeamid, x.awayteamid, x.game_date), axis=1) dbMgr.df_insert(gamelog_df[['gameid', 'playerid', 'teamid', 'oppid']], 'playsForTeam', True) if not team: stats = ['age', 'fumbles', 'fumbles_forced', 'fumbles_lost', 'fumbles_rec_td', 'fumbles_rec_yds', 'pass_att', 'pass_cmp', 'pass_int', 'pass_rating', 'pass_sacked', 'pass_sacked_yds', 'pass_td', 'pass_yds', 'rec', 'rec_td', 'rec_yds', 'rush_att', 'rush_td', 'rush_yds', 'targets'] indexer = ['gameid', 'playerid'] stacked_df = gamelog_df.reindex(columns=(stats + indexer)).astype(float).groupby( ['gameid', 'playerid']).mean().stack().dropna().reset_index() stacked_df.columns = ['gameid', 'playerid', 'stat', 'value'] dbMgr.df_insert(stacked_df, 'gameLog', True) else: 1 + 1 return None