def _config(self, *dates): self.conn = establish_db_connection('sqlalchemy').connect() for i, d in enumerate(dates): self.start_date = d[0] self.end_date = d[1] self.sql_fields = [ 'TEAM_ID', 'GAME_ID', 'PLAYER_ID', 'MIN', 'EFG_PCT', 'FTA_RATE', 'TM_TOV_PCT', 'OREB_PCT', 'OPP_EFG_PCT', 'OPP_FTA_RATE', 'OPP_TOV_PCT', 'OPP_OREB_PCT' ] self.team_dict = teams.get('nba_teams')
def upload_flatten_data(data, db_name): conn = establish_db_connection('sqlalchemy').connect() print "uploading flattened data to %s"%db_name try: data.to_sql(db_name, con = conn, index = None, if_exists = 'append') print "succesfully uploaded flattened data!" except Exception as e: print "could not upload data to db because: %s"%e return None
def main(): date_range = range_all_dates(sdate, edate) sql_table = 'historical_picks_table' for i, d in enumerate(date_range): d_games = mdb.main(d) engine = establish_db_connection('sqlalchemy') conn = engine.connect() try: print "writing predictions for %s to %s"%(d, sql_table) d_games.to_sql(name = sql_table, con = conn, if_exists = 'append', index = False) print "predictions successfully written to db at %s"%datetime.datetime.now() except Exception as e: print "db write failed because: %s"%e return None
def get_picks_and_scores(game_date): conn = establish_db_connection('sqlalchemy').connect() for i, sql_table in enumerate(['final_scores', 'historical_picks']): game_date_query = "SELECT * FROM %s WHERE game_date = '%s'" % ( sql_table, game_date) try: query_output = pd.read_sql(game_date_query, con=conn) if i == 0: game_scores = query_output else: game_picks = query_output except Exception as e: print "could not get data from %s because: %s" % (sql_table, e) return game_scores, game_picks
def list_games(team_id, date, start_date=None): clean_date = datetime.strptime(date, '%Y-%m-%d').date() engine = establish_db_connection('sqlalchemy') conn = engine.connect() data = pd.read_sql("SELECT * FROM final_scores", con=conn) if start_date is None: start_date = '2018-10-16' start_date = datetime.strptime(start_date, '%Y-%m-%d').date() data = data[data['game_date'] >= start_date] trunc_data = data[data['game_date'] < clean_date] trunc_data['away_id'] = trunc_data['away_id'].astype(str) trunc_data['home_id'] = trunc_data['home_id'].astype(str) team_data = trunc_data[(trunc_data['away_id'] == team_id) | (trunc_data['home_id'] == team_id)].copy() games_list = team_data['game_id'].tolist() return games_list
def flatten_four_factors_thru(season_id): conn = establish_db_connection('sqlalchemy').connect() four_factors_sql = 'SELECT * FROM four_factors_thru WHERE GAME_ID LIKE "%s"'%(season_id + "0%%") #four_factors_sql = "SELECT * FROM four_factors_thru" four_factors_df = pd.read_sql(four_factors_sql, con = conn) print four_factors_df.head() games_list = list(set(four_factors_df['GAME_ID'].tolist())) #rerun_games = pd.read_sql("SELECT * FROM flatten_four_factors_thru WHERE home_team_id IS NULL", con = conn) flatten_cols = list(four_factors_df) flatten_cols.remove('SIDE') flatten_cols = [x.lower() for x in flatten_cols] away_flatten_cols = ["away_%s"%x for x in flatten_cols] home_flatten_cols = ["home_%s"%x for x in flatten_cols] flatten_cols = away_flatten_cols + home_flatten_cols flatten_cols.remove('home_game_id') ff_flatten_df = pd.DataFrame(columns = flatten_cols) for g in games_list: print "flattening %s"%g ff_slice = four_factors_df[four_factors_df['GAME_ID'] == g] slice_away = ff_slice[ff_slice['SIDE'] == 'AWAY'] slice_home = ff_slice[ff_slice['SIDE'] == 'HOME'] game_id = slice_away['GAME_ID'].item() ff_slice = slice_away.merge(slice_home, how = 'left', on = ['GAME_ID']) ff_slice = ff_slice.drop(columns = ['SIDE_x', 'SIDE_y']) ff_slice.columns = flatten_cols ff_flatten_df = ff_flatten_df.append(ff_slice) ff_flatten_df = ff_flatten_df.rename(columns = {'away_game_id':'game_id'}) upload_flatten_data(ff_flatten_df, 'flatten_four_factors') return None
def main(start_date=start_date, end_date=end_date): conn = establish_db_connection('sqlalchemy').connect() date_list = range_all_dates(start_date, end_date) for d in date_list: print "getting picks and scores for %s" % d game_scores, game_picks = get_picks_and_scores(d) print "determing results..." daily_result = determine_results(game_scores, game_picks) if daily_result is not None: print "uploading to db..." try: daily_result.to_sql("results_table", if_exists='append', con=conn) except Exception as e: print "tried to upload duplicate records to results_table" else: print "no games today" print "" return daily_result
def flatten_scores(): #seasons = ['002120', '002110', '002100'] games_to_fill = ['0021200628', '0021200627', '0021200626', '0021200619', '0021200618', '0021200617', '0021200616', '0021200614'] for g in games_to_fill: conn = establish_db_connection("sqlalchemy").connect() #print "flattening scores for season_id = %s"%s final_scores_sql = 'SELECT * FROM final_scores WHERE GAME_ID = "%s"'%(g) final_scores_table = pd.read_sql(final_scores_sql, con = conn) i = 0 game_cols = ['game_date', 'game_id', 'away_team_id', 'away_team_abbreviation', 'away_pts', 'home_team_id', 'home_team_abbreviation', 'home_pts', 'home_pt_diff'] # if i <= (len(final_scores_table) + 1): all_games_table = pd.DataFrame(columns = game_cols) for i in range(0, len(final_scores_table), 2): curr_game = final_scores_table[i:i+2][['GAME_DATE_EST', 'GAME_ID','TEAM_ID','TEAM_ABBREVIATION', 'PTS']] curr_game.insert(5, 'SIDE', ['away', 'home']) game_date = curr_game['GAME_DATE_EST'].max() game_id = curr_game['GAME_ID'].max() away = curr_game[curr_game['SIDE'] == 'away'] home = curr_game[curr_game['SIDE'] == 'home'] clean_game = away.merge(home, how = 'left', on = ['GAME_ID', 'GAME_DATE_EST']) clean_game = clean_game.drop(columns = ['SIDE_y', 'SIDE_x']) clean_game['home_pt_diff'] = clean_game['PTS_y'] - clean_game['PTS_x'] clean_game.columns = game_cols all_games_table = all_games_table.append(clean_game) ## write flattened scores to the database, game_id is the key so duplicate uploads will be blocked upload_flatten_data(all_games_table, 'flatten_final_scores') return None
def _upload_to_db(table, db_tbl): conn = establish_db_connection('sqlalchemy').connect() table.to_sql(db_tbl, con=conn, if_exists='append', index=False) return None
import pandas as pd import numpy as np from datetime import datetime, date, timedelta from db_connection_manager import establish_db_connection conn = establish_db_connection('sqlalchemy').connect() scores = pd.read_sql('SELECT * FROM final_scores', con=conn) new_scores = pd.DataFrame(columns=list(scores)) for d in scores['game_date'].unique(): d_sub = scores[scores['game_date'] == d].copy() prev_d_sub = scores[scores['game_date'] == d - timedelta(1)].copy() prev_teams = [a for a in prev_d_sub['away_team']] for h in prev_d_sub['home_team']: prev_teams.append(h) d_sub['b2b_away'] = np.where(d_sub['away_team'].isin(prev_teams), "Yes", "No") d_sub['b2b_home'] = np.where(d_sub['home_team'].isin(prev_teams), "Yes", "No") new_scores = new_scores.append(d_sub) reconn = establish_db_connection('sqlalchemy').connect() new_scores.to_sql('new_final_scores', con=reconn, if_exists='append', index=False)
def upload_to_db(data, tbl, append=False): conn = establish_db_connection('sqlalchemy').connect() if append == True: data.to_sql(tbl, con=conn, if_exists='append', index=False) else: data.to_sql(tbl, con=conn, if_exists='replace', index=False)