def load(league): ''' @param results - dictionary of a list of teams to be loaded into the SQL database ''' BASE.metadata.create_all(tables=[x.__table__ for x in MODELS], checkfirst=True) session = get_session() for model in MODELS: session.merge(model(**league)) session.commit()
def starter_load(results): BASE.metadata.create_all(tables=[x.__table__ for x in STARTERMODELS], checkfirst=True) session = get_session() for model in STARTERMODELS: data = results[model.__tablename__] i = 0 # Here is where we convert directly the dictionary output of our marshmallow schema into sqlalchemy objs = [] for row in data: if i % 1000 == 0: print('loading...', i) i += 1 session.merge(model(**row)) session.commit()
def load_data(results): ''' takes in parsed data from transform and uses sqlalchemy to load into the database; note we dump the results of the parsed schema directly into our SQLAlchemy models, then commit them to the DB! @param dict results: results of transforming raw data ''' logger.info("loading rows into database") # create a session: https://docs.sqlalchemy.org/en/13/orm/session.html session = get_session() for model in MODELS: data = results[model.__tablename__] # Here is where we convert directly the dictionary output of our marshmallow schema into sqlalchemy for row in data: session.merge(model(**row)) session.commit()
def load(results): ''' @param results - dictionary of a list of teams to be loaded into the SQL database ''' BASE.metadata.create_all(tables=[x.__table__ for x in MODELS], checkfirst=True) session = get_session() for model in MODELS: data = results[model.__tablename__] i = 0 # Here is where we convert directly the dictionary output of our marshmallow schema into sqlalchemy objs = [] for row in data: if i % 1000 == 0: print('loading...', i) i += 1 session.merge(model(**row)) session.commit()
def load_data(results): print('loading...') session = get_session() for model in MODELS: print(model) data = results[model.__tablename__] i = 0 # Here is where we convert directly the dictionary output of our marshmallow schema into sqlalchemy objs = [] for row in data: objs.append(merge(session, model, row, i)) i += 1 # results = [executor.submit(merge, session, model, row, i) for row in data] # objs = [] # for result in concurrent.futures.as_completed(results): # objs.append(result.result()) # for row in data: session.bulk_save_objects(objs) session.commit() print('loaded')
def load_data(results): ''' Load playoff data into the SQL database @param results - a dictionary of lists of dictionaries containing the PlateAppearance, Game, Run, BaseRunningEvent data for the playoffs ''' print('loading...') # Get the Playoff session session = get_session(True) for model in MODELS: print(model) data = results[model.__tablename__] i = 0 # Here is where we convert directly the dictionary output of our marshmallow schema into sqlalchemy objs = [] games = set([]) for row in data: session.merge(model(**row)) #objs.append(model(**row)) i += 1 #session.bulk_save_objects(objs) session.commit() print('loaded')
def etl_league_adjusted_stats(year): session = get_session() league_data_df = pd.read_sql_table('league', con=ENGINE) team_data_df = pd.read_sql_table('team', con=ENGINE) mlb_data_df = league_data_df[(league_data_df['year'] == int(year)) & (league_data_df['league'] == 'MLB')] nl_data_df = league_data_df[(league_data_df['year'] == int(year)) & (league_data_df['league'] == 'NL')] al_data_df = league_data_df[(league_data_df['year'] == int(year)) & (league_data_df['league'] == 'AL')] team_data_df = team_data_df[team_data_df['year'] == int(year)] players = session.query(Player).filter(Player.year == int(year)) players.update( {Player.FIPR9: Player.iFIP + mlb_data_df.get('ciFIP').item()}) for player in players: team = team_data_df[team_data_df.team == player.team] player.pFIPR9 = (player.FIPR9 / team['PPFp'].item()) if team['league'].item() == 'NL': league_FIPR9 = nl_data_df['FIPR9'].item() player.RAAP9 = league_FIPR9 - player.pFIPR9 elif team['league'].item() == 'AL': league_FIPR9 = al_data_df['FIPR9'].item() player.RAAP9 = league_FIPR9 - player.pFIPR9 player.dRPW = (((((18 - (player.IP / player.GP)) * league_FIPR9) + ( (player.IP / player.GP) * player.pFIPR9)) / 18) + 2) * 1.5 player.WPGAA = player.RAAP9 / player.dRPW repl = 0.03 * (1 - (player.GS / player.GP)) + 0.12 * (player.GS / player.GP) player.WPGAR = player.WPGAA + repl player.WAR = player.WPGAR * (player.IP / 9) # pf = team_data_df[team_data_df.team == player.team]['PPFp'] session.commit()
def load(results): ''' load all of the player data into the SQL database @param results - dictionary of lists of dictionaries containing all the individual player rows of data ''' BASE.metadata.create_all(tables=[x.__table__ for x in MODELS], checkfirst=True) session = get_session() for model in MODELS: data = results[model.__tablename__] i = 0 # Here is where we convert directly the dictionary output of our marshmallow schema into sqlalchemy objs = [] for row in data: if i % 1000 == 0: print('loading...', i) i += 1 if row['AB'] > 0 or row['IP'] > 0: objs.append(model(**row)) #session.merge(model(**row)) session.bulk_save_objects(objs) session.commit()