def main(): logger = get_root_logger() get_header(logger, 'LOADING PROJECTIONS') client = APIClient() # grab dataframe shape from a trial run data = client.get_data('weekly-projections', 'json', 'QB') test_df = json_normalize(data['Projections']) # get DF structure from columns in test_df cols = test_df.columns df = DataFrame(columns=cols) # grab current week current_week = test_df.week.values[0] # loop through all weeks up to current week for wk in [str(x) for x in range(int(current_week))]: logger.info('Processing projections for week {0}'.format(int(wk) + 1)) # loop through all positions for pos in ['QB', 'RB', 'WR', 'TE', 'K', 'DEF']: tmp_data = client.get_data('weekly-projections', 'json', pos, wk) tmp_df = json_normalize(tmp_data['Projections']) df = df.append(tmp_df) # import this df directly to PG DB conn = DBClient() conn.load(df, 'projections', schema='raw', if_exists='replace')
def main(): logger = get_root_logger() get_header(logger, "Importing Bye weeks") client = APIClient() data = client.get_data("byes") df = None for key in data.keys(): # build DF the first time through the loop if df is None: df = json_normalize(data[key]) # append every other time else: df = df.append(json_normalize(data[key])) # import this df directly to PG DB conn = DBClient() conn.load(df, "byes", schema="raw", if_exists="replace")
def main(): path = '{0}/data/salaries/*'.format(getenv('BASEDIR')) files_list = glob(path) for i, f in enumerate(files_list): df = read_csv(f, sep=';') df.rename(columns={'h/a': 'homeoraway', 'DK points': 'dk_proj_points', 'DK salary': 'dk_salary' }, inplace=True ) # first iteration, create a new df if i == 0: all_df = df # future iterations, append to is else: all_df = all_df.append(df) conn = DBClient() conn.load(all_df, 'salaries', if_exists='replace', schema='raw')
#!/home/jjardel/fb/pkgs/envs/etl/bin/python from api_client import APIClient from db_client import DBClient from pandas.io.json import json_normalize client = APIClient() data = client.get_data('nfl-teams') df = json_normalize(data['NFLTeams']) # import this df directly to PG DB conn = DBClient() conn.load(df, 'teams', schema='raw', if_exists='replace')
def load(self, df, table): # import to DB conn = DBClient() conn.load(df, table, schema='raw', if_exists='replace') self.logger.info('Successfully loaded {0} rows into {1}'.format(len(df), table))