def test_update_scores_missing(self, mock_scores, mock_games_missing): blank_scores, expected_scores = mock_scores expected_scores['GamePredictions'][2]['score'] = { 'home': '-', 'away': '-' } updated_scores = update_scores(blank_scores, mock_games_missing) assert updated_scores == expected_scores, "Updated missing scores do not match expected!"
def update_previous_record(bucket_name, pipeline_name, job_id, last_pred_date, season_start): # Get the games CSV from s3 endpoint_url = os.getenv("AWS_ENDPOINT_URL") use_ssl = os.getenv("AWS_USE_SSL") s3 = s3fs.S3FileSystem(client_kwargs={ "endpoint_url": endpoint_url, "use_ssl": use_ssl, }) with s3.open(f"{bucket_name}/{pipeline_name}/{job_id}/games.csv", "rb") as f: games = pd.read_csv(f) # Get the last record JSON from S3 with s3.open(f"{bucket_name}/{pipeline_name}/{job_id}/lastpred.json", "rb") as f: last_pred = json.load(f) # Update the scores for the previous record updated_last_pred = update_scores(last_pred, games) # Update the model performance if it does not exist if "ModelPerformance" not in last_pred: # Get model performance for the last prediction last_pred_dt = dt.date.fromisoformat(last_pred_date) if last_pred_date < season_start: performance_start_date = ( last_pred_dt - dt.timedelta(days=perf_ws + 1)).strftime("%Y-%m-%d") season_db_records = query_dynamodb(performance_start_date) else: season_db_records = query_dynamodb(season_start) season_db_records[-1] = updated_last_pred model_perf = prediction_performance(season_db_records, games, ws=perf_ws) number_cols = ["cum_acc", "rolling_acc", "cum_ll", "rolling_ll"] model_perf[number_cols] = model_perf[number_cols].applymap( "{:,.5f}".format) perf_start_date = (last_pred_dt - dt.timedelta(days=perf_ws - 1)).strftime("%Y-%m-%d") perf_idx = (model_perf["date"] >= perf_start_date) & (model_perf["date"] <= last_pred_date) model_perf_json = model_perf[perf_idx].to_dict(orient="records") updated_last_pred["ModelPerformance"] = model_perf_json logger.info("Updated scores and performance for item with " f"League=nhl and date={last_pred_date}") put_dynamodb_item(updated_last_pred) return
def test_update_scores(self, mock_scores, mock_games): blank_scores, expected_scores = mock_scores updated_scores = update_scores(blank_scores, mock_games) assert updated_scores == expected_scores, "Updated scores do not match expected!"
def main(): today_dt = dt.date.today() today = today_dt.strftime('%Y-%m-%d') start_date = (today_dt - dt.timedelta(days=365)).strftime('%Y-%m-%d') games = fetch_nhl_data_by_dates(start_date, today) games = games[games['game_type'] != 'A'] # No All Star games # Get the first date of the season current_season = games.loc[games['game_date'] == today]['season'].values[0] season_start = games[games['season'] == current_season]['game_date'].min() teams = get_unique_teams(games) teams_to_int, int_to_teams = get_teams_int_maps(teams) n_teams = len(teams) # Drop games with non-nhl teams (usually preseason exhibition games) valid_rows = (games['home_team'].isin(teams) & games['away_team'].isin(teams)) games = games[valid_rows] # Get last_pred last_pred = most_recent_dynamodb_item('nhl', today) last_pred_date = last_pred['PredictionDate'] last_pred_dt = dt.date.fromisoformat(last_pred_date) logger.info(f'Most recent prediction is from {last_pred_date}') # Update scores in the last prediction updated_last_pred = update_scores(last_pred, games) # Get model performance for the last prediction if last_pred_date < season_start: performance_start_date = ( last_pred_dt - dt.timedelta(days=perf_ws + 1)).strftime('%Y-%m-%d') season_db_records = query_dynamodb(performance_start_date) else: season_db_records = query_dynamodb(season_start) season_db_records[-1] = updated_last_pred model_perf = prediction_performance(season_db_records, games, ws=perf_ws) number_cols = ['cum_acc', 'rolling_acc', 'cum_ll', 'rolling_ll'] model_perf[number_cols] = model_perf[number_cols].applymap( '{:,.5f}'.format) perf_start_date = (last_pred_dt - dt.timedelta(days=perf_ws - 1)).strftime('%Y-%m-%d') perf_idx = (model_perf['date'] >= perf_start_date) & (model_perf['date'] <= last_pred_date) model_perf_json = model_perf[perf_idx].to_dict(orient='records') updated_last_pred['ModelPerformance'] = model_perf_json logger.info( f'Updated scores and performance for item with League=nhl and date={last_pred_date}' ) # Put updated DynamoDB item back into database put_dynamodb_item(updated_last_pred) # Backfill missing predictions game_dates = games['game_date'].drop_duplicates() new_pred_dates = [gd for gd in game_dates if gd > last_pred_date] if len(new_pred_dates) == 0: logger.info(f'No new games to predict on.') return # Get last_pred posteriors to use as priors priors = last_pred['ModelVariables'] priors = model_vars_to_numeric(priors, teams_to_int) for gd in new_pred_dates: logger.info(f'Generating new NHL model predictions for {gd}') # Get the most recent game date played prev_gd = max([gd2 for gd2 in game_dates if gd2 < gd]) obs_idx = (games['game_date'] == prev_gd) & (games['game_state'] != 'Postponed') obs_data = games[obs_idx].reset_index(drop=True) obs_data = model_ready_data(obs_data, teams_to_int) posteriors = model_update(obs_data, priors, n_teams, fattening_factor, f_thresh, delta_sigma) priors = posteriors.copy() pred_idx = (games['game_date'] == gd) & (games['game_state'] != 'Postponed') games_to_predict = games[pred_idx].reset_index(drop=True) game_preds = game_predictions(games_to_predict, posteriors, teams_to_int) record = create_dynamodb_item(gd, posteriors, int_to_teams, teams_to_int, metadata, game_preds=game_preds) logger.info(f'Generated predictions for League=nhl and date={gd}') put_dynamodb_item(record) # Add new pred_dates to the S3 Bucket bucket_name = os.getenv('WEB_S3_BUCKET') region = os.getenv('AWS_REGION') endpoint_url = os.getenv('AWS_ENDPOINT_URL') use_ssl = os.getenv('AWS_USE_SSL') s3 = boto3.client('s3', region_name=region, endpoint_url=endpoint_url, use_ssl=use_ssl) with open('pred_dates.json', 'wb') as f: s3.download_fileobj(bucket_name, 'pred_dates.json', f) with open('pred_dates.json', 'r') as f: pred_dates = json.load(f) pred_dates = pred_dates + new_pred_dates with open('pred_dates.json', 'w') as f: f.write(json.dumps(pred_dates)) with open('pred_dates.json', 'rb') as f: s3.upload_fileobj(f, bucket_name, 'pred_dates.json') return