def set_model(num_teams, home_team, away_team, observed_home_goals, observed_away_goals): with pm.Model() as model: # global model parameters home = pm.Flat('home') sd_att = pm.HalfStudentT('sd_att', nu=3, sd=2.5) sd_def = pm.HalfStudentT('sd_def', nu=3, sd=2.5) intercept = pm.Flat('intercept') # team-specific model parameters atts_star = pm.Normal("atts_star", mu=0, sd=sd_att, shape=num_teams) defs_star = pm.Normal("defs_star", mu=0, sd=sd_def, shape=num_teams) atts = pm.Deterministic('atts', atts_star - tt.mean(atts_star)) defs = pm.Deterministic('defs', defs_star - tt.mean(defs_star)) home_theta = tt.exp(intercept + home + atts[home_team] + defs[away_team]) away_theta = tt.exp(intercept + atts[away_team] + defs[home_team]) # likelihood of observed data home_points = pm.Poisson('home_points', mu=home_theta, observed=observed_home_goals) away_points = pm.Poisson('away_points', mu=away_theta, observed=observed_away_goals) return model
def run(): teams = df.home_team.unique() teams = pd.DataFrame(teams, columns=['team']) teams['i'] = teams.index df = pd.merge(df, teams, left_on='home_team', right_on='team', how='left') df = df.rename(columns = {'i': 'i_home'}).drop('team', 1) df = pd.merge(df, teams, left_on='away_team', right_on='team', how='left') df = df.rename(columns = {'i': 'i_away'}).drop('team', 1) observed_home_goals = df.home_score.values observed_away_goals = df.away_score.values home_team = df.i_home.values away_team = df.i_away.values num_teams = len(df.i_home.drop_duplicates()) num_games = len(home_team) g = df.groupby('i_away') att_starting_points = np.log(g.away_score.mean()) g = df.groupby('i_home') def_starting_points = -np.log(g.away_score.mean()) with pm.Model() as model: # global model parameters home = pm.Flat('home') sd_att = pm.HalfStudentT('sd_att', nu=3, sigma=2.5) sd_def = pm.HalfStudentT('sd_def', nu=3, sigma=2.5) intercept = pm.Flat('intercept') # team-specific model parameters atts_star = pm.Normal("atts_star", mu=0, sigma=sd_att, shape=num_teams) defs_star = pm.Normal("defs_star", mu=0, sigma=sd_def, shape=num_teams) atts = pm.Deterministic('atts', atts_star - tt.mean(atts_star)) defs = pm.Deterministic('defs', defs_star - tt.mean(defs_star)) home_theta = tt.exp(intercept + home + atts[home_team] + defs[away_team]) away_theta = tt.exp(intercept + atts[away_team] + defs[home_team]) # likelihood of observed data home_points = pm.Poisson('home_points', mu=home_theta, observed=observed_home_goals) away_points = pm.Poisson('away_points', mu=away_theta, observed=observed_away_goals) trace = pm.sample(1000, tune=1000, cores=3) pm.traceplot(trace, var_names=['intercept', 'home', 'sd_att', 'sd_def']); bfmi = pm.bfmi(trace) max_gr = max(np.max(gr_stats) for gr_stats in pm.gelman_rubin(trace).values()) (pm.energyplot(trace, legend=False, figsize=(6, 4))
def _build_gains_time(self, Bx_gains): self.dims.update({ 'gains_time_sd_raw': ('algo', ), 'gains_time_sd': ('algo', ), 'gains_time_raw': ('algo', 'time_raw_gains'), 'gains_time': ('algo', 'time'), }) k = self.n_algos n_knots_gains = len(self.coords['time_raw_gains']) gains_time_alpha = pm.HalfNormal('gains_time_alpha', sd=0.1) if 'log_gains_time_sd_sd_trace_mu' in self.params: mu = self.params.pop('log_gains_time_sd_sd_trace_mu') sd = self.params.pop('log_gains_time_sd_sd_trace_sd') log_gains_time_sd_sd = pm.Normal('log_gains_time_sd_sd', mu=mu, sd=sd) gains_time_sd_sd = pm.Deterministic('gains_time_sd_sd', tt.exp(log_gains_time_sd_sd)) else: gains_time_sd_sd = pm.HalfStudentT('gains_time_sd_sd', nu=3, sd=0.1) pm.Deterministic('log_gains_time_sd_sd', tt.log(gains_time_sd_sd)) gains_time_sd_raw = pm.HalfNormal('gains_time_sd_raw', shape=k) gains_time_sd = pm.Deterministic('gains_time_sd', gains_time_sd_sd * gains_time_sd_raw) gains_time_raw = GPExponential('gains_time_raw', mu=0, alpha=gains_time_alpha, sigma=1, shape=(k, n_knots_gains)) gains_time = gains_time_sd[:, None] * gains_time_raw gains_time = sparse_dot(Bx_gains, gains_time.T).T pm.Deterministic('gains_time', gains_time) return gains_time
def main(args): print("Loading data...") teams, df = load_data() nt = len(teams) train = df[df["split"] == "train"] print("Starting inference...") with pm.Model() as model: # priors alpha = pm.Normal("alpha", mu=0, sigma=1) sd_att = pm.HalfStudentT("sd_att", nu=3, sigma=2.5) sd_def = pm.HalfStudentT("sd_def", nu=3, sigma=2.5) home = pm.Normal("home", mu=0, sigma=1) # home advantage # team-specific model parameters attack = pm.Normal("attack", mu=0, sigma=sd_att, shape=nt) defend = pm.Normal("defend", mu=0, sigma=sd_def, shape=nt) # data home_id = pm.Data("home_data", train["Home_id"]) away_id = pm.Data("away_data", train["Away_id"]) # likelihood theta1 = tt.exp(alpha + home + attack[home_id] - defend[away_id]) theta2 = tt.exp(alpha + attack[away_id] - defend[home_id]) pm.Poisson("s1", mu=theta1, observed=train["score1"]) pm.Poisson("s2", mu=theta2, observed=train["score2"]) with model: fit = pm.sample( draws=args.num_samples, tune=args.num_warmup, chains=args.num_chains, cores=args.num_cores, random_seed=args.rng_seed, ) print("Analyse posterior...") az.plot_forest( fit, var_names=("alpha", "home", "sd_att", "sd_def"), backend="bokeh", ) az.plot_trace( fit, var_names=("alpha", "home", "sd_att", "sd_def"), backend="bokeh", ) # Attack and defence quality = teams.copy() quality = quality.assign( attack=fit["attack"].mean(axis=0), attacksd=fit["attack"].std(axis=0), defend=fit["defend"].mean(axis=0), defendsd=fit["defend"].std(axis=0), ) quality = quality.assign( attack_low=quality["attack"] - quality["attacksd"], attack_high=quality["attack"] + quality["attacksd"], defend_low=quality["defend"] - quality["defendsd"], defend_high=quality["defend"] + quality["defendsd"], ) plot_quality(quality) # Predicted goals and table predict = df[df["split"] == "predict"] with model: pm.set_data({"home_data": predict["Home_id"]}) pm.set_data({"away_data": predict["Away_id"]}) predicted_score = pm.sample_posterior_predictive( fit, var_names=["s1", "s2"], random_seed=1) predicted_full = predict.copy() predicted_full = predicted_full.assign( score1=predicted_score["s1"].mean(axis=0).round(), score1error=predicted_score["s1"].std(axis=0), score2=predicted_score["s2"].mean(axis=0).round(), score2error=predicted_score["s2"].std(axis=0), ) predicted_full = train.append( predicted_full.drop(columns=["score1error", "score2error"])) print(score_table(df)) print(score_table(predicted_full))
g=n) Y_tensor = tt.matrix("Y") (s, P, ll), _ = K.filter(Y_tensor) kf = theano.function(inputs=[Y_tensor, sv_tnsr], outputs=[s, P, ll], mode=theano.Mode(optimizer="unsafe")) s, P, ll = kf(Y, 2 * np.ones(m)) import pymc3 as pm with pm.Model() as model: # Phi, Q, L, c, H, Sv, d, s0, P0, n, m, g phi = pm.Normal("phi", shape=(1, 1)) q = pm.HalfStudentT("q", nu=1.0, sd=2.0, shape=(1, 1)) K = KalmanFilter("kf", phi, q, np.array([[1.]]), np.array([0.]), np.array([[1.]]), np.array([[0.0]]), np.array([0.]), np.array([0.]), np.array([[10.]]), 1, 1, 1, observed=y)
return df if __name__ == '__main__': df = get_tidy_data() obs_h_score = df.home_score.values obs_a_score = df.away_score.values home_team = df.i_home.values away_team = df.i_away.values num_teams = max(home_team) + 1 with pm.Model() as model: # home court advantage! home = pm.Flat('home') sd_atk = pm.HalfStudentT('sd_atk', nu=3, sd=2.5) sd_def = pm.HalfStudentT('sd_def', nu=3, sd=2.5) # intercept intercept = pm.Flat('intercept') # team-specific parameters # shape parameter for vector of values atks_star = pm.Normal('atks_star', mu=0, sd=sd_atk, shape=num_teams) defs_star = pm.Normal('defs_star', mu=0, sd=sd_def, shape=num_teams) # transformation atks = pm.Deterministic('atks', atks_star - tt.mean(atks_star)) defs = pm.Deterministic('defs', defs_star - tt.mean(defs_star)) # theta as a function of parameters
params_model = dict( new_cases_obs=new_cases_obs[:], data_begin=data_begin, fcast_len=num_days_forecast, diff_data_sim=diff_data_sim, N_population=args.population, ) # Median of the prior for the delay in case reporting, we assume 10 days pr_delay = 10 # Create model compartments with cov19.model.Cov19Model(**params_model) as this_model: # Edit pr_sigma_lambda for each cp sigma_lambda = pm.HalfStudentT(name="sigma_lambda_cps", nu=4, sigma=0.5) for i, cp in enumerate(change_points[1:]): cp["pr_sigma_lambda"] = sigma_lambda # Create the an array of the time dependent infection rate lambda lambda_t_log = cov19.model.lambda_t_with_sigmoids( pr_median_lambda_0=0.4, pr_sigma_lambda_0=0.5, change_points_list= change_points, # The change point priors we constructed earlier name_lambda_t= "lambda_t", # Name for the variable in the trace (see later) ) # set prior distribution for the recovery rate mu = pm.Lognormal(name="mu", mu=np.log(1 / 8), sigma=0.2)
home_team = df.i_home.values away_team = df.i_away.values num_teams = len(df.i_home.drop_duplicates()) num_games = len(home_team) g = df.groupby('i_away') att_starting_points = np.log(g.away_score.mean()) g = df.groupby('i_home') def_starting_points = -np.log(g.away_score.mean()) with pm.Model() as model: # global model parameters home = pm.Flat('home') sd_att = pm.HalfStudentT('sd_att', nu=3, sigma=2.5) sd_def = pm.HalfStudentT('sd_def', nu=3, sigma=2.5) intercept = pm.Flat('intercept') # team-specific model parameters atts_star = pm.Normal("atts_star", mu=0, sigma=sd_att, shape=num_teams) defs_star = pm.Normal("defs_star", mu=0, sigma=sd_def, shape=num_teams) atts = pm.Deterministic('atts', atts_star - tt.mean(atts_star)) defs = pm.Deterministic('defs', defs_star - tt.mean(defs_star)) home_theta = tt.exp(intercept + home + atts[home_team] + defs[away_team]) away_theta = tt.exp(intercept + atts[away_team] + defs[home_team]) # likelihood of observed data home_points = pm.Poisson('home_points', mu=home_theta,
observed_home_goals = df.home_score.values observed_away_goals = df.away_score.values home_team = df.i_home.values away_team = df.i_away.values num_teams = len(df.i_home.drop_duplicates()) num_games = len(home_team) # define model with pm.Model() as model: # global model parameters home = pm.Flat("home") sd_att = pm.HalfStudentT("sd_att", nu=3, sigma=2.5) sd_def = pm.HalfStudentT("sd_def", nu=3, sigma=2.5) intercept = pm.Flat("intercept") # team-specific model parameters atts_star = pm.Normal("atts_star", mu=0, sigma=sd_att, shape=num_teams) defs_star = pm.Normal("defs_star", mu=0, sigma=sd_def, shape=num_teams) atts = pm.Deterministic("atts", atts_star - tt.mean(atts_star)) defs = pm.Deterministic("defs", defs_star - tt.mean(defs_star)) home_theta = tt.exp(intercept + home + atts[home_team] + defs[away_team]) away_theta = tt.exp(intercept + atts[away_team] + defs[home_team]) # likelihood of observed data home_goals = pm.Poisson("home_goals", mu=home_theta, observed=observed_home_goals) away_goals = pm.Poisson("away_goals", mu=away_theta, observed=observed_away_goals)
# initial values g = df.groupby('i_away') att_tries_init = np.log(g.away_tries.mean()) g = df.groupby('i_home') def_tries_init = -np.log(g.away_tries.mean()) g = df.groupby('i_away') att_pens_init = np.log(g.away_pens.mean()) g = df.groupby('i_home') def_pens_init = -np.log(g.away_pens.mean()) with pm.Model() as model: # global model parameters home_tries = pm.Flat('home_tries') # intercept for home advantage home_pens = pm.Flat('home_pens') # intercept for home advantage sd_att_tries = pm.HalfStudentT('sd_att_tries', nu=3, sd=2.5) sd_def_tries = pm.HalfStudentT('sd_def_tries', nu=3, sd=2.5) sd_att_pens = pm.HalfStudentT('sd_att_pens', nu=3, sd=2.5) sd_def_pens = pm.HalfStudentT('sd_def_pens', nu=3, sd=2.5) # sd_att_drops = pm.HalfStudentT('sd_att_drops', nu=3, sd=2.5) # sd_def_drops = pm.HalfStudentT('sd_def_drops', nu=3, sd=2.5) intercept_tries = pm.Flat('intercept_tries') intercept_pens = pm.Flat('intercept_pens') # intercept_drops = pm.Flat('intercept_drops') # team-specific model parameters atts_tries_star = pm.Normal("atts_tries_star", mu=0, sd=sd_att_tries, shape=num_teams) defs_tries_star = pm.Normal("defs_tries_star", mu=0, sd=sd_def_tries, shape=num_teams) atts_pens_star = pm.Normal("atts_pens_star", mu=0, sd=sd_att_pens, shape=num_teams) defs_pens_star = pm.Normal("defs_pens_star", mu=0, sd=sd_def_pens, shape=num_teams)