def main(trainkey, predkey, outputkey, inference_method='', ncores='', nchains=1, niters='1500', redishost='10.42.72.93'): panthera = redishost conn = redis.StrictRedis(host=panthera, password='******') #predkey = 'p-50x50-guerrero-4' #trainkey = 't-luca-guerrero-4' #outputkey = 'test-model' PDF = preparePredictors(loadDataFrameFromRedis(predkey, conn)) TDF = loadDataFrameFromRedis(trainkey, conn) formula = 'LUCA ~ Longitude + Latitude + Q("Dist.to.road_m") + Population_m + name' TM, PM = splitByFormula(formula, TDF, PDF['clean']) logger.info("Start modelling inference") model = ModelSamplingEffort(TM, PM) trace = SampleModel(model, inference_method=inference_method, ncores=ncores, nchains=nchains, niters=niters) logger.info("Saving trace") try: pm.save_trace( trace, directory= '/storage/users/escamill/presence-only-model/output/rawtrace', overwrite=True) except: logger.error("not possible to save trace") tracedf = pm.trace_to_dataframe(trace) tracedf.to_csv( '/storage/users/escamill/presence-only-model/output/trace%s.csv' % outputkey, encoding='utf8') try: pred_sample = SamplePredictions(model, TM, PM, trace) except: logger.error("something went wrong") pred_sample.to_csv( '/storage/users/escamill/presence-only-model/output/pred_cond-%s.csv' % outputkey, encoding='utf8') # pred sample is a dictionary pickle.dump( '/storage/users/escamill/presence-only-model/output/pred%s.pickle' % outputkey, pred_sample) #conn.set(outputkey+'-df',pickle.dumps(tracedf)) #conn.set(outputkey+'-trace',pickle.dumps(pred_sample)) logger.info("Finished!")
def run(sir_model, N_SAMPLES, cluster_save_path): print('sample start') with sir_model: trace = pm.sample(N_SAMPLES, model=sir_model, step=pm.Metropolis(), progressbar=True) pm.save_trace(trace, cluster_save_path + 'sir_model.trace', overwrite=True) print('sample end') # -------- prepare data for visualization --------------- varnames = get_all_free_RVs_names(sir_model) #for varname in varnames: #visualize_trace(trace[varname][:, None], varname, N_SAMPLES) lambda_t = np.median(trace['lambda_t'][:, :], axis=0) μ = np.median(trace['mu'][:, None], axis=0) # -------- visualize histogram --------------- num_cols = 5 num_rows = int(np.ceil(len(varnames) / num_cols)) x_size = num_cols * 2.5 y_size = num_rows * 2.5 fig, axes = plt.subplots(num_rows, num_cols, figsize=(x_size, y_size), squeeze=False) i_ax = 0 for i_row, axes_row in enumerate(axes): for i_col, ax in enumerate(axes_row): if i_ax >= len(varnames): ax.set_visible(False) continue else: plot_hist(sir_model, trace, ax, varnames[i_ax], colors=('tab:blue', 'tab:green')) if i_col == 0: ax.set_ylabel('Density') if i_col == 0 and i_row == 0: ax.legend() i_ax += 1 fig.subplots_adjust(wspace=0.25, hspace=0.4) plt.savefig(cluster_save_path + 'plot_hist.png') plt.clf() np.save(cluster_save_path + 'varnames.npy', varnames) np.save(cluster_save_path + 'SIR_params.npy', [lambda_t, μ])
def save_traces(trace): fname={} folder_name = 'traces' if not os.path.exists(folder_name): os.makedirs(folder_name) for key,val in trace.items(): fname[key]=pm.save_trace(trace[key],directory=folder_name+'/'+key,overwrite=True)
def run_group_inference(self, ndraws=300, nburn=100, cores=5): curr_model = self.group_model() with curr_model: step = pm.Metropolis() #S=np.ones(1)*0.01) trace = pm.sample(ndraws, tune=nburn, discard_tuned_samples=True, step=step, cores=cores) # plot the traces # plt.figure() # _ = pm.traceplot(trace)#, lines=('h', 1./alpha_true)) # plt.show() # plt.figure() # _ = pm.plot_posterior(trace, var_names=['h'], ref_val=(1./alpha_true)) # plt.show() # save the traces fname = pm.save_trace(trace) return fname
def run_on_all_gals(): sid_list = pd.read_csv('lib/subject-id-list.csv').values.T[0] for subject_id in sid_list: traces_dir = os.path.join('uniform-traces', str(subject_id)) if os.path.isdir(traces_dir): continue try: arms = get_arms(subject_id) except IndexError: continue print('Working on', subject_id) trace = get_logsp_trace_from_arms(arms) try: os.mkdir(traces_dir) except FileExistsError: pass pm.save_trace(trace, directory=traces_dir, overwrite=True)
def test_save_new_model(self, tmpdir_factory): directory = str(tmpdir_factory.mktemp("data")) save_dir = pm.save_trace(self.trace, directory, overwrite=True) assert save_dir == directory with pm.Model() as model: w = pm.Normal("w", 0, 1) new_trace = pm.sample(return_inferencedata=False) with pytest.raises(OSError): _ = pm.save_trace(new_trace, directory) _ = pm.save_trace(new_trace, directory, overwrite=True) with model: new_trace_copy = pm.load_trace(directory) assert (new_trace["w"] == new_trace_copy["w"]).all()
def create_and_run_model( cases_filename, covariates_filename, dist_params, num_baseline_intensities, num_draws=100, num_burn=100, fixed_r_c=None, fixed_r_h=None, discharges_filename=None, output_prefix='', step='slice' ): """ Perform a fit of a given set of cases, covariates, and optionally discharges, with specified distribution parameters, and using the likelihood form defined in likelihood.py. Output the MCMC history to disk. Output a plot of the history to disk. Output a summary of the fitted parameters to the screen and to disk. """ likelihood_obj = CareHomeLikelihood( cases_filename, covariates_filename, dist_params, discharges_filename, fixed_r_c=fixed_r_c, fixed_r_h=fixed_r_h ) model = get_model( likelihood_obj, fixed_r_c=fixed_r_c, fixed_r_h=fixed_r_h, num_baseline_intensities=num_baseline_intensities ) trace = mcmc_fit(model, num_draws=num_draws, num_burn=num_burn, step=step) plot_trace( trace, output_prefix.parent / (output_prefix.name + 'traceplot.pdf') ) pm.save_trace( trace, output_prefix.parent / (output_prefix.name + 'trace.dat') ) summary = pm.summary(trace, round_to="none") print_result( summary, likelihood_obj, filename=output_prefix.parent / (output_prefix.name + 'summary.txt') )
def test_save_new_model(self, tmpdir_factory): directory = str(tmpdir_factory.mktemp('data')) save_dir = pm.save_trace(self.trace, directory, overwrite=True) assert save_dir == directory with pm.Model() as model: w = pm.Normal('w', 0, 1) new_trace = pm.sample() with pytest.raises(OSError): _ = pm.save_trace(new_trace, directory) _ = pm.save_trace(new_trace, directory, overwrite=True) with model: new_trace_copy = pm.load_trace(directory) assert (new_trace['w'] == new_trace_copy['w']).all()
def add_receiver_theory_points( ds, output_file=None, theory_ds='POLARBEAR/pb2a_cryostat_japan_measurements.txt'): if type(ds) is str: ds = mf.Dataset(from_file=ds) elif type(ds) is mf.Dataset: ds = ds.copy() else: raise ValueError('Type of dataset argument not recoginized.') if type(theory_ds) is str: theoryds = mf.Dataset(from_file=theory_ds) elif type(theory_ds) is mf.Dataset: theoryds = theory_ds.copy() else: raise ValueError('Type of theory dataset argument not recoginized.') dsuse = ds.subset_from_labels(MEASURED_RECEIVER_TARGET_NAMES) with mf.AlignDatasets(ds1=dsuse, ds2=theoryds, fitmap={'s': False}) as tamodel: tatrace = pm.sample(2000, tune=5500, init='advi+adapt_diag', nuts_kwargs={ 'target_accept': .90, 'max_treedepth': 25 }, error_scale1=1., error_scale2=1.) pm.save_trace(tatrace) pm.traceplot(tatrace) plt.show() fptheory = theoryds.subset_from_marker('FOCALPLANE') pos, err = tamodel.use_transform_trace(fptheory.to_tensors(), tatrace) newtheoryarray = mf.DatasetArrays(pos=np.mean(pos, axis=0), err=np.std(pos, axis=0), serr=np.std(pos, axis=0)) newfptheory = fptheory.remake_from_arrays(newtheoryarray) for p in newfptheory.values(): ds.add_point(p) print(ds)
def test_save_and_load(self, tmpdir_factory): directory = str(tmpdir_factory.mktemp('data')) save_dir = pm.save_trace(self.trace, directory, overwrite=True) assert save_dir == directory trace2 = pm.load_trace(directory, model=TestSaveLoad.model()) for var in ('x', 'z'): assert (self.trace[var] == trace2[var]).all()
def sampleandsave(f): """Sample from the model in context. """ if not exists(f): # sample and save trace = pm.sample(8000, tune=2000, chains=1) pm.save_trace(trace, f) pm.traceplot(trace, compact=True) rcParams["font.size"] = 14 plt.savefig(f"{f}/traceplot.png") ppc = pm.sample_posterior_predictive(trace)["$Y$"] np.savez_compressed(f"{f}/ppc.npz", ppc) else: trace = pm.load_trace(f) return trace
def save(self, output, trace=None): if trace is not None: trace_fname = pm.save_trace(trace) else: trace_fname = None with open(output, "wb") as buff: pickle.dump( { 'galaxies': self.galaxies, 'trace': trace_fname, 'n_chains': trace.nchains if trace is not None else None, }, buff)
def test_pm(): # This takes 5min to run # Hiding this import in here import pymc3 as pm parm_dict = mcmc.grab_parmdict() outroot = os.path.join(resource_filename('frb', 'tests'), 'files', 'mcmc') with mcmc.pm_four_parameter_model(parm_dict, beta=3.): # Sample #trace = pm.sample(40000, tune=2000) # This defaults to 4 chains trace = pm.sample(1000, tune=500) # This defaults to 4 chains # Save the traces -- Needs to be done before the plot pm.save_trace(trace, directory=outroot, overwrite=True) print("All done with the 4 parameter, beta=3 run ") # Save a plot plt.clf() _ = pm.plot_trace(trace) #plt.savefig(os.path.join(outroot, 'traceplot.png')) # Parameters jdict = utils.jsonify(parm_dict) utils.savejson(os.path.join(outroot, 'parms.json'), jdict, easy_to_read=True)
def write_traces(self, destination, user=None): """ Create directory for each user. In each user directory create folders for each model. Each chain goes inside a directory, and each directory contains a metadata json file, and a numpy compressed file. :param destination: Directory to save to. :type destination: Union[str, Path] :param user: Only write traces of this user. :type: int """ if isinstance(destination, str): destination = Path(destination) if user: if user not in self.traces.keys(): warnings.warn(f"User {user} not found!") return elif not self.traces[user]: warnings.warn(f"No traces for user {user}!") return # ToDo: parallelize. print("Writing traces to files... ") for u, models in self.traces.items(): # If user given, only process this one. if user and u != user: continue # User folder. user_folder = destination / f'user_{u}' for model_name, trace in models.items(): # Create model folder. model_folder = user_folder / model_name model_folder.mkdir(parents=True, exist_ok=True) pm.save_trace(trace, directory=str(model_folder), overwrite=True) print("Done.\n")
def test_save_and_load(self, tmpdir_factory): directory = str(tmpdir_factory.mktemp("data")) save_dir = pm.save_trace(self.trace, directory, overwrite=True) assert save_dir == directory trace2 = pm.load_trace(directory, model=TestSaveLoad.model()) for var in ("x", "z"): assert (self.trace[var] == trace2[var]).all() assert self.trace.stat_names == trace2.stat_names for stat in self.trace.stat_names: assert all(self.trace[stat] == trace2[stat]), ( "Restored value of statistic %s does not match stored value" % stat)
def test_sample_ppc(self, tmpdir_factory): directory = str(tmpdir_factory.mktemp('data')) save_dir = pm.save_trace(self.trace, directory, overwrite=True) assert save_dir == directory seed = 10 np.random.seed(seed) with TestSaveLoad.model(): ppc = pm.sample_ppc(self.trace) seed = 10 np.random.seed(seed) with TestSaveLoad.model(): trace2 = pm.load_trace(directory) ppc2 = pm.sample_ppc(trace2) for key, value in ppc.items(): assert (value == ppc2[key]).all()
def test_sample_posterior_predictive(self, tmpdir_factory): directory = str(tmpdir_factory.mktemp("data")) save_dir = pm.save_trace(self.trace, directory, overwrite=True) assert save_dir == directory rng = np.random.RandomState(10) with TestSaveLoad.model(rng_seeder=rng): ppc = pm.sample_posterior_predictive(self.trace) rng = np.random.RandomState(10) with TestSaveLoad.model(rng_seeder=rng): trace2 = pm.load_trace(directory) ppc2 = pm.sample_posterior_predictive(trace2) for key, value in ppc.items(): assert (value == ppc2[key]).all()
model = BaseModel(tspan, county_info, [ "../data/ia_effect_samples/{}_{}.pkl".format(disease, i) for i in range(100) ], include_eastwest=use_eastwest, include_demographics=use_age) print("Sampling parameters on the training set.") trace = model.sample_parameters(target_train, samples=num_samples, tune=100, target_accept=0.95, max_treedepth=15, chains=num_chains, cores=num_cores) with open(filename_model, "wb") as f: pkl.dump(model.model, f) with model.model: pm.save_trace(trace, filename_params, overwrite=True) print("Sampling predictions on the testing set.") pred = model.sample_predictions(target_test.index, target_test.columns, trace) with open(filename_pred, 'wb') as f: pkl.dump(pred, f) # for file in [filename_params, filename_pred]: # set_file_permissions(file, uid=46836, gid=10033)
annInput = theano.shared(XsTrain) annTarget = theano.shared(YsTrain) errAnnInput = theano.shared(errXsTrain) errAnnTarget = theano.shared(errYsTrain) neural_network = construct_nn(annInput, errAnnInput, annTarget, errAnnTarget) print("Starting the training of the BNN...") if not os.path.exists(cache_file_bnn): with neural_network: #fit model trace = pm.sample(draws=nsamples, init='advi+adapt_diag', n_init=ninit, tune=ninit // 2, chains=nchains, cores=ncores, nuts_kwargs={'target_accept': 0.90}, discard_tuned_samples=True, compute_convergence_checks=True, progressbar=False) pm.save_trace(trace, directory=cache_file_bnn) else: trace = pm.load_trace(cache_file_bnn, model=neural_network) print("Done...")
def get_SIR(x, y, y0, country, forecast_len=0, load_post=False): ''' If 'forecast_len' is nonzero, attempts to load a trace corresponding to the country of interest from the directory 'traces' and retrieves predicted numbers of infected and susceptible patients 'forecast_len' days into the future after the 1st case is detected in the country. ''' # If in 'prediction mode', modify x, y to reflect forecast length if forecast_len != 0: ext = np.arange(1, forecast_len + 1).astype(float) ext += x[-1] x = np.append(x, ext) y = np.empty((x.shape[0], y.shape[1])) # SIR Model # p[0]: beta, p[1]: lambda def SIR(y, t, p): ds = -p[0] * y[0] * y[1] # Susceptible differential di = p[0] * y[0] * y[1] - p[1] * y[1] # Infected differential return [ds, di] # Initialize ODE sir_ode = DifferentialEquation(func=SIR, times=x, n_states=2, n_theta=2, t0=0) load_dir = osp.join('traces', country.lower()) with pm.Model() as model: sigma = pm.HalfNormal('sigma', 3, shape=2) # R0 is bounded below by 1 because we see an epidemic has occured R0 = pm.Normal('R0', 2, 3) lmbda = pm.Normal('lambda', 0.1, 0.1) beta = pm.Deterministic('beta', lmbda * R0) print('Setting up model for ' + country) sir_curves = sir_ode(y0=y0, theta=[beta, lmbda]) y_obs = pm.Normal('y_obs', mu=sir_curves, sigma=sigma, observed=y) if forecast_len == 0: trace = pm.sample(2000, tune=1000, cores=2, chains=2, progressbar=True) # Save trace pm.save_trace(trace, load_dir, overwrite=True) # Get the posterior post = pm.sample_posterior_predictive(trace, progressbar=True) out_post = post else: # Load trace print('Loading trace') trace = pm.load_trace(load_dir) print('Computing posterior') #Get posterior if not load_post: post = pm.sample_posterior_predictive(trace[500:], progressbar=True) out_post = post with open(country + '_post.pkl', 'wb') as buff: pickle.dump({'post': post}, buff) else: with open(country + '_post.pkl', 'rb') as buff: data = pickle.load(buff) out_post = data['post'] print('Done') return trace, out_post, x
#Switchpoint tau = pm.DiscreteUniform("tau", lower=0, upper=len(data) - 1) #Prior when t <= tau mu_1 = pm.Normal("mu_1", mu=280, sd=20) sd_1 = pm.HalfNormal("sd_1", sigma=40) #Prior when t > tau mu_2 = pm.Normal("mu_2", mu=280, sd=20) sd_2 = pm.HalfNormal("sd_2", sigma=40) #Observations idx = np.arange(len(data)) mu_t = pm.math.switch(tau > idx, mu_1, mu_2) sd_t = pm.math.switch(tau > idx, sd_1, sd_2) observations = pm.Normal("observations", mu=mu_t, sd=sd_t, observed=data) #Perform inference with model: step = pm.NUTS() trace = pm.sample(50000, tune=5000, step=step) #Save summary summary = pm.summary(trace) with open('sp.summary', 'w') as f: summary.to_string(f) #Plot and save posterior traces pm.save_trace(trace, 'switchpoint.trace', overwrite=True) az.plot_trace(trace) plt.savefig( '/home/gerardo/Desktop/Projects/PGA-Analysis/reports/figures/driving-distance-pymc3-posteriors.png' ) plt.show()
def main(): # load the data df = pd.read_csv("../../assets/data/HS.csv", index_col=0) # define items to keep item_names = [ "visual", "cubes", "paper", "flags", "general", "paragrap", "sentence", "wordc", "wordm", "addition", "code", "counting", "straight", "wordr", "numberr", "figurer", "object", "numberf", "figurew", ] # define the factor structure factors = np.array([ [1, 0, 0, 0, 0], [1, 0, 0, 0, 0], [1, 0, 0, 0, 0], [1, 0, 0, 0, 0], [0, 1, 0, 0, 0], [0, 1, 0, 0, 0], [0, 1, 0, 0, 0], [0, 1, 0, 0, 0], [0, 1, 0, 0, 0], [0, 0, 1, 0, 0], [0, 0, 1, 0, 0], [0, 0, 1, 0, 0], [0, 0, 1, 0, 0], [0, 0, 0, 1, 0], [0, 0, 0, 1, 0], [0, 0, 0, 1, 0], [0, 0, 0, 1, 0], [0, 0, 0, 1, 0], [0, 0, 0, 1, 0], ]) paths = np.array([ [0, 0, 0, 0, 1], [0, 0, 0, 0, 1], [0, 0, 0, 0, 1], [0, 0, 0, 0, 1], [0, 0, 0, 0, 0], ]) # iterate over the two schools for school, sdf in df.groupby("school"): # define the path to save results f = f"../data/BSEM examples/{school}" # select the 19 commonly used variables items = sdf[item_names] # for numerical convenience, standardize the data items = (items - items.mean()) / items.std() with pm.Model(): # construct the model bsem(items, factors, paths) if not exists(f): # sample and save trace = pm.sample(chains=2) # 19000, tune=1000, pm.save_trace(trace, f) else: trace = pm.load_trace(f) pm.traceplot(trace, compact=True) rcParams["font.size"] = 14 plt.savefig(f"{f}/traceplot.png") # create a nice summary table loadings = pd.DataFrame( trace[r"$\Lambda$"].mean(axis=0).round(3), index=[v.title() for v in item_names], columns=["Spatial", "Verbal", "Speed", "Memory", "g"], ) loadings.to_csv(f"{f}/loadings.csv") print(tabulate(loadings, tablefmt="pipe", headers="keys")) # # # correlations = pd.DataFrame( # # trace[r"$\Psi$"].mean(axis=0).round(3), # # index=["Spatial", "Verbal", "Speed", "Memory", "g"], # # columns=["Spatial", "Verbal", "Speed", "Memory", "g"], # # ) # # correlations.to_csv(f"{f}/factor_correlations.csv") # _paths = pd.DataFrame( trace[r"$\Gamma$"].mean(axis=0).round(3), index=["Spatial", "Verbal", "Speed", "Memory", "g"], columns=["Spatial", "Verbal", "Speed", "Memory", "g"], ) _paths.to_csv(f"{f}/factor_paths.csv") print(tabulate(_paths, tablefmt="pipe", headers="keys"))
with pm.Model(): params = [] starts = {} starts_arr = [] for i in range(one_d_size): # param = one_rho[i] + np.random.randn() # params.append(pm.Uniform('d_{}'.format(i), lower=-1, upper=3, )) params.append(pm.Bound(pm.Normal, lower=-1.0)('d_{}'.format(i), mu=0.0, sigma=4.0)) start = 0 if one_Tb[i] == 0: start = one_rho[i] + np.random.randn() starts['d_{}'.format(i)] = start starts_arr.append(start) np.save("/Users/sabrinaberger/Library/Mobile Documents/com~apple~CloudDocs/CosmicDawn/T2D2 Model/STAT_DATA/CORR_DATA/inital_{}_{}.npy".format(z, one_d_size), starts_arr) prm = tt.as_tensor_variable(params) # use a DensityDist (use a lamdba function to "call" the Op) pm.DensityDist('likelihood', lambda v: logl(v), observed={'v': prm}) trace = pm.sample(ndraws, tune=nburn, cores=4, start=starts) pm.save_trace(trace, directory="/Users/sabrinaberger/Library/Mobile Documents/com~apple~CloudDocs/CosmicDawn/T2D2 Model/STAT_DATA/TRACES/z_{}_{}.trace".format(z, one_d_size), overwrite=True) # samples_pymc3 = np.vstack((trace['d_0'], trace['d_1'], trace['d_2'], trace['d_3'], trace['d_4'], trace['d_5'], trace['d_6'], trace['d_7'])).T # fig = corner.corner(samples_pymc3, labels=["d_0", "d_1", "d_2", "d_3", "d_4", "d_5", "d_6", "d_7"]) # plt.show()
m1 = pm.Uniform('m1',lower=1, upper=7) m2 = pm.Uniform('m2',lower=1, upper=6) m3 = pm.Uniform('m3',lower=1, upper=6) p1 = pm.Normal('p1',mu=0.2,sd=10) p2 = pm.Uniform('p2',lower=1, upper=6) p3 = pm.Uniform('p3',lower=1, upper=6) trace = pm.sample(100) ''' #pm.traceplot(trace) alpha = pm.Normal('alpha', mu=0, sd=10) beta = pm.Normal('beta', mu=0, sd=10) sigma = pm.Uniform('sigma', lower=0, upper=4) pInfect = pm.Uniform('pInfect', 0.01, 0.5, testval=0.05) params = [2, 4, 3, 0.2, 0.6, 0.3, 0.025, 0.88, 8, 0.56] yhat = pm.Deterministic( 'yhat', alpha + beta * step(24 * 7, 6, params, pdf, gdf, pInfect)) #func = lambda a,b,ps,d1,d2,pI: a+b*step(24*7,6,ps,d1,d2,pI) #yhat = func(a=alpha,b=beta,ps=params,d1=pdf,d2=gdf,pI=pInfect) likelihood = pm.Normal('y', mu=yhat, sd=sigma, observed=y) start = find_MAP() #step=NUTS() step = Metropolis() #trace = pm.sample(100,njobs=4,start=start, progressbar=True, verbose=False) #tune=1000 trace = pm.sample(100, njobs=4) #tune=1000 pm.traceplot(trace) #print(pm.summary(trace)) pm.save_trace(trace, r'f:/_MU/model.trace')
def run_simultaneous_hierarchial(recalculate=False, sample_size=None, max_ngals=None, outfolder='hierarchical-model'): enc = OrdinalEncoder(dtype=np.int32) sid_list = pd.read_csv('lib/subject-id-list.csv').values.T[0] if os.path.isfile('Xall.npy') and not recalculate: X_all = np.load('Xall.npy') else: X_all = make_X_all(sid_list) np.save('Xall.npy', X_all) # remove all points with weight of zero (or less..?) all_gal_idx, all_arm_idx = enc.fit_transform(X_all[:, [3, 4]]).T if max_ngals is not None and max_ngals <= all_gal_idx.max(): gals = np.random.choice(np.arange(all_gal_idx.max() + 1), max_ngals, replace=False) else: gals = np.arange(len(all_gal_idx.max() + 1)) X_masked = X_all[(X_all.T[2] > 0) & np.isin(all_gal_idx, gals)] sample = np.random.choice(len(X_masked), size=sample_size, replace=False) if sample_size else np.arange( len(X_masked)) X = X_masked[sample] t, R, point_weights = X.T[:3] # encode categorical variables into an index enc = OrdinalEncoder(dtype=np.int32) gal_idx, arm_idx = enc.fit_transform(X[:, [3, 4]]).T n_gals = len(np.unique(gal_idx)) n_unique_arms = len(np.unique(arm_idx)) print('{} galaxies, {} spiral arms, {} points'.format( n_gals, n_unique_arms, len(X))) with pm.Model() as hierarchical_model: print('Defining model') # Hyperpriors (informative for now) mu_psi = pm.Uniform('mu_psi', lower=0, upper=80, testval=15) # sigma_psi = pm.Gamma('sigma_psi', alpha=2, beta=10) sigma_psi = pm.HalfCauchy('sigma_psi', beta=1) psi_offset = pm.Normal('psi_offset', mu=0, sd=1, shape=n_gals) psi = pm.Deterministic('psi', mu_psi + sigma_psi * psi_offset) psi_radians = psi * np.pi / 180 a = pm.Uniform('a', lower=0, upper=200, testval=1, shape=n_unique_arms) # define our equation for mu_r r_est = (a[arm_idx] / 100 * tt.exp(tt.tan(psi_radians[gal_idx]) * t)) # define our expected error on r here we assume this sigma is the # same for all galaxies (not necessarily true) base_sigma = pm.HalfCauchy('sigma', beta=1, testval=0.02) sigma_y = theano.shared(np.asarray(np.sqrt(point_weights), dtype=theano.config.floatX), name='sigma_y') sigmas = base_sigma / sigma_y # define our likelihood function likelihood = pm.Normal('R_like', mu=r_est, sd=sigmas, observed=R) with hierarchical_model: trace = pm.sample(2000, tune=1000, cores=2, target_accept=0.95) if outfolder is not None: traces_dir = os.path.join('uniform-traces', outfolder) try: os.mkdir(traces_dir) except FileExistsError: shutil.rmtree(traces_dir) pm.save_trace(trace, directory=traces_dir, overwrite=True) pm.traceplot(trace, varnames=['mu_psi', 'sigma_psi', 'sigma']) plt.show()
def main(input_dir, output_dir, dataset, model_type, n_samples, n_tune, target_accept, n_cores, seed, init, profile): '''Fit log-parabola model to DATASET. Parameters ---------- input_dir : [type] input directory containing subdirs for each instrument with dl3 data output_dir : [type] where to save the results. traces and two plots dataset : string telescope name model_type : string whether to use the profile likelihood ('wstat' or 'profile') or not ('full') n_samples : int number of samples to draw n_tune : int number of tuning steps target_accept : float target accept fraction for the pymc sampler n_cores : int number of cpu cores to use seed : int random seed init : string pymc init string profile : bool whether to output debugging/profiling information to the console Raises ------ NotImplementedError This does not yet work on the joint dataset. but thats good enough for me. ''' np.random.seed(seed) if dataset == 'joint': #TODO need to calculate mu_b for each observation independently. raise NotImplementedError('This is not implemented for the joint dataset yet.') # observations, lo, hi = load_joint_spectrum_observation(input_dir) else: p = os.path.join(input_dir, dataset) observations, lo, hi = load_spectrum_observations(p) prepare_output(output_dir) # TODO: this has to happen for every observation independently exposure_ratio = observations[0].alpha[0] # print(exposure_ratio) on_data, off_data = get_observed_counts(observations) integrator = init_integrators(observations) print('On Data') display_data(on_data) print('Off Data') display_data(off_data) print('--' * 30) print(f'Fitting data for {dataset} in {len(observations)} observations. ') print(f'Using {len(on_data)} bins with { on_data.sum()} counts in on region and {off_data.sum()} counts in off region.') print(f'Fit range is: {(lo, hi) * u.TeV}.') model = pm.Model(theano_config={'compute_test_value': 'ignore'}) with model: # amplitude = pm.TruncatedNormal('amplitude', mu=4, sd=1, lower=0.01, testval=4) # alpha = pm.TruncatedNormal('alpha', mu=2.5, sd=1, lower=0.00, testval=2.5) # beta = pm.TruncatedNormal('beta', mu=0.5, sd=0.5, lower=0.00000, testval=0.5) amplitude = pm.HalfFlat('amplitude', testval=4) alpha = pm.HalfFlat('alpha', testval=2.5) beta = pm.HalfFlat('beta', testval=0.5) mu_s = forward_fold_log_parabola_symbolic(integrator, amplitude, alpha, beta, observations) # mu_s = forward_fold_log_parabola_analytic(amplitude, alpha, beta, observations) if model_type == 'wstat': print('Building profiled likelihood model') mu_b = pm.Deterministic('mu_b', calc_mu_b(mu_s, on_data, off_data, exposure_ratio)) else: print('Building full likelihood model') mu_b = pm.HalfFlat('mu_b', shape=len(off_data)) pm.Poisson('background', mu=mu_b, observed=off_data, shape=len(off_data)) pm.Poisson('signal', mu=mu_s + exposure_ratio * mu_b, observed=on_data, shape=len(on_data)) print('--' * 30) print('Model debug information:') for RV in model.basic_RVs: print(RV.name, RV.logp(model.test_point)) if profile: model.profile(model.logpt).summary() print(model.check_test_point()) print('--' * 30) print('Plotting landscape:') fig, _ = plot_landscape(model, off_data) fig.savefig(os.path.join(output_dir, 'landscape.pdf')) print('--' * 30) print('Printing graphs:') theano.printing.pydotprint(mu_s, outfile=os.path.join(output_dir, 'graph_mu_s.pdf'), format='pdf', var_with_name_simple=True) theano.printing.pydotprint(mu_s + exposure_ratio * mu_b, outfile=os.path.join(output_dir, 'graph_n_on.pdf'), format='pdf', var_with_name_simple=True) print('--' * 30) print('Sampling likelihood:') with model: trace = pm.sample(n_samples, cores=n_cores, tune=n_tune, init=init, seed=[seed] * n_cores) print('--' * 30) print(f'Fit results for {dataset}') print(trace['amplitude'].mean(), trace['alpha'].mean(), trace['beta'].mean()) print(np.median(trace['amplitude']), np.median(trace['alpha']), np.median(trace['beta'])) print('--' * 30) # print('Plotting traces') # plt.figure() # varnames = ['amplitude', 'alpha', 'beta'] if model_type != 'full' else ['amplitude', 'alpha', 'beta', 'mu_b'] # pm.traceplot(trace, varnames=varnames) # plt.savefig(os.path.join(output_dir, 'traces.pdf')) p = os.path.join(output_dir, 'num_samples.txt') with open(p, "w") as text_file: text_file.write(f'\\num{{{n_samples}}}') p = os.path.join(output_dir, 'num_chains.txt') with open(p, "w") as text_file: text_file.write(f'\\num{{{n_cores}}}') p = os.path.join(output_dir, 'num_tune.txt') with open(p, "w") as text_file: text_file.write(f'\\num{{{n_tune}}}') plt.figure() pm.energyplot(trace) plt.savefig(os.path.join(output_dir, 'energy.pdf')) # plt.figure() # pm.autocorrplot(trace, burn=n_tune) # plt.savefig(os.path.join(output_dir, 'autocorr.pdf')) plt.figure() pm.forestplot(trace, varnames=['amplitude', 'alpha', 'beta']) plt.savefig(os.path.join(output_dir, 'forest.pdf')) trace_output = os.path.join(output_dir, 'traces') print(f'Saving traces to {trace_output}') with model: pm.save_trace(trace, trace_output, overwrite=True)
with model as model: return pm.find_MAP(model=model) def sample(model): with model as model: trace = pm.sample(2000, tune=3800, init='advi+adapt_diag', nuts_kwargs={ 'target_accept': .98, 'max_treedepth': 25 }) #trace = pm.sample(2000, tune=5500, init = 'jitter+adapt_diag', nuts_kwargs={'target_accept': .90, 'max_treedepth': 25}) return trace if __name__ == '__main__': #model = load_multi_model_moons() model = load_model_align_primary() #model = load_model_primary() #model = load_multi_model_primary() print(model.vars, model.test_point) trace = sample(model) pm.save_trace(trace) pm.traceplot(trace) plt.show()
if not os.path.exists(cache_file_hier): with neural_network: #fit model trace_hier = pm.sample(draws=nsamples_hier, init='advi+adapt_diag', n_init=ninit, tune=ninit // 2, chains=nchains_hier, cores=ncores_hier, nuts_kwargs={'target_accept': 0.90}, discard_tuned_samples=True, compute_convergence_checks=True, progressbar=False) pm.save_trace(trace_hier, directory=cache_file_hier) else: trace_hier = pm.load_trace(cache_file_hier, model=neural_network) print("Done...") if not os.path.exists(cache_file_samples): samples_tmp = defaultdict(list) samples = {} for layer_name in layer_names: for mu, sd in zip( trace_hier.get_values(layer_name, burn=nsamples_hier // 2, combine=True),
observed=tiltslx) tsly_obs = pm.Normal('tsly_obs', mu=tsly_mod, sigma=tilt_std, observed=tiltsly) tstx_obs = pm.Normal('tstx_obs', mu=tstx_mod, sigma=tilt_std, observed=tiltstx) tsty_obs = pm.Normal('tsty_obs', mu=tsty_mod, sigma=tilt_std, observed=tiltsty) x_obs = pm.Normal('x_obs', mu=x_mod, sigma=gps_std, observed=gps) stack_obs = pm.Normal('stack_obs', mu=stack_mod, sigma=tilt_std * 1e+6, observed=stack) trace = pm.sample(Niter, init='advi', tune=100, target_accept=0.85) map_estimate = pm.find_MAP(model=model) results = {} results['MAP'] = map_estimate results['iterations'] = Niter pickle.dump(results, open(path_results + 'res' + str(Niter) + '_UF.pickle', 'wb')) pm.save_trace(trace, path_results + 'trace' + str(Niter) + '_UF', overwrite=True) #pm.traceplot(trace)
help='Subject id of galaxy to perform trace on') parser.add_argument('--outfolder', '-o', metavar='/path/to/directory', default=False, help='Output directory') parser.add_argument('--plot', '-p', action='store_true', default='.', help='Should plot trace and show to screen') args = parser.parse_args() arms = get_arms(args.subject_id) pa, sigma_pa = arms[0].get_parent().get_pitch_angle(arms) gal_pa_est = pa * arms[0].chirality trace = get_logsp_trace_from_arms(arms) if args.outfolder: traces_dir = os.path.join(str(args.outfolder), str(args.subject_id)) try: os.mkdir(traces_dir) except FileExistsError: pass pm.save_trace(trace, directory=traces_dir, overwrite=True) if args.plot: pm.traceplot(trace, lines={'psi': gal_pa_est}) plt.show()