def fit_full(model_name, J, j_ind, X, y, phi_true, m0, Q0, seed): """Fit full model and save the results.""" print "Full model {} ...".format(model_name) data = dict( N=X.shape[0], D=X.shape[1], J=J, X=X, y=y, j_ind=j_ind + 1, mu_phi=m0, Omega_phi=Q0.T # Q0 transposed in order to get C-contiguous ) model = load_stan(model_name) # Sample and extract parameters with suppress_stdout(): fit = model.sampling(data=data, seed=seed, chains=4, iter=1000, warmup=500, thin=2) samp = fit.extract(pars='phi')['phi'] m_phi_full = samp.mean(axis=0) var_phi_full = samp.var(axis=0, ddof=1) print "Full model sampled." if not os.path.exists('results'): os.makedirs('results') np.savez( 'results/res_f_{}.npz'.format(model_name), phi_true=phi_true, m_phi_full=m_phi_full, var_phi_full=var_phi_full, )
def main(model_name, conf, ret_master=False): """Fit requested model with given configurations. Arg. `ret_master` can be used to prematurely exit and return the dep.Master object, which is useful for debuging. """ # Ensure that the configurations class is used if not isinstance(conf, configurations): raise ValueError("Invalid arg. `conf`, use class fit.configurations") print("Configurations:") print(' ' + str(conf).replace('\n', '\n ')) # Localise few options J = conf.J D = conf.D K = conf.K # Import the model simulator module (import at runtime) model_module = getattr(__import__('models.'+model_name), model_name) model = model_module.model(J, D, conf.npg) # Simulate_data if conf.cor_input: data = model.simulate_data(Sigma_x='rand', seed=conf.seed_data) else: data = model.simulate_data(seed=conf.seed_data) # Calculate the uncertainty uncertainty_global, uncertainty_group = data.calc_uncertainty() # Get the prior S0, m0, Q0, r0 = model.get_prior() prior = {'Q':Q0, 'r':r0} # Set init_site to N(0,A**2/K I), where A = 10 * max(diag(S0)) init_site = 10 * np.max(np.diag(S0)) # Get parameter information pnames, pshapes, phiers = model.get_param_definitions() # Save true values if conf.save_true: if not os.path.exists(RES_PATH): os.makedirs(RES_PATH) if conf.id: filename = 'true_vals_{}_{}.npz'.format(model_name, conf.id) else: filename = 'true_vals_{}.npz'.format(model_name) np.savez( os.path.join(RES_PATH, filename), J = J, D = D, npg = conf.npg, seed = conf.seed_data, pnames = pnames, uncertainty_global = uncertainty_global, uncertainty_group = uncertainty_group, X_param = data.X_param, **data.true_values ) print("True values saved into results") # ------------------------------------------------------ # Fit distributed model # ------------------------------------------------------ if conf.method == 'both' or conf.method == 'distributed' or ret_master: print("Distributed model {} ...".format(model_name)) # Options for the ep-algorithm see documentation of dep.method.Master dep_options = dict( prior = prior, seed = conf.seed_mcmc, prec_estim = conf.prec_estim, df0 = conf.damp, init_site = init_site, **conf.mc_opt ) # Temp fix for the RandomState seed problem with pystan in 32bit Python dep_options['tmp_fix_32bit'] = TMP_FIX_32BIT if K < 2: raise ValueError("K should be at least 2.") elif K < J: # ------ Many groups per site: combine groups ------ Nk, Nj_k, j_ind_k = distribute_groups(J, K, data.Nj) # Create the Master instance stan_model = load_stan(os.path.join(MOD_PATH, model_name)) dep_master = Master( stan_model, data.X, data.y, A_k = {'J':Nj_k}, A_n = {'j_ind':j_ind_k+1}, site_sizes = Nk, **dep_options ) # Construct the map: which site contribute to which parameter pmaps = _create_pmaps(phiers, J, K, Nj_k) elif K == J: # ------ One group per site ------ # Create the Master instance dep_master = Master( load_stan(os.path.join(MOD_PATH, model_name+'_sg')), data.X, data.y, site_sizes=data.Nj, **dep_options ) # Construct the map: which site contribute to which parameter pmaps = _create_pmaps(phiers, J, K, None) elif K <= data.N: # ------ Multiple sites per group: split groups ------ Nk, Nk_j, _ = distribute_groups(J, K, data.Nj) # Create the Master instance dep_master = Master( load_stan(os.path.join(MOD_PATH, model_name+'_sg')), data.X, data.y, site_sizes=Nk, **dep_options ) # Construct the map: which site contribute to which parameter pmaps = _create_pmaps(phiers, J, K, Nk_j) else: raise ValueError("K cant be greater than number of samples") if ret_master: print("Returning dep.Master") return dep_master # Run the algorithm for `EP_ITER` iterations print("Run distributed EP algorithm for {} iterations." \ .format(conf.iter)) m_phi_i, cov_phi_i, info = dep_master.run( conf.iter, save_last_fits=conf.mix) if info: # Save results until failure if conf.save_res: if not os.path.exists(RES_PATH): os.makedirs(RES_PATH) if conf.id: filename = 'res_d_{}_{}.npz'.format(model_name, conf.id) else: filename = 'res_d_{}.npz'.format(model_name) np.savez( os.path.join(RES_PATH, filename), conf = conf.__dict__, m_phi_i = m_phi_i, cov_phi_i = cov_phi_i, last_iter = dep_master.iter ) print("Uncomplete distributed model results saved.") raise RuntimeError('Dep algorithm failed with error code: {}' .format(info)) if conf.mix: print("Form the final approximation " "by mixing the last samples from all the sites.") cov_phi, m_phi = dep_master.mix_phi() # Get mean and var of inferred variables pms, pvars = dep_master.mix_pred(pnames, pmaps, pshapes) # Construct a dict of from these results presults = {} for i in range(len(pnames)): pname = pnames[i] presults['m_'+pname] = pms[i] presults['var_'+pname] = pvars[i] # Save results if conf.save_res: if not os.path.exists(RES_PATH): os.makedirs(RES_PATH) if conf.id: filename = 'res_d_{}_{}.npz'.format(model_name, conf.id) else: filename = 'res_d_{}.npz'.format(model_name) if conf.mix: np.savez( os.path.join(RES_PATH, filename), conf = conf.__dict__, m_phi_i = m_phi_i, cov_phi_i = cov_phi_i, m_phi = m_phi, cov_phi = cov_phi, **presults ) else: np.savez( os.path.join(RES_PATH, filename), conf = conf.__dict__, m_phi_i = m_phi_i, cov_phi_i = cov_phi_i, ) print("Distributed model results saved.") # Release master object del dep_master # ------------------------------------------------------ # Fit full model # ------------------------------------------------------ if conf.method == 'both' or conf.method == 'full': print("Full model {} ...".format(model_name)) seed = np.random.RandomState(seed=conf.seed_mcmc) # Temp fix for the RandomState seed problem with pystan in 32bit Python seed = seed.randint(2**31-1) if TMP_FIX_32BIT else seed data = dict( N = data.X.shape[0], D = data.X.shape[1], J = J, X = data.X, y = data.y, j_ind = data.j_ind+1, mu_phi = m0, Omega_phi = Q0.T # Q0 transposed in order to get C-contiguous ) # Load model if not loaded already if not 'stan_model' in locals(): stan_model = load_stan(os.path.join(MOD_PATH, model_name)) # Sample and extract parameters with suppress_stdout(): time_full = timer() fit = stan_model.sampling( data = data, seed = seed, **conf.mc_full_opt ) time_full = (timer() - time_full) samp = fit.extract(pars='phi')['phi'] nsamp = samp.shape[0] m_phi_full = samp.mean(axis=0) samp -= m_phi_full cov_phi_full = samp.T.dot(samp) cov_phi_full /= nsamp -1 # Mean stepsize steps = [np.mean(p['stepsize__']) for p in fit.get_sampler_params()] print(' sampling time {}'.format(time_full)) print(' mean stepsize: {:.4}'.format(np.mean(steps))) # Max Rhat (from all but last row in the last column) print(' max Rhat: {:.4}'.format( np.max(fit.summary()['summary'][:-1,-1]) )) # Get mean and var of inferred variables presults = {} for i in range(len(pnames)): pname = pnames[i] samp = fit.extract(pname)[pname] presults['m_'+pname+'_full'] = np.mean(samp, axis=0) presults['var_'+pname+'_full'] = np.var(samp, axis=0, ddof=1) # Save results if conf.save_res: if not os.path.exists(RES_PATH): os.makedirs(RES_PATH) if conf.id: filename = 'res_f_{}_{}.npz'.format(model_name, conf.id) else: filename = 'res_f_{}.npz'.format(model_name) np.savez( os.path.join(RES_PATH, filename), conf = conf.__dict__, m_phi_full = m_phi_full, cov_phi_full = cov_phi_full, **presults ) print("Full model results saved.")
def main(filename='res.npz'): # ------------------------------------------------------ # Simulate data # ------------------------------------------------------ # Set seed rnd_data = np.random.RandomState(seed=SEED_DATA) # Parameters # Number of observations for each group if hasattr(NPG, '__getitem__') and len(NPG) == 2: Nj = rnd_data.randint(NPG[0],NPG[1]+1, size=J) else: Nj = NPG*np.ones(J, dtype=np.int64) # Total number of observations N = np.sum(Nj) # Observation index limits for J groups j_lim = np.concatenate(([0], np.cumsum(Nj))) # Group indices for each sample j_ind = np.empty(N, dtype=np.int64) for j in xrange(J): j_ind[j_lim[j]:j_lim[j+1]] = j # Assign parameters alpha_j = MU + rnd_data.randn(J)*TAU phi_true = np.log([MU, TAU, BETA, SIGMA]) dphi = 4 # Number of shared parameters # Simulate data # Truncated normal rejection sampling X = X_MU + rnd_data.randn(N)*X_STD xneg = X<0 while np.any(xneg): X[xneg] = X_MU + rnd_data.randn(np.count_nonzero(xneg))*X_STD xneg = X<0 f = alpha_j[j_ind] + X*BETA y = f + rnd_data.randn(N)*SIGMA yneg = y<0 while np.any(yneg): y[yneg] = f[yneg] + rnd_data.randn(np.count_nonzero(yneg))*SIGMA yneg = y<0 # ------------------------------------------------------ # Prior # ------------------------------------------------------ # Moment parameters of the prior (transposed in order to get F-contiguous) S0 = np.diag(V0).T m0 = M0 # Natural parameters of the prior Q0 = np.diag(np.ones(dphi)/V0).T r0 = M0/V0 prior = {'Q':Q0, 'r':r0} # ------------------------------------------------------ # Distributed EP # ------------------------------------------------------ print "Distributed model..." # Options for the ep-algorithm see documentation of dep.method.Master options = { 'seed' : SEED_MCMC, 'init_prev' : True, 'prec_estim' : PREC_ESTIM, 'chains' : CHAINS, 'iter' : ITER, 'warmup' : WARMUP, 'thin' : THIN } # Temp fix for the RandomState seed problem with pystan in 32bit Python options['tmp_fix_32bit'] = TMP_FIX_32BIT model = load_stan('model') if K < 2: raise ValueError("K should be at least 2.") elif K < J: # ---- Many groups per site ---- # Combine smallest pairs of consecutive groups until K has been reached Nk = Nj.tolist() Njd = (Nj[:-1]+Nj[1:]).tolist() Nj_k = [1]*J for _ in xrange(J-K): ind = Njd.index(min(Njd)) if ind+1 < len(Njd): Njd[ind+1] += Nk[ind] if ind > 0: Njd[ind-1] += Nk[ind+1] Nk[ind] = Njd[ind] Nk.pop(ind+1) Njd.pop(ind) Nj_k[ind] += Nj_k[ind+1] Nj_k.pop(ind+1) Nk = np.array(Nk) # Number of samples per site Nj_k = np.array(Nj_k) # Number of groups per site j_ind_k = np.empty(N, dtype=np.int32) # Within site group index k_lim = np.concatenate(([0], np.cumsum(Nj_k))) for k in xrange(K): for ji in xrange(Nj_k[k]): ki = ji + k_lim[k] j_ind_k[j_lim[ki]:j_lim[ki+1]] = ji # Create the Master instance dep_master = Master( model, X, y, A_k={'J':Nj_k}, A_n={'j_ind':j_ind_k+1}, site_sizes=Nk, prior=prior, **options ) elif K == J: # ---- One group per site ---- # Create the Master instance dep_master = Master( model, X, y, A_k={'J': np.ones(K, dtype=np.int64)}, A_n={'j_ind': np.ones(N, dtype=np.int64)}, site_sizes=Nj, prior=prior, **options ) elif K <= N: # ---- Multiple sites per group ---- # Split biggest groups until enough sites are formed ppg = np.ones(J, dtype=np.int64) # Parts per group Nj2 = Nj.astype(np.float) for _ in xrange(K-J): cur_max = Nj2.argmax() ppg[cur_max] += 1 Nj2[cur_max] = Nj[cur_max]/ppg[cur_max] Nj2 = Nj//ppg rem = Nj%ppg # Form the number of samples for each site Nk = np.empty(K, dtype=np.int64) k = 0 for j in xrange(J): for kj in xrange(ppg[j]): if kj < rem[j]: Nk[k] = Nj2[j] + 1 else: Nk[k] = Nj2[j] k += 1 # Create the Master instance dep_master = Master( model, X, y, A_k={'J': np.ones(K, dtype=np.int64)}, A_n={'j_ind': np.ones(N, dtype=np.int64)}, site_sizes=Nk, prior=prior, **options ) else: raise ValueError("K cant be greater than number of samples") # Run the algorithm for `EP_ITER` iterations print "Run distributed EP algorithm for {} iterations.".format(EP_ITER) m_phi, cov_phi, info = dep_master.run(EP_ITER) var_phi = np.diagonal(cov_phi, axis1=1, axis2=2) if info: raise RuntimeError('Dep algorithm failed with error code: {}' .format(info)) print "Form the final approximation " \ "by mixing the samples from all the sites." S_mix, m_mix = dep_master.mix_phi() var_mix = np.diag(S_mix) print "Distributed model sampled:" print " exp(phi) = {}".format(np.array2string(np.exp(m_mix), precision=1)) print "True values:" print " exp(phi) = {}".format([MU, TAU, BETA, SIGMA]) # ------------------------------------------------------ # Save results # ------------------------------------------------------ np.savez(filename, seed_data=SEED_DATA, seed_mcmc=SEED_MCMC, J=J, K=K, Nj=Nj, N=N, dphi=dphi, niter=EP_ITER, m0=M0, V0=V0, phi_true=phi_true, m_phi=m_phi, var_phi=var_phi, m_mix=m_mix, var_mix=var_mix )
def main(filename='res_full.npz'): # ------------------------------------------------------ # Simulate data # ------------------------------------------------------ # Set seed rnd_data = np.random.RandomState(seed=SEED_DATA) # Parameters # Number of observations for each group if hasattr(NPG, '__getitem__') and len(NPG) == 2: Nj = rnd_data.randint(NPG[0],NPG[1]+1, size=J) else: Nj = NPG*np.ones(J, dtype=np.int64) # Total number of observations N = np.sum(Nj) # Observation index limits for J groups j_lim = np.concatenate(([0], np.cumsum(Nj))) # Group indices for each sample j_ind = np.empty(N, dtype=np.int64) for j in xrange(J): j_ind[j_lim[j]:j_lim[j+1]] = j # Assign parameters alpha_j = MU + rnd_data.randn(J)*TAU phi_true = np.log([MU, TAU, BETA, SIGMA]) dphi = 4 # Number of shared parameters # Simulate data # Truncated normal rejection sampling X = X_MU + rnd_data.randn(N)*X_STD xneg = X<0 while np.any(xneg): X[xneg] = X_MU + rnd_data.randn(np.count_nonzero(xneg))*X_STD xneg = X<0 f = alpha_j[j_ind] + X*BETA y = f + rnd_data.randn(N)*SIGMA yneg = y<0 while np.any(yneg): y[yneg] = f[yneg] + rnd_data.randn(np.count_nonzero(yneg))*SIGMA yneg = y<0 # ------------------------------------------------------ # Prior # ------------------------------------------------------ # Moment parameters of the prior (transposed in order to get F-contiguous) S0 = np.diag(V0).T m0 = M0 # Natural parameters of the prior Q0 = np.diag(np.ones(dphi)/V0).T r0 = M0/V0 prior = {'Q':Q0, 'r':r0} # ------------------------------------------------------ # Full model # ------------------------------------------------------ print "Full model..." # Set seed rnd_mcmc = np.random.RandomState(seed=SEED_MCMC) data = dict( N=N, J=J, X=X, y=y, j_ind=j_ind+1, mu_phi=m0, Omega_phi=Q0.T # Q0 transposed in order to get C-contiguous ) # Sample and extract parameters model = load_stan('model') fit = model.sampling( data=data, seed=(rnd_mcmc.randint(2**31-1) if TMP_FIX_32BIT else rnd_mcmc), chains=CHAINS, iter=ITER, warmup=WARMUP, thin=THIN ) samp = fit.extract(pars='phi')['phi'] m_phi_full = samp.mean(axis=0) var_phi_full = samp.var(axis=0, ddof=1) print "Full model sampled:" print " exp(phi) = {}" \ .format(np.array2string(np.exp(m_phi_full), precision=1)) print "True values:" print " exp(phi) = {}".format([MU, TAU, BETA, SIGMA]) # ------------------------------------------------------ # Save results # ------------------------------------------------------ np.savez(filename, seed_data=SEED_DATA, seed_mcmc=SEED_MCMC, J=J, K=K, Nj=Nj, N=N, dphi=dphi, m0=M0, V0=V0, phi_true=phi_true, m_phi_full=m_phi_full, var_phi_full=var_phi_full )
def fit_distributed(model_name, niter, J, K, Nj, X, y, phi_true, options): """Fit distributed model and save the results.""" print "Distributed model {} ...".format(model_name) N = Nj.sum() if K < 2: raise ValueError("K should be at least 2.") elif K < J: # ---- Many groups per site ---- # Combine smallest pairs of consecutive groups until K has been reached j_lim = np.concatenate(([0], np.cumsum(Nj))) Nk = Nj.tolist() Njd = (Nj[:-1] + Nj[1:]).tolist() Nj_k = [1] * J for _ in xrange(J - K): ind = Njd.index(min(Njd)) if ind + 1 < len(Njd): Njd[ind + 1] += Nk[ind] if ind > 0: Njd[ind - 1] += Nk[ind + 1] Nk[ind] = Njd[ind] Nk.pop(ind + 1) Njd.pop(ind) Nj_k[ind] += Nj_k[ind + 1] Nj_k.pop(ind + 1) Nk = np.array(Nk) # Number of samples per site Nj_k = np.array(Nj_k) # Number of groups per site j_ind_k = np.empty(N, dtype=np.int32) # Within site group index k_lim = np.concatenate(([0], np.cumsum(Nj_k))) for k in xrange(K): for ji in xrange(Nj_k[k]): ki = ji + k_lim[k] j_ind_k[j_lim[ki]:j_lim[ki + 1]] = ji # Create the Master instance model = load_stan(model_name) dep_master = Master(model, X, y, A_k={'J': Nj_k}, A_n={'j_ind': j_ind_k + 1}, site_sizes=Nk, **options) elif K == J: # ---- One group per site ---- # Create the Master instance model_single_group = load_stan(model_name + '_sg') dep_master = Master(model_single_group, X, y, site_sizes=Nj, **options) elif K <= N: # ---- Multiple sites per group ---- # Split biggest groups until enough sites are formed ppg = np.ones(J, dtype=np.int64) # Parts per group Nj2 = Nj.astype(np.float) for _ in xrange(K - J): cur_max = Nj2.argmax() ppg[cur_max] += 1 Nj2[cur_max] = Nj[cur_max] / ppg[cur_max] Nj2 = Nj // ppg rem = Nj % ppg # Form the number of samples for each site Nk = np.empty(K, dtype=np.int64) k = 0 for j in xrange(J): for kj in xrange(ppg[j]): if kj < rem[j]: Nk[k] = Nj2[j] + 1 else: Nk[k] = Nj2[j] k += 1 # Create the Master instance model_single_group = load_stan(model_name + '_sg') dep_master = Master(model_single_group, X, y, site_sizes=Nk, **options) else: raise ValueError("K cant be greater than number of samples") # Run the algorithm for `niter` iterations print "Run distributed EP algorithm for {} iterations.".format(niter) m_phi, var_phi = dep_master.run(niter) print "Form the final approximation " \ "by mixing the samples from all the sites." S_mix, m_mix = dep_master.mix_samples() var_mix = np.diag(S_mix) print "Distributed model sampled." if not os.path.exists('results'): os.makedirs('results') np.savez( 'results/res_d_{}.npz'.format(model_name), phi_true=phi_true, m_phi=m_phi, var_phi=var_phi, m_mix=m_mix, var_mix=var_mix, )
def main(model_name, conf, ret_master=False): """Fit requested model with given configurations. Arg. `ret_master` can be used to prematurely exit and return the dep.Master object, which is useful for debuging. """ # Ensure that the configurations class is used if not isinstance(conf, configurations): raise ValueError("Invalid arg. `conf`, use class fit.configurations") print "Configurations:" print ' ' + str(conf).replace('\n', '\n ') # Localise few options J = conf.J D = conf.D K = conf.K # Import the model simulator module (import at runtime) model_module = getattr(__import__('models.'+model_name), model_name) model = model_module.model(J, D, conf.npg) # Simulate_data if conf.cor_input: data = model.simulate_data(Sigma_x='rand', seed=conf.seed_data) else: data = model.simulate_data(seed=conf.seed_data) # Calculate the uncertainty uncertainty_global, uncertainty_group = data.calc_uncertainty() # Get the prior S0, m0, Q0, r0 = model.get_prior() prior = {'Q':Q0, 'r':r0} # Get parameter information pnames, pshapes, phiers = model.get_param_definitions() # Save true values if conf.save_true: if not os.path.exists(RES_PATH): os.makedirs(RES_PATH) if conf.id: filename = 'true_vals_{}_{}.npz'.format(model_name, conf.id) else: filename = 'true_vals_{}.npz'.format(model_name) np.savez( os.path.join(RES_PATH, filename), J = J, D = D, npg = conf.npg, seed = conf.seed_data, pnames = pnames, uncertainty_global = uncertainty_global, uncertainty_group = uncertainty_group, X_param = data.X_param, **data.true_values ) print "True values saved into results" # ------------------------------------------------------ # Fit distributed model # ------------------------------------------------------ if conf.method == 'both' or conf.method == 'distributed' or ret_master: print "Distributed model {} ...".format(model_name) # Options for the ep-algorithm see documentation of dep.method.Master dep_options = dict( prior = prior, seed = conf.seed_mcmc, prec_estim = conf.prec_estim, **conf.mc_opt ) # Temp fix for the RandomState seed problem with pystan in 32bit Python dep_options['tmp_fix_32bit'] = TMP_FIX_32BIT if K < 2: raise ValueError("K should be at least 2.") elif K < J: # ------ Many groups per site: combine groups ------ Nk, Nj_k, j_ind_k = distribute_groups(J, K, data.Nj) # Create the Master instance stan_model = load_stan(os.path.join(MOD_PATH, model_name)) dep_master = Master( stan_model, data.X, data.y, A_k = {'J':Nj_k}, A_n = {'j_ind':j_ind_k+1}, site_sizes = Nk, **dep_options ) # Construct the map: which site contribute to which parameter pmaps = _create_pmaps(phiers, J, K, Nj_k) elif K == J: # ------ One group per site ------ # Create the Master instance dep_master = Master( load_stan(os.path.join(MOD_PATH, model_name+'_sg')), data.X, data.y, site_sizes=data.Nj, **dep_options ) # Construct the map: which site contribute to which parameter pmaps = _create_pmaps(phiers, J, K, None) elif K <= data.N: # ------ Multiple sites per group: split groups ------ Nk, Nk_j, _ = distribute_groups(J, K, data.Nj) # Create the Master instance dep_master = Master( load_stan(os.path.join(MOD_PATH, model_name+'_sg')), data.X, data.y, site_sizes=Nk, **dep_options ) # Construct the map: which site contribute to which parameter pmaps = _create_pmaps(phiers, J, K, Nk_j) else: raise ValueError("K cant be greater than number of samples") if ret_master: print "Returning dep.Master" return dep_master # Run the algorithm for `EP_ITER` iterations print "Run distributed EP algorithm for {} iterations." \ .format(conf.iter) m_phi_i, cov_phi_i, info = dep_master.run(conf.iter) if info: # Save results until failure if conf.save_res: if not os.path.exists(RES_PATH): os.makedirs(RES_PATH) if conf.id: filename = 'res_d_{}_{}.npz'.format(model_name, conf.id) else: filename = 'res_d_{}.npz'.format(model_name) np.savez( os.path.join(RES_PATH, filename), conf = conf.__dict__, m_phi_i = m_phi_i, cov_phi_i = cov_phi_i, last_iter = dep_master.iter ) print "Uncomplete distributed model results saved." raise RuntimeError('Dep algorithm failed with error code: {}' .format(info)) print "Form the final approximation " \ "by mixing the samples from all the sites." cov_phi, m_phi = dep_master.mix_phi() # Get mean and var of inferred variables pms, pvars = dep_master.mix_pred(pnames, pmaps, pshapes) # Construct a dict of from these results presults = {} for i in xrange(len(pnames)): pname = pnames[i] presults['m_'+pname] = pms[i] presults['var_'+pname] = pvars[i] # Save results if conf.save_res: if not os.path.exists(RES_PATH): os.makedirs(RES_PATH) if conf.id: filename = 'res_d_{}_{}.npz'.format(model_name, conf.id) else: filename = 'res_d_{}.npz'.format(model_name) np.savez( os.path.join(RES_PATH, filename), conf = conf.__dict__, m_phi_i = m_phi_i, cov_phi_i = cov_phi_i, m_phi = m_phi, cov_phi = cov_phi, **presults ) print "Distributed model results saved." # Release master object del dep_master # ------------------------------------------------------ # Fit full model # ------------------------------------------------------ if conf.method == 'both' or conf.method == 'full': print "Full model {} ...".format(model_name) seed = np.random.RandomState(seed=conf.seed_mcmc) # Temp fix for the RandomState seed problem with pystan in 32bit Python seed = seed.randint(2**31-1) if TMP_FIX_32BIT else seed data = dict( N = data.X.shape[0], D = data.X.shape[1], J = J, X = data.X, y = data.y, j_ind = data.j_ind+1, mu_phi = m0, Omega_phi = Q0.T # Q0 transposed in order to get C-contiguous ) # Load model if not loaded already if not 'stan_model' in locals(): stan_model = load_stan(os.path.join(MOD_PATH, model_name)) # Sample and extract parameters with suppress_stdout(): fit = stan_model.sampling( data = data, seed = seed, **conf.mc_full_opt ) samp = fit.extract(pars='phi')['phi'] nsamp = samp.shape[0] m_phi_full = samp.mean(axis=0) samp -= m_phi_full cov_phi_full = samp.T.dot(samp) cov_phi_full /= nsamp -1 # Mean stepsize steps = [np.mean(p['stepsize__']) for p in fit.get_sampler_params()] print ' mean stepsize: {:.4}'.format(np.mean(steps)) # Max Rhat (from all but last row in the last column) print ' max Rhat: {:.4}'.format( np.max(fit.summary()['summary'][:-1,-1]) ) # Get mean and var of inferred variables presults = {} for i in xrange(len(pnames)): pname = pnames[i] samp = fit.extract(pname)[pname] presults['m_'+pname+'_full'] = np.mean(samp, axis=0) presults['var_'+pname+'_full'] = np.var(samp, axis=0, ddof=1) # Save results if conf.save_res: if not os.path.exists(RES_PATH): os.makedirs(RES_PATH) if conf.id: filename = 'res_f_{}_{}.npz'.format(model_name, conf.id) else: filename = 'res_f_{}.npz'.format(model_name) np.savez( os.path.join(RES_PATH, filename), conf = conf.__dict__, m_phi_full = m_phi_full, cov_phi_full = cov_phi_full, **presults ) print "Full model results saved."
def main(filename='res_full.npz'): # ------------------------------------------------------ # Simulate data # ------------------------------------------------------ # Set seed rnd_data = np.random.RandomState(seed=SEED_DATA) # Parameters # Number of observations for each group if hasattr(NPG, '__getitem__') and len(NPG) == 2: Nj = rnd_data.randint(NPG[0], NPG[1] + 1, size=J) else: Nj = NPG * np.ones(J, dtype=np.int64) # Total number of observations N = np.sum(Nj) # Observation index limits for J groups j_lim = np.concatenate(([0], np.cumsum(Nj))) # Group indices for each sample j_ind = np.empty(N, dtype=np.int64) for j in xrange(J): j_ind[j_lim[j]:j_lim[j + 1]] = j # Assign parameters alpha_j = MU + rnd_data.randn(J) * TAU phi_true = np.log([MU, TAU, BETA, SIGMA]) dphi = 4 # Number of shared parameters # Simulate data # Truncated normal rejection sampling X = X_MU + rnd_data.randn(N) * X_STD xneg = X < 0 while np.any(xneg): X[xneg] = X_MU + rnd_data.randn(np.count_nonzero(xneg)) * X_STD xneg = X < 0 f = alpha_j[j_ind] + X * BETA y = f + rnd_data.randn(N) * SIGMA yneg = y < 0 while np.any(yneg): y[yneg] = f[yneg] + rnd_data.randn(np.count_nonzero(yneg)) * SIGMA yneg = y < 0 # ------------------------------------------------------ # Prior # ------------------------------------------------------ # Moment parameters of the prior (transposed in order to get F-contiguous) S0 = np.diag(V0).T m0 = M0 # Natural parameters of the prior Q0 = np.diag(np.ones(dphi) / V0).T r0 = M0 / V0 prior = {'Q': Q0, 'r': r0} # ------------------------------------------------------ # Full model # ------------------------------------------------------ print "Full model..." # Set seed rnd_mcmc = np.random.RandomState(seed=SEED_MCMC) data = dict( N=N, J=J, X=X, y=y, j_ind=j_ind + 1, mu_phi=m0, Omega_phi=Q0.T # Q0 transposed in order to get C-contiguous ) # Sample and extract parameters model = load_stan('model') fit = model.sampling( data=data, seed=(rnd_mcmc.randint(2**31 - 1) if TMP_FIX_32BIT else rnd_mcmc), chains=CHAINS, iter=ITER, warmup=WARMUP, thin=THIN) samp = fit.extract(pars='phi')['phi'] m_phi_full = samp.mean(axis=0) var_phi_full = samp.var(axis=0, ddof=1) print "Full model sampled:" print " exp(phi) = {}" \ .format(np.array2string(np.exp(m_phi_full), precision=1)) print "True values:" print " exp(phi) = {}".format([MU, TAU, BETA, SIGMA]) # ------------------------------------------------------ # Save results # ------------------------------------------------------ np.savez(filename, seed_data=SEED_DATA, seed_mcmc=SEED_MCMC, J=J, K=K, Nj=Nj, N=N, dphi=dphi, m0=M0, V0=V0, phi_true=phi_true, m_phi_full=m_phi_full, var_phi_full=var_phi_full)