def sample_emcee(model=None, nwalkers=500, samples=1000, burn=500, thin=10): import pymc.progressbar as pbar # This is the likelihood function for emcee def lnprob(vals): try: for val, var in zip(vals, model.stochastics): var.value = val return model.logp except mc.ZeroProbability: return -1 * inf # emcee parameters ndim = len(model.stochastics) # Find MAP mc.MAP(model).fit() start = empty(ndim) for i, var in enumerate(model.stochastics): start[i] = var.value # sample starting points for walkers around the MAP p0 = random.randn(ndim * nwalkers).reshape((nwalkers, ndim)) + start # instantiate sampler passing in the pymc likelihood function sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob) bar = pbar.progress_bar(burn + samples) i = 0 # burn-in for pos, prob, state in sampler.sample(p0, iterations=burn): i += 1 bar.update(i) sampler.reset() # sample try: for p, lnprob, lnlike in sampler.sample(pos, iterations=samples, thin=thin): i += 1 bar.update(i) # except KeyboardInterrupt: # pass finally: print("\nMean acceptance fraction during sampling: {}".format( mean(sampler.acceptance_fraction))) mcmc = mc.MCMC(model) # MCMC instance for model mcmc.sample(1, progress_bar=False) # This call is to set up the chains for i, var in enumerate(model.stochastics): var.trace._trace[0] = sampler.flatchain[:, i] return mcmc
def sample_emcee( self, nwalkers=500, samples=10, dispersion=0.1, burn=5, thin=1, stretch_width=2.0, anneal_stretch=True, pool=None, ): import emcee import pymc.progressbar as pbar # This is the likelihood function for emcee lnprob = LnProb(self) # init self.mcmc() # get current values stochs = self.get_stochastics() start = [node_descr["node"].value for name, node_descr in stochs.iterrows()] ndim = len(start) def init_from_priors(): p0 = np.empty((nwalkers, ndim)) i = 0 while i != nwalkers: self.mc.draw_from_prior() try: self.mc.logp p0[i, :] = [node_descr["node"].value for name, node_descr in stochs.iterrows()] i += 1 except pm.ZeroProbability: continue return p0 if hasattr(self, "emcee_dispersions"): scale = np.empty_like(start) for i, (name, node_descr) in enumerate(stochs.iterrows()): knode_name = node_descr["knode_name"].replace("_subj", "") scale[i] = self.emcee_dispersions.get(knode_name, 0.1) else: scale = 0.1 p0 = np.random.randn(ndim * nwalkers).reshape((nwalkers, ndim)) * scale * dispersion + start # p0 = init_from_priors() # instantiate sampler passing in the pymc likelihood function sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, a=stretch_width, pool=pool) bar = pbar.progress_bar(burn + samples) i = 0 annealing = np.linspace(stretch_width, 2, burn) sys.stdout.flush() for pos, prob, state in sampler.sample(p0, iterations=burn): if anneal_stretch: sampler.a = annealing[i] i += 1 bar.update(i) # print("\nMean acceptance fraction during burn-in: {}".format(np.mean(sampler.acceptance_fraction))) sampler.reset() # sample try: for p, lnprob, lnlike in sampler.sample(pos, iterations=samples, thin=thin): i += 1 bar.update(i) except KeyboardInterrupt: pass finally: print(("\nMean acceptance fraction during sampling: {}".format(np.mean(sampler.acceptance_fraction)))) # restore state for val, (name, node_descr) in zip(start, stochs.iterrows()): node_descr["node"].set_value(val) # Save samples back to pymc model self.mc.sample(1, progress_bar=False) # This call is to set up the chains for pos, (name, node) in enumerate(stochs.iterrows()): node["node"].trace._trace[0] = sampler.flatchain[:, pos] return sampler
def post_pred_gen(model, groupby=None, samples=500, append_data=False, add_model_parameters=False, progress_bar=True): """Run posterior predictive check on a model. :Arguments: model : kabuki.Hierarchical Kabuki model over which to compute the ppc on. :Optional: samples : int How many samples to generate for each node. groupby : list Alternative grouping of the data. If not supplied, uses splitting of the model (as provided by depends_on). append_data : bool (default=False) Whether to append the observed data of each node to the replicatons. progress_bar : bool (default=True) Display progress bar :Returns: Hierarchical pandas.DataFrame with multiple sampled RT data sets. 1st level: wfpt node 2nd level: posterior predictive sample 3rd level: original data index :See also: post_pred_stats """ results = {} # Progress bar if progress_bar: n_iter = len(model.get_observeds()) bar = pbar.progress_bar(n_iter) bar_iter = 0 else: print("Sampling...") if groupby is None: iter_data = ((name, model.data.iloc[obs['node'].value.index]) for name, obs in model.iter_observeds()) else: iter_data = model.data.groupby(groupby) for name, data in iter_data: node = model.get_data_nodes(data.index) if progress_bar: bar.update(bar_iter) bar_iter += 1 if node is None or not hasattr(node, 'random'): continue # Skip # If we used data grouping --> name is a tuple which doesn't play well with pd.concat later on # We exchange the name for the name of the observed node we currently process if groupby is not None: new_name = node.__str__() else: # if groupby was None --> keep name as is new_name = name # Sample and generate stats datasets = _post_pred_generate( node, samples=samples, data=data, append_data=append_data, add_model_parameters=add_model_parameters) results[new_name] = pd.concat(datasets, names=['sample'], keys=list(range(len(datasets)))) return pd.concat(results, names=['node'])
def sample_emcee(self, nwalkers=500, samples=10, dispersion=.1, burn=5, thin=1, stretch_width=2., anneal_stretch=True, pool=None): import emcee import pymc.progressbar as pbar # This is the likelihood function for emcee lnprob = LnProb(self) # init self.mcmc() # get current values stochs = self.get_stochastics() start = [ node_descr['node'].value for name, node_descr in stochs.iterrows() ] ndim = len(start) def init_from_priors(): p0 = np.empty((nwalkers, ndim)) i = 0 while i != nwalkers: self.mc.draw_from_prior() try: self.mc.logp p0[i, :] = [ node_descr['node'].value for name, node_descr in stochs.iterrows() ] i += 1 except pm.ZeroProbability: continue return p0 if hasattr(self, 'emcee_dispersions'): scale = np.empty_like(start) for i, (name, node_descr) in enumerate(stochs.iterrows()): knode_name = node_descr['knode_name'].replace('_subj', '') scale[i] = self.emcee_dispersions.get(knode_name, 0.1) else: scale = 0.1 p0 = np.random.randn(ndim * nwalkers).reshape( (nwalkers, ndim)) * scale * dispersion + start #p0 = init_from_priors() # instantiate sampler passing in the pymc likelihood function sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, a=stretch_width, pool=pool) bar = pbar.progress_bar(burn + samples) i = 0 annealing = np.linspace(stretch_width, 2, burn) sys.stdout.flush() for pos, prob, state in sampler.sample(p0, iterations=burn): if anneal_stretch: sampler.a = annealing[i] i += 1 bar.update(i) #print("\nMean acceptance fraction during burn-in: {}".format(np.mean(sampler.acceptance_fraction))) sampler.reset() # sample try: for p, lnprob, lnlike in sampler.sample(pos, iterations=samples, thin=thin): i += 1 bar.update(i) except KeyboardInterrupt: pass finally: print("\nMean acceptance fraction during sampling: {}".format( np.mean(sampler.acceptance_fraction))) # restore state for val, (name, node_descr) in zip(start, stochs.iterrows()): node_descr['node'].set_value(val) # Save samples back to pymc model self.mc.sample( 1, progress_bar=False) # This call is to set up the chains for pos, (name, node) in enumerate(stochs.iterrows()): node['node'].trace._trace[0] = sampler.flatchain[:, pos] return sampler
def post_pred_gen(model, groupby=None, samples=500, append_data=False, progress_bar=True): """Run posterior predictive check on a model. :Arguments: model : kabuki.Hierarchical Kabuki model over which to compute the ppc on. :Optional: samples : int How many samples to generate for each node. groupby : list Alternative grouping of the data. If not supplied, uses splitting of the model (as provided by depends_on). append_data : bool (default=False) Whether to append the observed data of each node to the replicatons. progress_bar : bool (default=True) Display progress bar :Returns: Hierarchical pandas.DataFrame with multiple sampled RT data sets. 1st level: wfpt node 2nd level: posterior predictive sample 3rd level: original data index :See also: post_pred_stats """ results = {} # Progress bar if progress_bar: n_iter = len(model.get_observeds()) bar = pbar.progress_bar(n_iter) bar_iter = 0 else: print "Sampling..." if groupby is None: iter_data = ((name, model.data.ix[obs['node'].value.index]) for name, obs in model.iter_observeds()) else: iter_data = model.data.groupby(groupby) for name, data in iter_data: node = model.get_data_nodes(data.index) if progress_bar: bar_iter += 1 bar.update(bar_iter) if node is None or not hasattr(node, 'random'): continue # Skip ############################## # Sample and generate stats datasets = _post_pred_generate(node, samples=samples, data=data, append_data=append_data) results[name] = pd.concat(datasets, names=['sample'], keys=range(len(datasets))) if progress_bar: bar_iter += 1 bar.update(bar_iter) return pd.concat(results, names=['node'])
def post_pred_gen(model, groupby=None, samples=500, append_data=False, progress_bar=True): """Run posterior predictive check on a model. :Arguments: model : kabuki.Hierarchical Kabuki model over which to compute the ppc on. :Optional: samples : int How many samples to generate for each node. groupby : list Alternative grouping of the data. If not supplied, uses splitting of the model (as provided by depends_on). append_data : bool (default=False) Whether to append the observed data of each node to the replicatons. progress_bar : bool (default=True) Display progress bar :Returns: Hierarchical pandas.DataFrame with multiple sampled RT data sets. 1st level: wfpt node 2nd level: posterior predictive sample 3rd level: original data index :See also: post_pred_stats """ results = {} # Progress bar if progress_bar: n_iter = len(model.get_observeds()) bar = pbar.progress_bar(n_iter) bar_iter = 0 else: print("Sampling...") if groupby is None: iter_data = ((name, model.data.ix[obs['node'].value.index]) for name, obs in model.iter_observeds()) else: iter_data = model.data.groupby(groupby) for name, data in iter_data: node = model.get_data_nodes(data.index) #New addition: Reset index for non regression models if str(type(model)) == "<class 'hddm.models.hddm_info.HDDM'>": data = data.reset_index() if progress_bar: bar_iter += 1 bar.update(bar_iter) if node is None or not hasattr(node, 'random'): continue # Skip ############################## # Sample and generate stats datasets = _post_pred_generate(node, samples=samples, data=data, append_data=append_data) results[name] = pd.concat(datasets, names=['sample'], keys=list(range(len(datasets)))) #New addition: Convert results dict keys to single items for regression models with different conditions if list(results.keys())[0] != 'wfpt': if isinstance(list(results.keys())[0], str)==False: results={ '('+",".join(map(str,x))+')': results[x] for x in results.keys() } results={key.replace('.0','') : value for key, value in results.items()} else: results={ '('+x+')': results[x] for x in results.keys() } if progress_bar: bar_iter += 1 bar.update(bar_iter) return pd.concat(results, names=['node'])
progress_bar : bool (default=True) Display progress bar :Returns: Hierarchical pandas.DataFrame with multiple sampled RT data sets. 1st level: wfpt node 2nd level: posterior predictive sample 3rd level: original data index :See also: post_pred_stats """ results = {} # Progress bar if progress_bar: n_iter = len(model.get_observeds()) bar = pbar.progress_bar(n_iter) bar_iter = 0 else: print("Sampling...") if groupby is None: iter_data = ((name, model.data.iloc[obs['node'].value.index]) for name, obs in model.iter_observeds()) else: iter_data = model.data.groupby(groupby) for name, data in iter_data: node = model.get_data_nodes(data.index) if progress_bar: bar_iter += 1 bar.update(bar_iter)
def post_pred_gen(model, groupby=None, samples=500, append_data=False, progress_bar=True): """Run posterior predictive check on a model. :Arguments: model : kabuki.Hierarchical Kabuki model over which to compute the ppc on. :Optional: samples : int How many samples to generate for each node. groupby : list Alternative grouping of the data. If not supplied, uses splitting of the model (as provided by depends_on). append_data : bool (default=False) Whether to append the observed data of each node to the replicatons. progress_bar : bool (default=True) Display progress bar :Returns: Hierarchical pandas.DataFrame with multiple sampled RT data sets. 1st level: wfpt node 2nd level: posterior predictive sample 3rd level: original data index :See also: post_pred_stats """ results = {} # Progress bar if progress_bar: n_iter = len(model.get_observeds()) print('printing observeds') print(model.get_observeds()) print('printing len of observeds') print(len(model.get_observeds())) print('printing dir of observeds') print(dir(model.get_observeds())) bar = pbar.progress_bar( n_iter) # Why would n_iter be related to the number of observeds ? bar_iter = 0 else: print("Sampling...") if groupby is None: # Generates a tuple of tuples ((node label, dataframe),...) that each contain subject specific data and name or the nodes (or generally the observeds by unit of decomposition --> usually subjects and / or conditions) iter_data = ((name, model.data.iloc[obs['node'].value.index]) for name, obs in model.iter_observeds()) else: iter_data = model.data.groupby(groupby) print('Print iter_data') print(iter_data) for name, data in iter_data: print('PRINTNG data, implies one pass through the for loop') print(data) print('printing name for the data ') print(name) node = model.get_data_nodes(data.index) # CHECK WHAT THIS DOES ! if progress_bar: bar_iter += 1 bar.update(bar_iter) if node is None or not hasattr(node, 'random'): continue # Skip ############################## # Sample and generate stats datasets = _post_pred_generate( node, samples=samples, data=data, append_data=append_data) # CHECK WHAT THIS DOES ! results[name] = pd.concat(datasets, names=['sample'], keys=list(range(len(datasets)))) if progress_bar: bar_iter += 1 bar.update(bar_iter) return pd.concat(results, names=['node'])