Exemplo n.º 1
0
def test_convergence(run_model, chains, saveplots=False):
    """
    Uses ipyparallel client to run <chains> model fits, then runs r-hat (gelman-rubin) statistic
    on the resulting traces. Finally, the models are concatenated and their posteriors are plotted for assessing
    convergence.
    """
    from ipyparallel import Client
    c = Client()[:] # create the ipython client
    jobs = c.map(run_model, range(chains)) # c.map(function, number of CPUs)
    models = jobs.get() # run the jobs
    
    # Calculate convergence statistics
    from kabuki.analyze import gelman_rubin
    rhat = gelman_rubin(models)
    
    # Create a new model that contains all traces concatenated from the individual models
    from kabuki.utils import concat_models
    combined_model = concat_models(models)
    
    # Plot posteriors
    combined_model.plot_posteriors(save=saveplots)
    return rhat
# ************************************************************
# create group maps
pool = Pool()
#mp_results = pool.map(assess_convergence, hddm_fun_dict.keys())
mp_results = pool.map(assess_convergence, ['stroop'])
pool.close() 
pool.join()

results = {}
for d in mp_results:
    results.update(d)

# save plots of traces
# make sure the output directory exists
for k,v in results.items():
    gelman_vals[k+'_base'] = gelman_rubin([i[0] for i in v])
    # plot posteriors
    v[0][0].plot_posteriors(['a', 't', 'v'], save=True)
    plots = glob('*png')
    for p in plots:
        rename(p, path.join('hddm_output', 'Plots', '%s_base_%s' % (k,p)))
    
    if v[0][1] is not None:
        gelman_vals[k+'_condition'] = gelman_rubin([i[1] for i in v])
        
        v[0][1].plot_posteriors(['a', 't', 'v'], save=True)
        plots = glob('*png')
        for p in plots:
            rename(p, path.join('hddm_output', 'Plots', '%s_condition_%s' % (k,p)))

# save gelman vals
Exemplo n.º 3
0
                ss.sample(samples,burn=burnIn,thin=thinning,tune_throughout=False, db='pickle', dbname='local_traces' + str(i+1) + '.db')
                local_models.append(ss)
                if i == (n_runs-1):
                        print('Waiting for the other chains to finish...')

        if num_remote>0:
                models = results.get()

        for i in range(n_runs):
                models.append(local_models[i])

        #print(len(models))
        print "Finished sampling!"

        if numberOfChains > 1:
                Rhat = gelman_rubin(models)
                print('\n Gelman-Rubin Rhat diagnostic:')
                for key in Rhat.keys():
                        print((key, Rhat[key]))

        name_dataFile = dataFile.replace(".csv","")
      
        for i in range(numberOfChains):
                save_csv(get_traces(models[i]), 'parameters'+str(i+1)+'.csv', sep = ';')

        print "Posterior samples are saved to file."    

        if deviance == True:
                for i in range(numberOfChains):
                        dev = models[i].mc.db.trace('deviance')()
                        numpy.savetxt('deviance'+str(i+1)+'.csv', dev, delimiter=";") 
Exemplo n.º 4
0
        pickle.dump(self, f)
hddm.HDDM.savePatch = savePatch

docdir = 'D:\\all hddm analysis\\BL\\informative priors 40K\\ic'
saved_model_name = 'BL_ic_no_tttv'
os.chdir(docdir)

models = []

for i in range(12):
    print(i)
    m = pickle.load(open('submodel'+str(i), 'rb'))
    models.append(m)
print('Thats it!!')

gelman_rubin_stats = gelman_rubin(models)
with open('gelman_rubin_stats.csv', 'w') as csv_file:
    writer = csv.writer(csv_file)
    for key, value in gelman_rubin_stats.items():
       writer.writerow([key, value])
 
      
# The combined model:
running_model = kabuki.utils.concat_models(models)
# Saving the model:
running_model.savePatch(saved_model_name)
# Saving the traces:
running_model.get_traces().to_csv('traces.csv') 
# Saving the model's stats:
running_model_stats = running_model.gen_stats()
running_model_stats.to_csv('stats_csv_'+saved_model_name+'.csv')
Exemplo n.º 5
0
models = []
for i in range(5):
    m = hddm.models.HDDMRegressor(data,
                                  'v ~ other_poss_loss',
                                  depends_on={
                                      'v': 'condition',
                                      'z': 'condition',
                                      'a': 'condition'
                                  },
                                  bias=True,
                                  include=['z', 'st', 'sz', 'sv'],
                                  p_outlier=0.05)
    m.find_starting_values()
    m.sample(7000, burn=5000, dbname='traces.db', db='pickle')
    models.append(m)
gelman_rubin(models)

# retrieve plausible t-values by repeatedly sampling from posterior distribution
# pick some new samples to calculate with these plausible values
new_sample_n = 5000
new_sample_burn = 10
new_samples = m_all_vza.sample(new_sample_n, burn=new_sample_burn)

# new_samples = m_all_vza
# for every sample create a data frame
# and compare the conditions, save the t values (example: z-parameter)
t_values_z_mm_con = []
t_values_z_mm_emp = []
t_values_z_mm_rec = []
for s in range(0, new_sample_n - new_sample_burn):
    this_z_sample_df = pd.DataFrame()
Exemplo n.º 6
0
ppc_compare = hddm.utils.post_pred_stats(data, ppc_data)
ppc_stats = hddm.utils.post_pred_stats(data, ppc_data, call_compare=False)
print(ppc_compare)
ppc_data.to_csv(os.path.join(fig_dir, 'diagnostics', 'ppc_210518.csv'))

#Get Gelman-Rubin Stats
models = []
for i in range(5):
    m = hddm.HDDMStimCoding(data, include={'z'}, stim_col= 'stimulus', split_param='z', depends_on={'v': 'cond_v', 'a': 'session', 't': 'session', 'z': 'session'}, p_outlier=.05)
    m.find_starting_values()
    m.sample(5000, burn=2000)
    models.append(m)
model_base_name = '2018_GNG_HDDM'
fig_dir = os.path.join(model_base_name)

gelman = gelman_rubin(models)
gelman = pd.DataFrame.from_dict(gelman,orient='index')
gelman.to_csv(os.path.join(fig_dir, 'diagnostics', 'gelman_GNG.csv'))


##### Below code is saving data and creating figures

model_base_name = '2018_GNG_HDDM'

fig_dir = os.path.join(model_base_name)
results = model.gen_stats() # check the outputs in comparision with the true param
results.to_csv(os.path.join(fig_dir, 'diagnostics', 'results_ARC.csv'))

#data.to_csv(os.path.join(fig_dir, 'diagnostics', 'simdataTest.csv'))

text_file = open(os.path.join(fig_dir, 'diagnostics', 'DIC.txt'), 'w')
Exemplo n.º 7
0
if not os.path.exists(outputPath):
    os.makedirs(outputPath)

def run_model(id):

    m = hddm.HDDMRegressor(data, modelList,bias=includeBias,
        include='p_outlier',group_only_regressors = False)

    m.find_starting_values()
    m.sample(nSample, burn=nBurned, dbname=outputPath + '/' + modelName+'_'+str(id)+'.db', db='pickle')
    m.savePatch(outputPath + '/' +modelName+'_'+str(id))
    return m


pool = multiprocessing.Pool()
models = pool.map(run_model, range(5))
pool.close()

m_rhat = gelman_rubin(models)
pd.DataFrame.from_dict(m_rhat, orient='index').to_csv(outputPath + '/'+modelName+'_RHat.csv')

m_comb = concat_models(models)
m_comb_export = m_comb.get_traces()
m_comb_export.to_csv(outputPath + '/' + modelName+'_traces.csv')
print("DIC: %f" %m_comb.dic)

results = m_comb.get_traces()
results.to_csv(outputPath + '/' + modelName+'_Results.csv')
summary = m_comb.gen_stats()
summary.to_csv(outputPath + '/' + modelName+'_Summary.csv')
Exemplo n.º 8
0
                    except Exception as e:
                        print(e)
                        if str(e) == 'unsupported pickle protocol: 3':
                            try:
                                convert(mod + '_chain' + str(chain) + '_' +
                                        code + 'Code.model')
                                mods.append(this_model)
                                traces = this_model.get_traces()
                                traces.to_csv(outputdir + '/../diagnostics/' +
                                              mod + '_traces_' + str(chain) +
                                              '.csv')
                            except Exception as ee:
                                print(ee)

                try:
                    gel_rub = pd.DataFrame(gelman_rubin(mods).items(),
                                           columns=['parameter', 'rhat'])
                    gel_rub.to_csv(outputdir + '/../diagnostics/gr_' + mod +
                                   '.csv')
                    dic = this_model.dic
                    print dic
                    dics = dics.append(
                        pd.DataFrame([[mod, dic]], columns=['model', 'DIC']))

                    dics.to_csv(outputdir + '/../diagnostics/dics_all.csv')
                except Exception as e:
                    print('Ran into issues with these settings: ' + samp +
                          ' ' + task + ' ' + samp_size)

#
#
Exemplo n.º 9
0
# ************************************************************
# create group maps
pool = Pool()
#mp_results = pool.map(assess_convergence, hddm_fun_dict.keys())
mp_results = pool.map(assess_convergence, ['stroop'])
pool.close()
pool.join()

results = {}
for d in mp_results:
    results.update(d)

# save plots of traces
# make sure the output directory exists
for k, v in results.items():
    gelman_vals[k + '_base'] = gelman_rubin([i[0] for i in v])
    # plot posteriors
    v[0][0].plot_posteriors(['a', 't', 'v'], save=True)
    plots = glob('*png')
    for p in plots:
        rename(p, path.join('hddm_output', 'Plots', '%s_base_%s' % (k, p)))

    if v[0][1] is not None:
        gelman_vals[k + '_condition'] = gelman_rubin([i[1] for i in v])

        v[0][1].plot_posteriors(['a', 't', 'v'], save=True)
        plots = glob('*png')
        for p in plots:
            rename(
                p, path.join('hddm_output', 'Plots',
                             '%s_condition_%s' % (k, p)))