def load_data_all(data_dir, all_data_path, pred_path, gloveFile, first_run, load_all): # Load embeddings for the filtered glove list if load_all == True: weight_matrix, word_idx = uf.load_embeddings(gloveFile) else: weight_matrix, word_idx = uf.load_embeddings(filtered_glove_path) len(word_idx) len(weight_matrix) #%% # create test, validation and trainng data all_data = uf.read_data(all_data_path) train_data, test_data, dev_data = uf.training_data_split( all_data, 0.8, data_dir) train_data = train_data.reset_index() dev_data = dev_data.reset_index() test_data = test_data.reset_index() #%% # inputs from dl_sentiment that are hard coded but need to be automated maxSeqLength, avg_words, sequence_length = uf.maxSeqLen(all_data) numClasses = 10 #%% # load Training data matrix train_x = uf.tf_data_pipeline_nltk(train_data, word_idx, weight_matrix, maxSeqLength) test_x = uf.tf_data_pipeline_nltk(test_data, word_idx, weight_matrix, maxSeqLength) val_x = uf.tf_data_pipeline_nltk(dev_data, word_idx, weight_matrix, maxSeqLength) #%% # load labels data matrix train_y = uf.labels_matrix(train_data) val_y = uf.labels_matrix(dev_data) test_y = uf.labels_matrix(test_data) #%% # summarize size print("Training data: ") print(train_x.shape) print(train_y.shape) # Summarize number of classes print("Classes: ") print(np.unique(train_y.shape[1])) return train_x, train_y, test_x, test_y, val_x, val_y, weight_matrix, word_idx
def run_scipy(): theta_pass = 6e-13, 4.4e-14, 1e-9, 1e10 ssfr, snr, snr_err = util.read_data() nll = lambda *args: -lnlike(*args) result = opt.minimize(nll, [theta_pass], args=(ssfr, snr, snr_err), options={'disp': True}) print "ML parameters:", result.x #a_fit, b_fit, c_fit, d_fit = result.x[0], result.x[1], result.x[2], result.x[3] #theta_pass = a_fit, b_fit, c_fit, d_fit theta_pass = result.x return theta_pass
def run_scipy(ssfr, snr, snr_err): logssfr, ssfr, snr, snr_err = util.read_data_with_log() theta_pass = 1.1183673469387756e-13, 0.49081632653061219, 1.6653061224489797e-10, 0.7265306122448979 a_min, a_max = 1e-13, 7.e-13 k_min, k_max = 0.1, 1. s0_min, s0_max = 1e-10, 2e-10 alpha_min, alpha_max = 0.6, 9. bnds = ((a_min,a_max),(k_min,k_max),(s0_min,s0_max),(alpha_min,alpha_max)) ssfr, snr, snr_err = util.read_data() nll = lambda *args: -lnlike(*args) #result = opt.minimize(nll, [theta_pass],args=(ssfr,snr,snr_err),options={'disp': True},bounds=bnds,method='SLSQP') result = opt.fmin(nll, [theta_pass],args=(ssfr,snr,snr_err),ftol=0.000001,xtol=0.00001) result = opt.fmin_cg(nll, [theta_pass],args=(ssfr,snr,snr_err)) print "ML parameters:", result #a_fit, b_fit, c_fit, d_fit = result.x[0], result.x[1], result.x[2], result.x[3] #theta_pass = a_fit, b_fit, c_fit, d_fit theta_pass = result return theta_pass
def run_emcee(): if os.path.isfile(root_dir + 'Data/MCMC_abnew.pkl'): print 'Chains already exist, using existing chains' pkl_data_file = open(root_dir + 'Data/MCMC_abnew.pkl','rb') samples = pick.load(pkl_data_file) pkl_data_file.close() print np.shape(samples) else: print 'Chains do not exist, computing chains...' # Setting parameter top hat priors k1_min, k1_max = 1e-13, 9e-13 k2_min, k2_max = 1e-5, 7e-4 x1_min, x1_max = 1e-10, 8e-10 x2_min, x2_max = 1e-11, 8e-11 ndim = 4 nwalkers = 300 nburn = 20 nsample = 150 # These functions define the prior and the function to apply prior to likelihood def lnprior(theta): k1, k2, x1, x2 = theta if (k1_min < k1 < k1_max) and (k2_min < k2 < k2_max) and (x1_min < x1 < x1_max) and (x2_min < x2 < x2_max): return 0. return -np.inf def lnprob(theta, logssfr, logsnr, snr_err): lp = lnprior(theta) if not np.isfinite(lp): return -np.inf return lp + lnlike(theta, logssfr, logsnr, snr_err) # Reading in data ssfr, snr, snr_err = util.read_data() # Setting initial position of walkers pos_min = np.array([k1_min, k2_min, x1_min, x2_min]) pos_max = np.array([k1_max, k2_max, x1_max, x2_max]) psize = pos_max - pos_min psize = pos_max - pos_min pos = [pos_min + psize*np.random.rand(ndim) for ii in range(nwalkers)] #pos = [np.array([4.628e-13, 6.105e-11, 2.885e-10, 1.008e-11, 6.100e-4]) + 1e-4*psize*np.random.randn(ndim) for i in range(nwalkers)] # Defining sampler sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, args=(ssfr, snr, snr_err), threads=1) # Performing burn in pos, prob, state = sampler.run_mcmc(pos, nburn) sampler.reset() # Main sampling run pos, prob, state = sampler.run_mcmc(pos, nsample) # These plots are for diagnostics use plt.figure() ax = plt.subplot() ax.set_yscale("log") #ax.set_xscale("log") plt.plot(sampler.chain[:,:,0].T,'b',alpha=0.05) plt.xlabel('par0') plt.figure() ax = plt.subplot() ax.set_yscale("log") #ax.set_xscale("log") plt.plot(sampler.chain[:,:,3].T,'b',alpha=0.05) plt.xlabel('par3') plt.figure() ax = plt.subplot() #ax.set_yscale("log") #ax.set_xscale("log") plt.plot(sampler.lnprobability[:,:].T,'b',alpha=0.05) plt.xlabel('lnprob') # Formatting and saving output samples = sampler.flatchain output = open(root_dir + 'Data/MCMC_piecewise.pkl','wb') pick.dump(samples,output) output.close() print np.shape(samples) plt.figure() ax = plt.subplot() ax.set_xscale("log") plt.hist(samples[:,0],bins=100) plt.xlabel('k1') plt.figure() ax = plt.subplot() ax.set_xscale("log") plt.hist(samples[:,1],bins=100) plt.xlabel('k2') plt.figure() ax = plt.subplot() ax.set_xscale("log") plt.hist(samples[:,2],bins=100) plt.xlabel('x1') plt.figure() ax = plt.subplot() ax.set_xscale("log") plt.hist(samples[:,3],bins=100) plt.xlabel('x2') c = ChainConsumer() c.add_chain(samples, parameters=["$k_1$", "$k_2$", "$sSFR_1$", "$sSFR_2$", "$sSFR_a$"]) c.configure(smooth=False,bins=300,sigmas=[0,1,2,3]) #figw = c.plotter.plot_walks() fig = c.plotter.plot() summary = c.analysis.get_summary() k1_fit = summary["$k_1$"][1] k2_fit = summary["$k_2$"][1] ssfr1_fit = summary["$sSFR_1$"][1] ssfr2_fit = summary["$sSFR_2$"][1] ssfra_fit = summary["$sSFR_a$"][1] theta_pass = k1_fit, k2_fit, x1_fit, x2_fit print 'k1', k1_fit print 'k2', k2_fit print 'x1', x1_fit print 'x2', x2_fit return theta_pass
def run_grid(): if util.does_grid_exist(model_name,root_dir): print 'Grid already exists, using existing grid...' resolution, likelihoods, parameters, theta_max = util.read_grid(model_name,root_dir) k1_par, k2_par, x1_par, x2_par = parameters else: print 'Grid does not exist, computing grid...' resolution = 100 k1_min, k1_max = 0.4, 0.8 k2_min, k2_max = 0.1e-8, 90e-8 x1_min, x1_max = 0.1e-11, 4e-11 x2_min, x2_max = 0.1e-9, 3e-9 # Reading in data ssfr, snr, snr_err = util.read_data() k1_par = np.linspace(k1_min,k1_max,resolution) k2_par = np.linspace(k2_min,k2_max,resolution) x1_par = np.linspace(x1_min,x1_max,resolution) x2_par = np.linspace(x2_min,x2_max,resolution) likelihoods = np.ones((resolution,resolution,resolution,resolution)) max_like = 0. for ii in np.arange(resolution): if ii%2 == 0: print np.round((float(ii) / resolution) * 100.,2), "% Done" for jj in np.arange(resolution): for kk in np.arange(resolution): for ll in np.arange(resolution): theta = k1_par[ii], k2_par[jj], x1_par[kk], x2_par[ll] likelihoods[ii,jj,kk,ll] = np.exp(lnlike(theta,ssfr,snr,snr_err)) if likelihoods[ii,jj,kk,ll] > max_like: max_like = likelihoods[ii,jj,kk,ll] theta_max = k1_par[ii], k2_par[jj], x1_par[kk], x2_par[ll] #print "New max like:", max_like #print theta_max, "\n" likelihoods /= np.sum(likelihoods) output = open(root_dir + 'Data/MCMC_piecewise_grid.pkl','wb') parameters = k1_par, k2_par, x1_par, x2_par result = resolution, likelihoods, parameters, theta_max pick.dump(result,output) output.close() k1_like = np.zeros(resolution) k2_like = np.zeros(resolution) x1_like = np.zeros(resolution) x2_like = np.zeros(resolution) for ii in np.arange(resolution): k1_like[ii] = np.sum(likelihoods[ii,:,:,:]) k2_like[ii] = np.sum(likelihoods[:,ii,:,:]) x1_like[ii] = np.sum(likelihoods[:,:,ii,:]) x2_like[ii] = np.sum(likelihoods[:,:,:,ii]) ''' plt.figure() ax = plt.subplot() ax.set_xscale("log") plt.plot(k1_par,k1_like,'x') plt.xlabel('slope') plt.figure() ax = plt.subplot() ax.set_xscale("log") plt.plot(k2_par,k2_like,'x') plt.xlabel('offset') plt.figure() ax = plt.subplot() ax.set_xscale("log") plt.plot(x1_par,x1_like,'x') plt.xlabel('x1') plt.figure() ax = plt.subplot() ax.set_xscale("log") plt.plot(x2_par,x2_like,'x') plt.xlabel('x2') ''' # These are the marginalised maximum likelihood parameters k1_fit = k1_par[np.argmax(k1_like)] k2_fit = k2_par[np.argmax(k2_like)] x1_fit = x1_par[np.argmax(x1_like)] x2_fit = x2_par[np.argmax(x2_like)] print "ML parameters:" #theta_pass = k1_fit, k2_fit, x1_fit, x2_fit theta_pass = theta_max print theta_pass return theta_pass
4) Calculate the derivatives needed for Fisher sum 5) Calculate Fisher sum by doing (N'*(C**2))*N where N is the numerical derivative matrix and C is the inverse of the diagonal covariance matrix. Or alternatively just sum over all the individual components of the FIsher matrix. ''' start = timer() #To track program runtime #---------------------------------- INPUTS ----------------------------------- #1) The function that we wish to take derivatives of def inputFunction(dataPoints,baseParameters): return nicelog.nicelog_snr(np.log10(dataPoints),baseParameters) #2) The datapoints where we wish to know the derivative ssfr, snr, snr_err = util.read_data() dataPoints = ssfr dataPointsUncertainties = snr_err #3) The parameter values of the function #baseParameters = np.array([5.222222e-14, 0.2272727, 1.30909e-10, 0.9449494],dtype=np.float64) #baseParametersOriginal = np.array([5.222222e-14, 0.2272727, 1.309090e-10, 0.9449494],dtype=np.float64) baseParameters = np.array([1.1183673469387756e-13, 0.49081632653061219, 1.6653061224489797e-10, 0.7265306122448979],dtype=np.float64) baseParametersOriginal = np.array([1.1183673469387756e-13, 0.49081632653061219, 1.6653061224489797e-10, 0.7265306122448979],dtype=np.float64) #----------------------------- FUNCTION DEFINITIONS--------------------------- def derivativeNumerical(function,stepSize,parameterNumber):
def run_grid(): if util.does_grid_exist(model_name, root_dir): print 'Grid already exists, using existing grid...' resolution, likelihoods, parameters, theta_max = util.read_grid( model_name, root_dir) k1_par, k2_par, s1_par, s2_par, sa_par = parameters else: print 'Grid does not exist, computing grid...' resolution = 40 ''' # These limits find a decent fit with r.chi2 1.55 k1_min, k1_max = 3e-13, 6.6e-13 k2_min, k2_max = 3e-11, 9.5e-11 ssfr1_min, ssfr1_max = 2e-10, 4e-10 ssfr2_min, ssfr2_max = 2e-12, 1.35e-10 ssfra_min, ssfra_max = 4e-4, 9.5e-4 ''' #theta_pass = 4.628e-13, 2e-4, 1./(0.5e9), 1.008e-11, 2./(12e9) k1_min, k1_max = 3e-13, 6e-13 k2_min, k2_max = 3e-5, 4e-4 ssfr1_min, ssfr1_max = 2e-10, 6e-10 ssfr2_min, ssfr2_max = 1.5e-11, 5.8e-11 ssfra_min, ssfra_max = 5.2e-11, 7.e-10 # Reading in data ssfr, snr, snr_err = util.read_data() k1_par = np.linspace(k1_min, k1_max, resolution) k2_par = np.linspace(k2_min, k2_max, resolution) s1_par = np.linspace(ssfr1_min, ssfr1_max, resolution) s2_par = np.linspace(ssfr2_min, ssfr2_max, resolution) sa_par = np.linspace(ssfra_min, ssfra_max, resolution) # Adding another point by hand #4.628e-13, 6.105e-11, 2.885e-10, 1.008e-11, 6.100e-4 #k1_par = np.sort(np.append(k1_par,4.628e-13)) #k2_par = np.sort(np.append(k2_par,6.105e-11)) #s1_par = np.sort(np.append(s1_par,2.885e-10)) #s2_par = np.sort(np.append(s2_par,1.008e-11)) #sa_par = np.sort(np.append(sa_par,6.100e-4)) #resolution += 1 likelihoods = np.ones( (resolution, resolution, resolution, resolution, resolution)) max_like = 0. for ii in np.arange(resolution): if ii % 2 == 0: print np.round((float(ii) / resolution) * 100., 2), "% Done" for jj in np.arange(resolution): for kk in np.arange(resolution): for ll in np.arange(resolution): for mm in np.arange(resolution): theta = k1_par[ii], k2_par[jj], s1_par[kk], s2_par[ ll], sa_par[mm] likelihoods[ii, jj, kk, ll, mm] = np.exp( lnlike(theta, ssfr, snr, snr_err)) if likelihoods[ii, jj, kk, ll, mm] > max_like: max_like = likelihoods[ii, jj, kk, ll, mm] theta_max = k1_par[ii], k2_par[jj], s1_par[ kk], s2_par[ll], sa_par[mm] #print "New max like:", max_like #print theta_max, "\n" likelihoods /= np.sum(likelihoods) output = open(root_dir + 'Data/MCMC_abnew_grid.pkl', 'wb') parameters = k1_par, k2_par, s1_par, s2_par, sa_par result = resolution, likelihoods, parameters, theta_max pick.dump(result, output) output.close() k1_like = np.zeros(resolution) k2_like = np.zeros(resolution) s1_like = np.zeros(resolution) s2_like = np.zeros(resolution) sa_like = np.zeros(resolution) for ii in np.arange(resolution): k1_like[ii] = np.sum(likelihoods[ii, :, :, :, :]) k2_like[ii] = np.sum(likelihoods[:, ii, :, :, :]) s1_like[ii] = np.sum(likelihoods[:, :, ii, :, :]) s2_like[ii] = np.sum(likelihoods[:, :, :, ii, :]) sa_like[ii] = np.sum(likelihoods[:, :, :, :, ii]) ''' plt.figure() ax = plt.subplot() ax.set_xscale("log") plt.plot(k1_par,k1_like,'x') plt.xlabel('k1') plt.figure() ax = plt.subplot() ax.set_xscale("log") plt.plot(k2_par,k2_like,'x') plt.xlabel('k2') plt.figure() ax = plt.subplot() ax.set_xscale("log") plt.plot(s1_par,s1_like,'x') plt.xlabel('ssfr1') plt.figure() ax = plt.subplot() ax.set_xscale("log") plt.plot(s2_par,s2_like,'x') plt.xlabel('ssfr2') plt.figure() ax = plt.subplot() ax.set_xscale("log") plt.plot(sa_par,sa_like,'x') plt.xlabel('ssfra') ''' # These are the marginalised maximum likelihood parameters k1_fit = k1_par[np.argmax(k1_like)] k2_fit = k2_par[np.argmax(k2_like)] s1_fit = s1_par[np.argmax(s1_like)] s2_fit = s2_par[np.argmax(s2_like)] sa_fit = sa_par[np.argmax(sa_like)] print "ML parameters:" #theta_pass = k1_fit, k2_fit, s1_fit, s2_fit, sa_fit theta_pass = theta_max print theta_pass return theta_pass
def run_grid(): if util.does_grid_exist(model_name,root_dir): print 'Grid already exists, using existing grid...' resolution, likelihoods, parameters, theta_max = util.read_grid(model_name,root_dir) k2_par, s1_par, s2_par, sa_par = parameters else: print 'Grid does not exist, computing grid...' resolution = 100 #theta_pass = 4.628e-13, 2e-4, 1./(0.5e9), 1.008e-11, 2./(12e9) k2_min, k2_max = 4e-5, 1.2e-4 ssfr1_min, ssfr1_max = 1e-8, 5e-7 ssfr2_min, ssfr2_max = 7e-12, 2.e-10 ssfra_min, ssfra_max = 2e-10, 1.e-9 # Reading in data ssfr, snr, snr_err = util.read_data() k2_par = np.linspace(k2_min,k2_max,resolution) s1_par = np.linspace(ssfr1_min,ssfr1_max,resolution) s2_par = np.linspace(ssfr2_min,ssfr2_max,resolution) sa_par = np.linspace(ssfra_min,ssfra_max,resolution) likelihoods = np.ones((resolution,resolution,resolution,resolution)) max_like = 0. for ii in np.arange(resolution): if ii%2 == 0: print np.round((float(ii) / resolution) * 100.,2), "% Done" for jj in np.arange(resolution): for kk in np.arange(resolution): for ll in np.arange(resolution): theta = k2_par[ii], s1_par[jj], s2_par[kk], sa_par[ll] likelihoods[ii,jj,kk,ll] = np.exp(lnlike(theta,ssfr,snr,snr_err)) if likelihoods[ii,jj,kk,ll] > max_like: max_like = likelihoods[ii,jj,kk,ll] theta_max = k2_par[ii], s1_par[jj], s2_par[kk], sa_par[ll] #print "New max like:", max_like #print theta_max, "\n" likelihoods /= np.sum(likelihoods) output = open(root_dir + 'Data/MCMC_shortabnew_grid.pkl','wb') parameters = k2_par, s1_par, s2_par, sa_par result = resolution, likelihoods, parameters, theta_max pick.dump(result,output) output.close() k2_like = np.zeros(resolution) s1_like = np.zeros(resolution) s2_like = np.zeros(resolution) sa_like = np.zeros(resolution) for ii in np.arange(resolution): k2_like[ii] = np.sum(likelihoods[ii,:,:,:]) s1_like[ii] = np.sum(likelihoods[:,ii,:,:]) s2_like[ii] = np.sum(likelihoods[:,:,ii,:]) sa_like[ii] = np.sum(likelihoods[:,:,:,ii]) ''' plt.figure() ax = plt.subplot() ax.set_xscale("log") plt.plot(k2_par,k2_like,'x') plt.xlabel('k2') plt.figure() ax = plt.subplot() ax.set_xscale("log") plt.plot(s1_par,s1_like,'x') plt.xlabel('ssfr1') plt.figure() ax = plt.subplot() ax.set_xscale("log") plt.plot(s2_par,s2_like,'x') plt.xlabel('ssfr2') plt.figure() ax = plt.subplot() ax.set_xscale("log") plt.plot(sa_par,sa_like,'x') plt.xlabel('ssfra') ''' # These are the marginalised maximum likelihood parameters k2_fit = k2_par[np.argmax(k2_like)] s1_fit = s1_par[np.argmax(s1_like)] s2_fit = s2_par[np.argmax(s2_like)] sa_fit = sa_par[np.argmax(sa_like)] print "ML parameters:" #theta_pass = k2_fit, s1_fit, s2_fit, sa_fit theta_pass = theta_max print theta_pass return theta_pass
def run_grid(): if util.does_grid_exist(model_name, root_dir): print 'Grid already exists, using existing chains...' resolution, likelihoods, parameters, theta_max = util.read_grid( model_name, root_dir) a_par, b_par, c_par, d_par = parameters else: print 'Grid does not exist, computing grid...' resolution = 100 ''' # These limits give a pretty decent overview of this local extrema a_min, a_max = 1e-13, 9e-13 b_min, b_max = 1e-17, 6e-14 c_min, c_max = 0.1e-10, 1e-9 d_min, d_max = 1e9, 50e9 ''' # These limits focus in on the local extrema above a_min, a_max = 2e-13, 6e-13 b_min, b_max = 6e-15, 6e-14 c_min, c_max = 6e-11, 3e-10 d_min, d_max = 7e9, 80e9 # Reading in data logssfr, logsnr, snr_err = util.read_data() a_par = np.linspace(a_min, a_max, resolution) b_par = np.linspace(b_min, b_max, resolution) c_par = np.linspace(c_min, c_max, resolution) d_par = np.linspace(d_min, d_max, resolution) likelihoods = np.ones((resolution, resolution, resolution, resolution)) max_like = 0. for ii in np.arange(resolution): if ii % 5 == 0: print np.round((float(ii) / resolution) * 100., 2), "% Done" for jj in np.arange(resolution): for kk in np.arange(resolution): for ll in np.arange(resolution): theta = a_par[ii], b_par[jj], c_par[kk], d_par[ll] likelihoods[ii, jj, kk, ll] = np.exp( lnlike(theta, logssfr, logsnr, snr_err)) if likelihoods[ii, jj, kk, ll] > max_like: max_like = likelihoods[ii, jj, kk, ll] theta_max = a_par[ii], b_par[jj], c_par[kk], d_par[ ll] #print "New max like:", max_like #print theta_max, "\n" likelihoods /= np.sum(likelihoods) output = open(root_dir + 'Data/MCMC_sigmoid_grid.pkl', 'wb') parameters = a_par, b_par, c_par, d_par result = resolution, likelihoods, parameters, theta_max pick.dump(result, output) output.close() a_like = np.zeros(resolution) b_like = np.zeros(resolution) c_like = np.zeros(resolution) d_like = np.zeros(resolution) for ii in np.arange(resolution): a_like[ii] = np.sum(likelihoods[ii, :, :, :]) b_like[ii] = np.sum(likelihoods[:, ii, :, :]) c_like[ii] = np.sum(likelihoods[:, :, ii, :]) d_like[ii] = np.sum(likelihoods[:, :, :, ii]) ''' plt.figure() ax = plt.subplot() ax.set_xscale("log") plt.plot(a_par,a_like) plt.xlabel('a') plt.figure() ax = plt.subplot() ax.set_xscale("log") plt.plot(b_par,b_like) plt.xlabel('b') plt.figure() ax = plt.subplot() ax.set_xscale("log") plt.plot(c_par,c_like) plt.xlabel('c') plt.figure() ax = plt.subplot() ax.set_xscale("log") plt.plot(d_par,d_like) plt.xlabel('d') ''' a_fit = a_par[np.argmax(a_like)] b_fit = b_par[np.argmax(b_like)] c_fit = c_par[np.argmax(c_like)] d_fit = d_par[np.argmax(d_like)] print "ML parameters:" theta_pass = a_fit, b_fit, c_fit, d_fit print theta_pass return theta_max
def run_grid(): if util.does_grid_exist(model_name,root_dir): print 'Grid already exists, using existing grid...' resolution, likelihoods, parameters, theta_max = util.read_grid(model_name,root_dir) a_par, k_par, s0_par, alpha_par = parameters else: print 'Grid does not exist, computing grid...' resolution = 50 #theta_pass = 4.2e-14, 0.272, 3.8e-11, 0.9 a_min, a_max = 2e-14, 20e-14 k_min, k_max = 0.05, 1.4 s0_min, s0_max = 1e-11, 60e-11 alpha_min, alpha_max = 0.3, 1.4 #a_min, a_max = 2e-14, 13e-14 #k_min, k_max = 0.05, 2 #s0_min, s0_max = 1e-11, 20e-11 #alpha_min, alpha_max = 0.55, 1.4 # Reading in data ssfr, snr, snr_err = util.read_data() a_par = np.linspace(a_min,a_max,resolution) k_par = np.linspace(k_min,k_max,resolution) s0_par = np.linspace(s0_min,s0_max,resolution) alpha_par = np.linspace(alpha_min,alpha_max,resolution) likelihoods = np.ones((resolution,resolution,resolution,resolution)) max_like = 0. for ii in np.arange(resolution): if ii%2 == 0: print np.round((float(ii) / resolution) * 100.,2), "% Done" for jj in np.arange(resolution): for kk in np.arange(resolution): for ll in np.arange(resolution): theta = a_par[ii], k_par[jj], s0_par[kk], alpha_par[ll] likelihoods[ii,jj,kk,ll] = np.exp(lnlike(theta,ssfr,snr,snr_err)) if likelihoods[ii,jj,kk,ll] > max_like: max_like = likelihoods[ii,jj,kk,ll] theta_max = a_par[ii], k_par[jj], s0_par[kk], alpha_par[ll] #print "New max like:", max_like #print theta_max, "\n" likelihoods /= np.sum(likelihoods) output = open(root_dir + 'Data/MCMC_nicelog_grid.pkl','wb') parameters = a_par, k_par, s0_par, alpha_par result = resolution, likelihoods, parameters, theta_max pick.dump(result,output) output.close() a_like = np.zeros(resolution) k_like = np.zeros(resolution) s0_like = np.zeros(resolution) alpha_like = np.zeros(resolution) for ii in np.arange(resolution): a_like[ii] = np.sum(likelihoods[ii,:,:,:]) k_like[ii] = np.sum(likelihoods[:,ii,:,:]) s0_like[ii] = np.sum(likelihoods[:,:,ii,:]) alpha_like[ii] = np.sum(likelihoods[:,:,:,ii]) yes_chainconsumer = False if yes_chainconsumer: print "Defining chainconsumer" c = ChainConsumer() print "Adding chain" c.add_chain([a_par, k_par, s0_par, alpha_par], parameters=["a","k","s0","alpha"],weights=likelihoods,grid=True) print "Doing plot" fig = c.plotter.plot() ''' plt.figure() ax = plt.subplot() ax.set_xscale("log") plt.plot(a_par,a_like,'x') plt.xlabel('a') plt.figure() ax = plt.subplot() ax.set_xscale("log") plt.plot(k_par,k_like,'x') plt.xlabel('k') plt.figure() ax = plt.subplot() ax.set_xscale("log") plt.plot(s0_par,s0_like,'x') plt.xlabel('ssfr0') plt.figure() ax = plt.subplot() ax.set_xscale("log") plt.plot(alpha_par,alpha_like,'x') plt.xlabel('alpha') ''' # These are the marginalised maximum likelihood parameters a_fit = a_par[np.argmax(a_like)] k_fit = k_par[np.argmax(k_like)] s0_fit = s0_par[np.argmax(s0_like)] alpha_fit = alpha_par[np.argmax(alpha_like)] print "ML parameters:" #theta_pass = a_fit, k_fit, s0_fit, alpha_fit theta_pass = theta_max print theta_pass return theta_pass
def bootstrap_uncertainties(): time_start = time.time() a_min, a_max = 0.1e-13, 4.0e-13 k_min, k_max = 0.1, 2.0 s0_min, s0_max = 0.05e-10, 7e-10 alpha_min, alpha_max = 0.1, 1.2 n_runs = 400 pars = np.zeros((4,n_runs)) ssfr, snr, snr_err = util.read_data() ndata = len(ssfr) index_array = np.arange(ndata) #plt.figure() #ax = plt.subplot() counter = 0 while counter < n_runs: ii = counter print ii #snr_shift = snr + np.random.normal(size=len(snr)) * snr_err indices = np.random.choice(index_array,size=ndata,replace=True) ssfr_sub = ssfr[indices] snr_sub = snr[indices] snr_err_sub = snr_err[indices] pars[:,ii] = run_grid_fast(ssfr_sub, snr_sub, snr_err_sub) if np.isclose(pars[:,ii][3]-alpha_min,0.): counter -= 1 #util.plot_data(root_dir, model_name, pars[:,ii], nicelog_snr) #plt.show() #plt.plot(np.log10(ssfr_sub),snr_sub,'bo',alpha=0.05) counter += 1 #plt.errorbar(np.log10(ssfr),snr,yerr=snr_err,fmt='o',color='k') #plt.xlabel('log(sSFR)',size='large') #plt.ylabel('sSNR',size='large') #ax.set_yscale("log") #plt.xlim((-13,-8)) #plt.ylim((5e-15,6e-12)) plt.figure() plt.hist(pars[0,:],bins=10,range=(a_min,a_max)) plt.xlabel('a') plt.figure() plt.hist(pars[1,:],bins=10,range=(k_min,k_max)) plt.xlabel('k') plt.figure() plt.hist(pars[2,:],bins=10,range=(s0_min,s0_max)) plt.xlabel('ssfr0') plt.figure() plt.hist(pars[3,:],bins=10,range=(alpha_min,alpha_max)) plt.xlabel('alpha') runtime = time.time() - time_start np.savetxt('bootstrap_parameters.txt',pars.T) np.savetxt('runtime.txt',np.array([runtime]),fmt='%4.2f')
def run_grid(): if util.does_grid_exist(model_name, root_dir): print 'Grid already exists, using existing grid...' resolution, likelihoods, parameters, theta_max = util.read_grid( model_name, root_dir) k1_par, k2_par, s1_par, s2_par, alpha_par = parameters else: print 'Grid does not exist, computing grid...' resolution = 30 #k1_min, k1_max = 1.e-12, 4.5e-12 #k2_min, k2_max = 4e-4, 9e-4 #ssfr1_min, ssfr1_max = 1e-10, 8e-10 #ssfr2_min, ssfr2_max = 1.5e-11, 5.5e-11 #alpha_min, alpha_max = 0.01, 0.15 k1_min, k1_max = 1.e-12, 4.5e-12 k2_min, k2_max = 1e-4, 5e-4 ssfr1_min, ssfr1_max = 9e-10, 2e-9 ssfr2_min, ssfr2_max = 2.5e-11, 6.e-11 alpha_min, alpha_max = 0.01, 0.15 # Reading in data ssfr, snr, snr_err = util.read_data() k1_par = np.linspace(k1_min, k1_max, resolution) k2_par = np.linspace(k2_min, k2_max, resolution) s1_par = np.linspace(ssfr1_min, ssfr1_max, resolution) s2_par = np.linspace(ssfr2_min, ssfr2_max, resolution) alpha_par = np.linspace(alpha_min, alpha_max, resolution) likelihoods = np.ones( (resolution, resolution, resolution, resolution, resolution)) max_like = 0. for ii in np.arange(resolution): if ii % 2 == 0: print np.round((float(ii) / resolution) * 100., 2), "% Done" for jj in np.arange(resolution): for kk in np.arange(resolution): for ll in np.arange(resolution): for mm in np.arange(resolution): theta = k1_par[ii], k2_par[jj], s1_par[kk], s2_par[ ll], alpha_par[mm] likelihoods[ii, jj, kk, ll, mm] = np.exp( lnlike(theta, ssfr, snr, snr_err)) if likelihoods[ii, jj, kk, ll, mm] > max_like: max_like = likelihoods[ii, jj, kk, ll, mm] theta_max = k1_par[ii], k2_par[jj], s1_par[ kk], s2_par[ll], alpha_par[mm] #print "New max like:", max_like #print theta_max, "\n" likelihoods /= np.sum(likelihoods) output = open(root_dir + 'Data/MCMC_abnewalpha_grid.pkl', 'wb') parameters = k1_par, k2_par, s1_par, s2_par, alpha_par result = resolution, likelihoods, parameters, theta_max pick.dump(result, output) output.close() k1_like = np.zeros(resolution) k2_like = np.zeros(resolution) s1_like = np.zeros(resolution) s2_like = np.zeros(resolution) alpha_like = np.zeros(resolution) for ii in np.arange(resolution): k1_like[ii] = np.sum(likelihoods[ii, :, :, :, :]) k2_like[ii] = np.sum(likelihoods[:, ii, :, :, :]) s1_like[ii] = np.sum(likelihoods[:, :, ii, :, :]) s2_like[ii] = np.sum(likelihoods[:, :, :, ii, :]) alpha_like[ii] = np.sum(likelihoods[:, :, :, :, ii]) plt.figure() ax = plt.subplot() ax.set_xscale("log") plt.plot(k1_par, k1_like, 'x') plt.xlabel('k1') plt.figure() ax = plt.subplot() ax.set_xscale("log") plt.plot(k2_par, k2_like, 'x') plt.xlabel('k2') plt.figure() ax = plt.subplot() ax.set_xscale("log") plt.plot(s1_par, s1_like, 'x') plt.xlabel('ssfr1') plt.figure() ax = plt.subplot() ax.set_xscale("log") plt.plot(s2_par, s2_like, 'x') plt.xlabel('ssfr2') plt.figure() ax = plt.subplot() #ax.set_xscale("log") plt.plot(alpha_par, alpha_like, 'x') plt.xlabel('alpha') # These are the marginalised maximum likelihood parameters k1_fit = k1_par[np.argmax(k1_like)] k2_fit = k2_par[np.argmax(k2_like)] s1_fit = s1_par[np.argmax(s1_like)] s2_fit = s2_par[np.argmax(s2_like)] alpha_fit = alpha_par[np.argmax(alpha_like)] print "ML parameters:" #theta_pass = k1_fit, k2_fit, s1_fit, s2_fit, alpha_fit theta_pass = theta_max print theta_pass return theta_pass