def lbfgs_example(verbose): if (exgauss): fit = lbfgs_exgauss(x_obs=x_obs,y_obs=y_obs,w_obs=w_obs,initial=initial) else: fit = lbfgs_gauss(x_obs=x_obs,y_obs=y_obs,w_obs=w_obs,initial=initial) print "------------------------------------------------------------------------- " print " Initial and fitted coeffcients, and inverse-curvature e.s.d.'s" print "------------------------------------------------------------------------- " for i in range(initial.size()): print "%2d %10.4f %10.4f %10.4f"%( i, initial[i], fit.a[i], fit.a[i]) X1 = x_obs.as_numpy_array() plt.figure(1) plt.plot(x_obs,y_obs,'o')#, facecolors='None',edgecolors='b') # fout = open('exgauss_simulated.dat','w') # for i in range(len(X1)): # fout.write("%12.4f" %X1[i]) # fout.write("%12.4f\n" %y_obs[i]) if (exgauss): F0 = fit.exgauss_cdf_nparray(X1, initial[0], initial[1], initial[2]) F1 = fit.exgauss_cdf_nparray(X1,fit.a[0],fit.a[1], fit.a[2]) F2 = fit.exgauss_cdf_nparray(X1,-4000.0,4000.0, 25000.0) else: F0 = gauss_cdf_nparray(X1, initial[0], initial[1]) F1 = gauss_cdf_nparray(X1, fit.a[0], fit.a[1]) plt.plot(X1, F0, 'r*', linewidth=2.0) plt.plot(X1,F1,'g+',linewidth=2) # plt.plot(X1,F2,'k-',linewidth=2) import os seed = os.path.split(sys.argv[1])[-1].split('.')[0].split('_')[-1] from construct_random_datapt import ExGauss EXG= ExGauss(10000, -200000, 200000, fit.a[0], fit.a[1], fit.a[2]) cdf_cutoff = 0.95 I_fit = EXG.interpolate_x_value(cdf_cutoff) print I_fit fout = open('intensity_cdf.dat','a') fout.write("%12.5f %12.5f %12.5f %12.5f\n"%(I_fit, fit.a[0], fit.a[1], fit.a[2])) fout.close() plt.plot(X1, [cdf_cutoff]*len(X1), 'r--') plt.plot([I_fit]*100, np.linspace(0,1,100),'r--' ) plt.savefig('fit_intensities_%s.pdf'%seed) plt.figure(2) plt.plot(X1, F2-np.array(y_obs), 'o') plt.plot(X1, [0.0]*len(X1), 'r--') plt.ylabel('$\Delta(CDF_{Theory}-CDF_{Calc})$', fontsize=18) plt.xlabel('$ Intensity $', fontsize=18) plt.hist(x_obs, normed=True,bins=100) plt.show()
def run(self): """ Runs the mcmc_exgauss class and returns the I_ideal average and variance """ import matplotlib.pyplot as plt print '--------------------- Minimization ----------------------------------' intensities = exgauss_fit(self.datasource) exercise_levenberg_marquardt(intensities) initial = intensities.x_0 mu0, sigma0, tau0 = intensities.x self.error_diagonal = [1., 1., 1.] self.bootstrap_errors = intensities.bootstrap_errors # Get the covariance matrix get_covar_from_LM = False if get_covar_from_LM: intensities.build_up() upper = intensities.step_equations().normal_matrix_packed_u() nm_elem = flex.double(9) self.c = flex.double(3) ctr = 0 for x in xrange(3): x_0 = ctr for y in xrange(2, x - 1, -1): nm_elem[3 * x + y] = upper[x_0 + (y - x)] ctr += 1 if x != y: nm_elem[3 * y + x] = upper[x_0 + (y - x)] else: self.c[x] = upper[x_0 + (y - x)] NM = sqr(nm_elem) #from IPython import embed; embed(); exit() # self.helper.solve() #print list(self.helper.step_equations().cholesky_factor_packed_u()) # from IPython import embed; embed() error_matrix = NM.inverse() #from IPython import embed; embed(); exit() #print 'stdev from covariance matrix ', self.error_diagonal # Make sure sigma and tau are sensible after minimization. Should not blow up !! # This is highly controversial # FIXME if sigma0 < 0.0 or tau0 < 0.0: print 'Negative sigma or tau values not acceptable' # mu0,sigma0,tau0=intensities.initial_guess(wiki_method=True) print 'OK' print 'Initial Values of params = %10.4f, %10.4f, %10.4f' % ( initial[0], initial[1], initial[2]) print 'Final Values of parameters = %10.4f, %10.4f, %10.4f\n' % ( mu0, sigma0, tau0) X1 = intensities.t.as_numpy_array() Y1 = intensities.y.as_numpy_array() from construct_random_datapt import ExGauss EXG = ExGauss(len(X1), np.min(X1), np.max(X1), mu0, sigma0, tau0) I_fit0 = EXG.find_x_from_iter(self.cdf_cutoff) print 'Initial from fit I_%.2f value = ' % self.cdf_cutoff, I_fit0 if self.plot: plt.figure(1) plt.plot(X1, Y1, '.') if (1): F0 = intensities.exgauss_cdf_array(X1, initial[0], initial[1], initial[2]) F1 = intensities.exgauss_cdf_array(X1, mu0, sigma0, tau0) F2 = intensities.exgauss_cdf_array(X1, -4000.0, 4000.0, 25000.0) # print 'Initial Sum Squared Difference = ',sum(map(lambda x:x*x,F0-Y1)) residual = sum(map(lambda x: x * x, F1 - Y1)) if get_covar_from_LM: self.error_diagonal = [ math.sqrt(residual * error_matrix(a, a)) for a in xrange(3) ] print ' From LevMar: 1./(df/da)*sqrt(residual) = ', self.error_diagonal print 'Final Sum Squared Difference = ', residual if self.plot: # plt.plot(X1, F0, 'r*', linewidth=2.0) # Initial guess plt.plot(X1, F1, 'g+', linewidth=2) # Best fit plt.ylabel('CDF value') plt.xlabel('Intensities') # Plot difference between obs and calc values plt.figure(2) plt.plot(X1, (F1 - Y1), 'o') plt.ylabel('$\Delta(CDF_{calc}-CDF_{obs})$', fontsize=18) plt.xlabel('Intensities', fontsize=18) # Now do the MCMC stuff print '======================= MCMC stuff beginning ============================' # nsteps = 5 # t_start = 5 # dt = 1000 maxI = np.max(X1) #+np.max(X1)/2. minI = np.min(X1) #-np.min(X1)/2. proposal_width = 0.001 * np.abs(maxI - minI) # print 'initial guesses and proposal width = ',mu0, sigma0, tau0, proposal_width mcmc_helper = mcmc() # I_exp, cdf_exp = mcmc_helper.find_x_from_expdata_annlib(X1,self.cdf_cutoff) # fitted_cdf = intensities.exgauss_cdf(I_exp, mu0, sigma0, tau0) # print 'Experimental value of I_%.2f and corresponding cdf %.2f= %12.3f'%(self.cdf_cutoff, fitted_cdf,I_exp) I_avg_ideal, I_var_ideal, accept_rate = mcmc_helper.sampler( X1, samples=self.nsteps, mu_init=mu0, sigma_init=sigma0, tau_init=tau0, proposal_width=proposal_width, t_start=self.t_start, dt=self.dt, cdf_cutoff=self.cdf_cutoff, plot=False, analyse_mcmc=False, seed=self.mcmc_seed, prior_errors=self.bootstrap_errors, residual=residual) # mu,sigma, tau = params[-1] mu, sigma, tau = [mu0, sigma0, tau0] # plt.figure(3) # plt.plot(X1, F1, '-*g', linewidth=3.0) # for count in range(len(posterior)): # F1 = intensities.exgauss_cdf_array(X1,posterior[count][0], posterior[count][1], posterior[count][2]) # plt.plot(X1, F1, 'grey') # EXG = ExGauss(10000,minI,maxI,posterior[count][0], posterior[count][1], posterior[count][2]) # I_95 = EXG.find_x_from_iter(self.cdf_cutoff) # plt.plot(I_95,0.05,'r*') # F1 = intensities.exgauss_cdf_array(X1, mu,sigma,tau) # plt.plot(X1,F1,'grey') # Take MCMC averages # I_mcmc = [] # thinned_params = [] # for count in range(self.t_start, self.nsteps,self.dt): # mu,sigma, tau = params[count] # thinned_params.append(params[count]) # EXG= ExGauss(10000, minI, maxI, mu,sigma,tau) # I_mcmc.append(EXG.interpolate_x_value(self.cdf_cutoff)) # print_I_to_file = False # if print_I_to_file: # fout = open('intensity_mcmc_%.2f.dat'%self.cdf_cutoff, 'a') # if isinstance(self.datasource,str): # fout.write('Mean and Stdev of I_mcmc = %12.7f, %12.7f in file %s %d\n'%(np.mean(I_mcmc), np.std(I_mcmc), self.datasource,self.mcmc_seed)) # else: # fout.write('Mean and Stdev of I_mcmc = %12.7f, %12.7f in file %s %d\n'%(np.mean(I_mcmc), np.std(I_mcmc), 'CXI_MERGE',self.mcmc_seed)) # fout.close() if self.plot: plt.show() # if np.isnan(np.mean(I_mcmc)): # from IPython import embed; embed(); exit() del (X1) del (F1) del (F2) del (F0) del (mcmc_helper) # return thinned_params return I_avg_ideal, I_var_ideal, accept_rate
def mcmc_lbfgs_example(verbose): if (exgauss): fit = lbfgs_exgauss(x_obs=x_obs,y_obs=y_obs,w_obs=w_obs,initial=initial) else: fit = lbfgs_gauss(x_obs=x_obs,y_obs=y_obs,w_obs=w_obs,initial=initial) print "------------------------------------------------------------------------- " print " Initial and fitted coeffcients, and inverse-curvature e.s.d.'s" print "------------------------------------------------------------------------- " for i in range(initial.size()): print "%2d %10.4f %10.4f %10.4f"%( i, initial[i], fit.a[i], fit.a[i]) X1 = x_obs.as_numpy_array() plt.figure(1) plt.plot(x_obs,y_obs,'o')#, facecolors='None',edgecolors='b') # fout = open('exgauss_simulated.dat','w') # for i in range(len(X1)): # fout.write("%12.4f" %X1[i]) # fout.write("%12.4f\n" %y_obs[i]) if (exgauss): F0 = fit.exgauss_cdf_nparray(X1, initial[0], initial[1], initial[2]) F1 = fit.exgauss_cdf_nparray(X1,fit.a[0],fit.a[1], fit.a[2]) F2 = fit.exgauss_cdf_nparray(X1,-4000.0,4000.0, 25000.0) else: F0 = gauss_cdf_nparray(X1, initial[0], initial[1]) F1 = gauss_cdf_nparray(X1, fit.a[0], fit.a[1]) plt.plot(X1, F0, 'r*', linewidth=2.0) plt.plot(X1,F1,'g+',linewidth=2) # plt.plot(X1,F2,'k-',linewidth=2) import os seed = os.path.split(sys.argv[1])[-1].split('.')[0].split('_')[-1] from construct_random_datapt import ExGauss EXG= ExGauss(10000, -200000, 200000, fit.a[0], fit.a[1], fit.a[2]) cdf_cutoff = 0.95 I_fit = EXG.interpolate_x_value(cdf_cutoff) print I_fit fout = open('intensity_cdf.dat','a') fout.write("%12.5f\n"%I_fit) fout.close() plt.plot(X1, [cdf_cutoff]*len(X1), 'r--') plt.plot([I_fit]*100, np.linspace(0,1,100),'r--' ) plt.savefig('fit_intensities_%s.pdf'%seed) # plt.figure(2) # plt.plot(X1, F2-np.array(y_obs), 'o') # plt.plot(X1, [0.0]*len(X1), 'r--') # plt.ylabel('$\Delta(CDF_{Theory}-CDF_{Calc})$', fontsize=18) # plt.xlabel('$ Intensity $', fontsize=18) # plt.hist(x_obs, normed=True,bins=100) # plt.show() # =========== NOW DO THE MCMC bit here below ========== print '======================= MCMC stuff beginning ============================' nsteps = 50000 # x_obs = x_obs.as_numpy_array() maxI = np.max(x_obs) minI = np.min(x_obs) proposal_width = 0.01*np.abs(maxI-minI) print 'initial guesses and proposal width = ',fit.a[0], fit.a[1], fit.a[2], proposal_width params = sampler(x_obs, samples=nsteps, mu_init= fit.a[0],sigma_init = fit.a[1],tau_init = fit.a[2], proposal_width = proposal_width,plot=False) mu,sigma, tau = params[-1] print 'final parameter values ',mu,sigma, tau # X1 = np.arange(min(x_obs), max(x_obs),100.0) t_start = 0 dt = 50 plt.figure(2) plt.plot(X1, F1, 'green') F1 = exgauss_cdf(X1, mu,sigma,tau) plt.plot(X1,F1,'grey') I_mcmc = [] fout = open('params_mcmc_dummy_%s.dat'%seed, 'w') for count in range(t_start,nsteps): fout.write("%14.6f %14.6f %14.6f\n" %(params[count][0],params[count][1], params[count][2])) fout.close() exit() for count in range(t_start, nsteps,dt): mu,sigma, tau = params[count] EXG= ExGauss(10000, -200000, 200000, mu,sigma,tau) I_mcmc.append(EXG.interpolate_x_value(cdf_cutoff)) # Get CDFs # F1 = exgauss_cdf(X1,mu,sigma,tau) # plt.plot(X1,F1,'grey',linewidth=1.0) print 'Mean and Stdev of I_mcmc = %12.7f, %12.7f'%(np.mean(I_mcmc), np.std(I_mcmc)) X2 = np.sort(x_obs) F2 = np.array(range(len(x_obs)))/float(len(x_obs)) plt.plot(X2,F2,'o') plt.show()
def run(self): import matplotlib.pyplot as plt print '--------------------- Minimization ----------------------------------' intensities = exgauss_fit(self.filename) exercise_levenberg_marquardt(intensities) initial = intensities.x_0 mu0,sigma0,tau0 = intensities.x print 'OK' print 'Initial Values of params = %10.4f, %10.4f, %10.4f'%(initial[0], initial[1], initial[2]) print 'Final Values of parameters = %10.4f, %10.4f, %10.4f\n'%(mu0,sigma0,tau0) X1 = intensities.t.as_numpy_array() Y1 = intensities.y.as_numpy_array() from construct_random_datapt import ExGauss EXG= ExGauss(10000, np.min(X1), np.max(X1), mu0, sigma0, tau0) I_fit0 = EXG.interpolate_x_value(self.cdf_cutoff) print 'Initial I_%.2f value = '%self.cdf_cutoff, I_fit0 plt.figure(1) plt.plot(X1,Y1,'.') if (1): F0 = intensities.exgauss_cdf_array(X1, initial[0], initial[1], initial[2]) F1 = intensities.exgauss_cdf_array(X1,mu0, sigma0, tau0) F2 = intensities.exgauss_cdf_array(X1,-4000.0,4000.0, 25000.0) print 'Sum Squared Difference = ',sum(map(lambda x:x*x,F1-Y1)) plt.plot(X1, F0, 'r*', linewidth=2.0) plt.plot(X1,F1,'g+',linewidth=2) plt.ylabel('CDF value') plt.xlabel('Intensities') # Plot difference between obs and calc values plt.figure(2) plt.plot(X1, (F1-Y1), 'o') plt.ylabel('$\Delta(CDF_{calc}-CDF_{obs})$', fontsize=18) plt.xlabel('Intensities', fontsize=18) # Now do the MCMC stuff print '======================= MCMC stuff beginning ============================' # nsteps = 5 # t_start = 5 # dt = 1000 maxI = np.max(X1)+np.max(X1)/5. minI = np.min(X1)-np.min(X1)/5. proposal_width = 0.01*np.abs(maxI-minI) print 'initial guesses and proposal width = ',mu0, sigma0, tau0, proposal_width mcmc_helper = mcmc() params = mcmc_helper.sampler(X1, samples=self.nsteps, mu_init= mu0,sigma_init = sigma0, tau_init = tau0, proposal_width = proposal_width,plot=False, seed=self.mcmc_seed) mu,sigma, tau = params[-1] print 'final parameter values ',mu,sigma, tau plt.figure(3) plt.plot(X1, F1, 'green') F1 = intensities.exgauss_cdf_array(X1, mu,sigma,tau) plt.plot(X1,F1,'grey') I_mcmc = [] # fout = open('params_mcmc_%s.dat'%seed, 'a') # for count in range(t_start,nsteps): # fout.write("%14.6f %14.6f %14.6f\n" %(params[count][0],params[count][1], params[count][2])) # fout.close() # exit() for count in range(self.t_start, self.nsteps,self.dt): mu,sigma, tau = params[count] EXG= ExGauss(10000, minI, maxI, mu,sigma,tau) I_mcmc.append(EXG.interpolate_x_value(self.cdf_cutoff)) # Get CDFs # F1 = exgauss_cdf(X1,mu,sigma,tau) # plt.plot(X1,F1,'grey',linewidth=1.0) # print 'Mean and Stdev of I_mcmc = %12.7f, %12.7f'%(np.mean(I_mcmc), np.std(I_mcmc)) import os seed = os.path.split(self.filename)[-1].split('.')[0].split('_')[-1] # fout = open('intensity_mcmc_%s.dat'%seed, 'a') fout = open('intensity_mcmc_%.2f.dat'%self.cdf_cutoff, 'a') fout.write('Mean and Stdev of I_mcmc = %12.7f, %12.7f in file %s %d\n'%(np.mean(I_mcmc), np.std(I_mcmc), self.filename,self.mcmc_seed)) fout.close() # X2 = np.sort(x_obs) # F2 = np.array(range(len(x_obs)))/float(len(x_obs)) # plt.plot(X2,F2,'o') if self.plot: plt.show()
def mcmc_statistics(self, posterior, I_mcmc, data, cdf_cutoff): ''' Get statistics from the ensemble of curves generated using MCMC The following statistics are going to be calculated a. I_95_avg b. I_95_avg - I_95_exp c. P(I_95_mcmc) vs delta(I_95_mcmc-I_95_exp) d. I_95_mcmc vs t e. RMSD(CDF) vs t f. RMSF(CDF) vs i (data point) Note that here 95 stands for 95 percentile. In effect it is the cdf cutoff value used my mcmc.sampler. So don't take the variable name too literally ''' print 'Printing MCMC summary statistics' rmsd_flag = False # a. I_95_avg = np.mean(I_mcmc) print 'I_95_avg = ', I_95_avg, '+/-', np.std(I_mcmc) # b. I_95_exp, cdf_exp = self.find_x_from_expdata_annlib(data, cdf_cutoff) d_I_95_avg = I_95_avg - I_95_exp print 'Experimental I_95 = ', I_95_exp print 'Deviation of average I_95 from experimental I_95 = ', d_I_95_avg #exit() plt.figure(3) data = np.sort(data) fake_pts = np.linspace(np.min(data), np.max(data), 100) F1 = np.array(range(1, len(data) + 1)) / float(len(data)) F1[:] = [z - 0.5 / len(F1) for z in F1] # Actual Data #F1 = self.exgauss_cdf_array(data,posterior[0][0], posterior[0][1], posterior[0][2]) # Best fit from minimization plt.plot(data, F1, '.', linewidth=3.0) for count in range(1, len(posterior)): F1 = self.exgauss_cdf_array(fake_pts, posterior[count][0], posterior[count][1], posterior[count][2]) plt.plot(fake_pts, F1, 'grey') EXG = ExGauss(10000, np.min(data), np.max(data), posterior[count][0], posterior[count][1], posterior[count][2]) I_95 = EXG.find_x_from_iter(cdf_cutoff) plt.plot(I_95, 0.05, 'r*') F1 = self.exgauss_cdf_array(data, posterior[0][0], posterior[0][1], posterior[0][2]) plt.plot(data, F1, 'g+', linewidth=3.0) # c. d_I_95_mcmc = [np.abs(x - I_95_exp) for x in I_mcmc] print 'Average absolute deviation I_95 mcmc from experimental I_95', np.mean( d_I_95_mcmc) hist, bin_edges = np.histogram(d_I_95_mcmc, density=True) plt.figure(4) # plt.hist(d_I_95_mcmc) plt.plot(bin_edges[:-1], hist * np.diff(bin_edges), '-*r') plt.xlabel('$\Delta(I_{95_calc}-I_{95_obs})$', fontsize=18) plt.ylabel('Probability', fontsize=18) # d. plt.figure(5) plt.plot(range(len(I_mcmc)), I_mcmc, 'b') plt.plot(range(len(I_mcmc)), [I_95_exp] * len(I_mcmc), '-*r') plt.xlabel('time', fontsize=18) plt.ylabel('I_95_mcmc', fontsize=18) # e. if (rmsd_flag): rmsd = self.calc_rmsd(posterior, data, cdf_exp) print 'Average RMSD of datapoints', np.mean(rmsd) plt.figure(6) plt.plot(range(len(rmsd)), rmsd, '-*r') plt.xlabel('time', fontsize=18) plt.ylabel('RMSD', fontsize=18) plt.show()
def run_sampler(self, samples, data, t_start, dt, mu_current, sigma_current, tau_current, cdf_cutoff, analyse_mcmc=False, exploratory_run=False): maxI = np.max(data) #+np.max(data)/2. minI = np.min(data) #-np.min(data)/2. accept_counter = 0 posterior = [[mu_current, sigma_current, tau_current]] I_mcmc = [] for i in range(samples): accept = False inf_flag = False # trial move mu_proposal = np.random.normal( mu_current, self.proposal_factor * self.mu_prior_sd) sigma_proposal = np.abs( np.random.normal(sigma_current, self.proposal_factor * self.sd_prior_sd)) # tau_proposal = np.abs( np.random.normal(tau_current, self.proposal_factor * self.tau_prior_sd)) likelihood_current = self.exgauss( data, mu_current, sigma_current, tau_current) # multiply the probabilties likelihood_proposal = self.exgauss(data, mu_proposal, sigma_proposal, tau_proposal) # multiply prior_current = self.gauss_pdf(mu_current, self.mu_prior_mu, self.mu_prior_sd)+self.gauss_pdf(sigma_current, self.sd_prior_mu, self.sd_prior_sd)+ \ self.gauss_pdf(tau_current, self.tau_prior_mu, self.tau_prior_sd) prior_proposal = self.gauss_pdf(mu_proposal, self.mu_prior_mu, self.mu_prior_sd)+self.gauss_pdf(sigma_proposal, self.sd_prior_mu, self.sd_prior_sd)+ \ self.gauss_pdf(tau_proposal, self.tau_prior_mu, self.tau_prior_sd) if likelihood_proposal == -np.inf: accept = False inf_flag = True if not accept and not inf_flag: p_current = likelihood_current + prior_current p_proposal = likelihood_proposal + prior_proposal p_accept = p_proposal - p_current accept = np.log(np.random.rand()) < p_accept if accept: mu_current = mu_proposal sigma_current = sigma_proposal tau_current = tau_proposal accept_counter += 1 if i > t_start and i % dt == 0: EXG = ExGauss(10000, minI, maxI, mu_current, sigma_current, tau_current) I_ideal_temp = EXG.find_x_from_iter(cdf_cutoff) I_mcmc.append(I_ideal_temp) del (EXG) if analyse_mcmc: posterior.append([mu_current, sigma_current, tau_current]) # print 'acceptance rate',accept_counter*1.0/samples I_mcmc_avg = np.mean(I_mcmc) I_mcmc_var = np.var(I_mcmc) if analyse_mcmc: self.mcmc_statistics(posterior, I_mcmc, data, cdf_cutoff) del (I_mcmc) return I_mcmc_avg, I_mcmc_var, accept_counter * 1.0 / samples
from construct_random_datapt import ExGauss import matplotlib.pyplot as plt import numpy as np import sys seed = int(sys.argv[1]) exgauss= ExGauss(20, -200000, 200000, -4000.0, 4000.0, 25000.0) #exgauss_random_array = p.map(exgauss.rand()) #exgauss_random_array = exgauss.rand(seed) exgauss_random_array = exgauss.rand_annlib(seed) print 'created numbers' X2 = np.sort(exgauss_random_array) F2 = np.array(range(1,len(exgauss_random_array)+1))/float(len(exgauss_random_array)) # Nick's suggestion on using (n-0.5)/N for cdf to create a buffer region F2[:] = [x-0.5/len(exgauss_random_array) for x in F2] fout = open('exgauss_simulated_intensities_'+sys.argv[1]+'.dat', 'w') for i in range(len(X2)): fout.write("%12.7f"%X2[i]) fout.write("%12.7f\n"%F2[i]) print exgauss.interpolate_x_value(0.95) #plt.plot(X2,F2,'o')