def Bayes_wrapper(): """ This function calculates the Bayes score as a function of the beta-function parameter (defined in parameter.py) It is not currently parallelized (parallelization is done in MCMC already, but not in the integration). Output scores are saved and labelled in the Scores/ folder as an .npz file """ import time import fileinput import sys from .parameter import ModelParameters print('UPDATE NEURAL NETWORK') directory = 'Scores/' if not os.path.exists(directory): os.makedirs(directory) a = ModelParameters() beta_params = a.list_of_beta_params score = [] score_err = [] init_time = time.time() for i in range(len(beta_params)): # Iterate over beta parameters print("Calculating value %d of %d after %.3f seconds" % (i + 1, len(beta_params), time.time() - init_time)) # Modify beta value for line in fileinput.input("Chempy/parameter.py", inplace=True): if "\tbeta_param" in line: print("\tbeta_param = %.5f" % beta_params[i]) else: print(line, end='') fileinput.close() # Reimport model parameters for new beta del sys.modules['Chempy.parameter'] del sys.modules['Chempy.score_function'] from .parameter import ModelParameters from .score_function import preload_params_mcmc preload = preload_params_mcmc( ) # Must be reloaded since this depends on beta parameter a = ModelParameters() # Calculate Bayes score integral, integral_err = Bayes_score() score.append(integral) score_err.append(integral_err) # Save output as npz array np.savez("Scores/Bayes_score - " + str(a.yield_table_name_sn2) + ", " + str(a.yield_table_name_agb) + ", " + str(a.yield_table_name_1a) + ".npz", beta_param=beta_params, score=score, score_err=score_err) return beta_params, score, score_err
def CV_bash(beta_index): """ This is for a specific beta value only This function calculates the CV score as a function of the beta-function parameter (defined in parameter.py) It is not currently parallelized (parallelization is done in MCMC already and for element predictions). """ import time import fileinput import sys from .parameter import ModelParameters from .score_function import CV_score beta_index = int(beta_index) print(beta_index) a = ModelParameters() beta_params = a.list_of_beta_params[beta_index] CV_score_list = [] init_time = time.time() #print("Calculating value %d of %d after %.3f seconds" %(i+1,len(beta_params),time.time()-init_time)) # Modify beta value for line in fileinput.input("Chempy/parameter.py", inplace=True): if "\tbeta_param" in line: print("\tbeta_param = %.5f" % beta_params) else: print(line, end='') fileinput.close() # Reimport model parameters for new beta del sys.modules['Chempy.parameter'] del sys.modules['Chempy.score_function'] from Chempy.parameter import ModelParameters from Chempy.score_function import preload_params_mcmc a = ModelParameters() preload = preload_params_mcmc() # Calculate Bayes score score = CV_score() rescaled_score = np.power(score, 1. / len(a.initial_neural_names)) #CV_score_list.append(score) np.savez('Scores/CV' + str(beta_index) + '.npz', beta_param=beta_params, score=score, rescaled_score=rescaled_score) # Save output as npz array #np.savez("Scores/CV_score - "+str(a.yield_table_name_sn2)+", "+str(a.yield_table_name_agb)+", "+str(a.yield_table_name_1a)+".npz", # beta_param=beta_params, # score=CV_score_list) return beta_params, score, rescaled_score
def CV_stitch(): """ Function to load all CV predictions and combine into one file """ from .score_function import preload_params_mcmc preload = preload_params_mcmc() from .parameter import ModelParameters a = ModelParameters() beta = [] score = [] rescaled_score = [] for i in range(len(a.list_of_beta_params)): temp = np.load('Scores/CV' + str(i) + '.npz') beta.append(temp['beta_param']) score.append(temp['score']) rescaled_score.append(temp['rescaled_score']) temp.close() np.savez("Scores/CV_score - " + str(a.yield_table_name_sn2) + ", " + str(a.yield_table_name_agb) + ", " + str(a.yield_table_name_1a) + ".npz", beta_param=beta, score=score, rescaled_score=rescaled_score) return beta, score
def scoring_wrapper(): """ NO LONGER USED This function will calculate Bayes and CV scores for yield set, using the code in score_function.py. The neural network must be trained beforehand using training_data and create_network Main outputs are labelled .npz files in the Scores/ file. MUST set a.UseNeural = True for this and select correct dataset. """ from Chempy.neural import training_data, create_network import time from Chempy.parameter import ModelParameters from Chempy.score_function import CV_wrapper, Bayes_wrapper init_time = time.time() a = ModelParameters() print('Step 1 (at time %.2f s): Calculate Bayes score' % (time.time() - init_time)) Bayes_wrapper() print('Step 2 (at time %.2f s): Calculate cross-validation score' % (time.time() - init_time)) CV_wrapper() print('Process complete in time %.2f s' % (time.time() - init_time)) return None
def CV_element_predictions(size=10): """ This function computes the NORMALISED element predictions from the CV scoring. Predictions and sigmas are estimated 10 times over to check for scatter. """ from .overall_scores import overall_CV import numpy as np from .parameter import ModelParameters a = ModelParameters() scores = [] el_means=[] el_sigmas=[] el_likelihoods=[] for i in range(size): print('Computing score %d of %s' %(i+1,size)) tmp = overall_CV() scores.append(np.log10(tmp)) el_dat = np.load('OverallScores/CV_element_likelihoods.npz') el_means.append(el_dat['element_mean']) el_sigmas.append(el_dat['element_sigma']) el_likelihoods.append(el_dat['likelihood_factors']) el_dat.close() el_dat = np.load('OverallScores/CV_element_likelihoods.npz') el_names = el_dat['elements'] el_dat.close() # Save output as npz file - each will be list of identically simulated data np.savez('OverallScores/CV_normalised_element_predictions_'+str(a.yield_table_name_sn2)+'.npz', mean=el_means,sigma=el_sigmas,elements=el_names,likelihood=el_likelihoods,normalised_scores=scores) return None
def CV_wrapper(): """ NO LONGER USED This function calculates the UNNORMALISED CV score as a function of the beta-function parameter (defined in parameter.py) It is not currently parallelized (parallelization is done in MCMC already and for element predictions). """ import time import fileinput import sys from .parameter import ModelParameters from .score_function import CV_score a = ModelParameters() beta_params = a.list_of_beta_params CV_score_list = [] init_time = time.time() for i in range(len(beta_params)): # Iterate over beta parameters print("Calculating value %d of %d after %.3f seconds" % (i + 1, len(beta_params), time.time() - init_time)) # Modify beta value for line in fileinput.input("Chempy/parameter.py", inplace=True): if "\tbeta_param" in line: print("\tbeta_param = %.5f" % beta_params[i]) else: print(line, end='') fileinput.close() # Reimport model parameters for new beta del sys.modules['Chempy.parameter'] del sys.modules['Chempy.score_function'] from Chempy.score_function import preload_params_mcmc from Chempy.parameter import ModelParameters a = ModelParameters() # Calculate Bayes score score = CV_score() CV_score_list.append(score) # Save output as npz array np.savez("Scores/CV_score - " + str(a.yield_table_name_sn2) + ", " + str(a.yield_table_name_agb) + ", " + str(a.yield_table_name_1a) + ".npz", beta_param=beta_params, score=CV_score_list) return beta_params, CV_score_list
def runner(index): """Function to compute the Chempy predictions for each parameter set""" b=ModelParameters() params=param_grid[index] norm_params=norm_grid[index] abun,els=run_Chempy_sample(params,b); del b; return abun,params,norm_params
def runner(index): """Function to compute the Chempy predictions for each parameter set""" b = ModelParameters() params = all_params[index] output = single_timestep_chempy((params, b)) if type(output) == float: if output == np.inf: del b outs = np.zeros(len(els)), params else: abun = output[0] del b outs = abun, params return outs
class preload_params_mcmc(): """This calculates and stores useful quantities that would be calculated multiple times otherwise for the mcmc run. Definitions can be called from this file """ import numpy as np from Chempy.parameter import ModelParameters from scipy.stats import beta a = ModelParameters() elements_to_trace = list(a.elements_to_trace) elements_to_trace.append('Zcorona') elements_to_trace.append('SNratio') # Neural network coeffs coeffs = np.load('Neural/neural_model.npz') w_array_0 = coeffs['w_array_0'] w_array_1 = coeffs['w_array_1'] b_array_0 = coeffs['b_array_0'] b_array_1 = coeffs['b_array_1'] coeffs.close() # Beta function calculations model_errors = np.linspace(a.flat_model_error_prior[0], a.flat_model_error_prior[1], a.flat_model_error_prior[2]) #error_weight = beta.pdf(model_errors, a = a.beta_error_distribution[1], b = a.beta_error_distribution[2]) #error_weight/= sum(error_weight) wildcard = np.load('Chempy/input/stars/' + a.stellar_identifier + '.npy') elements = [] star_abundance_list = [] star_error_list = [] for i, item in enumerate(a.elements_to_trace): if item in list(wildcard.dtype.names): elements.append(item) star_abundance_list.append(float(wildcard[item][0])) star_error_list.append(float(wildcard[item][1])) star_abundance_list = np.hstack(star_abundance_list) star_error_list = np.hstack(star_error_list) elements = np.hstack(elements) err = [] for i, item in enumerate(model_errors): error_temp = np.ones(len(elements)) * item err.append( np.sqrt( np.multiply(error_temp[:, None], error_temp[:, None]).T + np.multiply(star_error_list, star_error_list)).T)
def overall_Bayes(): """ Convenience function to calculate the overall Bayes score, marginalizing over all parameters including beta. NB: in parameter.py, we must set the beta_parameter prior. 5+-2.5 is used here in linear space. """ import numpy as np import os from .parameter import ModelParameters from .score_function import Bayes_score directory = 'OverallScores/' if not os.path.exists(directory): os.makedirs(directory) a = ModelParameters() integral,integral_err = Bayes_score() np.savez("OverallScores/Bayes_score - "+str(a.yield_table_name_sn2)+", "+str(a.yield_table_name_agb)+", "+str(a.yield_table_name_1a)+".npz", score=integral, score_err=integral_err) return integral,integral_err
def CV_errors(): """ This function computes the overall CV score with errors. Median and errors (16/84 percentile) are estimated by running the process 10 times. """ from .overall_scores import overall_CV import numpy as np from .parameter import ModelParameters a = ModelParameters() scores = [] for _ in range(10): tmp = overall_CV() scores.append(np.log10(tmp)) median = np.median(scores) lower = np.percentile(scores,15.865) upper = np.percentile(scores,100-15.865) print("Average LOO-CV score over 10 iterations is %.2f + %.2f - %.2f" %(median,median-lower,upper-median)) np.savez("OverallScores/ErrorCV - "+str(a.yield_table_name_sn2)+", "+str(a.yield_table_name_agb)+", "+str(a.yield_table_name_1a)+".npz", median=median,lower=lower,upper=upper) return median, median-lower,upper-median
# # # in fact, mass loss and unprocessed mass is the same. # caveat: SNIA yields are not metallicity dependent and unprocessed mass loss is 0, thus we only store the yields once at the lowest metallicity. # caveat: direct BH collapse returns only intial element fractions and happens before SNII so we combine the mass loss with SNII mass loss. import numpy as np import xdrlib import numpy as np import multiprocessing as mp import os ####### SETTING THE CHEMPY PARAMETER ####################### from Chempy.parameter import ModelParameters a = ModelParameters() # Load solar abundances from Chempy.solar_abundance import solar_abundances basic_solar = solar_abundances() getattr(basic_solar, 'Asplund09')() # Load the yields from Chempy.yields import SN2_feedback, AGB_feedback, SN1a_feedback basic_sn2 = SN2_feedback() getattr(basic_sn2, 'chieffi04_net')() basic_1a = SN1a_feedback() getattr(basic_1a, "Seitenzahl")() basic_agb = AGB_feedback() getattr(basic_agb, "Karakas16_net")()
from Chempy.parameter import ModelParameters a=ModelParameters() import multiprocessing as mp import numpy as np import tqdm import time from Chempy.cem_function import run_Chempy_sample # First load parameter dataset: training_params=np.load("APOGEE Training Data.npz") param_grid=training_params['param_grid'] norm_grid = training_params['norm_grid'] N = len(param_grid) # number of samples def runner(index): """Function to compute the Chempy predictions for each parameter set""" b=ModelParameters() params=param_grid[index] norm_params=norm_grid[index] abun,els=run_Chempy_sample(params,b); del b; return abun,params,norm_params if __name__=='__main__': init_time=time.time() # Compute elements by running code once: _,els=run_Chempy_sample(param_grid[0],a)
def overall_CV(): """ Convenience function to calculate overall CV score, running MCMC over all parameters including beta. NB: must set beta_param / log10_beta priors in parameter file """ import numpy as np from Chempy.parameter import ModelParameters import importlib import fileinput import sys import os import multiprocessing as mp import tqdm from Chempy.wrapper import single_star_optimization from Chempy.plot_mcmc import restructure_chain from Chempy.cem_function import posterior_function_mcmc_quick from scipy.stats import norm from .score_function import preload_params_mcmc import matplotlib.pyplot as plt #p = mp.Pool() directory = 'OverallScores/' if not os.path.exists(directory): os.makedirs(directory) ## Code to rewrite parameter file for each element in turn, so as to run MCMC for 21/22 elements only # This is definitely not a good implementation (involves rewriting entire parameter file), # But other steps are far slower # Initialise arrays element_mean = [] element_sigma = [] overall_score = 1. factors = [] # Starting elements (copied from original parameter file) b = ModelParameters() starting_el = b.elements_to_trace orig = "\telements_to_trace = "+str(starting_el) # Original element string #print(starting_el) # Calculate required Chempy elements preload = preload_params_mcmc() elements_init = np.copy(preload.elements) np.save('Scores/CV_elements.npy',elements_init) #print(elements_init) # Create new parameter names newstr = [] for i,el in enumerate(elements_init): if el !=starting_el[-1]: newstr.append(orig.replace("'"+str(el)+"', ","")) else: newstr.append(orig.replace("'"+str(el)+"'","")) for index in range(len(elements_init)): # Iterate over removed element for line in fileinput.input("Chempy/parameter.py", inplace=True): if "\telements_to_trace" in line: print(newstr[index]) #print(line,end='') # TO TEST else: print(line,end='') fileinput.close() del sys.modules['Chempy.parameter'] del sys.modules['Chempy.score_function'] from Chempy.parameter import ModelParameters from .score_function import preload_params_mcmc a = ModelParameters() preload = preload_params_mcmc() ############## # Run MCMC with 27/28 elements. print('Running MCMC iteration %d of %d' %(index+1,len(elements_init))) #print(a.elements_to_trace) single_star_optimization() # Create the posterior PDF and load it restructure_chain('mcmc/') positions = np.load('mcmc/posteriorPDF.npy') # Posterior parameter PDF #print("In CV_score, element list is",a.elements_to_trace) ############## for line in fileinput.input("Chempy/parameter.py", inplace=True): if "\telements_to_trace" in line: print(orig) else: print(line,end='') fileinput.close() del sys.modules['Chempy.parameter'] del sys.modules['Chempy.score_function'] from Chempy.parameter import ModelParameters from .score_function import preload_params_mcmc a = ModelParameters() preload = preload_params_mcmc() ############## # This uses all 28 elements again for predictions # Multiprocess and calculate elemental predictions for each parameter set from .score_function import element_predictor p = mp.Pool() indices = np.ones(len(positions))*index abundance = list(tqdm.tqdm(p.imap_unordered(element_predictor,zip(positions,indices)),total=len(positions))) p.close() p.join() abundance = np.array(abundance) mean,sigma = norm.fit(abundance) print(mean) print(sigma) element_mean.append(mean) element_sigma.append(sigma) #a.plot_hist=True if a.plot_hist == True: plt.clf() plt.hist(abundance, bins=40, normed=True, alpha=0.6, color='g') #abundance = np.array(abundance) # Unmask array # Plot the PDF. xmin, xmax = plt.xlim() x = np.linspace(xmin, xmax, 100) p = norm.pdf(x, mean, sigma) plt.plot(x, p, c='k', linewidth=2) title = 'Plot of element %d abundance' %(index) plt.title(title) plt.xlabel('[X/Fe] abundance') plt.ylabel('Relative frequency') total_err = np.sqrt((preload.star_error_list[index])**2 + sigma**2) likelihood_factor = norm.pdf(mean,loc=preload.star_abundance_list[index],scale=total_err) overall_score *= likelihood_factor factors.append(likelihood_factor) print("Likelihood contribution from %dth element is %.8f" %(index+1,likelihood_factor)) print(overall_score) sys.stdout.flush() #print(starting_el) np.savez('OverallScores/CV_element_likelihoods.npz', elements=elements_init, likelihood_factors=factors, element_mean = element_mean, element_sigma = element_sigma) rescaled_score = np.power(overall_score,1./len(starting_el)) np.save("OverallScores/CV_score_rescaled - "+str(a.yield_table_name_sn2)+\ ","+str(a.yield_table_name_agb)+", "+str(a.yield_table_name_1a)+".npy",rescaled_score) return rescaled_score
def single_star_optimization(): ''' This function will optimize the parameters of a single zone quickly INPUT: a = will be loaded from parameter.py (prepare all variables there) OUTPUT: log_list = a list of intermediate results (so far only for debugging) ''' import time #import multiprocessing as mp from .optimization import minimizer_initial_quick from .cem_function import global_optimization_error_returned from .parameter import ModelParameters from .score_function import preload_params_mcmc # For testing import warnings warnings.filterwarnings("ignore") a = ModelParameters() preload = preload_params_mcmc() print(a.stellar_identifier_list) start_time = time.time() log_list = [] # I: Minimization for each star seperately # 1: for each star make initial conditions (each star needs other model parameters) parameter_list = [] for item in a.stellar_identifier_list: parameter_list.append(item) # 2: call posterior_function_for_minimization with scipy.optimize.minimize in multiprocess for each star and recover the found parameters #p = mp.Pool(len(parameter_list)) #t = p.map(minimizer_initial_quick, parameter_list) #p.close() #p.join() #result = np.vstack(t) result = minimizer_initial_quick(parameter_list) log_list.append(np.copy(result)) log_list.append('initial minimization') initial = time.time() print('first minimization for each star separately took: %2.f seconds' % (initial - start_time)) # IV: repeat II and III until posterior does not change much #result[:,:len(a.SSP_parameters)] = np.mean(result[:,:len(a.SSP_parameters)], axis = 0) #posteriors = [] #counter = 0 #while True: # counter += 1 # if len(posteriors) > 1: # if np.abs(posteriors[-1] - posteriors[-2]) < a.gibbs_sampler_tolerance: # break # if len(posteriors) > a.gibbs_sampler_maxiter: # break # initial = time.time() # II: Global parameter minimization: # 1: only SSP parameters free. Use mean SSP parameter values and individual (but fixed ISM parameter values) # changing_parameter = result[0,:len(a.SSP_parameters)] # 2: Call each star in multiprocess but only return the predictions # 3: Calculate the likelihood for each star and optimize the common model error (is all done within minimizer global, which is calling 'global optimization') # x = minimizer_global(changing_parameter, a.tol_minimization, a.maxiter_minimization, a.verbose, result) # 4: return global SSP parameters and common model error # posterior, error_list, elements = global_optimization_error_returned(x, result) # posteriors.append(posterior) # print(posteriors) # global_iteration1 = time.time() # print('step %d global minimization took: %2.f seconds' %(counter, global_iteration1 - initial)) # III: Local parameter minimization: # 1: Use fixed global parameters and fixed common errors make initial conditions # result[:,:len(a.SSP_parameters)] = x # log_list.append((np.copy(x),posterior)) # log_list.append('step %d global minimization' %(counter)) # p0_list = [] # parameter_list = [] # x_list = [] # error_list_mp = [] # element_list_mp = [] # for i,item in enumerate(a.stellar_identifier_list): # parameter_list.append(item) # p0_list.append(result[i,len(a.SSP_parameters):]) # x_list.append(x) # error_list_mp.append(error_list) # element_list_mp.append(elements) # args = zip(p0_list,parameter_list,x_list,error_list_mp,element_list_mp) # 2: Minimize each star ISM parameters in multiprocess # p = mp.Pool(len(parameter_list)) # t = p.map(minimizer_local, args) # p.close() # p.join() # local_parameters = np.vstack(t) # result[:,len(a.SSP_parameters):] = local_parameters # log_list.append(np.copy(result)) # log_list.append('step %d local minimization' %(counter)) # local_iteration1 = time.time() # print('step %d local minimization took: %2.f seconds' %(counter, local_iteration1 - global_iteration1)) #log_list.append(posteriors) #print(log_list) # V: MCMC run ## reshape the result to have global parameters in the front and the local parameters following #changing_parameter = list(result[0,:len(a.SSP_parameters)]) elements = np.unique(a.elements_to_trace, preload.wildcard.dtype.names) changing_parameter = list(result) #for i in range(result.shape[0]): # changing_parameter.append(list(result[i,len(a.SSP_parameters):])) changing_parameter = np.hstack(changing_parameter) ## jitter the parameters to initialise the chain (add a validation later, i.e. testing that the particular parameters yield a result) mcmc_quick(changing_parameter, elements, preload) # 1: Free all parameters and optimize common error (SSP should be the same for all stars) # 2: Plug everything into emcee and sample the posterior return log_list
def CV_median(): """ Function to compute the median and 15/85 percentiles of the posterior parameters. This is calculated for each element cross-validation. The value of the beta coefficient in the parameter file is used. """ from Chempy.parameter import ModelParameters import importlib import fileinput import sys import multiprocessing as mp import tqdm from Chempy.wrapper import single_star_optimization from Chempy.plot_mcmc import restructure_chain from Chempy.cem_function import posterior_function_mcmc_quick from Chempy.parameter import ModelParameters from scipy.stats import norm from .score_function import preload_params_mcmc import matplotlib.pyplot as plt print('SET BETA PARAMETER IN PARAMETER.PY FILE') ## Code to rewrite parameter file for each element in turn, so as to run MCMC for 21/22 elements only # This is definitely not a good implementation (involves rewriting entire parameter file), # But other steps are far slower # Initialise arrays element_mean = [] element_sigma = [] overall_score = 1. factors = [] posterior_med = [] posterior_up = [] posterior_low = [] # Starting elements (copied from original parameter file) b = ModelParameters() starting_el = b.elements_to_trace orig = "\telements_to_trace = " + str( starting_el) # Original element string #print(starting_el) # Calculate required Chempy elements preload = preload_params_mcmc() elements_init = np.copy(preload.elements) #print(elements_init) # Create new parameter names newstr = [] for i, el in enumerate(elements_init): if el != starting_el[-1]: newstr.append(orig.replace("'" + str(el) + "', ", "")) else: newstr.append(orig.replace("'" + str(el) + "'", "")) for index in range(len(elements_init)): # Iterate over removed element for line in fileinput.input("Chempy/parameter.py", inplace=True): if "\telements_to_trace" in line: print(newstr[index]) #print(line,end='') # TO TEST else: print(line, end='') fileinput.close() del sys.modules['Chempy.parameter'] from Chempy.parameter import ModelParameters a = ModelParameters() del sys.modules['Chempy.score_function'] from .score_function import preload_params_mcmc preload = preload_params_mcmc() ############## # Run MCMC with 27/28 elements. print('Running MCMC iteration %d of %d' % (index + 1, len(elements_init))) #print(a.elements_to_trace) single_star_optimization() # Create the posterior PDF and load it restructure_chain('mcmc/') positions = np.load('mcmc/posteriorPDF.npy') # Posterior parameter PDF tmp_med = np.zeros(len(a.p0)) tmp_low = np.zeros(len(a.p0)) tmp_up = np.zeros(len(a.p0)) for j in range(len(a.p0)): tmp_med[j] = np.median(positions[:, j]) tmp_up[j] = np.percentile(positions[:, j], 100 - 15.865) tmp_low[j] = np.percentile(positions[:, j], 15.865) posterior_med.append(tmp_med) posterior_up.append(tmp_up) posterior_low.append(tmp_low) #print(a.elements_to_trace) ############## # RESET parameter file for line in fileinput.input("Chempy/parameter.py", inplace=True): if "\telements_to_trace" in line: print(orig) else: print(line, end='') fileinput.close() del sys.modules['Chempy.parameter'] from Chempy.parameter import ModelParameters del sys.modules['Chempy.score_function'] from .score_function import preload_params_mcmc a = ModelParameters() preload = preload_params_mcmc() ############## sys.stdout.flush() np.savez('Scores/CV_medians' + str(a.beta_param) + '.npz', elements=elements_init, median=posterior_med, upper=posterior_up, lower=posterior_low) return None
def mcmc_quick(changing_parameter, elements, preload): ''' Convenience function to use the MCMC for one zone. A subdirectory mcmc/ will be created in the current directory and intermediate chains will be stored there. The MCMC will sample the volume of best posterior for the likelihood functions that are declared in parameter.py. This is a cut down version to speed up MCMC for one star only INPUT: changing_parameter = the parameter vector for initialization (will usually be found from minimization before). The initial chain will be created by jittering slightly the initial parameter guess error_list = the vector of element errors elements = the corresponding element symbols OUTPUT: The function will create a folder and store the chain as well as the predicted element values The MCMC stops when the convergence criteria is met, which is when the median posterior of all walkers does not change much inbetween 200 steps anymore. ''' import time import os import multiprocessing as mp from .cem_function import posterior_function_mcmc_quick from .score_function import preload_params_mcmc from .parameter import ModelParameters import emcee a = ModelParameters() start1 = time.time() directory = 'mcmc/' if os.path.exists(directory): if a.verbose: print('%s already existed. Content might be overwritten' % (directory)) else: os.makedirs(directory) nthreads = mp.cpu_count() if nthreads == 4: nthreads = 2 ndim = len(changing_parameter) a.nwalkers = max(a.nwalkers, int(ndim * 2)) chain = np.empty(shape=(a.nwalkers, ndim)) for i in range(a.nwalkers): result = -np.inf while result == -np.inf: jitter = np.random.normal(loc=0, scale=0.001, size=ndim) result = posterior_function_mcmc_quick(changing_parameter + jitter, elements, preload) chain[i] = changing_parameter + jitter pool = mp.Pool() sampler = emcee.EnsembleSampler(a.nwalkers, ndim, posterior_function_mcmc_quick, threads=nthreads, args=[elements, preload], pool=pool) pos, prob, state, blobs = sampler.run_mcmc(chain, a.mburn) mean_prob = mean_prob_beginning = np.zeros((a.m)) posterior_list = [] posterior_std_list = [] for i in range(a.m): print('step ', i + 1, 'of ', a.m) pos, prob, state, blobs = sampler.run_mcmc(pos, a.save_state_every, rstate0=state, lnprob0=prob, blobs0=blobs, storechain=True) # np.save('%s/flatchain' %(directory),sampler.chain) # np.save('%s/flatlnprobability' %(directory),sampler.lnprobability) # np.save('%s/flatblobs' %(directory),sampler.blobs) # posterior = np.load('%s/flatlnprobability.npy' %(directory)) posterior = sampler.lnprobability posterior_list.append(np.mean(posterior, axis=0)[-1]) posterior_std_list.append(np.std(posterior, axis=0)[-1]) # np.save('%s/flatmeanposterior' %(directory), posterior_list) # np.save('%s/flatstdposterior' %(directory), posterior_std_list) print(np.mean(posterior, axis=0)[0], np.mean(posterior, axis=0)[-1]) #if i>202: #print('posterior -1, -100, -200',np.mean(posterior, axis = 0)[-1], np.mean(posterior, axis = 0)[-100], np.mean(posterior, axis = 0)[-200]) #print('posterior 0, 100, 200',np.mean(posterior, axis = 0)[0], np.mean(posterior, axis = 0)[100], np.mean(posterior, axis = 0)[200]) #print("Mean acceptance fraction:", sampler.acceptance_fraction) elapsed1 = (time.time() - start1) print('calculation so far took', elapsed1, ' seconds') if i > a.min_mcmc_iterations and np.abs( np.mean(posterior, axis=0)[-1] - np.mean(posterior, axis=0) [-100]) < a.mcmc_tolerance and np.abs( np.mean(posterior, axis=0)[-1] - np.mean(posterior, axis=0)[-200]) < a.mcmc_tolerance: break np.save('%s/flatchain' % (directory), sampler.chain) np.save('%s/flatlnprobability' % (directory), sampler.lnprobability) np.save('%s/flatblobs' % (directory), sampler.blobs) posterior = sampler.lnprobability #posterior = np.load('%s/flatlnprobability.npy' %(directory)) posterior_list.append(np.mean(posterior, axis=0)[-1]) posterior_std_list.append(np.std(posterior, axis=0)[-1]) np.save('%s/flatmeanposterior' % (directory), posterior_list) np.save('%s/flatstdposterior' % (directory), posterior_std_list) pool.close() if a.send_email: send_email(nthreads, i, np.mean(posterior, axis=0)[0], np.mean(posterior, axis=0)[-1], a, elapsed1)
def Bayes_score(): """ This calculates the Bayes factor score for a specific yield set and choice of error parameter, as defined in parameter file. First MCMC is run to determine the centre of the parameter space and then integration is performed. This needs a trained neural network in the Neural/ folder. Output is Bayes score and predicted (1 sigma) error. """ from .parameter import ModelParameters from .cem_function import posterior_function_mcmc_quick from .score_function import preload_params_mcmc from .plot_mcmc import restructure_chain from .wrapper import single_star_optimization from scipy.stats import multivariate_normal as scinorm from numpy.random import multivariate_normal as numnorm from skmonaco import mcimport import time # Load model parameters a = ModelParameters() preload = preload_params_mcmc() init_time = time.time() # Compute posterior + load median values - this automatically uses the neural network!! print('After %.3f seconds, finding posterior parameter values' % (time.time() - init_time)) single_star_optimization() restructure_chain('mcmc/') positions = np.load('mcmc/posteriorPDF.npy') init_param = [] for j in range(len(a.p0)): init_param.append(np.percentile(positions[:, j], 50)) print( 'After %.3f seconds, initial parameters are:' % (time.time() - init_time), init_param) # Function to compute posterior (needs a trained neural network) def posterior(theta): a = ModelParameters() post, _ = posterior_function_mcmc_quick(theta, a, preload) posterior = np.exp(post) return posterior # Read prior sigma from file sigma = [] # Read prior sigma from parameter file for i, param_name in enumerate(a.to_optimize): sigma.append(a.priors.get(param_name)[1]) sigma = np.array(sigma) # Compute covariance matrix print('After %.3f seconds, computing covariance matrix' % (time.time() - init_time)) positions = np.load('mcmc/posteriorPDF.npy') cov_matrix = np.zeros((len(a.p0), len(a.p0))) for i in range(len(a.p0)): for j in range(len(a.p0)): cov_matrix[i, j] = np.cov((positions[:, i], positions[:, j]))[1, 0] def gauss_factor(theta): # Returns gaussian fit to data return scinorm.pdf(theta, mean=np.array(init_param), cov=cov_matrix) def posterior_mod(theta): # Returns flattened posterior return posterior(theta) / gauss_factor(theta) def dist(size): # Distribution function for mcmc sampling mean = np.array(init_param) return numnorm(mean, cov_matrix, size=size) if 'beta_param' or 'log10_beta' in a.to_optimize: # don't save output here print('After %.3f seconds, starting parameter-space integration' % (time.time() - init_time)) integral, integral_err = mcimport(posterior_mod, a.int_samples, dist, nprocs=4) # Quad-core processing else: print( 'After %.3f seconds, starting parameter-space integration for beta = %.3f' % (time.time() - init_time, a.beta_param)) integral, integral_err = mcimport(posterior_mod, a.int_samples, dist, nprocs=4) # Quad-core processing print('After %.3f seconds, integration is complete' % (time.time() - init_time)) np.save('Scores/integral_' + str(a.beta_param) + '.npy', integral) np.save('Scores/integral_err_' + str(a.beta_param) + '.npy', integral_err) return integral, integral_err
def posterior(theta): a = ModelParameters() post, _ = posterior_function_mcmc_quick(theta, a, preload) posterior = np.exp(post) return posterior