def reassign_metadata(new_posterior, original_hdf5): # Make sure output file has same metadata as original # input hdf5 file base_file = read_samples(original_hdf5) mcmc_diagnostics_params = ['nLocalTemps','randomSeed'] meta_dict = {} for colname, column in base_file.columns.items(): meta_dict[colname] = column.meta for colname, column in new_posterior.columns.items(): if colname in mcmc_diagnostics_params: column.meta = {'vary': OUTPUT} # these parameters are fixed within a run, # but doesn't have to be equal between runs. elif colname in meta_dict: column.meta = meta_dict[colname] elif 'cos'+colname in meta_dict: column.meta = meta_dict['cos'+colname] elif 'sin'+colname in meta_dict: column.meta = meta_dict['sin'+colname] elif 'log'+colname in meta_dict: column.meta = meta_dict['log'+colname] elif colname.startswith('chain_'): column.meta = {'vary': OUTPUT} # same argument as with mcmc_diagnostics_params else: column.meta = {'vary': FIXED} return new_posterior
def read_nested_from_hdf5(nested_path_list, strict_versions=True): headers = None input_arrays = [] metadata = {} log_noise_evidences = [] log_max_likelihoods = [] nlive = [] from lalinference.io import read_samples, extract_metadata for path in nested_path_list: if not os.path.isfile(path): print('Unable to open %s, skipping file' % (path)) continue try: tab = read_samples(path, tablename=nested_dset_name) input_arrays.append(tab) except KeyError: print('Unable to read table from %s, skipping' % (path)) continue # N.B.: This appends to metadata, log_noise_evidences, log_max_likelihoods, nlive, in addition to outputting run_identifier run_identifier = extract_metadata(path, metadata, log_noise_evidences, log_max_likelihoods, nlive, nested_dset_name, True, strict_versions) if len(input_arrays) == 0: print('No nested samples could be read from %s'.format( str(nested_path_list))) raise IOError # for metadata which is in a list, take the average. for level in metadata: for key in metadata[level]: if isinstance(metadata[level][key], list) and all( isinstance(x, (int, float)) for x in metadata[level][key]): metadata[level][key] = mean(metadata[level][key]) elif isinstance(metadata[level][key], list) and all( isinstance(x, (str)) for x in metadata[level][key]): print( "Warning: only printing the first of the %d entries found for metadata %s/%s. You can find the whole list in the headers of individual hdf5 output files\n" % (len(metadata[level][key]), level, key)) metadata[level][key] = metadata[level][key][0] log_noise_evidence = reduce(logaddexp, log_noise_evidences) - log( len(log_noise_evidences)) log_max_likelihood = max(log_max_likelihoods) return input_arrays, log_noise_evidence, log_max_likelihood, metadata, nlive, run_identifier
# convert nested samples to posterior samples outpost = os.path.join(outdir, "{}_post.hdf".format(label)) runcmd = " ".join([n2p, "-p", outpost, outfile]) with sp.Popen( runcmd, stdout=sp.PIPE, stderr=sp.PIPE, shell=True, bufsize=1, universal_newlines=True, ) as p: for line in p.stdout: print(line, end="") # get posterior samples post = read_samples(outpost, tablename=LALInferenceHDF5PosteriorSamplesDatasetName) lp = len(post["H0"]) postsamples = np.zeros((lp, len(priors))) for i, p in enumerate(priors.keys()): postsamples[:, i] = post[p.upper()] # get evidence hdf = h5py.File(outpost, "r") a = hdf["lalinference"]["lalinference_nest"] evsig = a.attrs["log_evidence"] evnoise = a.attrs["log_noise_evidence"] hdf.close() # run bilby via the pe interface runner = pe( data_file=hetfile,
def read_nested_from_hdf5(nested_path_list, strict_versions=True): headers = None input_arrays = [] metadata = {} log_noise_evidences = [] log_max_likelihoods = [] nlive = [] from lalinference.io import read_samples def update_metadata(level, attrs, collision='raise'): """Updates the sub-dictionary 'key' of 'metadata' with the values from 'attrs', while enforcing that existing values are equal to those with which the dict is updated. """ if level not in metadata: metadata[level] = {} for key in attrs: if key in metadata[level]: if collision == 'raise': if attrs[key]!=metadata[level][key]: if key == 'version' and not strict_versions: continue else: raise ValueError( 'Metadata mismtach on level %r for key %r:\n\t%r != %r' % (level, key, attrs[key], metadata[level][key])) elif collision == 'append': if isinstance(metadata[level][key], list): metadata[level][key].append(attrs[key]) else: metadata[level][key] = [metadata[level][key], attrs[key]] elif collision == 'ignore': pass else: raise ValueError('Invalid value for collision: %r' % collision) else: metadata[level][key] = attrs[key] return for path in nested_path_list: if not os.path.isfile(path): print('Unable to open %s, skipping file'%(path)) continue try: tab = read_samples(path,path='lalinference/lalinference_nest/nested_samples') input_arrays.append(tab) except: print('Unable to read table from %s, skipping'%(path)) continue with h5py.File(path, 'r') as hdf: # walk down the groups until the actual data is reached, storing # metadata for each step. current_level = '/lalinference' group = hdf[current_level] update_metadata(current_level, group.attrs) if len(hdf[current_level].keys()) != 1: raise KeyError('Multiple run-identifiers found: %r' % list(hdf[current_level].keys())) # we ensured above that there is only one identifier in the group. run_identifier = list(hdf[current_level].keys())[0] current_level = '/lalinference/' + run_identifier group = hdf[current_level] update_metadata(current_level, group.attrs, collision='append') # store the noise evidence and max likelihood seperately for later use log_noise_evidences.append(group.attrs['log_noise_evidence']) log_max_likelihoods.append(group.attrs['log_max_likelihood']) nlive.append(group.attrs['number_live_points']) # storing the metadata under the posterior_group name simplifies # writing it into the output hdf file. current_level = '/lalinference/' + run_identifier + '/' + nested_dset_name current_level_posterior = '/lalinference/' + run_identifier + '/' + posterior_dset_name group = hdf[current_level] update_metadata(current_level_posterior, group.attrs, collision='ignore') # for metadata which is in a list, take the average. for level in metadata: for key in metadata[level]: if isinstance(metadata[level][key], list) and all(isinstance(x, (int,float)) for x in metadata[level][key]): metadata[level][key] = mean(metadata[level][key]) log_noise_evidence = reduce(logaddexp, log_noise_evidences) - log(len(log_noise_evidences)) log_max_likelihood = max(log_max_likelihoods) return input_arrays, log_noise_evidence, log_max_likelihood, metadata, nlive, run_identifier
args = parser.parse_args() # Late imports. import re from astropy.table import Table from astropy.utils.misc import NumpyRNGContext from matplotlib import pyplot as plt import numpy as np from lalinference import io import lalinference.plot from lalinference.bayestar.postprocess import find_injection_moc from scipy.interpolate import interp1d # Read input. skymap = io.read_sky_map(args.skymap.name, moc=True) chain = io.read_samples(args.samples.name) # If required, downselect to a smaller number of posterior samples. if args.max_points is not None: chain = Table(np.random.permutation(chain)[:args.max_points]) # Calculate P-P plot. contours = np.asarray(args.contour) result = find_injection_moc(skymap, chain['ra'], chain['dec'], chain['dist'], contours=1e-2 * contours) def fmt(x, sigfigs, force_scientific=False):
def comparisons(label, outdir, grid, priors, cred=0.9): """ Perform comparisons of the evidence, parameter values, confidence intervals, and Kolmogorov-Smirnov test between samples produced with lalapps_pulsar_parameter_estimation_nested and cwinpy. """ lppenfile = os.path.join(outdir, "{}_post.hdf".format(label)) # get posterior samples post = read_samples(lppenfile, tablename=LALInferenceHDF5PosteriorSamplesDatasetName) # get uncertainty on ln(evidence) info = h5py.File(lppenfile)["lalinference"]["lalinference_nest"].attrs[ "information_nats"] nlive = h5py.File(lppenfile)["lalinference"]["lalinference_nest"].attrs[ "number_live_points"] evsig = h5py.File( lppenfile)["lalinference"]["lalinference_nest"].attrs["log_evidence"] evnoise = h5py.File(lppenfile)["lalinference"]["lalinference_nest"].attrs[ "log_noise_evidence"] everr = np.sqrt(info / nlive) # the uncertainty on the evidence # read in cwinpy results result = read_in_result(outdir=outdir, label=label) # comparison file comparefile = os.path.join(outdir, "{}_compare.txt".format(label)) # get grid-based evidence if grid is not None: grid_evidence = grid.log_evidence # set values to output values = 64 * [None] values[0:4] = evsig, evnoise, (evsig - evnoise), everr values[4:8] = ( result.log_evidence, result.log_noise_evidence, result.log_bayes_factor, result.log_evidence_err, ) if grid is not None: values[8] = "{0:.3f}".format(grid_evidence) values[9] = "{0:.3f}".format(grid_evidence - result.log_noise_evidence) else: values[8:10] = ("N/A", "N/A") # no values supplied # output parameter means standard deviations, and credible intervals idx = 10 for method in ["lalapps", "cwinpy"]: values[idx + 9] = int(cred * 100) for p in priors.keys(): samples = post[ p.upper()] if method == "lalapps" else result.posterior[p] # convert iota to cos(iota) if p == "iota": samples = np.cos(samples) mean = samples.mean() std = samples.std() low, high = credible_interval(samples, ci=cred) if p == "h0": exponent = int(np.floor(np.log10(mean))) values[idx] = mean / 10**exponent values[idx + 1] = std / 10**exponent values[idx + 2] = exponent values[idx + 10] = low / 10**exponent values[idx + 11] = high / 10**exponent values[idx + 12] = exponent idx += 3 else: values[idx] = mean values[idx + 1] = std values[idx + 10] = low values[idx + 11] = high idx += 2 idx += 10 # output parameter maximum a-posteriori points maxidx = (result.posterior["log_likelihood"] + result.posterior["log_prior"]).idxmax() maxidxlppen = (post["logL"] + post["logPrior"]).argmax() for method in ["lalapps", "cwinpy"]: for p in priors.keys(): maxpval = (post[p.upper()][maxidxlppen] if method == "lalapps" else result.posterior[p][maxidx]) if p == "h0": exponent = int(np.floor(np.log10(maxpval))) values[idx] = maxpval / 10**exponent values[idx + 1] = exponent idx += 2 else: values[idx] = maxpval idx += 1 if result.use_ratio: # convert likelihood ratio back to likelihood values[idx] = (post["logL"][maxidxlppen] if method == "lalapps" else (result.posterior["log_likelihood"][maxidx] + result.log_noise_evidence)) else: values[idx] = (post["logL"][maxidxlppen] if method == "lalapps" else result.posterior["log_likelihood"][maxidx]) idx += 1 # calculate the Kolmogorov-Smirnov test for each 1d marginalised distribution, # and the Jensen-Shannon divergence, from the two codes. Output the # combined p-value of the KS test statistic over all parameters, and the # maximum Jensen-Shannon divergence over all parameters. values[idx] = np.inf pvalues = [] jsvalues = [] for p in priors.keys(): _, pvalue = ks_2samp(post[p.upper()], result.posterior[p]) pvalues.append(pvalue) # calculate J-S divergence bins = np.linspace( np.min([np.min(post[p.upper()]), np.min(result.posterior[p])]), np.max([np.max(post[p.upper()]), np.max(result.posterior[p])]), 100, ) hp, _ = np.histogram(post[p.upper()], bins=bins, density=True) hq, _ = np.histogram(result.posterior[p], bins=bins, density=True) jsvalues.append(jensenshannon(hp, hq)**2) values[idx] = combine_pvalues(pvalues)[1] idx += 1 values[idx] = np.max(jsvalues) values[idx + 1] = cwinpy.__version__ values[idx + 2] = bilby.__version__ with open(comparefile, "w") as fp: fp.write(FILETEXT.format(*values))
progress.update(-1, 'Preparing projection') if opts.align_to is None or opts.input.name == opts.align_to.name: prob2, mu2, sigma2, norm2 = prob, mu, sigma, norm else: (prob2, mu2, sigma2, norm2), _ = io.read_sky_map( opts.align_to.name, distances=True) if opts.max_distance is None: max_distance = 2.5 * marginal_ppf(0.5, prob2, mu2, sigma2, norm2) else: max_distance = opts.max_distance R = np.ascontiguousarray(principal_axes(prob2, mu2, sigma2)) if opts.chain: chain = io.read_samples(opts.chain.name) chain = np.dot(R.T, (hp.ang2vec( 0.5 * np.pi - chain['dec'], chain['ra']) * np.atleast_2d(chain['dist']).T).T) fig = plt.figure(frameon=False) n = 1 if opts.projection else 2 gs = gridspec.GridSpec( n, n, left=0.01, right=0.99, bottom=0.01, top=0.99, wspace=0.05, hspace=0.05) imgwidth = int(opts.dpi * opts.figure_width / n) s = np.linspace(-max_distance, max_distance, imgwidth) xx, yy = np.meshgrid(s, s) dtheta = 0.5 * np.pi / nside / 4
progress.update(-1, 'Preparing projection') if opts.align_to is None or opts.input.name == opts.align_to.name: prob2, mu2, sigma2, norm2 = prob, mu, sigma, norm else: (prob2, mu2, sigma2, norm2), _ = io.read_sky_map( opts.align_to.name, distances=True) if opts.max_distance is None: mean, std = parameters_to_marginal_moments(prob2, mu2, sigma2) max_distance = mean + 2.5 * std else: max_distance = opts.max_distance R = np.ascontiguousarray(principal_axes(prob2, mu2, sigma2)) if opts.chain: chain = io.read_samples(opts.chain.name) chain = np.dot(R.T, (hp.ang2vec( 0.5 * np.pi - chain['dec'], chain['ra']) * np.atleast_2d(chain['dist']).T).T) fig = plt.figure(frameon=False) n = 1 if opts.projection else 2 gs = gridspec.GridSpec( n, n, left=0.01, right=0.99, bottom=0.01, top=0.99, wspace=0.05, hspace=0.05) imgwidth = int(opts.dpi * opts.figure_width / n) s = np.linspace(-max_distance, max_distance, imgwidth) xx, yy = np.meshgrid(s, s) dtheta = 0.5 * np.pi / nside / 4
# Late imports. import re from astropy.table import Table from astropy.utils.misc import NumpyRNGContext from matplotlib import pyplot as plt import numpy as np from lalinference import io import lalinference.plot from lalinference.bayestar.postprocess import find_injection_moc from scipy.interpolate import interp1d # Read input. skymap = io.read_sky_map(args.skymap.name, moc=True) chain = io.read_samples(args.samples.name) # If required, downselect to a smaller number of posterior samples. if args.max_points is not None: chain = Table(np.random.permutation(chain)[:args.max_points]) # Calculate P-P plot. contours = np.asarray(args.contour) result = find_injection_moc(skymap, chain['ra'], chain['dec'], chain['dist'], contours=1e-2 * contours) def fmt(x, sigfigs, force_scientific=False): """Round and format a number in scientific notation.""" places = sigfigs - int(np.floor(np.log10(x))) x_rounded = np.around(x, places)