def diagnose_not_marginalized(arguments): # Load the ratio estimator ratio_estimator = load_ratio_estimator(arguments.model, normalize_inputs=False) # Prepare the diagnostic prior = Prior() space = [[1, 50], [3, 7]] densities = load_densities(arguments) densities = torch.from_numpy(densities).float() diagnostic = DensityDiagnostic(space) # Iterate through all tests for _ in range(arguments.tests): # Fetch the density to test density = densities[np.random.randint(0, len(densities))].view(1, -1) density = density.to(hypothesis.accelerator) # Define the pdf function for integration def pdf(mass, age): mass = torch.tensor(mass).view(1, 1).float() age = torch.tensor(age).view(1, 1).float() mass = mass.to(hypothesis.accelerator) age = age.to(hypothesis.accelerator) inputs = torch.cat([mass, age], dim=1) log_posterior = prior.log_prob(inputs).sum( ) + ratio_estimator.log_ratio(inputs=inputs, outputs=density) return log_posterior.exp().item() # Compute the test diagnostic.test(pdf) return np.array(diagnostic.areas)
def diagnose_not_marginalized(arguments): # Load the ratio estimator ratio_estimator = load_ratio_estimator(arguments.model) # Prepare the diagnostic prior = Prior() space = [[1, 3], [7, 50]] densities = load_densities(arguments) densities = torch.from_numpy(densities).float()
def compute_log_posterior(self, r, observable, resolution=10, extent=[0, 2000, -4, 0, 2, 3.7]): prior = Prior() # Prepare grid epsilon = 0.00001 resolution = 10 p1 = torch.linspace( extent[0], extent[1] - epsilon, resolution) # Account for half-open interval of uniform prior p2 = torch.linspace( extent[2], extent[3] - epsilon, resolution) # Account for half-open interval of uniform prior p3 = torch.linspace( extent[4], extent[5] - epsilon, resolution) # Account for half-open interval of uniform prior ### p1 = p1.to(hypothesis.accelerator) p2 = p2.to(hypothesis.accelerator) p3 = p3.to(hypothesis.accelerator) ##### # print(p1) # print(p1.size()) g1, g2, g3 = torch.meshgrid(p1.view(-1), p2.view(-1), p3.view(-1)) ###### # print(g1.size()) # Vectorize inputs = torch.cat( [g1.reshape(-1, 1), g2.reshape(-1, 1), g3.reshape(-1, 1)], dim=1) ##### log_prior_probabilities = self.prior.log_prob(inputs).sum(axis=1).view( -1, 1) # print(log_prior_probabilities.size())#, log_prior_probabilities) # print('ob ', observable.size()) observables = observable.repeat(resolution**3, 1).float() ####### observables = observables.to(hypothesis.accelerator) # print(observables.size(), inputs.size()) #observables = observables.view(-1, 371) # for mlp only # print(observables.size(), inputs.size()) log_ratios = r._classifier_logits(inputs, observables, num_atoms=2) #log_posterior = (log_prior_probabilities + log_ratios).view(resolution, resolution, resolution).cpu() #print(len(inputs)) log_posterior = (log_prior_probabilities + log_ratios[:len(inputs)]).view( resolution, resolution, resolution).cpu() ## #print(log_posterior.size()) return log_posterior
def compute_log_posterior(self, r, observable, resolution=10, extent=[0, 2000, -4, 0, 2, 3.7]): prior = Prior() # Prepare grid epsilon = 0.00001 resolution = 10 p1 = torch.linspace( extent[0], extent[1] - epsilon, resolution) # Account for half-open interval of uniform prior p2 = torch.linspace( extent[2], extent[3] - epsilon, resolution) # Account for half-open interval of uniform prior p3 = torch.linspace( extent[4], extent[5] - epsilon, resolution) # Account for half-open interval of uniform prior ### p1 = p1.to(hypothesis.accelerator) p2 = p2.to(hypothesis.accelerator) p3 = p3.to(hypothesis.accelerator) ##### g1, g2, g3 = torch.meshgrid(p1.view(-1), p2.view(-1), p3.view(-1)) ###### # Vectorize inputs = torch.cat( [g1.reshape(-1, 1), g2.reshape(-1, 1), g3.reshape(-1, 1)], dim=1) ##### log_prior_probabilities = self.prior.log_prob(inputs).sum(axis=1).view( -1, 1) #observables = observable.repeat(resolution ** 3, 1).float() ####### #observables = observables.to(hypothesis.accelerator) #observables = observables.view(-1, 371) rr = r.build_posterior() log_posterior = rr.log_prob(inputs, observable).view(resolution, resolution, resolution).cpu() return log_posterior
def __init__(self, name=''): self.name = name self.prior = Prior()
class Coverage_class: def __init__(self, name=''): self.name = name self.prior = Prior() @torch.no_grad() def highest_density_level(self, density, alpha, min_epsilon=10e-16, region=False): # Check if a numpy type has been specified if type(density).__module__ != np.__name__: density = density.cpu().clone().numpy() else: density = np.array(density) density = density.astype(np.float64) # Check the discrete sum of the density (for scaling) integrand = density.sum() density /= integrand # Compute the level such that 1 - alpha has been satisfied. optimal_level = density.max() epsilon = 10e-00 # Current error while epsilon >= min_epsilon: optimal_level += 2 * epsilon # Overshoot solution, move back epsilon /= 10 area = 0.0 while area < (1 - alpha): area_under = (density >= optimal_level) area = np.sum(area_under * density) optimal_level -= epsilon # Gradient descent to reduce error # Rescale to original optimal_level *= integrand # Check if the computed mask needs to be returned if region: return optimal_level, area_under else: return optimal_level @torch.no_grad() def compute_log_posterior(self, r, observable, resolution=10, extent=[0, 2000, -4, 0, 2, 3.7]): prior = Prior() # Prepare grid epsilon = 0.00001 resolution = 10 p1 = torch.linspace( extent[0], extent[1] - epsilon, resolution) # Account for half-open interval of uniform prior p2 = torch.linspace( extent[2], extent[3] - epsilon, resolution) # Account for half-open interval of uniform prior p3 = torch.linspace( extent[4], extent[5] - epsilon, resolution) # Account for half-open interval of uniform prior ### p1 = p1.to(hypothesis.accelerator) p2 = p2.to(hypothesis.accelerator) p3 = p3.to(hypothesis.accelerator) ##### # print(p1) # print(p1.size()) g1, g2, g3 = torch.meshgrid(p1.view(-1), p2.view(-1), p3.view(-1)) ###### # print(g1.size()) # Vectorize inputs = torch.cat( [g1.reshape(-1, 1), g2.reshape(-1, 1), g3.reshape(-1, 1)], dim=1) ##### log_prior_probabilities = self.prior.log_prob(inputs).sum(axis=1).view( -1, 1) # print(log_prior_probabilities.size())#, log_prior_probabilities) # print('ob ', observable.size()) observables = observable.repeat(resolution**3, 1).float() ####### observables = observables.to(hypothesis.accelerator) # print(observables.size(), inputs.size()) #observables = observables.view(-1, 371) # for mlp only # print(observables.size(), inputs.size()) log_ratios = r._classifier_logits(inputs, observables, num_atoms=2) #log_posterior = (log_prior_probabilities + log_ratios).view(resolution, resolution, resolution).cpu() #print(len(inputs)) log_posterior = (log_prior_probabilities + log_ratios[:len(inputs)]).view( resolution, resolution, resolution).cpu() ## #print(log_posterior.size()) return log_posterior @torch.no_grad() def compute_log_pdf(self, r, inputs, outputs): inputs = inputs.to(hypothesis.accelerator) outputs = outputs.to(hypothesis.accelerator) log_ratios = r._classifier_logits(inputs.repeat(2, 1), outputs.repeat(2, 1), num_atoms=2) ################# log_ratios = log_ratios[:len(inputs)] #print('lr: ', log_ratios) log_prior = self.prior.log_prob(inputs).sum(axis=1) #print('lpr: ', log_prior) return (log_prior + log_ratios).squeeze() @torch.no_grad() def coverage(self, r, inputs, outputs, confidence_level=0.95, resolution=10, extent=[0, 2000, -4, 0, 2, 3.7]): n = len(inputs) covered = 0 alpha = 1.0 - confidence_level for index in tqdm(range(n), "Coverages evaluated"): # Prepare setup nominal = inputs[index].squeeze().unsqueeze(0) observable = outputs[index].squeeze().unsqueeze(0) nominal = nominal.to(hypothesis.accelerator) observable = observable.to(hypothesis.accelerator) pdf = self.compute_log_posterior(r, observable, resolution=resolution, extent=extent).exp().view( resolution, resolution, resolution) ############ # print('pdf ', pdf) # print('im outside') nominal_pdf = self.compute_log_pdf(r, nominal, observable).exp() level = self.highest_density_level(pdf, alpha) if nominal_pdf >= level: covered += 1 return covered / n
def main(arguments): # Load the ratio estimator ratio_estimator = load_ratio_estimator(arguments.model) # Load the densities densities = torch.from_numpy(np.load(arguments.data + "/density-contrasts-cut-noised.npy")).float() # Check if the non-marginalized model has been specified resolution = arguments.resolution if "not-marginalized" in arguments.model: prior = Prior() degrees_of_freedom = 2 masses = torch.from_numpy(np.load(arguments.data + "/masses.npy")).view(-1, 1).float() ages = torch.from_numpy(np.load(arguments.data + "/ages.npy")).view(-1, 1).float() nominals = torch.cat([masses, ages], dim=1) masses = torch.linspace(prior.low[0], prior.high[0] - 0.01, resolution).view(-1, 1) masses = masses.to(hypothesis.accelerator) ages = torch.linspace(prior.low[1], prior.high[1] - 0.01, resolution).view(-1, 1) ages = ages.to(hypothesis.accelerator) grid_masses, grid_ages = torch.meshgrid(masses.view(-1), ages.view(-1)) inputs = torch.cat([grid_masses.reshape(-1,1), grid_ages.reshape(-1, 1)], dim=1) else: prior = MarginalizedAgePrior() degrees_of_freedom = 1 # Prepare inputs nominals = torch.from_numpy(np.load(arguments.data + "/masses.npy")).view(-1, 1).float() masses = torch.linspace(prior.low, prior.high - 0.01, resolution).view(-1, 1) masses = masses.to(hypothesis.accelerator) inputs = masses # Prepare the diagnostic nominals = nominals.to(hypothesis.accelerator) densities = densities.to(hypothesis.accelerator) results = [] indices = np.random.randint(0, len(densities), size=arguments.n) for index in indices: # Get current density and nominal value nominal = nominals[index].view(1, -1) density = densities[index].view(1, -1) # Prepare the outputs outputs = density.repeat(len(inputs), 1) # Check if we have to compute Bayesian credible regions if not arguments.frequentist: # Compute Bayesian credible region # Compute the posterior pdf log_ratios = ratio_estimator.log_ratio(inputs=inputs, outputs=outputs) log_pdf = log_ratios # Uniform prior pdf = log_pdf.exp() norms = (inputs - nominal).norm(dim=1).cpu().numpy() nominal_index = np.argmin(norms) nominal_pdf = pdf[nominal_index].item() level = highest_density_level(pdf, arguments.level, bias=arguments.bias) if nominal_pdf >= level: covered = True else: covered = False else: # Compute Frequentist confidence interval based on Wilks' theorem. # Compute the maximum theta log_ratios = ratio_estimator.log_ratio(inputs=inputs, outputs=outputs) max_ratio = log_ratios[log_ratios.argmax()] test_statistic = -2 * (log_ratios - max_ratio) test_statistic -= test_statistic.min() x = chi2.isf(1 - arguments.level, df=degrees_of_freedom) norms = (inputs - nominal).norm(dim=1).cpu().numpy() nominal_index = np.argmin(norms) if test_statistic[nominal_index].item() <= x: covered = True else: covered = False results.append(covered) # Save the results of the diagnostic. np.save(arguments.out, results)
from ratio_estimation import RatioEstimator from scipy.stats import chi2 from sklearn.neighbors import KernelDensity from util import MarginalizedAgePrior from util import Prior matplotlib.rcParams["text.latex.preamble"] = r"\usepackage{amssymb}" # Matplotlib settings plt.rcParams.update({"font.size": 16}) rc("font", **{"family": "serif", "serif": ["Computer Modern"]}) plt.rcParams["text.usetex"] = True # Priors prior_1d = MarginalizedAgePrior() prior_2d = Prior() # Plotting defaults default_resolution = 100 masses_xticks = [0, 10, 20, 30, 40, 50] extent = [ # I know, this isn't very nice :( prior_2d.low[0].item(), prior_2d.high[0].item(), prior_2d.low[1].item(), prior_2d.high[1].item() ] @torch.no_grad() def plot_stream(ax, phi, stream): ax.step(phi, stream.reshape(-1), lw=2, color="black") ax.set_ylim([0, 2]) ax.minorticks_on()