def diagnose_not_marginalized(arguments):
    # Load the ratio estimator
    ratio_estimator = load_ratio_estimator(arguments.model,
                                           normalize_inputs=False)
    # Prepare the diagnostic
    prior = Prior()
    space = [[1, 50], [3, 7]]
    densities = load_densities(arguments)
    densities = torch.from_numpy(densities).float()
    diagnostic = DensityDiagnostic(space)
    # Iterate through all tests
    for _ in range(arguments.tests):
        # Fetch the density to test
        density = densities[np.random.randint(0, len(densities))].view(1, -1)
        density = density.to(hypothesis.accelerator)

        # Define the pdf function for integration
        def pdf(mass, age):
            mass = torch.tensor(mass).view(1, 1).float()
            age = torch.tensor(age).view(1, 1).float()
            mass = mass.to(hypothesis.accelerator)
            age = age.to(hypothesis.accelerator)
            inputs = torch.cat([mass, age], dim=1)
            log_posterior = prior.log_prob(inputs).sum(
            ) + ratio_estimator.log_ratio(inputs=inputs, outputs=density)

            return log_posterior.exp().item()

        # Compute the test
        diagnostic.test(pdf)

    return np.array(diagnostic.areas)
def diagnose_not_marginalized(arguments):
    # Load the ratio estimator
    ratio_estimator = load_ratio_estimator(arguments.model)
    # Prepare the diagnostic
    prior = Prior()
    space = [[1, 3], [7, 50]]
    densities = load_densities(arguments)
    densities = torch.from_numpy(densities).float()
 def compute_log_posterior(self,
                           r,
                           observable,
                           resolution=10,
                           extent=[0, 2000, -4, 0, 2, 3.7]):
     prior = Prior()
     # Prepare grid
     epsilon = 0.00001
     resolution = 10
     p1 = torch.linspace(
         extent[0], extent[1] - epsilon,
         resolution)  # Account for half-open interval of uniform prior
     p2 = torch.linspace(
         extent[2], extent[3] - epsilon,
         resolution)  # Account for half-open interval of uniform prior
     p3 = torch.linspace(
         extent[4], extent[5] - epsilon,
         resolution)  # Account for half-open interval of uniform prior ###
     p1 = p1.to(hypothesis.accelerator)
     p2 = p2.to(hypothesis.accelerator)
     p3 = p3.to(hypothesis.accelerator)  #####
     #         print(p1)
     #         print(p1.size())
     g1, g2, g3 = torch.meshgrid(p1.view(-1), p2.view(-1),
                                 p3.view(-1))  ######
     #         print(g1.size())
     # Vectorize
     inputs = torch.cat(
         [g1.reshape(-1, 1),
          g2.reshape(-1, 1),
          g3.reshape(-1, 1)], dim=1)  #####
     log_prior_probabilities = self.prior.log_prob(inputs).sum(axis=1).view(
         -1, 1)
     #         print(log_prior_probabilities.size())#, log_prior_probabilities)
     #         print('ob ', observable.size())
     observables = observable.repeat(resolution**3, 1).float()  #######
     observables = observables.to(hypothesis.accelerator)
     #         print(observables.size(), inputs.size())
     #observables = observables.view(-1, 371)   # for mlp only
     #         print(observables.size(), inputs.size())
     log_ratios = r._classifier_logits(inputs, observables, num_atoms=2)
     #log_posterior = (log_prior_probabilities + log_ratios).view(resolution, resolution, resolution).cpu()
     #print(len(inputs))
     log_posterior = (log_prior_probabilities +
                      log_ratios[:len(inputs)]).view(
                          resolution, resolution, resolution).cpu()  ##
     #print(log_posterior.size())
     return log_posterior
    def compute_log_posterior(self,
                              r,
                              observable,
                              resolution=10,
                              extent=[0, 2000, -4, 0, 2, 3.7]):
        prior = Prior()
        # Prepare grid
        epsilon = 0.00001
        resolution = 10
        p1 = torch.linspace(
            extent[0], extent[1] - epsilon,
            resolution)  # Account for half-open interval of uniform prior
        p2 = torch.linspace(
            extent[2], extent[3] - epsilon,
            resolution)  # Account for half-open interval of uniform prior
        p3 = torch.linspace(
            extent[4], extent[5] - epsilon,
            resolution)  # Account for half-open interval of uniform prior ###
        p1 = p1.to(hypothesis.accelerator)
        p2 = p2.to(hypothesis.accelerator)
        p3 = p3.to(hypothesis.accelerator)  #####

        g1, g2, g3 = torch.meshgrid(p1.view(-1), p2.view(-1),
                                    p3.view(-1))  ######
        # Vectorize
        inputs = torch.cat(
            [g1.reshape(-1, 1),
             g2.reshape(-1, 1),
             g3.reshape(-1, 1)], dim=1)  #####
        log_prior_probabilities = self.prior.log_prob(inputs).sum(axis=1).view(
            -1, 1)

        #observables = observable.repeat(resolution ** 3, 1).float() #######
        #observables = observables.to(hypothesis.accelerator)
        #observables = observables.view(-1, 371)
        rr = r.build_posterior()
        log_posterior = rr.log_prob(inputs,
                                    observable).view(resolution, resolution,
                                                     resolution).cpu()
        return log_posterior
 def __init__(self, name=''):
     self.name = name
     self.prior = Prior()
class Coverage_class:
    def __init__(self, name=''):
        self.name = name
        self.prior = Prior()

    @torch.no_grad()
    def highest_density_level(self,
                              density,
                              alpha,
                              min_epsilon=10e-16,
                              region=False):
        # Check if a numpy type has been specified
        if type(density).__module__ != np.__name__:
            density = density.cpu().clone().numpy()
        else:
            density = np.array(density)
        density = density.astype(np.float64)
        # Check the discrete sum of the density (for scaling)
        integrand = density.sum()
        density /= integrand
        # Compute the level such that 1 - alpha has been satisfied.
        optimal_level = density.max()
        epsilon = 10e-00  # Current error
        while epsilon >= min_epsilon:
            optimal_level += 2 * epsilon  # Overshoot solution, move back
            epsilon /= 10
            area = 0.0
            while area < (1 - alpha):
                area_under = (density >= optimal_level)
                area = np.sum(area_under * density)
                optimal_level -= epsilon  # Gradient descent to reduce error
        # Rescale to original
        optimal_level *= integrand
        # Check if the computed mask needs to be returned
        if region:
            return optimal_level, area_under
        else:
            return optimal_level

    @torch.no_grad()
    def compute_log_posterior(self,
                              r,
                              observable,
                              resolution=10,
                              extent=[0, 2000, -4, 0, 2, 3.7]):
        prior = Prior()
        # Prepare grid
        epsilon = 0.00001
        resolution = 10
        p1 = torch.linspace(
            extent[0], extent[1] - epsilon,
            resolution)  # Account for half-open interval of uniform prior
        p2 = torch.linspace(
            extent[2], extent[3] - epsilon,
            resolution)  # Account for half-open interval of uniform prior
        p3 = torch.linspace(
            extent[4], extent[5] - epsilon,
            resolution)  # Account for half-open interval of uniform prior ###
        p1 = p1.to(hypothesis.accelerator)
        p2 = p2.to(hypothesis.accelerator)
        p3 = p3.to(hypothesis.accelerator)  #####
        #         print(p1)
        #         print(p1.size())
        g1, g2, g3 = torch.meshgrid(p1.view(-1), p2.view(-1),
                                    p3.view(-1))  ######
        #         print(g1.size())
        # Vectorize
        inputs = torch.cat(
            [g1.reshape(-1, 1),
             g2.reshape(-1, 1),
             g3.reshape(-1, 1)], dim=1)  #####
        log_prior_probabilities = self.prior.log_prob(inputs).sum(axis=1).view(
            -1, 1)
        #         print(log_prior_probabilities.size())#, log_prior_probabilities)
        #         print('ob ', observable.size())
        observables = observable.repeat(resolution**3, 1).float()  #######
        observables = observables.to(hypothesis.accelerator)
        #         print(observables.size(), inputs.size())
        #observables = observables.view(-1, 371)   # for mlp only
        #         print(observables.size(), inputs.size())
        log_ratios = r._classifier_logits(inputs, observables, num_atoms=2)
        #log_posterior = (log_prior_probabilities + log_ratios).view(resolution, resolution, resolution).cpu()
        #print(len(inputs))
        log_posterior = (log_prior_probabilities +
                         log_ratios[:len(inputs)]).view(
                             resolution, resolution, resolution).cpu()  ##
        #print(log_posterior.size())
        return log_posterior

    @torch.no_grad()
    def compute_log_pdf(self, r, inputs, outputs):
        inputs = inputs.to(hypothesis.accelerator)
        outputs = outputs.to(hypothesis.accelerator)

        log_ratios = r._classifier_logits(inputs.repeat(2, 1),
                                          outputs.repeat(2, 1),
                                          num_atoms=2)  #################
        log_ratios = log_ratios[:len(inputs)]
        #print('lr: ', log_ratios)
        log_prior = self.prior.log_prob(inputs).sum(axis=1)
        #print('lpr: ', log_prior)
        return (log_prior + log_ratios).squeeze()

    @torch.no_grad()
    def coverage(self,
                 r,
                 inputs,
                 outputs,
                 confidence_level=0.95,
                 resolution=10,
                 extent=[0, 2000, -4, 0, 2, 3.7]):
        n = len(inputs)
        covered = 0
        alpha = 1.0 - confidence_level
        for index in tqdm(range(n), "Coverages evaluated"):
            # Prepare setup
            nominal = inputs[index].squeeze().unsqueeze(0)
            observable = outputs[index].squeeze().unsqueeze(0)

            nominal = nominal.to(hypothesis.accelerator)
            observable = observable.to(hypothesis.accelerator)
            pdf = self.compute_log_posterior(r,
                                             observable,
                                             resolution=resolution,
                                             extent=extent).exp().view(
                                                 resolution, resolution,
                                                 resolution)  ############
            #             print('pdf ', pdf)
            #             print('im outside')
            nominal_pdf = self.compute_log_pdf(r, nominal, observable).exp()
            level = self.highest_density_level(pdf, alpha)
            if nominal_pdf >= level:
                covered += 1

        return covered / n
예제 #7
0
def main(arguments):
    # Load the ratio estimator
    ratio_estimator = load_ratio_estimator(arguments.model)
    # Load the densities
    densities = torch.from_numpy(np.load(arguments.data + "/density-contrasts-cut-noised.npy")).float()
    # Check if the non-marginalized model has been specified
    resolution = arguments.resolution
    if "not-marginalized" in arguments.model:
        prior = Prior()
        degrees_of_freedom = 2
        masses = torch.from_numpy(np.load(arguments.data + "/masses.npy")).view(-1, 1).float()
        ages = torch.from_numpy(np.load(arguments.data + "/ages.npy")).view(-1, 1).float()
        nominals = torch.cat([masses, ages], dim=1)
        masses = torch.linspace(prior.low[0], prior.high[0] - 0.01, resolution).view(-1, 1)
        masses = masses.to(hypothesis.accelerator)
        ages = torch.linspace(prior.low[1], prior.high[1] - 0.01, resolution).view(-1, 1)
        ages = ages.to(hypothesis.accelerator)
        grid_masses, grid_ages = torch.meshgrid(masses.view(-1), ages.view(-1))
        inputs = torch.cat([grid_masses.reshape(-1,1), grid_ages.reshape(-1, 1)], dim=1)
    else:
        prior = MarginalizedAgePrior()
        degrees_of_freedom = 1
        # Prepare inputs
        nominals = torch.from_numpy(np.load(arguments.data + "/masses.npy")).view(-1, 1).float()
        masses = torch.linspace(prior.low, prior.high - 0.01, resolution).view(-1, 1)
        masses = masses.to(hypothesis.accelerator)
        inputs = masses
    # Prepare the diagnostic
    nominals = nominals.to(hypothesis.accelerator)
    densities = densities.to(hypothesis.accelerator)
    results = []
    indices = np.random.randint(0, len(densities), size=arguments.n)
    for index in indices:
        # Get current density and nominal value
        nominal = nominals[index].view(1, -1)
        density = densities[index].view(1, -1)
        # Prepare the outputs
        outputs = density.repeat(len(inputs), 1)
        # Check if we have to compute Bayesian credible regions
        if not arguments.frequentist:
            # Compute Bayesian credible region
            # Compute the posterior pdf
            log_ratios = ratio_estimator.log_ratio(inputs=inputs, outputs=outputs)
            log_pdf = log_ratios # Uniform prior
            pdf = log_pdf.exp()
            norms = (inputs - nominal).norm(dim=1).cpu().numpy()
            nominal_index = np.argmin(norms)
            nominal_pdf = pdf[nominal_index].item()
            level = highest_density_level(pdf, arguments.level, bias=arguments.bias)
            if nominal_pdf >= level:
                covered = True
            else:
                covered = False
        else:
            # Compute Frequentist confidence interval based on Wilks' theorem.
            # Compute the maximum theta
            log_ratios = ratio_estimator.log_ratio(inputs=inputs, outputs=outputs)
            max_ratio = log_ratios[log_ratios.argmax()]
            test_statistic = -2 * (log_ratios - max_ratio)
            test_statistic -= test_statistic.min()
            x = chi2.isf(1 - arguments.level, df=degrees_of_freedom)
            norms = (inputs - nominal).norm(dim=1).cpu().numpy()
            nominal_index = np.argmin(norms)
            if test_statistic[nominal_index].item() <= x:
                covered = True
            else:
                covered = False
        results.append(covered)
    # Save the results of the diagnostic.
    np.save(arguments.out, results)
예제 #8
0
from ratio_estimation import RatioEstimator
from scipy.stats import chi2
from sklearn.neighbors import KernelDensity
from util import MarginalizedAgePrior
from util import Prior

matplotlib.rcParams["text.latex.preamble"] = r"\usepackage{amssymb}"

# Matplotlib settings
plt.rcParams.update({"font.size": 16})
rc("font", **{"family": "serif", "serif": ["Computer Modern"]})
plt.rcParams["text.usetex"] = True

# Priors
prior_1d = MarginalizedAgePrior()
prior_2d = Prior()

# Plotting defaults
default_resolution = 100
masses_xticks = [0, 10, 20, 30, 40, 50]
extent = [  # I know, this isn't very nice :(
    prior_2d.low[0].item(), prior_2d.high[0].item(), prior_2d.low[1].item(),
    prior_2d.high[1].item()
]


@torch.no_grad()
def plot_stream(ax, phi, stream):
    ax.step(phi, stream.reshape(-1), lw=2, color="black")
    ax.set_ylim([0, 2])
    ax.minorticks_on()