示例#1
0
 def testLoads(self):
     # test initiating from multiple chain arrays
     samps = []
     for i in range(3):
         samps.append(Gaussian2D([1.5, -2], np.diagflat([1, 2])).MCSamples(1001 + i * 10, names=['x', 'y']))
     fromChains = MCSamples(samples=[s.samples for s in samps], names=['x', 'y'])
     mean = np.sum([s.norm * s.mean('x') for s in samps]) / np.sum([s.norm for s in samps])
     meanChains = fromChains.mean('x')
     self.assertAlmostEqual(mean, meanChains)
示例#2
0
 def testLoads(self):
     # test initiating from multiple chain arrays
     samps = []
     for i in range(3):
         samps.append(Gaussian2D([1.5, -2], np.diagflat([1, 2])).MCSamples(1001 + i * 10, names=['x', 'y']))
     fromChains = MCSamples(samples=[s.samples for s in samps], names=['x', 'y'])
     mean = np.sum([s.norm * s.mean('x') for s in samps]) / np.sum([s.norm for s in samps])
     meanChains = fromChains.mean('x')
     self.assertAlmostEqual(mean, meanChains)
示例#3
0
 def _init_samples(self,
                   mcmc_steps=5000,
                   mcmc_batch_size=5,
                   ignore_rows=0.3):
     u = 2 * (np.random.uniform(size=(mcmc_batch_size, self.x_dim)) - 0.5)
     v = self.transform(u)
     logl = self.loglike(v)
     samples = []
     likes = []
     for i in range(mcmc_steps):
         du = np.random.standard_normal(u.shape) * 0.1
         u_prime = u + du
         v_prime = self.transform(u_prime)
         log_ratio_1 = np.zeros(mcmc_batch_size)
         prior = np.logical_or(np.abs(u) > 1, np.abs(u_prime) > 1)
         idx = np.where([np.any(p) for p in prior])
         log_ratio_1[idx] = -np.inf
         rnd_u = np.random.rand(mcmc_batch_size)
         ratio = np.clip(np.exp(log_ratio_1), 0, 1)
         mask = (rnd_u < ratio).astype(int)
         logl_prime = np.full(mcmc_batch_size, logl)
         for idx, im in enumerate(mask):
             if im:
                 lp = self.loglike(v_prime[idx])
                 if lp >= logl[idx]:
                     logl_prime[idx] = lp
                 elif rnd_u[idx] < np.clip(np.exp(lp - logl[idx]), 0, 1):
                     logl_prime[idx] = lp
                 else:
                     mask[idx] = 0
         m = mask[:, None]
         u = u_prime * m + u * (1 - m)
         v = v_prime * m + v * (1 - m)
         logl = logl_prime * mask + logl * (1 - mask)
         samples.append(v)
         likes.append(logl)
     samples = np.transpose(np.array(samples), axes=[1, 0, 2])
     loglikes = -np.transpose(np.array(likes), axes=[1, 0])
     weights = np.ones(loglikes.shape)
     self._chain_stats(samples)
     self._save_samples(samples, weights, loglikes)
     names = ['p%i' % i for i in range(int(self.x_dim))]
     labels = [r'x_%i' % i for i in range(int(self.x_dim))]
     files = chainFiles(os.path.join(self.logs['chains'], 'chain'),
                        first_chain=1,
                        last_chain=mcmc_batch_size)
     mc = MCSamples(self.logs['chains'],
                    names=names,
                    labels=labels,
                    ignore_rows=ignore_rows)
     mc.readChains(files)
     return mc
示例#4
0
 def _read_samples(self, fileroot, match='', ignore_rows=0.3):
     names = ['p%i' % i for i in range(int(self.num_params))]
     labels = [r'x_%i' % i for i in range(int(self.num_params))]
     if match:
         files = glob.glob(os.path.join(fileroot, match))
     else:
         files = chainFiles(fileroot)
     mc = MCSamples(fileroot,
                    names=names,
                    labels=labels,
                    ignore_rows=ignore_rows)
     mc.readChains(files)
     return mc
示例#5
0
    def MCSamples(self,
                  size,
                  names=None,
                  logLikes=False,
                  random_state=None,
                  **kwargs):
        """
        Gets a set of independent samples from the mixture as a  :class:`.mcsamples.MCSamples` object
        ready for plotting etc.

        :param size: number of samples
        :param names: set to override existing names
        :param logLikes: if True set the sample likelihood values from the pdf, if false, don't store log likelihoods
        :param random_state: random seed or Generator
        :return: a new :class:`.mcsamples.MCSamples` instance
        """
        samples = self.sim(size, random_state=random_state)
        if logLikes:
            loglikes = -np.log(self.pdf(samples))
        else:
            loglikes = None
        return MCSamples(samples=samples,
                         loglikes=loglikes,
                         paramNamesFile=copy.deepcopy(self.paramNames),
                         names=names,
                         ranges=self.lims,
                         **kwargs)
示例#6
0
def MCSamplesFromCobaya(info, collections, name_tag=None,
                        ignore_rows=0, ini=None, settings=None):
    """
    Creates a set of samples from Cobaya's output.
    Parameter names, ranges and labels are taken from the "info" dictionary
    (always use the "updated" one generated by `cobaya.run`).

    For a description of the various analysis settings and default values see
    `analysis_defaults.ini <https://getdist.readthedocs.org/en/latest/analysis_settings.html>`_.

    :param collections: collection(s) of samples from Cobaya
    :param info: info dictionary, common to all collections
                 (use the "updated" one, returned by `cobaya.run`)
    :param name_tag: name for this sample to be shown in the plots' legend
    :param ignore_rows: initial samples to skip, number (`int>=1`) or fraction (`float<1`)
    :param ini: The name of a .ini file with analysis settings to use
    :param settings: dictionary of analysis settings to override defaults
    :return: The :class:`MCSamples` instance
    """

    if hasattr(collections, "data"):
        collections = [collections]
    # Check consistency between collections
    try:
        columns = list(collections[0].data)
    except AttributeError:
        raise TypeError(
            "The second argument does not appear to be a (list of) samples `Collection`.")
    if not all(list(c.data) == columns for c in collections[1:]):
        raise ValueError("The given collections don't have the same columns.")
    # Check consistency with info
    info_params = get_info_params(info)
    # if skip burn in *has already been done*
    skip = info.get(_post, {}).get("skip", 0)
    if ignore_rows != 0 and skip != 0:
        logging.warning("You are asking for rows to be ignored (%r), but some (%r) were "
                        "already ignored in the original chain.", ignore_rows, skip)
    var_params = [k for k, v in info_params.items() if is_sampled_param(v) or is_derived_param(v)]
    assert set(columns[2:]) == set(var_params), (
            "Info and collection(s) are not compatible, because their parameters differ: "
            "the collection(s) have %r and the info has %r. " % (columns[2:], var_params) +
            "Are you sure that you are using an *updated* info dictionary "
            "(i.e. the output of `cobaya.run`)?")
    # We need to use *collection* sorting, not info sorting!
    names = [p + ("*" if is_derived_param(info_params[p]) else "")
             for p in columns[2:]]
    labels = [(info_params[p] or {}).get(_p_label, p) for p in columns[2:]]
    ranges = {p: get_range(info_params[p]) for p in info_params}  # include fixed parameters not in columns
    renames = {p: info_params.get(p, {}).get(_p_renames, []) for p in columns[2:]}
    samples = [c[c.data.columns[2:]].values for c in collections]
    weights = [c[_weight].values for c in collections]
    loglikes = [-c[_minuslogpost].values for c in collections]
    sampler = get_sampler_type(info)
    label = get_sample_label(info)
    from getdist.mcsamples import MCSamples
    return MCSamples(samples=samples, weights=weights, loglikes=loglikes, sampler=sampler,
                     names=names, labels=labels, ranges=ranges, renames=renames,
                     ignore_rows=ignore_rows, name_tag=name_tag, label=label, ini=ini,
                     settings=settings)
示例#7
0
    def testDensitySymmetries(self):
        # check flipping samples gives flipped density
        samps = Gaussian1D(0, 1, xmin=-1, xmax=4).MCSamples(12000)
        d = samps.get1DDensity('x')
        samps.samples[:, 0] *= -1
        samps = MCSamples(samples=samps.samples, names=['x'], ranges={'x': [-4, 1]})
        d2 = samps.get1DDensity('x')
        self.assertTrue(np.allclose(d.P, d2.P[::-1]))

        samps = Gaussian2D([0, 0], np.diagflat([1, 2]), xmin=-1, xmax=2, ymin=0, ymax=3).MCSamples(12000)
        d = samps.get2DDensity('x', 'y')
        samps.samples[:, 0] *= -1
        samps = MCSamples(samples=samps.samples, names=['x', 'y'], ranges={'x': [-2, 1], 'y': [0, 3]})
        d2 = samps.get2DDensity('x', 'y')
        self.assertTrue(np.allclose(d.P, d2.P[:, ::-1]))
        samps.samples[:, 0] *= -1
        samps.samples[:, 1] *= -1
        samps = MCSamples(samples=samps.samples, names=['x', 'y'], ranges={'x': [-1, 2], 'y': [-3, 0]})
        d2 = samps.get2DDensity('x', 'y')
        self.assertTrue(np.allclose(d.P, d2.P[::-1, ::], atol=1e-5))
示例#8
0
    def MCSamples(self, size, names=None, logLikes=False, **kwargs):
        """
        Gets a set of independent samples from the mixture as a  :class:`.mcsamples.MCSamples` object ready for plotting etc.

        :param size: number of samples
        :param names: set to override existing names
        :param logLikes: if True set the sample likelihood values from the pdf, if false, don't store log likelihoods
        :return: list of [x,y] pair names
        """
        samples = self.sim(size)
        if logLikes:
            loglikes = -np.log(self.pdf(samples))
        else:
            loglikes = None
        return MCSamples(samples=samples, loglikes=loglikes, paramNamesFile=self.paramNames, names=names,
                         ranges=self.lims, **kwargs)
示例#9
0
    def testDensitySymmetries(self):
        # check flipping samples gives flipped density
        samps = Gaussian1D(0, 1, xmin=-1, xmax=4).MCSamples(12000)
        d = samps.get1DDensity('x')
        samps.samples[:, 0] *= -1
        samps = MCSamples(samples=samps.samples, names=['x'], ranges={'x': [-4, 1]})
        d2 = samps.get1DDensity('x')
        self.assertTrue(np.allclose(d.P, d2.P[::-1]))

        samps = Gaussian2D([0, 0], np.diagflat([1, 2]), xmin=-1, xmax=2, ymin=0, ymax=3).MCSamples(12000)
        d = samps.get2DDensity('x', 'y')
        samps.samples[:, 0] *= -1
        samps = MCSamples(samples=samps.samples, names=['x', 'y'], ranges={'x': [-2, 1], 'y': [0, 3]})
        d2 = samps.get2DDensity('x', 'y')
        self.assertTrue(np.allclose(d.P, d2.P[:, ::-1]))
        samps.samples[:, 0] *= -1
        samps.samples[:, 1] *= -1
        samps = MCSamples(samples=samps.samples, names=['x', 'y'], ranges={'x': [-1, 2], 'y': [-3, 0]})
        d2 = samps.get2DDensity('x', 'y')
        self.assertTrue(np.allclose(d.P, d2.P[::-1, ::], atol=1e-5))
示例#10
0
    def run(self,
            train_iters=200,
            mcmc_steps=5000,
            bootstrap_iters=1,
            bootstrap_mcmc_steps=5000,
            bootstrap_fileroot='',
            bootstrap_match='',
            bootstrap_batch_size=5,
            alpha=0,
            single_thin=1,
            ignore_rows=0.3):

        if alpha == 0.0:
            alpha = 1 / self.x_dim**0.5

        if self.log:
            self.logger.info('Alpha [%5.4f]' % (alpha))

        for t in range(bootstrap_iters):

            if t == 0:
                if bootstrap_fileroot:
                    mc = self._read_samples(bootstrap_fileroot,
                                            match=bootstrap_match,
                                            ignore_rows=ignore_rows)
                else:
                    mc = self._init_samples(
                        mcmc_steps=bootstrap_mcmc_steps,
                        mcmc_batch_size=bootstrap_batch_size,
                        ignore_rows=ignore_rows)
            else:
                samples, likes, scale, nc = self.trainer.sample(
                    loglike=self.loglike,
                    transform=transform,
                    mcmc_steps=bootstrap_mcmc_steps,
                    alpha=alpha,
                    dynamic=False,
                    show_progress=True)
                samples = transform(samples)
                self._chain_stats(samples)
                loglikes = -np.array(likes)
                weights = np.ones(loglikes.shape)
                mc = MCSamples(samples=[samples[0]],
                               weights=[weights[0]],
                               loglikes=[loglikes[0]],
                               ignore_rows=ignore_rows)

            samples = mc.makeSingleSamples(single_thin=single_thin)
            samples = samples[:, :self.x_dim]
            mean = np.mean(samples, axis=0)
            std = np.std(samples, axis=0)
            samples = (samples - mean) / std
            self.trainer.train(samples, max_iters=train_iters, noise=-1)

            def transform(x):
                return x * std + mean

        samples, likes, scale, nc = self.trainer.sample(
            loglike=self.loglike,
            transform=transform,
            mcmc_steps=mcmc_steps,
            alpha=alpha,
            dynamic=False,
            show_progress=True,
            out_chain=os.path.join(self.logs['chains'], 'chain'))
        samples = transform(samples)
        self._chain_stats(samples)
示例#11
0
def MCSamplesFromCosmosis(chain_root,
                          chain_min_root=None,
                          param_name_dict=None,
                          param_label_dict=None,
                          name_tag=None,
                          settings=None):
    """
    Function to import Cosmosis chains in GetDist.

    :param chain_root: the name and path to the chain or the path to the
        folder that contains it.
    :param chain_min_root: (optional) name of the file containing the
        explicit best fit.
    :param param_name_dict: (optional) a dictionary with the mapping between
        cosmosis names and reasonable parameter names.
    :param param_label_dict: (optional) dictionary with the mapping between
        parameter names and parameter labels, since Cosmosis does not save
        the labels in the chain.
    :param name_tag: (optional) a string with the name tag for the chain.
    :param settings: (optional) dictionary of analysis settings to override
        getdist defaults
    :return: The :class:`~getdist.mcsamples.MCSamples` instance
    """
    # decide if the user passed a folder or a chain:
    if os.path.isfile(chain_root + '.txt'):
        chain_file = chain_root + '.txt'
    elif os.path.isdir(chain_root):
        # look for the chain file:
        temp = list(filter(lambda x: 'chain.txt' in x, os.listdir(chain_root)))
        if len(temp) == 0:
            raise ValueError('No chain file found in folder', chain_root)
        chain_file = chain_root + '/' + temp[0]
    else:
        raise ValueError('Input chain root is not a folder nor a file.')
    # get all the commented lines in the chain file:
    info = get_cosmosis_info(chain_file)
    # get the parameter names:
    param_names = get_param_names(info)
    # get the parameter labels from the user provided dictionary:
    param_labels = get_param_labels(info, param_names, param_label_dict)
    # get the sampler:
    sampler = get_sampler_type(info)
    # get the name tag:
    if name_tag is None:
        name_tag = get_name_tag(info)
    # get the samples weights and likelihood:
    chain = loadNumpyTxt(chain_file, skiprows=0)
    # parse the chain depending on the sampler that produced it:
    if sampler == 'nested':
        # get number of samples to use:
        nsamples = int(
            list(filter(lambda x: 'nsample=' in x,
                        info))[0].replace(' ', '').split('=')[1])
        # get the chain:
        chain = chain[-nsamples:]
        # get all quantities:
        indexes = [
            i for i in range(len(param_names)) if
            i != param_names.index('weight') and i != param_names.index('post')
        ]
        samples = chain[:, indexes]
        weights = chain[:, param_names.index('weight')]
        loglike = chain[:, param_names.index('post')]
        # delete the weights and likelihood from names:
        if param_labels is not None:
            param_labels.pop(param_names.index('weight'))
            param_labels.pop(param_names.index('post'))
        param_names.pop(param_names.index('weight'))
        param_names.pop(param_names.index('post'))
    elif sampler == 'mcmc':
        # get all quantities:
        indexes = [
            i for i in range(len(param_names))
            if i != param_names.index('post')
        ]
        samples = chain[:, indexes]
        loglike = chain[:, param_names.index('post')]
        # Cosmosis does not weight samples:
        samples, idx, weights = np.unique(samples,
                                          return_index=True,
                                          return_counts=True,
                                          axis=0)
        loglike = loglike[idx]
        # delete the weights and likelihood from names:
        if param_labels is not None:
            param_labels.pop(param_names.index('post'))
        param_names.pop(param_names.index('post'))
    elif sampler == 'uncorrelated':
        # get all quantities:
        indexes = [
            i for i in range(len(param_names))
            if i != param_names.index('post')
        ]
        samples = chain[:, indexes]
        loglike = chain[:, param_names.index('post')]
        weights = None
        # delete the weights and likelihood from names:
        if param_labels is not None:
            param_labels.pop(param_names.index('post'))
        param_names.pop(param_names.index('post'))
    else:
        raise ValueError('Unknown sampler')
    # get the ranges:
    ranges = get_ranges(info, param_names)
    # transform param names:
    if param_name_dict is not None:
        for i, name in enumerate(param_names):
            if name in param_name_dict.keys():
                param_names[i] = param_name_dict[name]
                if name in ranges.keys():
                    ranges[param_name_dict[name]] = ranges.pop(name)
        #for i, name in enumerate(param_names):
        #    if name in param_name_dict.keys():
    # initialize the samples:
    mc_samples = MCSamples(samples=samples,
                           weights=weights,
                           loglikes=-2. * loglike,
                           sampler=sampler,
                           names=param_names,
                           labels=param_labels,
                           ranges=ranges,
                           ignore_rows=0,
                           name_tag=name_tag,
                           settings=settings)
    # set running parameters:
    for name in mc_samples.getParamNames().parsWithNames(
            mc_samples.getParamNames().list()):
        if name.name in ranges.keys():
            name.isDerived = False
        else:
            name.isDerived = True
    # polish the samples removing nans:
    mc_samples = polish_samples(mc_samples)
    # get the best fit:
    if chain_min_root is not None:
        # since getdist does not cache the best fit we have to override the
        # method in this brute way:
        funcType = types.MethodType
        mc_samples.getBestFit = funcType(
            functools.partial(get_maximum_likelihood,
                              chain_min_root=chain_min_root,
                              param_name_dict=param_name_dict,
                              param_label_dict=param_label_dict), mc_samples)
    # update statistics:
    mc_samples.updateBaseStatistics()
    #
    return mc_samples
示例#12
0
    def __init__(self,
                 root,
                 base_dir,
                 use_nestcheck,
                 transform=None,
                 overwrite_transformed=False,
                 **kwargs):
        filerootpath = _os.path.join(base_dir, root)
        _filerootpath = filerootpath

        if transform is not None:
            samples = _np.loadtxt(filerootpath + '.txt')
            ndims = samples.shape[1] - 2
            temp = transform(samples[0, 2:], old_API=True)
            ntransform = len(temp) - ndims

            _exists = _os.path.isfile(filerootpath + '_transformed.txt')
            if not _exists or overwrite_transformed:
                transformed = _np.zeros(
                    (samples.shape[0], samples.shape[1] + ntransform))
                transformed[:, :2] = samples[:, :2]
                for i in range(samples.shape[0]):
                    transformed[i, 2:] = transform(samples[i, 2:],
                                                   old_API=True)
                _np.savetxt(filerootpath + '_transformed.txt', transformed)

            filerootpath += '_transformed'
            root += '_transformed'

        super(NestedBackend, self).__init__(filepath=filerootpath + '.txt',
                                            **kwargs)

        if getdist is not None:
            # getdist backend
            self._gd_bcknd = MCSamples(
                root=filerootpath,
                settings=self.kde_settings,
                sampler='nested',
                names=self.names,
                ranges=self.bounds,
                labels=[self.labels[name] for name in self.names])
            self._gd_bcknd.readChains(getdist.chains.chainFiles(filerootpath))

        self.use_nestcheck = use_nestcheck

        if self.use_nestcheck:  # nestcheck backend
            if transform is not None:
                for ext in ['dead-birth.txt', 'phys_live-birth.txt']:
                    _exists = _os.path.isfile(filerootpath + ext)
                    if not _exists or overwrite_transformed:
                        samples = _np.loadtxt(_filerootpath + ext)
                        transformed = _np.zeros(
                            (samples.shape[0], samples.shape[1] + ntransform))
                        transformed[:, ndims + ntransform:] = samples[:,
                                                                      ndims:]
                        for i in range(samples.shape[0]):
                            transformed[i,:ndims+ntransform] =\
                                                transform(samples[i,:ndims],
                                                          old_API=True)

                        _np.savetxt(filerootpath + ext, transformed)

                # .stats file with same root needed, but do not need to modify
                # the .stats file contents
                if not _os.path.isfile(filerootpath + '.stats'):
                    if _os.path.isfile(_filerootpath + '.stats'):
                        try:
                            from shutil import copyfile as _copyfile
                        except ImportError:
                            pass
                        else:
                            _copyfile(_filerootpath + '.stats',
                                      filerootpath + '.stats')
            try:
                kwargs['implementation']
            except KeyError:
                print('Root %r sampling implementation not specified... '
                      'assuming MultiNest for nestcheck...')
                self._nc_bcknd = process_multinest_run(root, base_dir=base_dir)
            else:
                if kwargs['implementation'] == 'multinest':
                    self._nc_bcknd = process_multinest_run(root,
                                                           base_dir=base_dir)
                elif kwargs['implementation'] == 'polychord':
                    self._nc_bcknd = process_polychord_run(root,
                                                           base_dir=base_dir)
                else:
                    raise ValueError('Cannot process with nestcheck.')
示例#13
0
class NestedBackend(Run):
    """
    Container for nested samples generated by a single run, and backends
    for analysis of the run.

    The other keyword arguments are generic properties passed to the parent
    class, such as the identification (ID) string of the run.

    :param str root:
        The root filename of the sample file collection.

    :param str base_dir:
        The directly containing the sample file collection.

    :param bool use_nestcheck:
        Invoke :mod:`nestcheck` for nested sampling error analysis?

    :param callable transform:
        A function to transform the parameter vector to another space.

    """
    def __init__(self,
                 root,
                 base_dir,
                 use_nestcheck,
                 transform=None,
                 overwrite_transformed=False,
                 **kwargs):
        filerootpath = _os.path.join(base_dir, root)
        _filerootpath = filerootpath

        if transform is not None:
            samples = _np.loadtxt(filerootpath + '.txt')
            ndims = samples.shape[1] - 2
            temp = transform(samples[0, 2:], old_API=True)
            ntransform = len(temp) - ndims

            _exists = _os.path.isfile(filerootpath + '_transformed.txt')
            if not _exists or overwrite_transformed:
                transformed = _np.zeros(
                    (samples.shape[0], samples.shape[1] + ntransform))
                transformed[:, :2] = samples[:, :2]
                for i in range(samples.shape[0]):
                    transformed[i, 2:] = transform(samples[i, 2:],
                                                   old_API=True)
                _np.savetxt(filerootpath + '_transformed.txt', transformed)

            filerootpath += '_transformed'
            root += '_transformed'

        super(NestedBackend, self).__init__(filepath=filerootpath + '.txt',
                                            **kwargs)

        if getdist is not None:
            # getdist backend
            self._gd_bcknd = MCSamples(
                root=filerootpath,
                settings=self.kde_settings,
                sampler='nested',
                names=self.names,
                ranges=self.bounds,
                labels=[self.labels[name] for name in self.names])
            self._gd_bcknd.readChains(getdist.chains.chainFiles(filerootpath))

        self.use_nestcheck = use_nestcheck

        if self.use_nestcheck:  # nestcheck backend
            if transform is not None:
                for ext in ['dead-birth.txt', 'phys_live-birth.txt']:
                    _exists = _os.path.isfile(filerootpath + ext)
                    if not _exists or overwrite_transformed:
                        samples = _np.loadtxt(_filerootpath + ext)
                        transformed = _np.zeros(
                            (samples.shape[0], samples.shape[1] + ntransform))
                        transformed[:, ndims + ntransform:] = samples[:,
                                                                      ndims:]
                        for i in range(samples.shape[0]):
                            transformed[i,:ndims+ntransform] =\
                                                transform(samples[i,:ndims],
                                                          old_API=True)

                        _np.savetxt(filerootpath + ext, transformed)

                # .stats file with same root needed, but do not need to modify
                # the .stats file contents
                if not _os.path.isfile(filerootpath + '.stats'):
                    if _os.path.isfile(_filerootpath + '.stats'):
                        try:
                            from shutil import copyfile as _copyfile
                        except ImportError:
                            pass
                        else:
                            _copyfile(_filerootpath + '.stats',
                                      filerootpath + '.stats')
            try:
                kwargs['implementation']
            except KeyError:
                print('Root %r sampling implementation not specified... '
                      'assuming MultiNest for nestcheck...')
                self._nc_bcknd = process_multinest_run(root, base_dir=base_dir)
            else:
                if kwargs['implementation'] == 'multinest':
                    self._nc_bcknd = process_multinest_run(root,
                                                           base_dir=base_dir)
                elif kwargs['implementation'] == 'polychord':
                    self._nc_bcknd = process_polychord_run(root,
                                                           base_dir=base_dir)
                else:
                    raise ValueError('Cannot process with nestcheck.')

    @property
    def getdist_backend(self):
        """ Get the :class:`getdist.mcsamples.MCSamples` instance. """
        return self._gd_bcknd

    @property
    def nestcheck_backend(self):
        """ Get the :mod:`nestcheck` backend for the nested samples. """
        return self._nc_bcknd

    @property
    def margeStats(self):
        """ Return the marginal statistics using :mod:`getdist`. """
        return self._mcsamples.getMargeStats()
示例#14
0
    def _add_prior_density(self, plotter, posterior,
                           ndraws, normalize,
                           KL_divergence, KL_base,
                           bootstrap, n_simulate):
        """ Crudely estimate the prior density.

        Kullback-Leibler divergence estimated in bits for a combined run or
        the same run for which the credible intervals are calculated.

        """
        run = posterior.subset_to_plot[0]

        yield 'Plotting prior for posterior %s...' % posterior.ID

        l = posterior.likelihood

        if l is None:
            return # quietly do nothing
        elif not hasattr(l, 'prior'):
            return
        elif not hasattr(l.prior, 'draw'):
            return
        elif not callable(l.prior.draw):
            return

        samples, _ = l.prior.draw(ndraws, transform=True)

        color, lw = (run.contours[key] for key in ('color', 'lw'))

        quantiles = [None] * 3

        with verbose(KL_divergence,
                     'Estimating 1D marginal KL-divergences in %s' % KL_base,
                     'Estimated 1D marginal KL-divergences') as condition:
            for i, ax in enumerate([plotter.subplots[i,i] \
                                for i in range(plotter.subplots.shape[0])]):

                name = self.params.names[i]
                bounds = {name: posterior.bounds[name]}
                settings = {'fine_bins': 1024,
                            'smooth_scale_1D': 0.3,
                            'boundary_correction_order': 1,
                            'mult_bias_correction_order': 1} # adopt from posterior settings or take custom input?

                idx = l.index(name)
                if idx is None: idx = l.prior.index(name)

                bcknd = MCSamples(sampler='uncorrelated',
                                  samples=samples[:,idx],
                                  weights=None,
                                  names=[name],
                                  ranges=bounds,
                                  settings=settings)

                if normalize:
                    bcknd.get1DDensity(name).normalize(by='integral',
                                                       in_place=True)

                x = _np.linspace(ax.xaxis.get_view_interval()[0],
                                 ax.xaxis.get_view_interval()[1],
                                 1000)

                ax.plot(x, bcknd.get1DDensity(name).Prob(x),
                        ls='-.', color=color, lw=lw)

                if not condition: continue # go to next iteration if no KL

                # a prototype Kullback-Leibler divergence callback
                # information in bits
                def KL(ns_run, logw):
                    x = ns_run['theta'][:,posterior.get_index(name)]
                    w_rel = _np.exp(logw - logw.max())
                    where = w_rel > run.kde_settings.get('min_weight_ratio',
                                                         1.0e-30)
                    prior = bcknd.get1DDensity(name).Prob(x[where])
                    p = getdist_kde(x[where], x, w_rel,
                                        ranges=[posterior.bounds[name]],
                                        idx=0,
                                        normalize=normalize,
                                        settings=run.kde_settings)
                    # Due to spline interpolation, very small densities can be
                    # negative, so manually give a small postive value which
                    # does not affect KL integral approximation
                    p[p<=0.0] = p[p>0.0].min()

                    KL = _np.sum(w_rel[where] \
                                   * (_np.log(p) - _np.log(prior))) \
                                   /_np.sum(w_rel[where])

                    if KL_base == 'bits':
                        return KL / _np.log(2.0)
                    elif KL_base == 'nats':
                        return KL
                    else:
                        raise ValueError('Invalid base for KL-divergence.')

                if bootstrap:
                    for j, cred_int in enumerate([0.025, 0.5, 0.975]):
                        quantiles[j] = run_ci_bootstrap(run.nestcheck_backend,
                                                     estimator_list=[KL],
                                                     cred_int=cred_int,
                                                     n_simulate=n_simulate,
                                                     simulate_weights=True,
                                                     flip_skew=True)
                    # KL in bits
                    interval = r'$D_{\mathrm{KL}}=%.2f_{-%.2f}^{+%.2f}$' \
                                                  % (quantiles[1],
                                                     quantiles[1] - quantiles[0],
                                                     quantiles[2] - quantiles[1])

                    yield ('%s KL-divergence = %.4f/-%.4f/+%.4f'
                            % (name,
                               quantiles[1],
                               quantiles[1] - quantiles[0],
                               quantiles[2] - quantiles[1]))

                    if not rcParams['text.usetex']:
                        fontsize = plotter.settings.lab_fontsize - 1
                    else:
                        fontsize = plotter.settings.lab_fontsize

                    ax.set_title(interval, color=color,
                                 fontsize=fontsize)
                else:
                    where = run.samples[:,0] > 0.0

                    ns_run = {'theta': run.samples[where,2:]}
                    divergence = KL(ns_run, _np.log(run.samples[where,0]))

                    yield ('%s KL-divergence = %.4f' % (name, divergence))

                    divergence = (r'$D_{\mathrm{KL}}=%.2f$' % divergence)

                    if not rcParams['text.usetex']:
                        fontsize = plotter.settings.lab_fontsize - 1
                    else:
                        fontsize = plotter.settings.lab_fontsize

                    ax.set_title(divergence, color=color,
                                 fontsize=fontsize)

        yield None
示例#15
0
    temp = bl[:, :, list(blobs_names).index('temperature')]
    print('Last temperature: %s' % temp[-1, -1])
else:
    temp = np.ones(ln.shape)

####################################################################################
####################################################################################

# If requested, make getdist-formatted version of the chain
# if args.output_getdist is not None:
if args.output_getdist:
    from getdist.mcsamples import MCSamples
    gdist = MCSamples(
        ranges={par_names[i]: [lbs[i], ubs[i]]
                for i in range(n_par)},
        samples=np.dstack((ch, bl)).reshape(-1, n_par + n_blobs),
        loglikes=(-1. * ln).reshape(-1),
        names=list(par_names) + list(blobs_names),
        labels=list(par_labels) + list(blobs_labels),
    )
    gdist.saveAsText(ini_fname_nosuffix + '_gdist')

####################################################################################
####################################################################################

if 1 in plot:
    fig1, ax1 = plt.subplots(1, 1)
    ax1.plot(-1. * ln, alpha=0.1)
    for p, ls in zip([2.5, 16, 50, 84, 97.5], ['-.', '--', '-', '--', '-.']):
        ax1.axhline(
            np.percentile(-1. * ln, p, axis=1)[n_steps // 2:].mean(),
            color='blue',
示例#16
0
    def bootstrap(
            self,
            mcmc_steps,
            num_walkers,
            iters=1,
            thin=10,
            stats_interval=10,
            output_interval=None,
            initial_jitter=0.01,
            final_jitter=0.01,
            init_samples=None,
            moves=None):
        """

        Args:
            num_walkers:
            mcmc_steps:
            iters:
            thin:
            stats_interval:
            output_interval:
            initial_jitter:
            final_jitter:
            init_samples:
            moves:

        Returns:

        """

        def log_prob(x):
            logl, der = self.loglike(x)
            return logl + self.prior(x), der

        if not os.path.isfile(os.path.join(self.log_dir, 'emcee.h5')) and init_samples is None:
            if self.sample_prior is not None:
                init_samples = self.sample_prior(num_walkers)
            else:
                raise ValueError('Prior does not have sample method')
        try:
            import emcee
        except:
            raise ImportError

        if moves is not None:
            ensemble_moves = []
            for k, v in moves.items():
                if k.lower() == 'stretch':
                    ensemble_moves.append((emcee.moves.StretchMove(), v))
                elif k.lower() == 'kde':
                    ensemble_moves.append((emcee.moves.KDEMove(), v))
                elif k.lower() == 'de':
                    ensemble_moves.append((emcee.moves.DEMove(), v))
                elif k.lower() == 'snooker':
                    ensemble_moves.append((emcee.moves.DESnookerMove(), v))
        else:
            ensemble_moves = [(emcee.moves.StretchMove(), 1.0)]

        self.logger.info('Performing initial emcee run with [%d] walkers' % (num_walkers))
        sampler = emcee.EnsembleSampler(num_walkers, self.x_dim, log_prob, moves=ensemble_moves,
                                        backend=emcee.backends.HDFBackend(os.path.join(self.log_dir, 'emcee.h5')))
        state = sampler.run_mcmc(init_samples, mcmc_steps)
        self.logger.info('Initial acceptance [%5.4f]' % (np.mean(sampler.acceptance_fraction)))
        self._chain_stats(np.transpose(sampler.get_chain(), axes=[1, 0, 2]))

        tau = sampler.get_autocorr_time()
        training_samples = sampler.get_chain(discard=int(2 * np.max(tau)), flat=True, thin=int(0.5 * np.min(tau)))

        for it in range(1, iters + 1):

            if iters > 1:
                jitter = initial_jitter + (it - 1) * (final_jitter - initial_jitter) / (iters - 1)
            else:
                jitter = initial_jitter

            mean = np.mean(training_samples, axis=0)
            std = np.std(training_samples, axis=0)
            # Normalise samples
            training_samples = (training_samples - mean) / std
            self.transform = lambda x: x * std + mean
            self.trainer.train(training_samples, jitter=jitter)

            init_samples = None
            init_loglikes = None
            init_derived = None

            samples, latent_samples, derived_samples, loglikes, ncall = self._ensemble_sample(
                mcmc_steps, num_walkers, init_samples=init_samples,
                init_loglikes=init_loglikes, init_derived=init_derived, stats_interval=stats_interval,
                output_interval=output_interval)

            # Remember last position and loglikes
            # init_samples = samples[:, -1, :]
            # init_loglikes = loglikes[:, -1]
            # init_derived = derived_samples[:, -1, :]

            samples = self.transform(samples)
            self._chain_stats(samples)

            mc = MCSamples(samples=[samples[i, :, :].squeeze() for i in range(samples.shape[0])],
                           loglikes=[-loglikes[i, :].squeeze() for i in range(loglikes.shape[0])])
            training_samples = mc.makeSingleSamples(single_thin=thin)

        return training_samples