Пример #1
0
 def _init_data(self):
     """Override `Sample_Grid._init_data()` to avoid calculating `idx` and `csum`, not needed yet
     """
     if self._mass is None:
         self._mass = utils.trapz_dens_to_mass(self._dens,
                                               self._edges,
                                               axis=None)
     if (self._scalar_mass is None) and (self._scalar_dens is not None):
         self._scalar_mass = utils.trapz_dens_to_mass(self._scalar_dens,
                                                      self._edges,
                                                      axis=None)
     return
Пример #2
0
    def _init_data(self):
        if self._mass is None:
            self._mass = utils.trapz_dens_to_mass(self._dens,
                                                  self._edges,
                                                  axis=None)
        if (self._scalar_mass is None) and (self._scalar_dens is not None):
            self._scalar_mass = utils.trapz_dens_to_mass(self._scalar_dens,
                                                         self._edges,
                                                         axis=None)

        idx, csum = _data_to_cumulative(self._mass)
        self._idx = idx
        self._csum = csum
        return
Пример #3
0
    def _test_sample(self, kernel):
        kern = kernel()

        NUM = int(1e6)
        bw = 1.0
        pad = 4.0
        xe, xc, dx = kale.utils.bins(-pad * bw, pad * bw, 100)
        samp = kern.sample(NUM)

        hist, _ = np.histogram(samp, xe, density=True)
        pdf = kern.evaluate(xc)

        cum_pdf = utils.trapz_dens_to_mass(pdf, xc)
        cum_pdf = np.cumsum(cum_pdf)

        cum_pdf = np.append([0.0], cum_pdf)
        cdf = kern.cdf(xc)

        # Compare 'analytic' PDF/CDF with distribution of samples
        # CDF tend not to match as well, so use larger tolerance
        for aa, bb, name, tol in zip([hist, cum_pdf], [pdf, cdf],
                                     ['pdf', 'cdf'], [1e-2, 1e-1]):
            idx = (aa > 0.0) & (bb > 0.0)
            dof = np.count_nonzero(idx) - 1
            x2 = np.sum(np.square(aa[idx] - bb[idx]) / bb[idx]**2)
            x2 = x2 / dof

            print("Distribution: {} :: {} : x2/dof = {:.4e}".format(
                kern.name(), name, x2))
            print("\t" + kale.utils.array_str(aa[idx]))
            print("\t" + kale.utils.array_str(bb[idx]))
            utils.alltrue(x2 < tol)

        return
Пример #4
0
    def _test_ndim_a2(self, ndim):
        from kalepy import utils

        BIN_SIZE_RANGE = [10, 30]
        num_bins = np.random.randint(*BIN_SIZE_RANGE, ndim)

        edges = []
        for nb in num_bins:
            ee = np.cumsum(np.random.uniform(0.0, 2.0, nb))
            edges.append(ee)

        grid = np.meshgrid(*edges, indexing='ij')
        shp = np.array([len(ee) for ee in edges])

        for axis in np.ndindex(*([ndim] * 2)):
            if len(np.unique(axis)) != len(axis):
                continue

            axis = np.asarray(axis)
            not_axis = np.array(list(set(range(ndim)) - set(axis)))
            print("\nndim = {}, axis = {}, other = {}".format(
                ndim, axis, not_axis))

            bcast_norm = [np.newaxis for ii in range(ndim)]
            for na in not_axis:
                bcast_norm[na] = slice(None)

            bcast_norm = tuple(bcast_norm)
            norm = np.random.uniform(0.0, 10.0, shp[not_axis])[bcast_norm]

            widths = []
            for ii in range(ndim):
                dim_len_inn = shp[ii]
                if ii in axis:
                    wid = np.diff(edges[ii])
                else:
                    wid = np.ones(dim_len_inn)

                # Create new axes along all by the current dimension, slice along the current dimension
                cut = [np.newaxis for ii in range(ndim)]
                cut[ii] = slice(None)
                temp = wid[tuple(cut)]
                widths.append(temp)

            wids = np.product(np.array(widths, dtype=object),
                              axis=0).astype(float)

            pdf = np.ones_like(grid[0]) * norm
            pmf = utils.trapz_dens_to_mass(pdf, edges, axis=axis)

            new_shp = [ss for ss in shp]
            for aa in axis:
                new_shp[aa] -= 1

            utils.alltrue(
                np.shape(pmf) == np.array(new_shp),
                "Output shape is {fail:}correct")
            utils.alltrue(pmf == norm * wids, 'Values do {fail:}match')

        return
Пример #5
0
    def _test_ndim_a1(self, ndim):
        from kalepy import utils

        BIN_SIZE_RANGE = [10, 30]
        num_bins = np.random.randint(*BIN_SIZE_RANGE, ndim)
        # num_bins = [3, 4]

        edges = []
        for nb in num_bins:
            ee = np.cumsum(np.random.uniform(0.0, 2.0, nb))
            edges.append(ee)

        grid = np.meshgrid(*edges, indexing='ij')
        shp = [len(ee) for ee in edges]

        for axis in range(ndim):
            not_axis = (axis + 1) % ndim
            print("\nndim = {}, axis = {}, other = {}".format(
                ndim, axis, not_axis))

            bcast_norm = [np.newaxis for ii in range(ndim)]
            bcast_norm[not_axis] = slice(None)
            bcast_norm = tuple(bcast_norm)
            norm = np.random.uniform(0.0, 10.0, shp[not_axis])[bcast_norm]

            bcast_wids = [np.newaxis for ii in range(ndim)]
            bcast_wids[axis] = slice(None)
            bcast_wids = tuple(bcast_wids)
            wids = np.diff(edges[axis])[bcast_wids]

            pdf = np.ones_like(grid[0]) * norm
            pmf = utils.trapz_dens_to_mass(pdf, edges, axis=axis)

            new_shp = [ss for ss in shp]
            new_shp[axis] -= 1
            utils.alltrue(
                np.shape(pmf) == np.array(new_shp),
                "Output shape is {fail:}correct")

            utils.alltrue(pmf == norm * wids, 'Values do {fail:}match')

            # print(pdf)
            # print(wids)
            # print(pmf)

        return
Пример #6
0
    def _test_ndim(self, ndim):
        from kalepy import utils

        print("`ndim` = {}".format(ndim))

        BIN_SIZE_RANGE = [10, 30]

        extr = [[0.0, np.random.uniform(0.0, 2.0)] for ii in range(ndim)]
        norm = np.random.uniform(0.0, 10.0)
        # extr = [[0.0, 1.0] for ii in range(ndim)]
        # norm = 1.0

        edges = [
            np.linspace(*ex, np.random.randint(*BIN_SIZE_RANGE)) for ex in extr
        ]
        grid = np.meshgrid(*edges, indexing='ij')

        lengths = np.max(extr, axis=-1)

        xx = np.min(np.moveaxis(grid, 0, -1) / lengths, axis=-1)

        pdf = norm * xx
        area = np.product(lengths)
        pmf = utils.trapz_dens_to_mass(pdf, edges)

        # Known area of a pyramid in ndim
        vol = area * norm / (ndim + 1)
        tot = np.sum(pmf)
        print("Volume = {:.4e}, Total Mass = {:.4e};  ratio = {:.4e}".format(
            vol, tot, tot / vol))
        utils.allclose(vol,
                       tot,
                       rtol=1e-2,
                       msg="total volume does {fail:}match analytic value")

        test = utils.trapz_nd(pdf, edges)
        print("Volume = {:.4e}, Total Mass = {:.4e};  ratio = {:.4e}".format(
            test, tot, tot / test))
        utils.allclose(vol,
                       tot,
                       rtol=1e-2,
                       msg="total volume does {fail:}match `trapz_nd` value")

        return
Пример #7
0
    def sample(self, nsamp=None, interpolate=True, return_scalar=None):
        """Sample from the probability distribution.

        Arguments
        ---------
        nsamp : scalar or None
        interpolate : bool
        return_scalar : bool

        Returns
        -------
        vals : (D, N) ndarray of scalar

        """
        dens = self._dens
        scalar_dens = self._scalar_dens
        edges = self._edges
        ndim = self._ndim

        # ---- initialize parameters
        if interpolate and (dens is None):
            logging.info("`dens` is None, cannot interpolate sampling")
            interpolate = False

        # If no number of samples are given, assume that the units of `self._mass` are number of samples, and choose
        # the total numbe of samples to be the total of this
        if nsamp is None:
            nsamp = self._mass.sum()
        nsamp = int(nsamp)

        if return_scalar is None:
            return_scalar = (scalar_dens is not None)
        elif return_scalar and (scalar_dens is None):
            return_scalar = False
            logging.warning(
                "WARNING: no `scalar` initialized, but `return_scalar`=True!")

        # ---- Get generalized sampling locations

        # Choose random bins, proportionally to `mass`, and positions within bins (uniformly distributed)
        #     `bin_numbers_flat` (N*D,) are the index numbers for bins in flattened 1D array of length N*D
        #     `intrabin_locs` (D, N) are position [0.0, 1.0] within each bin for each sample in each dimension
        bin_numbers_flat, intrabin_locs = self._random_bins(nsamp)
        # Convert from flat (N,) indices into ND indices;  (D, N) for D dimensions, N samples (`nsamp`)
        bin_numbers = np.unravel_index(bin_numbers_flat, self._shape_bins)

        # If scalars are also being sampled: find scalar value for bin centers (i.e. bin averages)
        #     this will be updated/improved if `interpolation=True`
        if return_scalar:
            scalar_mass = self._scalar_mass
            scalar_values = scalar_mass[bin_numbers]

        # ---- Place samples in each dimension

        vals = np.zeros_like(intrabin_locs)
        for dim, (edge, bidx) in enumerate(zip(edges, bin_numbers)):
            # Width of bins in this dimension
            wid = np.diff(edge)

            # Random location, in this dimension, for each bin. Relative position, i.e. between [0.0, 1.0]
            loc = intrabin_locs[dim]

            # Uniform / no-interpolation :: random-uniform within each bin
            if (not interpolate):
                vals[dim, :] = edge[bidx] + wid[bidx] * loc

            # Interpolated :: random-linear proportional to bin gradients (i.e. slope across bin in each dimension)
            else:
                # Calculate normalization for gradients; needs to be done for each dimension specifically
                #    This normalization is needed to ensure that the pdf values are unitary when integrating in each dim
                norm = utils.trapz_dens_to_mass(dens, edges, axis=dim)
                others = np.arange(ndim).tolist()
                others.pop(dim)
                norm = utils.midpoints(norm, axis=others)

                edge = np.asarray(edge)

                # Find the gradient along this dimension (using center-values in other dimensions)
                grad = _grad_along(dens, dim) / norm
                # get the gradient for each sample
                grad = grad.flat[bin_numbers_flat] * wid[bidx]
                # interpolate edge values in this dimension (returns values [0.0, 1.0])
                temp = _intrabin_linear_interp(loc, grad)
                # convert from intrabin positions to overall positions by linearly rescaling
                vals[dim, :] = edge[bidx] + temp * wid[bidx]

            # interpolate scalar values also
            if return_scalar and interpolate:
                grad = _grad_along(scalar_dens, dim)
                grad = grad.flat[bin_numbers_flat]
                # shift `loc` (location within bin) to center point
                scalar_values += grad * (loc - 0.5)

        if return_scalar:
            return vals, scalar_values

        return vals