示例#1
0
 def __getitem__(self, index):
     cls = type(self)
     if isinstance(index, slice):
         return cls(self.data[index], axis=1)
     if isinstance(index, Integral):
         return self.data[index]
     if is_sequence(index):
         return cls(itemgetter(*index)(self), axis=1)
     raise NddError('%s is not avalid index type' % type(index))
示例#2
0
    def __init__(self, entropy=Nsb()):
        """Default entropy estimator is NSB."""
        super(DivergenceEstimator, self).__init__()
        self.input_data_ndim = 2

        estimator_name = type(entropy).__name__
        if estimator_name not in entropy_estimators:
            raise NddError('%s is not a valid entropy estimator' %
                           estimator_name)

        self.entropy_estimator = entropy
示例#3
0
 def __init__(self, ar, axis=0, ks=None, k=None):
     # set data
     if not isinstance(ar, self.__class__):
         ar = numpy.atleast_2d(ar)
         if not ar.size:
             raise NddError('Empty data array')
         if ar.ndim > 2:
             raise NddError('Input array has %s dimensions; must be 2D' %
                            ar.ndim)
         if ar.shape[0] > 1:
             # take 1D array as single variable, n samples data
             if ar.shape[0] > 1 and axis == 0:
                 ar = ar.T
         self._data = ar
     else:
         self._data = ar.data
     self._ks = None
     self._k = k
     if ks is not None:
         self.ks = ks
示例#4
0
    def counts(self, key=None, k=None):
        """Return counts.
        counts(key) will update the statistics for indices `key`
        if key not in statistics dict.

        Parameters
        ----------
        key : int or tuple or `full`
            Return statistics for the set of features in `key`.
            Defaults: return the statistics for the full set of features.
        k : int or dict or None
            Cardinality. If k is a dict, set k = k[key].
            If `key not in k` and key is a tuple, then set k to the product
            of `(k[x] for x in key)`. No effect if stat='counts'

        Returns
        -------
        keys, values

        """
        if key is None:
            key = 'full'

        if key not in self.statistics:  # compute statistics
            if key == 'full':
                data, order = self.data, 0
            else:
                index, order = self.array_index(key)
                data = self.data[index]

            stats = self._counts(data)
            if order <= self.order:  # save statistics
                self.statistics[key] = stats
        else:
            stats = self.statistics[key]

        keys, values = stats

        if self.stat == 'multiplicities' and k is not None:
            # append statistics for non-observed bins
            if isinstance(k, collections.Mapping):
                try:
                    k = k[key]
                except KeyError:
                    if order > 1:  # use combinatorics
                        try:
                            k = numpy.prod(k[x] for x in key)
                        except KeyError:
                            return NddError('counts(): check k dictionary')
            k = k - sum(values)
            keys.append(0)
            values.append(k)

        return keys, values
示例#5
0
    def fit(self, nk, k=None, zk=None):
        if k is None:
            raise NddError('Wolper-Wolf estimator needs k')
        if k == 1:
            self.estimate_, self.err_ = PZERO, PZERO
            return self

        if zk is not None:
            self.estimate_, self.err_ = ndd.fnsb.ww_from_multiplicities(
                nk, zk, k, self.alpha)
        else:
            self.estimate_, self.err_ = ndd.fnsb.ww(nk, k, self.alpha)
        return self
示例#6
0
 def __init__(self, *, nk=None, zk=None, k=None):
     self.nk = None
     self.k = None
     self.zk = None
     self._n = None
     self._k1 = None
     self.counts = None
     if (nk is None) != (zk is None):
         raise NddError('nk and zk should be passed together.')
     if nk is not None:
         self.nk = as_counts_array(nk)
         self.zk = as_counts_array(zk)
         self._n = numpy.sum(self.zk * self.nk)
         self._k1 = numpy.sum(self.zk[self.nk > 0])
     if k is not None:
         self.k = check_k(k)
示例#7
0
    def fit(self, nk, k=None, zk=None):
        if k is None:
            raise NddError('NSB estimator needs k')
        if k == 1:
            self.estimate_, self.err_ = PZERO, PZERO
            return self

        if self.alpha is None:
            if zk is not None:
                self.estimate_, self.err_ = ndd.fnsb.nsb_from_multiplicities(
                    nk, zk, k)
            else:
                self.estimate_, self.err_ = ndd.fnsb.nsb(nk, k)
        else:  # wolpert-wolf estimator
            estimator = WolpertWolf(self.alpha).fit(nk=nk, k=k, zk=zk)
            self.estimate_ = estimator.estimate_
            self.err_ = estimator.err_
        return self
示例#8
0
def as_estimator(estimator):
    """Return an entropy estimator object from class/class name.

    Parameters
    ----------
    estimator : str or estimator class or estimator object

    Returns
    -------
    estimator object

    """
    if isinstance(estimator, str):  # estimator name or label
        name = as_class_name(estimator)
        if name not in ndd.entropy_estimators:
            raise NddError('%s is not a valid entropy estimator' % name)
        return ndd.entropy_estimators[name]()
    if isclass(estimator):
        return estimator()
    return estimator
示例#9
0
    def fit(self, nk, k=None, zk=None):
        if zk is None:
            counts = CountsDistribution().fit(nk)
        else:
            counts = CountsDistribution(nk=nk, zk=zk)

        if not counts.coincidences:
            raise NddError('AsymptoticNSB estimator: no coincidences '
                           'in the data.')
        if counts.sampling_ratio > 0.1:
            logger.info('The AsymptoticNSB estimator should only be used '
                        'in the under-sampled regime.')
        if k == 1:
            self.estimate_, self.err_ = PZERO, PZERO
            return self

        self.estimate_ = (euler_gamma - numpy.log(2) +
                          2.0 * numpy.log(counts.n) -
                          ndd.fnsb.gamma0(counts.coincidences))
        self.err_ = numpy.sqrt(ndd.fnsb.gamma1(counts.coincidences))
        return self
示例#10
0
def check_k(k):
    """
    if k is an integer, just check
    if an array set k = prod(k)
    if None, return

    Raises
    ------
    NddError
        If k is not valid (wrong type, negative, too large...)

    """
    MAX_LOGK = 200 * numpy.log(2)

    if k is None:
        return k
    try:
        k = numpy.float64(k)
    except ValueError:
        raise NddError('%r is not a valid cardinality' % k)
    if k.ndim:
        # if k is a sequence, set k = prod(k)
        if k.ndim > 1:
            raise NddError('k must be a scalar or 1D array')
        logk = numpy.sum(numpy.log(x) for x in k)
        if logk > MAX_LOGK:
            # too large a number; backoff to n_bins?
            # TODO: log warning
            raise NddError('k is too large (%e).'
                           'Must be < 2^200 ' % numpy.exp(logk))
        k = numpy.prod(k)
    else:
        # if a scalar check size
        if k <= 0:
            raise NddError('k must be > 0 (%r)' % k)
        if numpy.log(k) > MAX_LOGK:
            raise NddError('k is too large (%e).' 'Must be < 2^200 ' % k)
    if not k.is_integer():
        raise NddError('k must be a whole number (got %r).' % k)

    return k