def __getitem__(self, index): cls = type(self) if isinstance(index, slice): return cls(self.data[index], axis=1) if isinstance(index, Integral): return self.data[index] if is_sequence(index): return cls(itemgetter(*index)(self), axis=1) raise NddError('%s is not avalid index type' % type(index))
def __init__(self, entropy=Nsb()): """Default entropy estimator is NSB.""" super(DivergenceEstimator, self).__init__() self.input_data_ndim = 2 estimator_name = type(entropy).__name__ if estimator_name not in entropy_estimators: raise NddError('%s is not a valid entropy estimator' % estimator_name) self.entropy_estimator = entropy
def __init__(self, ar, axis=0, ks=None, k=None): # set data if not isinstance(ar, self.__class__): ar = numpy.atleast_2d(ar) if not ar.size: raise NddError('Empty data array') if ar.ndim > 2: raise NddError('Input array has %s dimensions; must be 2D' % ar.ndim) if ar.shape[0] > 1: # take 1D array as single variable, n samples data if ar.shape[0] > 1 and axis == 0: ar = ar.T self._data = ar else: self._data = ar.data self._ks = None self._k = k if ks is not None: self.ks = ks
def counts(self, key=None, k=None): """Return counts. counts(key) will update the statistics for indices `key` if key not in statistics dict. Parameters ---------- key : int or tuple or `full` Return statistics for the set of features in `key`. Defaults: return the statistics for the full set of features. k : int or dict or None Cardinality. If k is a dict, set k = k[key]. If `key not in k` and key is a tuple, then set k to the product of `(k[x] for x in key)`. No effect if stat='counts' Returns ------- keys, values """ if key is None: key = 'full' if key not in self.statistics: # compute statistics if key == 'full': data, order = self.data, 0 else: index, order = self.array_index(key) data = self.data[index] stats = self._counts(data) if order <= self.order: # save statistics self.statistics[key] = stats else: stats = self.statistics[key] keys, values = stats if self.stat == 'multiplicities' and k is not None: # append statistics for non-observed bins if isinstance(k, collections.Mapping): try: k = k[key] except KeyError: if order > 1: # use combinatorics try: k = numpy.prod(k[x] for x in key) except KeyError: return NddError('counts(): check k dictionary') k = k - sum(values) keys.append(0) values.append(k) return keys, values
def fit(self, nk, k=None, zk=None): if k is None: raise NddError('Wolper-Wolf estimator needs k') if k == 1: self.estimate_, self.err_ = PZERO, PZERO return self if zk is not None: self.estimate_, self.err_ = ndd.fnsb.ww_from_multiplicities( nk, zk, k, self.alpha) else: self.estimate_, self.err_ = ndd.fnsb.ww(nk, k, self.alpha) return self
def __init__(self, *, nk=None, zk=None, k=None): self.nk = None self.k = None self.zk = None self._n = None self._k1 = None self.counts = None if (nk is None) != (zk is None): raise NddError('nk and zk should be passed together.') if nk is not None: self.nk = as_counts_array(nk) self.zk = as_counts_array(zk) self._n = numpy.sum(self.zk * self.nk) self._k1 = numpy.sum(self.zk[self.nk > 0]) if k is not None: self.k = check_k(k)
def fit(self, nk, k=None, zk=None): if k is None: raise NddError('NSB estimator needs k') if k == 1: self.estimate_, self.err_ = PZERO, PZERO return self if self.alpha is None: if zk is not None: self.estimate_, self.err_ = ndd.fnsb.nsb_from_multiplicities( nk, zk, k) else: self.estimate_, self.err_ = ndd.fnsb.nsb(nk, k) else: # wolpert-wolf estimator estimator = WolpertWolf(self.alpha).fit(nk=nk, k=k, zk=zk) self.estimate_ = estimator.estimate_ self.err_ = estimator.err_ return self
def as_estimator(estimator): """Return an entropy estimator object from class/class name. Parameters ---------- estimator : str or estimator class or estimator object Returns ------- estimator object """ if isinstance(estimator, str): # estimator name or label name = as_class_name(estimator) if name not in ndd.entropy_estimators: raise NddError('%s is not a valid entropy estimator' % name) return ndd.entropy_estimators[name]() if isclass(estimator): return estimator() return estimator
def fit(self, nk, k=None, zk=None): if zk is None: counts = CountsDistribution().fit(nk) else: counts = CountsDistribution(nk=nk, zk=zk) if not counts.coincidences: raise NddError('AsymptoticNSB estimator: no coincidences ' 'in the data.') if counts.sampling_ratio > 0.1: logger.info('The AsymptoticNSB estimator should only be used ' 'in the under-sampled regime.') if k == 1: self.estimate_, self.err_ = PZERO, PZERO return self self.estimate_ = (euler_gamma - numpy.log(2) + 2.0 * numpy.log(counts.n) - ndd.fnsb.gamma0(counts.coincidences)) self.err_ = numpy.sqrt(ndd.fnsb.gamma1(counts.coincidences)) return self
def check_k(k): """ if k is an integer, just check if an array set k = prod(k) if None, return Raises ------ NddError If k is not valid (wrong type, negative, too large...) """ MAX_LOGK = 200 * numpy.log(2) if k is None: return k try: k = numpy.float64(k) except ValueError: raise NddError('%r is not a valid cardinality' % k) if k.ndim: # if k is a sequence, set k = prod(k) if k.ndim > 1: raise NddError('k must be a scalar or 1D array') logk = numpy.sum(numpy.log(x) for x in k) if logk > MAX_LOGK: # too large a number; backoff to n_bins? # TODO: log warning raise NddError('k is too large (%e).' 'Must be < 2^200 ' % numpy.exp(logk)) k = numpy.prod(k) else: # if a scalar check size if k <= 0: raise NddError('k must be > 0 (%r)' % k) if numpy.log(k) > MAX_LOGK: raise NddError('k is too large (%e).' 'Must be < 2^200 ' % k) if not k.is_integer(): raise NddError('k must be a whole number (got %r).' % k) return k