def __getitem__(self, key): # Just integer indices (no slices) for now if not isinstance(key, tuple): key = (key,) key = tuple([operator.index(i) for i in key]) if len(key) > len(self._dshape) - 1: raise IndexError('Cannot have more indices than dimensions') # Apply each index in turn ds = self._dshape c_strides = ds.c_strides ptr = self._ptr for i, idx in enumerate(key): dim_size = operator.index(ds[i]) # Implement Python's negative indexing if idx >= 0: if idx >= dim_size: raise IndexError(('Index %d is out of range ' + 'in dimension sized %d') % (idx, dim_size)) else: if idx < -dim_size: raise IndexError(('Index %d is out of range ' + 'in dimension sized %d') % (idx, dim_size)) idx += dim_size ptr = ptr + idx * c_strides[i] # Create the data shape of the result ds = ds.subarray(len(key)) return MemBufDataDescriptor(ptr, self.ptr_owner, ds, self._writable)
def cat_dshapes(dslist): """ Concatenates a list of dshapes together along the first axis. Raises an error if there is a mismatch along another axis or the measures are different. Requires that the leading dimension be a known size for all data shapes. TODO: Relax this restriction to support streaming dimensions. """ if len(dslist) == 0: raise ValueError('Cannot concatenate an empty list of dshapes') elif len(dslist) == 1: return dslist[0] outer_dim_size = operator.index(dslist[0][0]) inner_ds = dslist[0][1:] for ds in dslist[1:]: outer_dim_size += operator.index(ds[0]) if ds[1:] != inner_ds: raise ValueError(('The datashapes to concatenate much' ' all match after' ' the first dimension (%s vs %s)') % (inner_ds, ds[1:])) return coretypes.DataShape(*[coretypes.Fixed(outer_dim_size)] + list(inner_ds))
def test_basic(self): self.o.ind = -2 self.n.ind = 2 import operator assert operator.index(self.o) == -2 assert operator.index(self.n) == 2 raises(TypeError, operator.index, self.o_no_index) raises(TypeError, operator.index, self.n_no_index)
def check_shape(args, current_shape=None): """Imitate numpy.matrix handling of shape arguments""" if len(args) == 0: raise TypeError("function missing 1 required positional argument: " "'shape'") elif len(args) == 1: try: shape_iter = iter(args[0]) except TypeError: new_shape = (operator.index(args[0]), ) else: new_shape = tuple(operator.index(arg) for arg in shape_iter) else: new_shape = tuple(operator.index(arg) for arg in args) if current_shape is None: if len(new_shape) != 2: raise ValueError('shape must be a 2-tuple of positive integers') elif new_shape[0] < 0 or new_shape[1] < 0: raise ValueError("'shape' elements cannot be negative") else: # Check the current size only if needed current_size = np.prod(current_shape, dtype=int) # Check for negatives negative_indexes = [i for i, x in enumerate(new_shape) if x < 0] if len(negative_indexes) == 0: new_size = np.prod(new_shape, dtype=int) if new_size != current_size: raise ValueError('cannot reshape array of size {} into shape {}' .format(new_size, new_shape)) elif len(negative_indexes) == 1: skip = negative_indexes[0] specified = np.prod(new_shape[0:skip] + new_shape[skip+1:]) unspecified, remainder = divmod(current_size, specified) if remainder != 0: err_shape = tuple('newshape' if x < 0 else x for x in new_shape) raise ValueError('cannot reshape array of size {} into shape {}' ''.format(current_size, err_shape)) new_shape = new_shape[0:skip] + (unspecified,) + new_shape[skip+1:] else: raise ValueError('can only specify one unknown dimension') # Add and remove ones like numpy.matrix.reshape if len(new_shape) != 2: new_shape = tuple(arg for arg in new_shape if arg != 1) if len(new_shape) == 0: new_shape = (1, 1) elif len(new_shape) == 1: new_shape = (1, new_shape[0]) if len(new_shape) > 2: raise ValueError('shape too large to be a matrix') return new_shape
def _get_item_ptr(self, idx): if len(idx) != self.nindex: raise IndexError('Incorrect number of indices (got %d, require %d)' % (len(idx), self.nindex)) idx = tuple(operator.index(i) for i in idx) if self.nindex > 0: idx = tuple([operator.index(i) for i in idx]) c_strides = self._mbdd.dshape.c_strides[:self.nindex] offset = sum(stride * idx for stride, idx in zip(c_strides, idx)) return self._mbdd.ptr + offset else: return self._mbdd.ptr
def __new__(cls, width, height): if width <= 0: raise ValueError("width must be greater than 0") try: width = index(width) except TypeError: raise TypeError("width must be of integral value or provide " "an __index__ method") if height <= 0: raise ValueError("height must be greater than 0") try: height = index(height) except TypeError: raise TypeError("height must be of integral value or provide " "an __index__ method") return super(ImageSize, cls).__new__(cls, width, height)
def binom(n, k): """ Binomial coefficient (n over k) """ n = operator.index(n) k = operator.index(k) if not 0 <= k <= n: return 0 m = n + 1 num = 1 den = 1 for j in range(1, min(k, n - k) + 1): num *= m - j den *= j return num // den
def index(self, value, start=None, stop=None): start = index(start) if start is not None else 0 if stop is not None: stop = index(stop) if not (start == 0 and stop is None): l = self[start:stop] else: l = self idx = start while l is not nil: if l.car == value: return idx l = l.cdr idx += 1 raise ValueError("'%s' not in list" % value)
def __new__(cls, numerator=0, denominator=1): """Constructs a Fraction. Takes a string like '3/2' or '1.5', another Fraction, or a numerator/denominator pair. """ self = super(Fraction, cls).__new__(cls) if type(numerator) not in (int, long) and denominator == 1: if isinstance(numerator, basestring): # Handle construction from strings. input = numerator m = _RATIONAL_FORMAT.match(input) if m is None: raise ValueError( 'Invalid literal for Fraction: %r' % input) numerator = m.group('num') decimal = m.group('decimal') if decimal: # The literal is a decimal number. numerator = int(numerator + decimal) denominator = 10 ** len(decimal) else: # The literal is an integer or fraction. numerator = int(numerator) # Default denominator to 1. denominator = int(m.group('denom') or 1) if m.group('sign') == '-': numerator = -numerator elif isinstance(numerator, Rational): # Handle copies from other rationals. Integrals get # caught here too, but it doesn't matter because # denominator is already 1. other_rational = numerator numerator = other_rational.numerator denominator = other_rational.denominator if denominator == 0: raise ZeroDivisionError('Fraction(%s, 0)' % numerator) numerator = operator.index(numerator) denominator = operator.index(denominator) g = gcd(numerator, denominator) self._numerator = numerator // g self._denominator = denominator // g return self
def slice_indices(slice, length): """ Reference implementation for the slice.indices method. """ # Compute step and length as integers. length = operator.index(length) step = 1 if slice.step is None else evaluate_slice_index(slice.step) # Raise ValueError for negative length or zero step. if length < 0: raise ValueError("length should not be negative") if step == 0: raise ValueError("slice step cannot be zero") # Find lower and upper bounds for start and stop. lower = -1 if step < 0 else 0 upper = length - 1 if step < 0 else length # Compute start. if slice.start is None: start = upper if step < 0 else lower else: start = evaluate_slice_index(slice.start) start = max(start + length, lower) if start < 0 else min(start, upper) # Compute stop. if slice.stop is None: stop = lower if step < 0 else upper else: stop = evaluate_slice_index(slice.stop) stop = max(stop + length, lower) if stop < 0 else min(stop, upper) return start, stop, step
def to_int(self, v): try: int_pack(index(v)) except (struct_error, TypeError): raise TypeError('32-bit integer expected') return int(v)
def _check_rank(rank): """Check rank parameter and deal with deprecation.""" err_msg = ('rank must be None, dict, "full", or int, ' 'got %s (type %s)' % (rank, type(rank))) if isinstance(rank, str): # XXX we can use rank='' to deprecate to get to None eventually: # if rank == '': # warn('The rank parameter default in 0.18 of "full" will change ' # 'to None in 0.19, set it explicitly to avoid this warning', # DeprecationWarning) # rank = 'full' if rank not in ['full', 'info']: raise ValueError('rank, if str, must be "full" or "info", ' 'got %s' % (rank,)) elif isinstance(rank, bool): raise TypeError(err_msg) elif rank is not None and not isinstance(rank, dict): try: rank = int(operator.index(rank)) except TypeError: raise TypeError(err_msg) else: warn('rank as int is deprecated and will be removed in 0.19. ' 'use rank=dict(meg=...) instead.', DeprecationWarning) rank = dict(meg=rank) return rank
def test_index(self): import operator assert operator.index(42) == 42 assert operator.__index__(42) == 42 raises(TypeError, operator.index, "abc") exc = raises(TypeError, operator.index, "abc") assert str(exc.value) == "'str' object cannot be interpreted as an integer"
def __init__(self, text_node): self.index = index(text_node) self.msg_dict = {'type': 'setTextNode', 'childTag': text_node.tag, 'parentId': text_node.parentNode.id, 'index': self.index, 'text': text_node.text }
async def receive_some(self, max_bytes): async with self._receive_conflict_detector: # Argument validation max_bytes = operator.index(max_bytes) if max_bytes < 1: raise ValueError("max_bytes must be >= 1") # State validation if self._receiver_closed: raise ClosedStreamError # Wake wait_send_all_might_not_block and wait for data self._receiver_waiting = True self._something_happened() try: await self._wait_for(lambda: self._data or self._sender_closed) finally: self._receiver_waiting = False # Get data, possibly waking send_all if self._data: got = self._data[:max_bytes] del self._data[:max_bytes] self._something_happened() return got else: assert self._sender_closed return b""
def test_index_returns_int_subclass(self): class BadInt: def __index__(self): return True class BadInt2(int): def __index__(self): return True bad_int = BadInt() n = operator.index(bad_int) self.assertEqual(n, 1) bad_int = BadInt2() n = operator.index(bad_int) self.assertEqual(n, 0)
def __getitem__(self, key): """ This function reports the record of case number <key>. For example: firstRecord = SavReader(savFileName)[0]. The <key> argument may also be a slice, for example: firstfiveRecords = SavReader(savFileName)[:5]. You can also do stuff like (numpy required!): savReader(savFileName)[1:5, 1]""" is_slice = isinstance(key, slice) is_array_slice = key is Ellipsis or isinstance(key, tuple) if is_slice: start, stop, step = key.indices(self.nCases) elif is_array_slice: return self._get_array_slice(key, self.nCases, len(self.header)) else: key = operator.index(key) start = key + self.nCases if key < 0 else key if not 0 <= start < self.nCases: raise IndexError("Index out of bounds") stop = start + 1 step = 1 records = self._items(start, stop, step) if is_slice: return list(records) return next(records)
def test_proxy_index(self): class C: def __index__(self): return 10 o = C() p = weakref.proxy(o) self.assertEqual(operator.index(p), 10)
def interpret(func, env, args, storage=None, **kwds): assert len(args) == len(func.args) # Make a copy, since we're going to mutate our IR! func, _ = copy_function(func) # If it's a BLZ output, we want an interpreter that streams # the processing through in chunks if storage is not None: if len(func.type.restype.shape) == 0: raise TypeError('Require an array, not a scalar, for outputting to BLZ') env['stream-outer'] = True result_ndim = env['result-ndim'] = len(func.type.restype.shape) else: # Convert any persistent inputs to memory # TODO: should stream the computation in this case for i, arg in enumerate(args): if isinstance(arg._data, BLZDataDescriptor): args[i] = arg[:] # Update environment with dynd type information dynd_types = dict((arg, get_dynd_type(array)) for arg, array in zip(func.args, args) if isinstance(array._data, DyNDDataDescriptor)) env['dynd-types'] = dynd_types # Lift ckernels func, env = run_pipeline(func, env, run_time_passes) if storage is None: # Evaluate once values = dict(zip(func.args, args)) interp = CKernelInterp(values) visit(interp, func) return interp.result else: res_shape, res_dt = datashape.to_numpy(func.type.restype) dim_size = operator.index(res_shape[0]) row_size = ndt.type(str(func.type.restype.subarray(1))).data_size chunk_size = min(max(1, (1024*1024) // row_size), dim_size) # Evaluate by streaming the outermost dimension, # and using the BLZ data descriptor's append dst_dd = BLZDataDescriptor(blz.zeros((0,)+res_shape[1:], res_dt, rootdir=storage.path)) # Loop through all the chunks for chunk_start in range(0, dim_size, chunk_size): # Tell the interpreter which chunk size to use (last # chunk might be smaller) chunk_size = min(chunk_size, dim_size - chunk_start) # Evaluate the chunk args_chunk = [arg[chunk_start:chunk_start+chunk_size] if len(arg.dshape.shape) == result_ndim else arg for arg in args] values = dict(zip(func.args, args_chunk)) interp = CKernelChunkInterp(values, chunk_size, result_ndim) visit(interp, func) chunk = interp.result._data.dynd_arr() dst_dd.append(chunk) return blaze.Array(dst_dd)
def __init__(self, table, index): self._table = table self._index = operator.index(index) n = len(table) if index < -n or index >= n: raise IndexError('index {0} out of range for table with length {1}' .format(index, len(table)))
def test_index_returns_int_subclass(self): class BadInt: def __index__(self): return True class BadInt2(int): def __index__(self): return True bad_int = BadInt() with self.assertWarns(DeprecationWarning): n = operator.index(bad_int) self.assertEqual(n, 1) bad_int = BadInt2() n = operator.index(bad_int) self.assertEqual(n, 0)
def _get_abs_string_index(self, idx): """Get the absolute index for the list of modules""" idx = operator.index(idx) if not (-len(self) <= idx < len(self)): raise IndexError('index {} is out of range'.format(idx)) if idx < 0: idx += len(self) return str(idx)
def _get_item_by_idx(self, iterator, idx): """Get the idx-th item of the iterator""" size = len(self) idx = operator.index(idx) if not -size <= idx < size: raise IndexError('index {} is out of range'.format(idx)) idx %= size return next(islice(iterator, idx, None))
def __init__(self, i): # Use operator.index, so Python integers, numpy int scalars, etc work i = operator.index(i) if i < 0: raise ValueError('Fixed dimensions must be positive') self.val = i
def __getitem__(self, key): if not isinstance(key, slice): child_sbvalue = (self.__sbvalue.GetValueForExpressionPath("[%i]" % operator.index(key))) if child_sbvalue and child_sbvalue.IsValid(): return Value(child_sbvalue) raise IndexError("Index '%d' is out of range" % key) else: return [self[i] for i in xrange(*key.indices(len(self)))]
def bind_func(estruct, dst_dd, src_dd_list): for i, (ds, dd) in enumerate( izip(self.dshapes, src_dd_list + [dst_dd])): shape = [operator.index(dim) for dim in dd.dshape[-len(ds):-1]] cshape = getattr(estruct, 'operand_%d' % i) for j, dim_size in enumerate(shape): cshape[j] = dim_size
def __eq__(self, item): """A Users is equal to a User if the ids match""" if isinstance(item, User): return self.id == item.id try: return self.id == index(item) except TypeError: return NotImplemented
def _ensure_int(x, name='unknown', must_be='an int'): """Ensure a variable is an integer.""" # This is preferred over numbers.Integral, see: # https://github.com/scipy/scipy/pull/7351#issuecomment-299713159 try: x = int(operator.index(x)) except TypeError: raise TypeError('%s must be %s, got %s' % (name, must_be, type(x))) return x
def __getitem__(self, key): # Just integer indices (no slices) for now if not isinstance(key, tuple): key = (key,) key = tuple([operator.index(i) for i in key]) blzarr = self.blzarr # The returned arrays are temporary buffers, # so must be flagged as readonly. return DyNDDataDescriptor(nd.array(blzarr[key], access='readonly'))
def __init__(self, mbdd): if len(mbdd.dshape) <= 1: raise IndexError('Need at least one dimension for iteration') self._outer_stride = mbdd.dshape.c_strides[0] self._mbdd = mbdd self._dshape = mbdd.dshape.subarray(1) self._ptr = mbdd.ptr self._end = (self._ptr + self._outer_stride * operator.index(mbdd.dshape[0]))
def sanitize_smiles_df( df: pd.DataFrame, column_levels: SupportsIndex = 2, column_padding: Hashable = None, ) -> pd.DataFrame: """Sanitize the passed dataframe, canonicalizing the SMILES in its index, converting the columns into a multiIndex and removing duplicate entries. Examples -------- .. code-block:: python >>> import pandas as pd >>> from nanoCAT.recipes import sanitize_smiles_df >>> df: pd.DataFrame = ... >>> print(df) a smiles CCCO[H] 1 CCO[H] 2 CO[H] 3 >>> sanitize_smiles_df(df) a NaN smiles CCCO 1 CCO 2 CO 3 Parameters ---------- df : :class:`pd.DataFrame <pandas.DataFrame>` The dataframe in question. The dataframes' index should consist of smiles strings. column_levels : :class:`int` The number of multiindex column levels that should be in the to-be returned dataframe. column_padding : :class:`~collections.abc.Hashable` The object used as padding for the multiindex levels (where appropiate). Returns ------- :class:`pd.DataFrame <pandas.DataFrame>` The newly sanitized dataframe. Returns either the initially passed dataframe or a copy thereof. """ # noqa: E501 # Sanitize `arguments` column_levels = operator.index(column_levels) if column_levels < 1: raise ValueError("`column_levels` must be larger than or equal to 1") elif isinstance(df.columns, pd.MultiIndex) and len(df.columns.levels) > column_levels: raise ValueError("`column_levels` must be larger than or equal to number " "of MultiIndex levels in `df`") elif not isinstance(column_padding, Hashable): raise TypeError("`column_padding` expected a hashable object") # Sanitize the index index = pd.Index( [_canonicalize_smiles(i) for i in df.index], dtype=df.index.dtype, name=df.index.name, ) # Create or pad a MultiIndex padding = (column_levels - 1) * (column_padding,) if not isinstance(df.columns, pd.MultiIndex): columns = pd.MultiIndex.from_tuples( [(i, *padding) for i in df.columns], names=(df.columns.name, *padding) ) elif len(df.columns.levels) < column_levels: columns = pd.MultiIndex.from_tuples( [(*j, *padding) for j in df.columns], names=(*df.columns.names, *padding) ) else: columns = df.columns.copy() mask = ~df.index.duplicated(keep='first') & (df.index != None) ret = df[mask] ret.index = index[mask] ret.columns = columns return ret
def _reg_pinv(x, reg=0, rank='full', rcond=1e-15): """Compute a regularized pseudoinverse of Hermitian matrices. Regularization is performed by adding a constant value to each diagonal element of the matrix before inversion. This is known as "diagonal loading". The loading factor is computed as ``reg * np.trace(x) / len(x)``. The pseudo-inverse is computed through SVD decomposition and inverting the singular values. When the matrix is rank deficient, some singular values will be close to zero and will not be used during the inversion. The number of singular values to use can either be manually specified or automatically estimated. Parameters ---------- x : ndarray, shape (..., n, n) Square, Hermitian matrices to invert. reg : float Regularization parameter. Defaults to 0. rank : int | None | 'full' This controls the effective rank of the covariance matrix when computing the inverse. The rank can be set explicitly by specifying an integer value. If ``None``, the rank will be automatically estimated. Since applying regularization will always make the covariance matrix full rank, the rank is estimated before regularization in this case. If 'full', the rank will be estimated after regularization and hence will mean using the full rank, unless ``reg=0`` is used. Defaults to 'full'. rcond : float | 'auto' Cutoff for detecting small singular values when attempting to estimate the rank of the matrix (``rank='auto'``). Singular values smaller than the cutoff are set to zero. When set to 'auto', a cutoff based on floating point precision will be used. Defaults to 1e-15. Returns ------- x_inv : ndarray, shape (..., n, n) The inverted matrix. loading_factor : float Value added to the diagonal of the matrix during regularization. rank : int If ``rank`` was set to an integer value, this value is returned, else the estimated rank of the matrix, before regularization, is returned. """ from ..rank import _estimate_rank_from_s if rank is not None and rank != 'full': rank = int(operator.index(rank)) if x.ndim < 2 or x.shape[-2] != x.shape[-1]: raise ValueError('Input matrix must be square.') if not np.allclose(x, x.conj().swapaxes(-2, -1)): raise ValueError('Input matrix must be Hermitian (symmetric)') assert x.ndim >= 2 and x.shape[-2] == x.shape[-1] n = x.shape[-1] # Decompose the matrix, not necessarily positive semidefinite from mne.fixes import svd U, s, Vh = svd(x, hermitian=True) # Estimate the rank before regularization tol = 'auto' if rcond == 'auto' else rcond * s[..., :1] rank_before = _estimate_rank_from_s(s, tol) # Decompose the matrix again after regularization loading_factor = reg * np.mean(s, axis=-1) if reg: U, s, Vh = svd(x + loading_factor[..., np.newaxis, np.newaxis] * np.eye(n), hermitian=True) # Estimate the rank after regularization tol = 'auto' if rcond == 'auto' else rcond * s[..., :1] rank_after = _estimate_rank_from_s(s, tol) # Warn the user if both all parameters were kept at their defaults and the # matrix is rank deficient. if (rank_after < n).any() and reg == 0 and \ rank == 'full' and rcond == 1e-15: warn('Covariance matrix is rank-deficient and no regularization is ' 'done.') elif isinstance(rank, int) and rank > n: raise ValueError('Invalid value for the rank parameter (%d) given ' 'the shape of the input matrix (%d x %d).' % (rank, x.shape[0], x.shape[1])) # Pick the requested number of singular values mask = np.arange(s.shape[-1]).reshape((1, ) * (x.ndim - 2) + (-1, )) if rank is None: cmp = ret = rank_before elif rank == 'full': cmp = rank_after ret = rank_before else: cmp = ret = rank mask = mask < np.asarray(cmp)[..., np.newaxis] mask &= s > 0 # Invert only non-zero singular values s_inv = np.zeros(s.shape) s_inv[mask] = 1. / s[mask] # Compute the pseudo inverse x_inv = np.matmul(U * s_inv[..., np.newaxis, :], Vh) return x_inv, loading_factor, ret
def make_lsq_spline(x, y, t, k=3, w=None, axis=0, check_finite=True): r"""Compute the (coefficients of) an LSQ B-spline. The result is a linear combination .. math:: S(x) = \sum_j c_j B_j(x; t) of the B-spline basis elements, :math:`B_j(x; t)`, which minimizes .. math:: \sum_{j} \left( w_j \times (S(x_j) - y_j) \right)^2 Parameters ---------- x : array_like, shape (m,) Abscissas. y : array_like, shape (m, ...) Ordinates. t : array_like, shape (n + k + 1,). Knots. Knots and data points must satisfy Schoenberg-Whitney conditions. k : int, optional B-spline degree. Default is cubic, k=3. w : array_like, shape (n,), optional Weights for spline fitting. Must be positive. If ``None``, then weights are all equal. Default is ``None``. axis : int, optional Interpolation axis. Default is zero. check_finite : bool, optional Whether to check that the input arrays contain only finite numbers. Disabling may give a performance gain, but may result in problems (crashes, non-termination) if the inputs do contain infinities or NaNs. Default is True. Returns ------- b : a BSpline object of the degree `k` with knots `t`. Notes ----- The number of data points must be larger than the spline degree `k`. Knots `t` must satisfy the Schoenberg-Whitney conditions, i.e., there must be a subset of data points ``x[j]`` such that ``t[j] < x[j] < t[j+k+1]``, for ``j=0, 1,...,n-k-2``. Examples -------- Generate some noisy data: >>> x = np.linspace(-3, 3, 50) >>> y = np.exp(-x**2) + 0.1 * np.random.randn(50) Now fit a smoothing cubic spline with a pre-defined internal knots. Here we make the knot vector (k+1)-regular by adding boundary knots: >>> from scipy.interpolate import make_lsq_spline, BSpline >>> t = [-1, 0, 1] >>> k = 3 >>> t = np.r_[(x[0],)*(k+1), ... t, ... (x[-1],)*(k+1)] >>> spl = make_lsq_spline(x, y, t, k) For comparison, we also construct an interpolating spline for the same set of data: >>> from scipy.interpolate import make_interp_spline >>> spl_i = make_interp_spline(x, y) Plot both: >>> import matplotlib.pyplot as plt >>> xs = np.linspace(-3, 3, 100) >>> plt.plot(x, y, 'ro', ms=5) >>> plt.plot(xs, spl(xs), 'g-', lw=3, label='LSQ spline') >>> plt.plot(xs, spl_i(xs), 'b-', lw=3, alpha=0.7, label='interp spline') >>> plt.legend(loc='best') >>> plt.show() **NaN handling**: If the input arrays contain ``nan`` values, the result is not useful since the underlying spline fitting routines cannot deal with ``nan``. A workaround is to use zero weights for not-a-number data points: >>> y[8] = np.nan >>> w = np.isnan(y) >>> y[w] = 0. >>> tck = make_lsq_spline(x, y, t, w=~w) Notice the need to replace a ``nan`` by a numerical value (precise value does not matter as long as the corresponding weight is zero.) See Also -------- BSpline : base class representing the B-spline objects make_interp_spline : a similar factory function for interpolating splines LSQUnivariateSpline : a FITPACK-based spline fitting routine splrep : a FITPACK-based fitting routine """ x = _as_float_array(x, check_finite) y = _as_float_array(y, check_finite) t = _as_float_array(t, check_finite) if w is not None: w = _as_float_array(w, check_finite) else: w = np.ones_like(x) k = operator.index(k) axis = normalize_axis_index(axis, y.ndim) y = np.rollaxis(y, axis) # now internally interp axis is zero if x.ndim != 1 or np.any(x[1:] - x[:-1] <= 0): raise ValueError("Expect x to be a 1-D sorted array_like.") if x.shape[0] < k+1: raise ValueError("Need more x points.") if k < 0: raise ValueError("Expect non-negative k.") if t.ndim != 1 or np.any(t[1:] - t[:-1] < 0): raise ValueError("Expect t to be a 1-D sorted array_like.") if x.size != y.shape[0]: raise ValueError('x & y are incompatible.') if k > 0 and np.any((x < t[k]) | (x > t[-k])): raise ValueError('Out of bounds w/ x = %s.' % x) if x.size != w.size: raise ValueError('Incompatible weights.') # number of coefficients n = t.size - k - 1 # construct A.T @ A and rhs with A the collocation matrix, and # rhs = A.T @ y for solving the LSQ problem ``A.T @ A @ c = A.T @ y`` lower = True extradim = prod(y.shape[1:]) ab = np.zeros((k+1, n), dtype=np.float_, order='F') rhs = np.zeros((n, extradim), dtype=y.dtype, order='F') _bspl._norm_eq_lsq(x, t, k, y.reshape(-1, extradim), w, ab, rhs) rhs = rhs.reshape((n,) + y.shape[1:]) # have observation matrix & rhs, can solve the LSQ problem cho_decomp = cholesky_banded(ab, overwrite_ab=True, lower=lower, check_finite=check_finite) c = cho_solve_banded((cho_decomp, lower), rhs, overwrite_b=True, check_finite=check_finite) c = np.ascontiguousarray(c) return BSpline.construct_fast(t, c, k, axis=axis)
def nchan(self, nchan): self['OBSNCHAN'] = operator.index(nchan)
def _check_max_bytes(self, max_bytes): if max_bytes is None: return max_bytes = operator.index(max_bytes) if max_bytes < 1: raise ValueError("max_bytes must be >= 1")
def __setitem__(self, idx, module): idx = operator.index(idx) return setattr(self, str(idx), module)
def __setitem__(self, idx, param): idx = operator.index(idx) return self.register_parameter(str(idx), param)
def eye(N, M=None, k=0, dtype=float, order='C', *, like=None): """ Return a 2-D array with ones on the diagonal and zeros elsewhere. Parameters ---------- N : int Number of rows in the output. M : int, optional Number of columns in the output. If None, defaults to `N`. k : int, optional Index of the diagonal: 0 (the default) refers to the main diagonal, a positive value refers to an upper diagonal, and a negative value to a lower diagonal. dtype : data-type, optional Data-type of the returned array. order : {'C', 'F'}, optional Whether the output should be stored in row-major (C-style) or column-major (Fortran-style) order in memory. .. versionadded:: 1.14.0 ${ARRAY_FUNCTION_LIKE} .. versionadded:: 1.20.0 Returns ------- I : ndarray of shape (N,M) An array where all elements are equal to zero, except for the `k`-th diagonal, whose values are equal to one. See Also -------- identity : (almost) equivalent function diag : diagonal 2-D array from a 1-D array specified by the user. Examples -------- >>> np.eye(2, dtype=int) array([[1, 0], [0, 1]]) >>> np.eye(3, k=1) array([[0., 1., 0.], [0., 0., 1.], [0., 0., 0.]]) """ if like is not None: return _eye_with_like(N, M=M, k=k, dtype=dtype, order=order, like=like) if M is None: M = N m = zeros((N, M), dtype=dtype, order=order) if k >= M: return m # Ensure M and k are integers, so we don't get any surprise casting # results in the expressions `M-k` and `M+1` used below. This avoids # a problem with inputs with type (for example) np.uint64. M = operator.index(M) k = operator.index(k) if k >= 0: i = k else: i = (-k) * M m[:M - k].flat[i::M + 1] = 1 return m
def as_int(n, strict=True): """ Convert the argument to a builtin integer. The return value is guaranteed to be equal to the input. ValueError is raised if the input has a non-integral value. When ``strict`` is True, this uses `__index__ <https://docs.python.org/3/reference/datamodel.html#object.__index__>`_ and when it is False it uses ``int``. Examples ======== >>> from sympy.core.compatibility import as_int >>> from sympy import sqrt, S The function is primarily concerned with sanitizing input for functions that need to work with builtin integers, so anything that is unambiguously an integer should be returned as an int: >>> as_int(S(3)) 3 Floats, being of limited precision, are not assumed to be exact and will raise an error unless the ``strict`` flag is False. This precision issue becomes apparent for large floating point numbers: >>> big = 1e23 >>> type(big) is float True >>> big == int(big) True >>> as_int(big) Traceback (most recent call last): ... ValueError: ... is not an integer >>> as_int(big, strict=False) 99999999999999991611392 Input that might be a complex representation of an integer value is also rejected by default: >>> one = sqrt(3 + 2*sqrt(2)) - sqrt(2) >>> int(one) == 1 True >>> as_int(one) Traceback (most recent call last): ... ValueError: ... is not an integer """ if strict: try: if type(n) is bool: raise TypeError return operator.index(n) except TypeError: raise ValueError('%s is not an integer' % (n,)) else: try: result = int(n) except TypeError: raise ValueError('%s is not an integer' % (n,)) if n != result: raise ValueError('%s is not an integer' % (n,)) return result
def __getitem__(self, key): if isinstance(key, slice): cls = type(self) return cls(self._components[key]) index = operator.index(key) return self._components[index]
def random_bcoo(key, shape, *, dtype=jnp.float_, indices_dtype=jnp.int_, nse=0.2, n_batch=0, n_dense=0, unique_indices=True, sorted_indices=False, generator=random.uniform, **kwds): """Generate a random BCOO matrix. Args: key : random.PRNGKey to be passed to ``generator`` function. shape : tuple specifying the shape of the array to be generated. dtype : dtype of the array to be generated. indices_dtype: dtype of the BCOO indicies. nse : number of specified elements in the matrix, or if 0 < nse < 1, a fraction of sparse dimensions to be specified (default: 0.2). n_batch : number of batch dimensions. must satisfy ``n_batch >= 0`` and ``n_batch + n_dense <= len(shape)``. n_dense : number of batch dimensions. must satisfy ``n_dense >= 0`` and ``n_batch + n_dense <= len(shape)``. unique_indices : boolean specifying whether indices should be unique (default: True). sorted_indices : boolean specifying whether indices should be row-sorted in lexicographical order (default: False). generator : function for generating random values accepting a key, shape, and dtype. It defaults to :func:`jax.random.uniform`, and may be any function with a similar signature. **kwds : additional keyword arguments to pass to ``generator``. Returns: arr : a sparse.BCOO array with the specified properties. """ shape = tuple(map(operator.index, shape)) n_batch = operator.index(n_batch) n_dense = operator.index(n_dense) if n_batch < 0 or n_dense < 0 or n_batch + n_dense > len(shape): raise ValueError( f"Invalid n_batch={n_batch}, n_dense={n_dense} for shape={shape}") n_sparse = len(shape) - n_batch - n_dense batch_shape, sparse_shape, dense_shape = map( tuple, split_list(shape, [n_batch, n_sparse])) batch_size = np.prod(batch_shape) sparse_size = np.prod(sparse_shape) if not 0 <= nse < sparse_size: raise ValueError( f"got nse={nse}, expected to be between 0 and {sparse_size}") if 0 < nse < 1: nse = int(np.ceil(nse * sparse_size)) nse = operator.index(nse) data_shape = batch_shape + (nse, ) + dense_shape indices_shape = batch_shape + (nse, n_sparse) @vmap def _indices(key): if not sparse_shape: return jnp.empty((nse, n_sparse), dtype=int) flat_ind = random.choice(key, sparse_size, shape=(nse, ), replace=not unique_indices) return jnp.column_stack(jnp.unravel_index(flat_ind, sparse_shape)) keys = random.split(key, batch_size + 1) data_key, index_keys = keys[0], keys[1:] data = generator(data_key, shape=data_shape, dtype=dtype, **kwds) indices = _indices(index_keys).reshape(indices_shape).astype(indices_dtype) mat = sparse.BCOO((data, indices), shape=shape) return mat.sort_indices() if sorted_indices else mat
def _get_bin_edges(a, bins, range, weights): """ Compute the bins used internally by `histogram`. Parameters ========== a : ndarray Ravelled data array bins, range Forwarded arguments from `histogram`. weights : ndarray, optional Ravelled weights array, or None Returns ======= bin_edges : ndarray Array of bin edges uniform_bins : (Number, Number, int): The upper bound, lowerbound, and number of bins, used in the optimized implementation of `histogram` that works on uniform bins. """ # parse the overloaded bins argument n_equal_bins = None bin_edges = None if isinstance(bins, str): bin_name = bins # if `bins` is a string for an automatic method, # this will replace it with the number of bins calculated if bin_name not in _hist_bin_selectors: raise ValueError( "{!r} is not a valid estimator for `bins`".format(bin_name)) if weights is not None: raise TypeError("Automated estimation of the number of " "bins is not supported for weighted data") first_edge, last_edge = _get_outer_edges(a, range) # truncate the range if needed if range is not None: keep = a >= first_edge keep &= a <= last_edge if not np.logical_and.reduce(keep): a = a[keep] if a.size == 0: n_equal_bins = 1 else: # Do not call selectors on empty arrays width = _hist_bin_selectors[bin_name](a, (first_edge, last_edge)) if width: n_equal_bins = int( np.ceil(_unsigned_subtract(last_edge, first_edge) / width)) else: # Width can be zero for some estimators, e.g. FD when # the IQR of the data is zero. n_equal_bins = 1 elif np.ndim(bins) == 0: try: n_equal_bins = operator.index(bins) except TypeError as e: raise TypeError( "`bins` must be an integer, a string, or an array") from e if n_equal_bins < 1: raise ValueError("`bins` must be positive, when an integer") else: raise ValueError("`bins` must be 1d, when an array") return bin_edges, n_equal_bins
def tabulate_neighbors(L, kind): r"""Tabulate the root-2 neighbors on the 3D cubic lattice with PBC. Parameters ---------- L : int or length-3 array the lattice is L \times L \times L if L is a scalar, otherwise L[0]*L[1]*L[2] kind : callable or str ``kind(site, L)`` returns a list of neighbors of ``site``. Returns ------- neighbors : array, shape(Nsite, NUM_NEIGHB + 1) Notes ----- The sites of the lattice are indexed by a flat index, ``site = 0, 1, ..., Nsite-1``, where ``Nsite = L**3``. The ``neighbors`` array is: ``nk = neighbors[site, 0]`` is the number of neighbors of ``site`` and ``neighbors[site, 1:nk+1]`` are the neighbor sites. E.g., for the following 2D arrangement, the format is 4 | 2 - 1 - 3 | 5 ``neighbors[1, 0] == 4`` since there are four neighbors, and ``neighbors[1, 1:5] == [2, 3, 4, 5]`` (the order not guaranteed). """ try: L = operator.index(L) L = (L, ) * dimension(kind) except: # TODO: allow 2D w/ kind='sc' and L=(3, 4) assert len(L) == dimension(kind) if callable(kind): get_neighbors = kind else: get_neighbors = get_neighbors_selector(kind) # total # of sites Nsite = 1 for ll in L: Nsite *= ll # construct lists of neighbors for each site n_lst = [] for site in range(Nsite): lst = get_neighbors(site, L) n_lst.append([len(lst)] + lst) # max coordination number max_num_neighb = max([e[0] for e in n_lst]) # copy neighbors lists into a numpy array neighb = np.empty((Nsite, max_num_neighb + 1), dtype=int) for site in range(Nsite): neighb[site, :max_num_neighb + 1] = n_lst[site] return neighb
def _get_bin_edges(a, bins, range, weights): """ Computes the bins used internally by `histogram`. Parameters ========== a : ndarray Ravelled data array bins, range Forwarded arguments from `histogram`. weights : ndarray, optional Ravelled weights array, or None Returns ======= bin_edges : ndarray Array of bin edges uniform_bins : (Number, Number, int): The upper bound, lowerbound, and number of bins, used in the optimized implementation of `histogram` that works on uniform bins. """ # parse the overloaded bins argument n_equal_bins = None bin_edges = None if isinstance(bins, str): bin_name = bins # if `bins` is a string for an automatic method, # this will replace it with the number of bins calculated if bin_name not in _hist_bin_selectors: raise ValueError( "{!r} is not a valid estimator for `bins`".format(bin_name)) if weights is not None: raise TypeError("Automated estimation of the number of " "bins is not supported for weighted data") first_edge, last_edge = _get_outer_edges(a, range) # truncate the range if needed if range is not None: keep = (a >= first_edge) keep &= (a <= last_edge) if not np.logical_and.reduce(keep): a = a[keep] if a.size == 0: n_equal_bins = 1 else: # Do not call selectors on empty arrays width = _hist_bin_selectors[bin_name](a, (first_edge, last_edge)) if width: n_equal_bins = int( np.ceil(_unsigned_subtract(last_edge, first_edge) / width)) else: # Width can be zero for some estimators, e.g. FD when # the IQR of the data is zero. n_equal_bins = 1 elif np.ndim(bins) == 0: try: n_equal_bins = operator.index(bins) except TypeError: raise TypeError('`bins` must be an integer, a string, or an array') if n_equal_bins < 1: raise ValueError('`bins` must be positive, when an integer') first_edge, last_edge = _get_outer_edges(a, range) elif np.ndim(bins) == 1: bin_edges = np.asarray(bins) if np.any(bin_edges[:-1] > bin_edges[1:]): raise ValueError( '`bins` must increase monotonically, when an array') else: raise ValueError('`bins` must be 1d, when an array') if n_equal_bins is not None: # gh-10322 means that type resolution rules are dependent on array # shapes. To avoid this causing problems, we pick a type now and stick # with it throughout. bin_type = np.result_type(first_edge, last_edge, a) if np.issubdtype(bin_type, np.integer): bin_type = np.result_type(bin_type, float) # bin edges must be computed bin_edges = np.linspace(first_edge, last_edge, n_equal_bins + 1, endpoint=True, dtype=bin_type) return bin_edges, (first_edge, last_edge, n_equal_bins) else: return bin_edges, None
def overlap(self, overlap): self['OVERLAP'] = operator.index(overlap)
def as_integer_or_none(value): return None if value is None else operator.index(value)
def _canonicalize_dimension(dim): if type(dim) in _DIMENSION_TYPES: return dim else: return operator.index(dim)
def linspace(start, stop, num=50, endpoint=True, retstep=False, dtype=None, axis=0): """ Return evenly spaced numbers over a specified interval. Returns `num` evenly spaced samples, calculated over the interval [`start`, `stop`]. The endpoint of the interval can optionally be excluded. .. versionchanged:: 1.16.0 Non-scalar `start` and `stop` are now supported. Parameters ---------- start : array_like The starting value of the sequence. stop : array_like The end value of the sequence, unless `endpoint` is set to False. In that case, the sequence consists of all but the last of ``num + 1`` evenly spaced samples, so that `stop` is excluded. Note that the step size changes when `endpoint` is False. num : int, optional Number of samples to generate. Default is 50. Must be non-negative. endpoint : bool, optional If True, `stop` is the last sample. Otherwise, it is not included. Default is True. retstep : bool, optional If True, return (`samples`, `step`), where `step` is the spacing between samples. dtype : dtype, optional The type of the output array. If `dtype` is not given, infer the data type from the other input arguments. .. versionadded:: 1.9.0 axis : int, optional The axis in the result to store the samples. Relevant only if start or stop are array-like. By default (0), the samples will be along a new axis inserted at the beginning. Use -1 to get an axis at the end. .. versionadded:: 1.16.0 Returns ------- samples : ndarray There are `num` equally spaced samples in the closed interval ``[start, stop]`` or the half-open interval ``[start, stop)`` (depending on whether `endpoint` is True or False). step : float, optional Only returned if `retstep` is True Size of spacing between samples. See Also -------- arange : Similar to `linspace`, but uses a step size (instead of the number of samples). geomspace : Similar to `linspace`, but with numbers spaced evenly on a log scale (a geometric progression). logspace : Similar to `geomspace`, but with the end points specified as logarithms. Examples -------- >>> np.linspace(2.0, 3.0, num=5) array([2. , 2.25, 2.5 , 2.75, 3. ]) >>> np.linspace(2.0, 3.0, num=5, endpoint=False) array([2. , 2.2, 2.4, 2.6, 2.8]) >>> np.linspace(2.0, 3.0, num=5, retstep=True) (array([2. , 2.25, 2.5 , 2.75, 3. ]), 0.25) Graphical illustration: >>> import matplotlib.pyplot as plt >>> N = 8 >>> y = np.zeros(N) >>> x1 = np.linspace(0, 10, N, endpoint=True) >>> x2 = np.linspace(0, 10, N, endpoint=False) >>> plt.plot(x1, y, 'o') [<matplotlib.lines.Line2D object at 0x...>] >>> plt.plot(x2, y + 0.5, 'o') [<matplotlib.lines.Line2D object at 0x...>] >>> plt.ylim([-0.5, 1]) (-0.5, 1) >>> plt.show() """ try: num = operator.index(num) except TypeError: raise TypeError( "object of type {} cannot be safely interpreted as an integer.". format(type(num))) if num < 0: raise ValueError("Number of samples, %s, must be non-negative." % num) div = (num - 1) if endpoint else num # Convert float/complex array scalars to float, gh-3504 # and make sure one can use variables that have an __array_interface__, gh-6634 start = asanyarray(start) * 1.0 stop = asanyarray(stop) * 1.0 dt = result_type(start, stop, float(num)) if dtype is None: dtype = dt delta = stop - start y = _nx.arange(0, num, dtype=dt).reshape((-1, ) + (1, ) * ndim(delta)) # In-place multiplication y *= delta/div is faster, but prevents the multiplicant # from overriding what class is produced, and thus prevents, e.g. use of Quantities, # see gh-7142. Hence, we multiply in place only for standard scalar types. _mult_inplace = _nx.isscalar(delta) if div > 0: step = delta / div if _nx.any(step == 0): # Special handling for denormal numbers, gh-5437 y /= div if _mult_inplace: y *= delta else: y = y * delta else: if _mult_inplace: y *= step else: y = y * step else: # sequences with 0 items or 1 item with endpoint=True (i.e. div <= 0) # have an undefined step step = NaN # Multiply with delta to allow possible override of output class. y = y * delta y += start if endpoint and num > 1: y[-1] = stop if axis != 0: y = _nx.moveaxis(y, 0, axis) if retstep: return y.astype(dtype, copy=False), step else: return y.astype(dtype, copy=False)
def _quadratic_assignment_faq( A: np.ndarray, B: np.ndarray, maximize: bool = False, partial_match: Optional[np.ndarray] = None, S: Optional[np.ndarray] = None, rng: Optional[Union[int, np.random.RandomState, np.random.Generator]] = None, P0: Union[Literal["barycenter", "randomized"], np.ndarray] = "barycenter", shuffle_input: bool = False, maxiter: int = 30, tol: float = 0.03, ) -> OptimizeResult: r""" Solve the quadratic assignment problem (approximately). This function solves the Quadratic Assignment Problem (QAP) and the Graph Matching Problem (GMP) using the Fast Approximate QAP Algorithm (FAQ) [1]_. Quadratic assignment solves problems of the following form: .. math:: \min_P & \ {\ \text{trace}(A^T P B P^T)}\\ \mbox{s.t. } & {P \ \epsilon \ \mathcal{P}}\\ where :math:`\mathcal{P}` is the set of all permutation matrices, and :math:`A` and :math:`B` are square matrices. Graph matching tries to *maximize* the same objective function. This algorithm can be thought of as finding the alignment of the nodes of two graphs that minimizes the number of induced edge disagreements, or, in the case of weighted graphs, the sum of squared edge weight differences. Note that the quadratic assignment problem is NP-hard, is not known to be solvable in polynomial time, and is computationally intractable. Therefore, the results given are approximations, not guaranteed to be exact solutions. Parameters ---------- A : 2d-array, square The square matrix :math:`A` in the objective function above. B : 2d-array, square The square matrix :math:`B` in the objective function above. method : str in {'faq', '2opt'} (default: 'faq') The algorithm used to solve the problem. This is the method-specific documentation for 'faq'. :ref:`'2opt' <optimize.qap-2opt>` is also available. Options ------- maximize : bool (default = False) Setting `maximize` to ``True`` solves the Graph Matching Problem (GMP) rather than the Quadratic Assingnment Problem (QAP). This is accomplished through trivial negation of the objective function. rng : {None, int, `~np.random.RandomState`, `~np.random.Generator`} This parameter defines the object to use for drawing random variates. If `rng` is ``None`` the `~np.random.RandomState` singleton is used. If `rng` is an int, a new ``RandomState`` instance is used, seeded with `rng`. If `rng` is already a ``RandomState`` or ``Generator`` instance, then that object is used. Default is None. partial_match : 2d-array of integers, optional, (default = None) Allows the user to fix part of the matching between the two matrices. In the literature, a partial match is also known as a "seed". Each row of `partial_match` specifies the indices of a pair of corresponding nodes, that is, node ``partial_match[i, 0]`` of `A` is matched to node ``partial_match[i, 1]`` of `B`. Accordingly, ``partial_match`` is an array of size ``(m , 2)``, where ``m`` is not greater than the number of nodes, :math:`n`. S : 2d-array, square A similarity matrix. Should be same shape as ``A`` and ``B``. Note: the scale of `S` may effect the weight placed on the term :math:`\\text{trace}(S^T P)` relative to :math:`\\text{trace}(A^T PBP^T)` during the optimization process. P0 : 2d-array, "barycenter", or "randomized" (default = "barycenter") The initial (guess) permutation matrix or search "position" `P0`. `P0` need not be a proper permutation matrix; however, it must be :math:`m' x m'`, where :math:`m' = n - m`, and it must be doubly stochastic: each of its rows and columns must sum to 1. If unspecified or ``"barycenter"``, the non-informative "flat doubly stochastic matrix" :math:`J = 1*1^T/m'`, where :math:`1` is a :math:`m' \times 1` array of ones, is used. This is the "barycenter" of the search space of doubly-stochastic matrices. If ``"randomized"``, the algorithm will start from the randomized initial search position :math:`P_0 = (J + K)/2`, where :math:`J` is the "barycenter" and :math:`K` is a random doubly stochastic matrix. shuffle_input : bool (default = False) To avoid artificially high or low matching due to inherent sorting of input matrices, gives users the option to shuffle the nodes. Results are then unshuffled so that the returned results correspond with the node order of inputs. Shuffling may cause the algorithm to be non-deterministic, unless a random seed is set or an `rng` option is provided. maxiter : int, positive (default = 30) Integer specifying the max number of Franke-Wolfe iterations performed. tol : float (default = 0.03) A threshold for the stopping criterion. Franke-Wolfe iteration terminates when the change in search position between iterations is sufficiently small, that is, when the relative Frobenius norm, :math:`\frac{||P_{i}-P_{i+1}||_F}{\sqrt{len(P_{i})}} \leq tol`, where :math:`i` is the iteration number. Returns ------- res : OptimizeResult A :class:`scipy.optimize.OptimizeResult` containing the following fields. col_ind : 1-D array An array of column indices corresponding with the best permutation of the nodes of `B` found. fun : float The corresponding value of the objective function. nit : int The number of Franke-Wolfe iterations performed. Notes ----- The algorithm may be sensitive to the initial permutation matrix (or search "position") due to the possibility of several local minima within the feasible region. A barycenter initialization is more likely to result in a better solution than a single random initialization. However, ``quadratic_assignment`` calling several times with different random initializations may result in a better optimum at the cost of longer total execution time. Examples -------- As mentioned above, a barycenter initialization often results in a better solution than a single random initialization. >>> np.random.seed(0) >>> n = 15 >>> A = np.random.rand(n, n) >>> B = np.random.rand(n, n) >>> res = quadratic_assignment(A, B) # FAQ is default method >>> print(res.fun) 46.871483385480545 # may vary >>> options = {"P0": "randomized"} # use randomized initialization >>> res = quadratic_assignment(A, B, options=options) >>> print(res.fun) 47.224831071310625 # may vary However, consider running from several randomized initializations and keeping the best result. >>> res = min([quadratic_assignment(A, B, options=options) ... for i in range(30)], key=lambda x: x.fun) >>> print(res.fun) 46.671852533681516 # may vary The '2-opt' method can be used to further refine the results. >>> options = {"partial_guess": np.array([np.arange(n), res.col_ind]).T} >>> res = quadratic_assignment(A, B, method="2opt", options=options) >>> print(res.fun) 46.47160735721583 # may vary References ---------- .. [1] J.T. Vogelstein, J.M. Conroy, V. Lyzinski, L.J. Podrazik, S.G. Kratzer, E.T. Harley, D.E. Fishkind, R.J. Vogelstein, and C.E. Priebe, "Fast approximate quadratic programming for graph matching," PLOS one, vol. 10, no. 4, p. e0121002, 2015, :doi:`10.1371/journal.pone.0121002` .. [2] D. Fishkind, S. Adali, H. Patsolic, L. Meng, D. Singh, V. Lyzinski, C. Priebe, "Seeded graph matching", Pattern Recognit. 87 (2019): 203-215, :doi:`10.1016/j.patcog.2018.09.014` """ maxiter = operator.index(maxiter) # ValueError check A, B, partial_match_value = _common_input_validation(A, B, partial_match) msg = None if isinstance(P0, str) and P0 not in {"barycenter", "randomized"}: msg = "Invalid 'P0' parameter string" elif maxiter <= 0: msg = "'maxiter' must be a positive integer" elif tol <= 0: msg = "'tol' must be a positive float" if msg is not None: raise ValueError(msg) if not isinstance(S, np.ndarray): raise ValueError("`S` must be an ndarray") elif S.shape != (S.shape[0], S.shape[0]): raise ValueError("`S` must be square") elif S.shape != A.shape: raise ValueError("`S`, `A`, and `B` matrices must be of equal size") else: s_value = S rng = check_random_state(rng) n = A.shape[0] # number of vertices in graphs n_seeds = partial_match_value.shape[0] # number of seeds n_unseed = n - n_seeds # check outlier cases if n == 0 or partial_match_value.shape[0] == n: # Cannot assume partial_match is sorted. partial_match_value = np.row_stack( sorted(partial_match_value, key=lambda x: x[0])) score = _calc_score(A, B, s_value, partial_match_value[:, 1]) res = {"col_ind": partial_match_value[:, 1], "fun": score, "nit": 0} return OptimizeResult(res) obj_func_scalar = 1 if maximize: obj_func_scalar = -1 nonseed_B = np.setdiff1d(range(n), partial_match_value[:, 1]) perm_S = np.copy(nonseed_B) if shuffle_input: nonseed_B = rng.permutation(nonseed_B) # shuffle_input to avoid results from inputs that were already matched nonseed_A = np.setdiff1d(range(n), partial_match_value[:, 0]) perm_A = np.concatenate([partial_match_value[:, 0], nonseed_A]) perm_B = np.concatenate([partial_match_value[:, 1], nonseed_B]) s_value = s_value[:, perm_B] # definitions according to Seeded Graph Matching [2]. A11, A12, A21, A22 = _split_matrix(A[perm_A][:, perm_A], n_seeds) B11, B12, B21, B22 = _split_matrix(B[perm_B][:, perm_B], n_seeds) S22 = s_value[perm_S, n_seeds:] # [1] Algorithm 1 Line 1 - choose initialization if isinstance(P0, str): # initialize J, a doubly stochastic barycenter J = np.ones((n_unseed, n_unseed)) / n_unseed if P0 == "barycenter": P = J elif P0 == "randomized": # generate a nxn matrix where each entry is a random number [0, 1] # would use rand, but Generators don't have it # would use random, but old mtrand.RandomStates don't have it K = rng.uniform(size=(n_unseed, n_unseed)) # Sinkhorn balancing K = _doubly_stochastic(K) P = J * 0.5 + K * 0.5 elif isinstance(P0, np.ndarray): P0 = np.atleast_2d(P0) _check_init_input(P0, n_unseed) invert_inds = np.argsort(nonseed_B) perm_nonseed_B = np.argsort(invert_inds) P = P0[:, perm_nonseed_B] else: msg = "`init` must either be of type str or np.ndarray." raise TypeError(msg) const_sum = A21 @ B21.T + A12.T @ B12 + S22 # [1] Algorithm 1 Line 2 - loop while stopping criteria not met for n_iter in range(1, maxiter + 1): # [1] Algorithm 1 Line 3 - compute the gradient of f(P) = -tr(APB^tP^t) grad_fp = const_sum + A22 @ P @ B22.T + A22.T @ P @ B22 # [1] Algorithm 1 Line 4 - get direction Q by solving Eq. 8 _, cols = linear_sum_assignment(grad_fp, maximize=maximize) Q = np.eye(n_unseed)[cols] # [1] Algorithm 1 Line 5 - compute the step size # Noting that e.g. trace(Ax) = trace(A)*x, expand and re-collect # terms as ax**2 + bx + c. c does not affect location of minimum # and can be ignored. Also, note that trace(A@B) = (A.T*B).sum(); # apply where possible for efficiency. R = P - Q b21 = ((R.T @ A21) * B21).sum() b12 = ((R.T @ A12.T) * B12.T).sum() AR22 = A22.T @ R BR22 = B22 @ R.T b22a = (AR22 * B22.T[cols]).sum() b22b = (A22 * BR22[cols]).sum() s = (S22 * R).sum() a = (AR22.T * BR22).sum() b = b21 + b12 + b22a + b22b + s # critical point of ax^2 + bx + c is at x = -d/(2*e) # if a * obj_func_scalar > 0, it is a minimum # if minimum is not in [0, 1], only endpoints need to be considered if a * obj_func_scalar > 0 and 0 <= -b / (2 * a) <= 1: alpha = -b / (2 * a) else: alpha = np.argmin([0, (b + a) * obj_func_scalar]) # [1] Algorithm 1 Line 6 - Update P P_i1 = alpha * P + (1 - alpha) * Q if np.linalg.norm(P - P_i1) / np.sqrt(n_unseed) < tol: P = P_i1 break P = P_i1 # [1] Algorithm 1 Line 7 - end main loop # [1] Algorithm 1 Line 8 - project onto the set of permutation matrices _, col = linear_sum_assignment(-P) perm = np.concatenate((np.arange(n_seeds), col + n_seeds)) unshuffled_perm = np.zeros(n, dtype=int) unshuffled_perm[perm_A] = perm_B[perm] score = _calc_score(A, B, s_value, unshuffled_perm) res = {"col_ind": unshuffled_perm, "fun": score, "nit": n_iter} return OptimizeResult(res)
def test_uid_index(self): self.assertEqual(operator.index(UID(1)), 1)
def make_interp_spline(x, y, k=3, t=None, bc_type=None, axis=0, check_finite=True): """Compute the (coefficients of) interpolating B-spline. Parameters ---------- x : array_like, shape (n,) Abscissas. y : array_like, shape (n, ...) Ordinates. k : int, optional B-spline degree. Default is cubic, k=3. t : array_like, shape (nt + k + 1,), optional. Knots. The number of knots needs to agree with the number of datapoints and the number of derivatives at the edges. Specifically, ``nt - n`` must equal ``len(deriv_l) + len(deriv_r)``. bc_type : 2-tuple or None Boundary conditions. Default is None, which means choosing the boundary conditions automatically. Otherwise, it must be a length-two tuple where the first element sets the boundary conditions at ``x[0]`` and the second element sets the boundary conditions at ``x[-1]``. Each of these must be an iterable of pairs ``(order, value)`` which gives the values of derivatives of specified orders at the given edge of the interpolation interval. Alternatively, the following string aliases are recognized: * ``"clamped"``: The first derivatives at the ends are zero. This is equivalent to ``bc_type=([(1, 0.0)], [(1, 0.0)])``. * ``"natural"``: The second derivatives at ends are zero. This is equivalent to ``bc_type=([(2, 0.0)], [(2, 0.0)])``. * ``"not-a-knot"`` (default): The first and second segments are the same polynomial. This is equivalent to having ``bc_type=None``. axis : int, optional Interpolation axis. Default is 0. check_finite : bool, optional Whether to check that the input arrays contain only finite numbers. Disabling may give a performance gain, but may result in problems (crashes, non-termination) if the inputs do contain infinities or NaNs. Default is True. Returns ------- b : a BSpline object of the degree ``k`` and with knots ``t``. Examples -------- Use cubic interpolation on Chebyshev nodes: >>> def cheb_nodes(N): ... jj = 2.*np.arange(N) + 1 ... x = np.cos(np.pi * jj / 2 / N)[::-1] ... return x >>> x = cheb_nodes(20) >>> y = np.sqrt(1 - x**2) >>> from scipy.interpolate import BSpline, make_interp_spline >>> b = make_interp_spline(x, y) >>> np.allclose(b(x), y) True Note that the default is a cubic spline with a not-a-knot boundary condition >>> b.k 3 Here we use a 'natural' spline, with zero 2nd derivatives at edges: >>> l, r = [(2, 0.0)], [(2, 0.0)] >>> b_n = make_interp_spline(x, y, bc_type=(l, r)) # or, bc_type="natural" >>> np.allclose(b_n(x), y) True >>> x0, x1 = x[0], x[-1] >>> np.allclose([b_n(x0, 2), b_n(x1, 2)], [0, 0]) True Interpolation of parametric curves is also supported. As an example, we compute a discretization of a snail curve in polar coordinates >>> phi = np.linspace(0, 2.*np.pi, 40) >>> r = 0.3 + np.cos(phi) >>> x, y = r*np.cos(phi), r*np.sin(phi) # convert to Cartesian coordinates Build an interpolating curve, parameterizing it by the angle >>> from scipy.interpolate import make_interp_spline >>> spl = make_interp_spline(phi, np.c_[x, y]) Evaluate the interpolant on a finer grid (note that we transpose the result to unpack it into a pair of x- and y-arrays) >>> phi_new = np.linspace(0, 2.*np.pi, 100) >>> x_new, y_new = spl(phi_new).T Plot the result >>> import matplotlib.pyplot as plt >>> plt.plot(x, y, 'o') >>> plt.plot(x_new, y_new, '-') >>> plt.show() See Also -------- BSpline : base class representing the B-spline objects CubicSpline : a cubic spline in the polynomial basis make_lsq_spline : a similar factory function for spline fitting UnivariateSpline : a wrapper over FITPACK spline fitting routines splrep : a wrapper over FITPACK spline fitting routines """ # convert string aliases for the boundary conditions if bc_type is None or bc_type == 'not-a-knot': deriv_l, deriv_r = None, None elif isinstance(bc_type, str): deriv_l, deriv_r = bc_type, bc_type else: try: deriv_l, deriv_r = bc_type except TypeError: raise ValueError("Unknown boundary condition: %s" % bc_type) y = np.asarray(y) axis = normalize_axis_index(axis, y.ndim) # special-case k=0 right away if k == 0: if any(_ is not None for _ in (t, deriv_l, deriv_r)): raise ValueError("Too much info for k=0: t and bc_type can only " "be None.") x = _as_float_array(x, check_finite) t = np.r_[x, x[-1]] c = np.asarray(y) c = np.rollaxis(c, axis) c = np.ascontiguousarray(c, dtype=_get_dtype(c.dtype)) return BSpline.construct_fast(t, c, k, axis=axis) # special-case k=1 (e.g., Lyche and Morken, Eq.(2.16)) if k == 1 and t is None: if not (deriv_l is None and deriv_r is None): raise ValueError("Too much info for k=1: bc_type can only be None.") x = _as_float_array(x, check_finite) t = np.r_[x[0], x, x[-1]] c = np.asarray(y) c = np.rollaxis(c, axis) c = np.ascontiguousarray(c, dtype=_get_dtype(c.dtype)) return BSpline.construct_fast(t, c, k, axis=axis) x = _as_float_array(x, check_finite) y = _as_float_array(y, check_finite) k = operator.index(k) # come up with a sensible knot vector, if needed if t is None: if deriv_l is None and deriv_r is None: if k == 2: # OK, it's a bit ad hoc: Greville sites + omit # 2nd and 2nd-to-last points, a la not-a-knot t = (x[1:] + x[:-1]) / 2. t = np.r_[(x[0],)*(k+1), t[1:-1], (x[-1],)*(k+1)] else: t = _not_a_knot(x, k) else: t = _augknt(x, k) t = _as_float_array(t, check_finite) y = np.rollaxis(y, axis) # now internally interp axis is zero if x.ndim != 1 or np.any(x[1:] < x[:-1]): raise ValueError("Expect x to be a 1-D sorted array_like.") if np.any(x[1:] == x[:-1]): raise ValueError("Expect x to not have duplicates") if k < 0: raise ValueError("Expect non-negative k.") if t.ndim != 1 or np.any(t[1:] < t[:-1]): raise ValueError("Expect t to be a 1-D sorted array_like.") if x.size != y.shape[0]: raise ValueError('x and y are incompatible.') if t.size < x.size + k + 1: raise ValueError('Got %d knots, need at least %d.' % (t.size, x.size + k + 1)) if (x[0] < t[k]) or (x[-1] > t[-k]): raise ValueError('Out of bounds w/ x = %s.' % x) # Here : deriv_l, r = [(nu, value), ...] deriv_l = _convert_string_aliases(deriv_l, y.shape[1:]) deriv_l_ords, deriv_l_vals = _process_deriv_spec(deriv_l) nleft = deriv_l_ords.shape[0] deriv_r = _convert_string_aliases(deriv_r, y.shape[1:]) deriv_r_ords, deriv_r_vals = _process_deriv_spec(deriv_r) nright = deriv_r_ords.shape[0] # have `n` conditions for `nt` coefficients; need nt-n derivatives n = x.size nt = t.size - k - 1 if nt - n != nleft + nright: raise ValueError("The number of derivatives at boundaries does not " "match: expected %s, got %s+%s" % (nt-n, nleft, nright)) # set up the LHS: the collocation matrix + derivatives at boundaries kl = ku = k ab = np.zeros((2*kl + ku + 1, nt), dtype=np.float_, order='F') _bspl._colloc(x, t, k, ab, offset=nleft) if nleft > 0: _bspl._handle_lhs_derivatives(t, k, x[0], ab, kl, ku, deriv_l_ords) if nright > 0: _bspl._handle_lhs_derivatives(t, k, x[-1], ab, kl, ku, deriv_r_ords, offset=nt-nright) # set up the RHS: values to interpolate (+ derivative values, if any) extradim = prod(y.shape[1:]) rhs = np.empty((nt, extradim), dtype=y.dtype) if nleft > 0: rhs[:nleft] = deriv_l_vals.reshape(-1, extradim) rhs[nleft:nt - nright] = y.reshape(-1, extradim) if nright > 0: rhs[nt - nright:] = deriv_r_vals.reshape(-1, extradim) # solve Ab @ x = rhs; this is the relevant part of linalg.solve_banded if check_finite: ab, rhs = map(np.asarray_chkfinite, (ab, rhs)) gbsv, = get_lapack_funcs(('gbsv',), (ab, rhs)) lu, piv, c, info = gbsv(kl, ku, ab, rhs, overwrite_ab=True, overwrite_b=True) if info > 0: raise LinAlgError("Collocation matix is singular.") elif info < 0: raise ValueError('illegal value in %d-th argument of internal gbsv' % -info) c = np.ascontiguousarray(c.reshape((nt,) + y.shape[1:])) return BSpline.construct_fast(t, c, k, axis=axis)
def __getitem__(self, k): """Return a single result or slice of results from the query. """ # NOTE: only supports the default result constructor. # are we already paginated? if so, we'll apply this getitem to the # paginated result - else we'll apply it to the whole. offset = 0 if self.paginator.start is None else self.paginator.start if isinstance(k, slice): # calculate solr pagination options for the requested slice step = operator.index(k.step) if k.step is not None else 1 if step == 0: raise ValueError("slice step cannot be zero") if step > 0: s1 = k.start s2 = k.stop inc = 0 else: s1 = k.stop s2 = k.start inc = 1 if s1 is not None: start = operator.index(s1) if start < 0: start += self.count() start = max(0, start) start += inc else: start = 0 if s2 is not None: stop = operator.index(s2) if stop < 0: stop += self.count() stop = max(0, stop) stop += inc else: stop = self.count() rows = stop - start if self.paginator.rows is not None: rows = min(rows, self.paginator.rows) if rows <= 0: return [] start += offset return self.paginate(start=start, rows=rows).execute()[::step] else: # if not a slice, a single result is being requested k = operator.index(k) if k < 0: k += self.count() if k < 0: raise IndexError("list index out of range") # Otherwise do the query anyway, don't count() to avoid extra Solr call k += offset response = self.paginate(start=k, rows=1).execute() if response.result.numFound < k: raise IndexError("list index out of range") return response.result.docs[0]
def _ensure_index_tuple(x: Any) -> Tuple[int, ...]: """Convert x to a tuple of indices.""" try: return (operator.index(x), ) except TypeError: return tuple(map(operator.index, x))
def test_basic(self): self.o.ind = -2 self.n.ind = 2 self.assertEqual(operator.index(self.o), -2) self.assertEqual(operator.index(self.n), 2)
def _ensure_index(x: Any) -> Union[int, Tuple[int, ...]]: """Ensure x is either an index or a tuple of indices.""" try: return operator.index(x) except TypeError: return tuple(map(operator.index, x))
def _interpret_indexing(self, keys): """Internal routine used by __getitem__ and __setitem__""" maxlen = len(self.shape) shape = (maxlen,) startl = numpy.empty(shape=shape, dtype=SizeType) stopl = numpy.empty(shape=shape, dtype=SizeType) stepl = numpy.empty(shape=shape, dtype=SizeType) stop_None = numpy.zeros(shape=shape, dtype=SizeType) if not isinstance(keys, tuple): keys = (keys,) nkeys = len(keys) dim = 0 # Here is some problem when dealing with [...,...] params # but this is a bit weird way to pass parameters anyway for key in keys: ellipsis = 0 # Sentinel if isinstance(key, type(Ellipsis)): ellipsis = 1 for diml in range(dim, len(self.shape) - (nkeys - dim) + 1): startl[dim] = 0 stopl[dim] = self.shape[diml] stepl[dim] = 1 dim += 1 elif dim >= maxlen: raise IndexError("Too many indices for object '%s'" % self._v_pathname) elif is_idx(key): key = operator.index(key) # Protection for index out of range if key >= self.shape[dim]: raise IndexError("Index out of range") if key < 0: # To support negative values (Fixes bug #968149) key += self.shape[dim] start, stop, step = self._process_range( key, key + 1, 1, dim=dim) stop_None[dim] = 1 elif isinstance(key, slice): start, stop, step = self._process_range( key.start, key.stop, key.step, dim=dim) else: raise TypeError("Non-valid index or slice: %s" % key) if not ellipsis: startl[dim] = start stopl[dim] = stop stepl[dim] = step dim += 1 # Complete the other dimensions, if needed if dim < len(self.shape): for diml in range(dim, len(self.shape)): startl[dim] = 0 stopl[dim] = self.shape[diml] stepl[dim] = 1 dim += 1 # Compute the shape for the container properly. Fixes #1288792 shape = [] for dim in range(len(self.shape)): # The negative division operates differently with python scalars # and numpy scalars (which are similar to C conventions). See: # http://www.python.org/doc/faq/programming.html#why-does-22-10-return-3 # and # http://www.peterbe.com/Integer-division-in-programming-languages # for more info on this issue. # I've finally decided to rely on the len(xrange) function. # F. Alted 2006-09-25 # Switch to `lrange` to allow long ranges (see #99). # use xrange, since it supports large integers as of Python 2.6 # see github #181 new_dim = len(range(startl[dim], stopl[dim], stepl[dim])) if not (new_dim == 1 and stop_None[dim]): shape.append(new_dim) return startl, stopl, stepl, shape
def __init__(self, arg1, shape=None, dtype=None, copy=False): _data_matrix.__init__(self) if isinstance(arg1, tuple): if isshape(arg1): M, N = arg1 self._shape = check_shape((M, N)) idx_dtype = get_index_dtype(maxval=max(M, N)) self.row = np.array([], dtype=idx_dtype) self.col = np.array([], dtype=idx_dtype) self.data = np.array([], getdtype(dtype, default=float)) self.has_canonical_format = True else: try: obj, (row, col) = arg1 except (TypeError, ValueError) as e: raise TypeError('invalid input format') from e if shape is None: if len(row) == 0 or len(col) == 0: raise ValueError('cannot infer dimensions from zero ' 'sized index arrays') M = operator.index(np.max(row)) + 1 N = operator.index(np.max(col)) + 1 self._shape = check_shape((M, N)) else: # Use 2 steps to ensure shape has length 2. M, N = shape self._shape = check_shape((M, N)) idx_dtype = get_index_dtype(maxval=max(self.shape)) self.row = np.array(row, copy=copy, dtype=idx_dtype) self.col = np.array(col, copy=copy, dtype=idx_dtype) self.data = np.array(obj, copy=copy) self.has_canonical_format = False else: if isspmatrix(arg1): if isspmatrix_coo(arg1) and copy: self.row = arg1.row.copy() self.col = arg1.col.copy() self.data = arg1.data.copy() self._shape = check_shape(arg1.shape) else: coo = arg1.tocoo() self.row = coo.row self.col = coo.col self.data = coo.data self._shape = check_shape(coo.shape) self.has_canonical_format = False else: #dense argument M = np.atleast_2d(np.asarray(arg1)) if M.ndim != 2: raise TypeError('expected dimension <= 2 array or matrix') self._shape = check_shape(M.shape) if shape is not None: if check_shape(shape) != self._shape: raise ValueError('inconsistent shapes: %s != %s' % (shape, self._shape)) self.row, self.col = M.nonzero() self.data = M[self.row, self.col] self.has_canonical_format = True if dtype is not None: self.data = self.data.astype(dtype, copy=False) self._check()
def __index__(self): return operator.index(self.__wrapped__)
def _parse_einsum_input(operands): """ A reproduction of einsum c side einsum parsing in python. Returns ------- input_strings : str Parsed input strings output_string : str Parsed output string operands : list of array_like The operands to use in the numpy contraction Examples -------- The operand list is simplified to reduce printing: >>> np.random.seed(123) >>> a = np.random.rand(4, 4) >>> b = np.random.rand(4, 4, 4) >>> _parse_einsum_input(('...a,...a->...', a, b)) ('za,xza', 'xz', [a, b]) # may vary >>> _parse_einsum_input((a, [Ellipsis, 0], b, [Ellipsis, 0])) ('za,xza', 'xz', [a, b]) # may vary """ if len(operands) == 0: raise ValueError("No input operands") if isinstance(operands[0], str): subscripts = operands[0].replace(" ", "") operands = [asanyarray(v) for v in operands[1:]] # Ensure all characters are valid for s in subscripts: if s in '.,->': continue if s not in einsum_symbols: raise ValueError("Character %s is not a valid symbol." % s) else: tmp_operands = list(operands) operand_list = [] subscript_list = [] for p in range(len(operands) // 2): operand_list.append(tmp_operands.pop(0)) subscript_list.append(tmp_operands.pop(0)) output_list = tmp_operands[-1] if len(tmp_operands) else None operands = [asanyarray(v) for v in operand_list] subscripts = "" last = len(subscript_list) - 1 for num, sub in enumerate(subscript_list): for s in sub: if s is Ellipsis: subscripts += "..." else: try: s = operator.index(s) except TypeError as e: raise TypeError("For this input type lists must contain " "either int or Ellipsis") from e subscripts += einsum_symbols[s] if num != last: subscripts += "," if output_list is not None: subscripts += "->" for s in output_list: if s is Ellipsis: subscripts += "..." else: try: s = operator.index(s) except TypeError as e: raise TypeError("For this input type lists must contain " "either int or Ellipsis") from e subscripts += einsum_symbols[s] # Check for proper "->" if ("-" in subscripts) or (">" in subscripts): invalid = (subscripts.count("-") > 1) or (subscripts.count(">") > 1) if invalid or (subscripts.count("->") != 1): raise ValueError("Subscripts can only contain one '->'.") # Parse ellipses if "." in subscripts: used = subscripts.replace(".", "").replace(",", "").replace("->", "") unused = list(einsum_symbols_set - set(used)) ellipse_inds = "".join(unused) longest = 0 if "->" in subscripts: input_tmp, output_sub = subscripts.split("->") split_subscripts = input_tmp.split(",") out_sub = True else: split_subscripts = subscripts.split(',') out_sub = False for num, sub in enumerate(split_subscripts): if "." in sub: if (sub.count(".") != 3) or (sub.count("...") != 1): raise ValueError("Invalid Ellipses.") # Take into account numerical values if operands[num].shape == (): ellipse_count = 0 else: ellipse_count = max(operands[num].ndim, 1) ellipse_count -= (len(sub) - 3) if ellipse_count > longest: longest = ellipse_count if ellipse_count < 0: raise ValueError("Ellipses lengths do not match.") elif ellipse_count == 0: split_subscripts[num] = sub.replace('...', '') else: rep_inds = ellipse_inds[-ellipse_count:] split_subscripts[num] = sub.replace('...', rep_inds) subscripts = ",".join(split_subscripts) if longest == 0: out_ellipse = "" else: out_ellipse = ellipse_inds[-longest:] if out_sub: subscripts += "->" + output_sub.replace("...", out_ellipse) else: # Special care for outputless ellipses output_subscript = "" tmp_subscripts = subscripts.replace(",", "") for s in sorted(set(tmp_subscripts)): if s not in (einsum_symbols): raise ValueError("Character %s is not a valid symbol." % s) if tmp_subscripts.count(s) == 1: output_subscript += s normal_inds = ''.join(sorted(set(output_subscript) - set(out_ellipse))) subscripts += "->" + out_ellipse + normal_inds # Build output string if does not exist if "->" in subscripts: input_subscripts, output_subscript = subscripts.split("->") else: input_subscripts = subscripts # Build output subscripts tmp_subscripts = subscripts.replace(",", "") output_subscript = "" for s in sorted(set(tmp_subscripts)): if s not in einsum_symbols: raise ValueError("Character %s is not a valid symbol." % s) if tmp_subscripts.count(s) == 1: output_subscript += s # Make sure output subscripts are in the input for char in output_subscript: if char not in input_subscripts: raise ValueError("Output character %s did not appear in the input" % char) # Make sure number operands is equivalent to the number of terms if len(input_subscripts.split(',')) != len(operands): raise ValueError("Number of einsum subscripts must be equal to the " "number of operands.") return (input_subscripts, output_subscript, operands)
def binned_statistic_dd(sample, values, statistic='mean', bins=10, range=None, expand_binnumbers=False, binned_statistic_result=None): """ Compute a multidimensional binned statistic for a set of data. This is a generalization of a histogramdd function. A histogram divides the space into bins, and returns the count of the number of points in each bin. This function allows the computation of the sum, mean, median, or other statistic of the values within each bin. Parameters ---------- sample : array_like Data to histogram passed as a sequence of N arrays of length D, or as an (N,D) array. values : (N,) array_like or list of (N,) array_like The data on which the statistic will be computed. This must be the same shape as `sample`, or a list of sequences - each with the same shape as `sample`. If `values` is such a list, the statistic will be computed on each independently. statistic : string or callable, optional The statistic to compute (default is 'mean'). The following statistics are available: * 'mean' : compute the mean of values for points within each bin. Empty bins will be represented by NaN. * 'median' : compute the median of values for points within each bin. Empty bins will be represented by NaN. * 'count' : compute the count of points within each bin. This is identical to an unweighted histogram. `values` array is not referenced. * 'sum' : compute the sum of values for points within each bin. This is identical to a weighted histogram. * 'std' : compute the standard deviation within each bin. This is implicitly calculated with ddof=0. If the number of values within a given bin is 0 or 1, the computed standard deviation value will be 0 for the bin. * 'min' : compute the minimum of values for points within each bin. Empty bins will be represented by NaN. * 'max' : compute the maximum of values for point within each bin. Empty bins will be represented by NaN. * function : a user-defined function which takes a 1D array of values, and outputs a single numerical statistic. This function will be called on the values in each bin. Empty bins will be represented by function([]), or NaN if this returns an error. bins : sequence or positive int, optional The bin specification must be in one of the following forms: * A sequence of arrays describing the bin edges along each dimension. * The number of bins for each dimension (nx, ny, ... = bins). * The number of bins for all dimensions (nx = ny = ... = bins). range : sequence, optional A sequence of lower and upper bin edges to be used if the edges are not given explicitly in `bins`. Defaults to the minimum and maximum values along each dimension. expand_binnumbers : bool, optional 'False' (default): the returned `binnumber` is a shape (N,) array of linearized bin indices. 'True': the returned `binnumber` is 'unraveled' into a shape (D,N) ndarray, where each row gives the bin numbers in the corresponding dimension. See the `binnumber` returned value, and the `Examples` section of `binned_statistic_2d`. binned_statistic_result : binnedStatisticddResult Result of a previous call to the function in order to reuse bin edges and bin numbers with new values and/or a different statistic. To reuse bin numbers, `expand_binnumbers` must have been set to False (the default) .. versionadded:: 0.17.0 Returns ------- statistic : ndarray, shape(nx1, nx2, nx3,...) The values of the selected statistic in each two-dimensional bin. bin_edges : list of ndarrays A list of D arrays describing the (nxi + 1) bin edges for each dimension. binnumber : (N,) array of ints or (D,N) ndarray of ints This assigns to each element of `sample` an integer that represents the bin in which this observation falls. The representation depends on the `expand_binnumbers` argument. See `Notes` for details. See Also -------- numpy.digitize, numpy.histogramdd, binned_statistic, binned_statistic_2d Notes ----- Binedges: All but the last (righthand-most) bin is half-open in each dimension. In other words, if `bins` is ``[1, 2, 3, 4]``, then the first bin is ``[1, 2)`` (including 1, but excluding 2) and the second ``[2, 3)``. The last bin, however, is ``[3, 4]``, which *includes* 4. `binnumber`: This returned argument assigns to each element of `sample` an integer that represents the bin in which it belongs. The representation depends on the `expand_binnumbers` argument. If 'False' (default): The returned `binnumber` is a shape (N,) array of linearized indices mapping each element of `sample` to its corresponding bin (using row-major ordering). If 'True': The returned `binnumber` is a shape (D,N) ndarray where each row indicates bin placements for each dimension respectively. In each dimension, a binnumber of `i` means the corresponding value is between (bin_edges[D][i-1], bin_edges[D][i]), for each dimension 'D'. .. versionadded:: 0.11.0 Examples -------- >>> from scipy import stats >>> import matplotlib.pyplot as plt >>> from mpl_toolkits.mplot3d import Axes3D Take an array of 600 (x, y) coordinates as an example. `binned_statistic_dd` can handle arrays of higher dimension `D`. But a plot of dimension `D+1` is required. >>> mu = np.array([0., 1.]) >>> sigma = np.array([[1., -0.5],[-0.5, 1.5]]) >>> multinormal = stats.multivariate_normal(mu, sigma) >>> data = multinormal.rvs(size=600, random_state=235412) >>> data.shape (600, 2) Create bins and count how many arrays fall in each bin: >>> N = 60 >>> x = np.linspace(-3, 3, N) >>> y = np.linspace(-3, 4, N) >>> ret = stats.binned_statistic_dd(data, np.arange(600), bins=[x, y], ... statistic='count') >>> bincounts = ret.statistic Set the volume and the location of bars: >>> dx = x[1] - x[0] >>> dy = y[1] - y[0] >>> x, y = np.meshgrid(x[:-1]+dx/2, y[:-1]+dy/2) >>> z = 0 >>> bincounts = bincounts.ravel() >>> x = x.ravel() >>> y = y.ravel() >>> fig = plt.figure() >>> ax = fig.add_subplot(111, projection='3d') >>> with np.errstate(divide='ignore'): # silence random axes3d warning ... ax.bar3d(x, y, z, dx, dy, bincounts) Reuse bin numbers and bin edges with new values: >>> ret2 = stats.binned_statistic_dd(data, -np.arange(600), ... binned_statistic_result=ret, ... statistic='mean') """ known_stats = ['mean', 'median', 'count', 'sum', 'std', 'min', 'max'] if not callable(statistic) and statistic not in known_stats: raise ValueError('invalid statistic %r' % (statistic, )) try: bins = index(bins) except TypeError: # bins is not an integer pass # If bins was an integer-like object, now it is an actual Python int. # NOTE: for _bin_edges(), see e.g. gh-11365 if isinstance(bins, int) and not np.isfinite(sample).all(): raise ValueError('%r contains non-finite values.' % (sample, )) # `Ndim` is the number of dimensions (e.g. `2` for `binned_statistic_2d`) # `Dlen` is the length of elements along each dimension. # This code is based on np.histogramdd try: # `sample` is an ND-array. Dlen, Ndim = sample.shape except (AttributeError, ValueError): # `sample` is a sequence of 1D arrays. sample = np.atleast_2d(sample).T Dlen, Ndim = sample.shape # Store initial shape of `values` to preserve it in the output values = np.asarray(values) input_shape = list(values.shape) # Make sure that `values` is 2D to iterate over rows values = np.atleast_2d(values) Vdim, Vlen = values.shape # Make sure `values` match `sample` if (statistic != 'count' and Vlen != Dlen): raise AttributeError('The number of `values` elements must match the ' 'length of each `sample` dimension.') try: M = len(bins) if M != Ndim: raise AttributeError('The dimension of bins must be equal ' 'to the dimension of the sample x.') except TypeError: bins = Ndim * [bins] if binned_statistic_result is None: nbin, edges, dedges = _bin_edges(sample, bins, range) binnumbers = _bin_numbers(sample, nbin, edges, dedges) else: edges = binned_statistic_result.bin_edges nbin = np.array([len(edges[i]) + 1 for i in builtins.range(Ndim)]) # +1 for outlier bins dedges = [np.diff(edges[i]) for i in builtins.range(Ndim)] binnumbers = binned_statistic_result.binnumber result = np.empty([Vdim, nbin.prod()], float) if statistic == 'mean': result.fill(np.nan) flatcount = np.bincount(binnumbers, None) a = flatcount.nonzero() for vv in builtins.range(Vdim): flatsum = np.bincount(binnumbers, values[vv]) result[vv, a] = flatsum[a] / flatcount[a] elif statistic == 'std': result.fill(0) flatcount = np.bincount(binnumbers, None) a = flatcount.nonzero() for vv in builtins.range(Vdim): for i in np.unique(binnumbers): # NOTE: take std dev by bin, np.std() is 2-pass and stable binned_data = values[vv, binnumbers == i] # calc std only when binned data is 2 or more for speed up. if len(binned_data) >= 2: result[vv, i] = np.std(binned_data) elif statistic == 'count': result.fill(0) flatcount = np.bincount(binnumbers, None) a = np.arange(len(flatcount)) result[:, a] = flatcount[np.newaxis, :] elif statistic == 'sum': result.fill(0) for vv in builtins.range(Vdim): flatsum = np.bincount(binnumbers, values[vv]) a = np.arange(len(flatsum)) result[vv, a] = flatsum elif statistic == 'median': result.fill(np.nan) for i in np.unique(binnumbers): for vv in builtins.range(Vdim): result[vv, i] = np.median(values[vv, binnumbers == i]) elif statistic == 'min': result.fill(np.nan) for i in np.unique(binnumbers): for vv in builtins.range(Vdim): result[vv, i] = np.min(values[vv, binnumbers == i]) elif statistic == 'max': result.fill(np.nan) for i in np.unique(binnumbers): for vv in builtins.range(Vdim): result[vv, i] = np.max(values[vv, binnumbers == i]) elif callable(statistic): with np.errstate(invalid='ignore'), suppress_warnings() as sup: sup.filter(RuntimeWarning) try: null = statistic([]) except Exception: null = np.nan result.fill(null) for i in np.unique(binnumbers): for vv in builtins.range(Vdim): result[vv, i] = statistic(values[vv, binnumbers == i]) # Shape into a proper matrix result = result.reshape(np.append(Vdim, nbin)) # Remove outliers (indices 0 and -1 for each bin-dimension). core = tuple([slice(None)] + Ndim * [slice(1, -1)]) result = result[core] # Unravel binnumbers into an ndarray, each row the bins for each dimension if (expand_binnumbers and Ndim > 1): binnumbers = np.asarray(np.unravel_index(binnumbers, nbin)) if np.any(result.shape[1:] != nbin - 2): raise RuntimeError('Internal Shape Error') # Reshape to have output (`result`) match input (`values`) shape result = result.reshape(input_shape[:-1] + list(nbin - 2)) return BinnedStatisticddResult(result, edges, binnumbers)