示例#1
0
def in1d(ar1, ar2, assume_unique=False, invert=False):
    """Tests whether each element of a 1-D array is also present in a second
    array.

    Returns a boolean array the same length as ``ar1`` that is ``True``
    where an element of ``ar1`` is in ``ar2`` and ``False`` otherwise.

    Args:
        ar1 (cupy.ndarray): Input array.
        ar2 (cupy.ndarray): The values against which to test each value of
            ``ar1``.
        assume_unique (bool, optional): Ignored
        invert (bool, optional): If ``True``, the values in the returned array
            are inverted (that is, ``False`` where an element of ``ar1`` is in
            ``ar2`` and ``True`` otherwise). Default is ``False``.

    Returns:
        cupy.ndarray, bool: The values ``ar1[in1d]`` are in ``ar2``.

    """
    # Ravel both arrays, behavior for the first array could be different
    ar1 = ar1.ravel()
    ar2 = ar2.ravel()
    if ar1.size == 0 or ar2.size == 0:
        if invert:
            return cupy.ones(ar1.shape, dtype=cupy.bool_)
        else:
            return cupy.zeros(ar1.shape, dtype=cupy.bool_)
    # Use brilliant searchsorted trick
    # https://github.com/cupy/cupy/pull/4018#discussion_r495790724
    ar2 = cupy.sort(ar2)
    v1 = cupy.searchsorted(ar2, ar1, 'left')
    v2 = cupy.searchsorted(ar2, ar1, 'right')
    return v1 == v2 if invert else v1 != v2
示例#2
0
    def do_map(inputs, output):
        """labels must be sorted"""
        nidx = sorted_index.size

        # Find boundaries for each stretch of constant labels
        # This could be faster, but we already paid N log N to sort labels.
        lo = cupy.searchsorted(labels, sorted_index, side='left')
        hi = cupy.searchsorted(labels, sorted_index, side='right')

        for i, low, high in zip(range(nidx), lo, hi):
            if low == high:
                continue
            output[i] = func(*[inp[low:high] for inp in inputs])
示例#3
0
    def score(self, input_data):
        """
        Calculate anomaly scores using negative likelihood across n_random_cuts histograms.

        :param input_data: NxD training sample
        :type input_data: cupy.ndarray

        Examples
        --------
        >>> from clx.analytics.loda import Loda
        >>> import cupy as cp
        >>> x = cp.random.randn(100,5) # 5-D multivariate synthetic dataset
        >>> loda_ad = Loda(n_bins=None, n_random_cuts=100)
        >>> loda_ad.fit(x)
        >>> loda_ad.score(x)
        array([0.04295848, 0.02853553, 0.04587308, 0.03750692, 0.05050418,
        0.02671958, 0.03538646, 0.05606504, 0.03418612, 0.04040502,
        0.03542846, 0.02801463, 0.04884918, 0.02943411, 0.02741364,
        0.02702433, 0.03064191, 0.02575712, 0.03957355, 0.02729784,
        ...
        0.03943715, 0.02701243, 0.02880341, 0.04086408, 0.04365477])
        """
        if cp.ndim(input_data) < 2:
            input_data = input_data.reshape(1, -1)
        pred_scores = cp.zeros([input_data.shape[0], 1])
        for i in range(self._n_random_cuts):
            projected_data = self._projections[i, :].dot(input_data.T)
            inds = cp.searchsorted(self._limits[i, :self._n_bins - 1],
                                   projected_data,
                                   side='left')
            pred_scores[:, 0] += -self._weights[i] * cp.log(
                self._histograms[i, inds])
        pred_scores /= self._n_random_cuts
        return pred_scores.ravel()
示例#4
0
def _csr_row_slice(start_maj, step_maj, Ap, Aj, Ax, Bp):
    """Populate indices and data arrays of sparse matrix by slicing the
    rows of an input sparse matrix

    Args
        start : starting row
        step : step increment size
        Ap : indptr array of input sparse matrix
        Aj : indices array of input sparse matrix
        Ax : data array of input sparse matrix
        Bp : indices array of output sparse matrix

    Returns
        Bj : indices array of output sparse matrix
        Bx : data array of output sparse matrix
    """

    in_rows = cupy.arange(start_maj,
                          start_maj + (Bp.size - 1) * step_maj,
                          step_maj,
                          dtype=Bp.dtype)
    offsetsB = Ap[in_rows] - Bp[:-1]
    B_size = int(Bp[-1])
    offsetsA = offsetsB[
        cupy.searchsorted(Bp, cupy.arange(B_size, dtype=Bp.dtype), 'right') -
        1]
    offsetsA += cupy.arange(offsetsA.size, dtype=offsetsA.dtype)
    Bj = Aj[offsetsA]
    Bx = Ax[offsetsA]
    return Bj, Bx
示例#5
0
    def transform(self, X):
        """[summary].

        Args:
            X (cupy.ndarray): [description].
        Returns:
            cupy.ndarray: [description].
        """
        check_is_fitted(self, "class_means_")
        # TODO(smly):
        # X = column_or_1d(X, warn=True)

        # Label encoding if necessary
        if self._label_encoding_uniques is not None:
            X = self._label_encoding_uniques.get_indexer(X.to_pandas())
        X = cupy.asarray(X)

        missing_mask = cupy.isnan(X)
        encode_mask = cupy.invert(missing_mask)
        unseen_mask = cupy.bitwise_xor(
            cupy.isin(X, self.classes_, invert=True), missing_mask)

        X = X.copy()
        X[unseen_mask] = cupy.max(self.classes_)

        indices = _get_index_cupy(self.classes_, X[encode_mask])

        _classes_index_list = cupy.searchsorted(self.lut_[:, 0], self.classes_)
        encoded_values = cupy.zeros(X.shape[0], dtype=cupy.float32)
        encoded_values[encode_mask] = cupy.take(
            self.lut_[:, 1], cupy.take(_classes_index_list, indices))

        encoded_values[unseen_mask] = self.default_unseen_
        return encoded_values
示例#6
0
def _csr_column_index1(col_idxs, Ap, Aj):
    """Construct indptr and components for populating indices and data of
    output sparse array
    Args
        col_idxs : column indices to index from input indices
        Ap : indptr of input sparse matrix
        Aj : indices of input sparse matrix
    Returns
        Bp : indptr of output sparse matrix
        Aj_mask : Input indices array with all cols not matching the index
                  index masked out with -1.
        col_counts : Number of times each unique index occurs in Aj
        sort_idxs : Indices sorted to preserve original order of idxs
    """

    idx_map, sort_idxs = cupy.unique(col_idxs, return_index=True)
    sort_idxs = sort_idxs.astype(idx_map.dtype)
    idxs = cupy.searchsorted(idx_map, col_idxs)

    col_counts = cupy.zeros(idx_map.size, dtype=col_idxs.dtype)
    cupyx.scatter_add(col_counts, idxs, 1)

    Bp, Aj_mask = _csr_column_index1_indptr(idx_map, sort_idxs, col_counts, Ap,
                                            Aj)

    return Bp, Aj_mask, col_counts, sort_idxs
示例#7
0
    def sample(self, n_samples=1, random_state=None):
        """
        Generate random samples from the model.
        Currently, this is implemented only for gaussian and tophat kernels,
        and the Euclidean metric.

        Parameters
        ----------
        n_samples : int, default=1
            Number of samples to generate.
        random_state : int, cupy RandomState instance or None, default=None

        Returns
        -------
        X : cupy array of shape (n_samples, n_features)
            List of samples.
        """
        if not hasattr(self, "X_"):
            raise NotFittedError()

        supported_kernels = ["gaussian", "tophat"]
        if (self.kernel not in supported_kernels
                or self.metric != "euclidean"):
            raise NotImplementedError(
                "Only {} kernels, and the euclidean"
                " metric are supported.".format(supported_kernels))

        if isinstance(random_state, cp.random.RandomState):
            rng = random_state
        else:
            rng = cp.random.RandomState(random_state)

        u = rng.uniform(0, 1, size=n_samples)
        if self.sample_weight_ is None:
            i = (u * self.X_.shape[0]).astype(np.int64)
        else:
            cumsum_weight = cp.cumsum(self.sample_weight_)
            sum_weight = cumsum_weight[-1]
            i = cp.searchsorted(cumsum_weight, u * sum_weight)
        if self.kernel == "gaussian":
            return cp.atleast_2d(rng.normal(self.X_[i], self.bandwidth))

        elif self.kernel == "tophat":
            # we first draw points from a d-dimensional normal distribution,
            # then use an incomplete gamma function to map them to a uniform
            # d-dimensional tophat distribution.
            has_scipy(raise_if_unavailable=True)
            dim = self.X_.shape[1]
            X = rng.normal(size=(n_samples, dim))
            s_sq = cp.einsum("ij,ij->i", X, X).get()

            # do this on the CPU becaause we don't have
            # a gammainc function  readily available
            correction = cp.array(
                gammainc(0.5 * dim, 0.5 * s_sq)**(1.0 / dim) * self.bandwidth /
                np.sqrt(s_sq))
            return self.X_[i] + X * correction[:, np.newaxis]
示例#8
0
def _preprocess(labels):

    label_values, inv_idx = cp.unique(labels, return_inverse=True)
    if not (label_values == 0).any():
        warn('Random walker only segments unlabeled areas, where '
             'labels == 0. No zero valued areas in labels were '
             'found. Returning provided labels.',
             stacklevel=2)

        return labels, None, None, None, None

    # If some labeled pixels are isolated inside pruned zones, prune them
    # as well and keep the labels for the final output

    null_mask = labels == 0
    pos_mask = labels > 0
    mask = labels >= 0

    fill = ndi.binary_propagation(null_mask, mask=mask)
    isolated = cp.logical_and(pos_mask, cp.logical_not(fill))

    pos_mask[isolated] = False

    # If the array has pruned zones, be sure that no isolated pixels
    # exist between pruned zones (they could not be determined)
    if label_values[0] < 0 or cp.any(isolated):  # synchronize!
        isolated = cp.logical_and(
            cp.logical_not(ndi.binary_propagation(pos_mask, mask=mask)),
            null_mask)

        labels[isolated] = -1
        if cp.all(isolated[null_mask]):
            warn('All unlabeled pixels are isolated, they could not be '
                 'determined by the random walker algorithm.',
                 stacklevel=2)
            return labels, None, None, None, None

        mask[isolated] = False
        mask = cp.atleast_3d(mask)

    else:
        mask = None

    # Reorder label values to have consecutive integers (no gaps)
    zero_idx = cp.searchsorted(label_values, cp.array(0))
    labels = cp.atleast_3d(inv_idx.reshape(labels.shape) - zero_idx)

    nlabels = label_values[zero_idx + 1:].shape[0]

    inds_isolated_seeds = cp.nonzero(isolated)
    isolated_values = labels[inds_isolated_seeds]

    return labels, nlabels, mask, inds_isolated_seeds, isolated_values
示例#9
0
 def score(self, X):
     if cp.ndim(X) < 2:
         X = X.reshape(1, -1)
     pred_scores = cp.zeros([X.shape[0], 1])
     for i in range(self.n_random_cuts):
         projected_data = self.projections[i, :].dot(X.T)
         inds = cp.searchsorted(self.limits[i, :self.n_bins - 1],
                                projected_data, side='left')
         pred_scores[:, 0] += -self.weights[i] * cp.log(
             self.histograms[i, inds])
     pred_scores /= self.n_random_cuts
     return pred_scores.ravel()
示例#10
0
def _csr_column_index1_indptr(unique_idxs, sort_idxs, col_counts, Ap, Aj):
    """Construct output indptr by counting column indices
    in input matrix for each row.
    Args
        unique_idxs : Unique set of indices sorted in ascending order
        sort_idxs : Indices sorted to preserve original order of unique_idxs
        col_counts : Number of times each unique index occurs in Aj
        Ap : indptr array of input sparse matrix
        Aj : indices array of input sparse matrix
    Returns
        Bp : Output indptr
        Aj_mask : Input indices array with all cols not matching the
                  index masked out with -1.
    """
    out_col_sum = cupy.zeros((Aj.size + 1, ), dtype=col_counts.dtype)

    index = cupy.argsort(unique_idxs)
    sorted_index = cupy.searchsorted(unique_idxs, Aj)

    yindex = cupy.take(index, sorted_index)
    mask = unique_idxs[yindex] == Aj

    idxs_adj = _csr_column_inv_idx(unique_idxs)
    out_col_sum[1:][mask] = col_counts[idxs_adj[Aj[mask]]]

    Aj_mask = out_col_sum[1:].copy()
    Aj_mask[Aj_mask == 0] = -1

    Aj_mask[Aj_mask > 0] = Aj[Aj_mask > 0]
    Aj_mask[Aj_mask > 0] = cupy.searchsorted(unique_idxs, Aj_mask[Aj_mask > 0])

    Aj_mask[Aj_mask >= 0] = sort_idxs[Aj_mask[Aj_mask >= 0]]

    cupy.cumsum(out_col_sum, out=out_col_sum)
    Bp = out_col_sum[Ap]
    Bp[1:] -= Bp[:-1]
    cupy.cumsum(Bp, out=Bp)

    return Bp, Aj_mask
示例#11
0
 def instance_score(self, x, projection_index):
     """
         Return scores from selected projection index.
         x (cupy.ndarray) : D x 1 feature instance.
     """
     if cp.ndim(x) < 2:
         x = x.reshape(1, -1)
     pred_scores = cp.zeros([x.shape[0], len(projection_index)])
     for i in projection_index:
         projected_data = self.projections[i, :].dot(x.T)
         inds = cp.searchsorted(self.limits[i, :self.n_bins - 1],
                                projected_data, side='left')
         pred_scores[:, i] = -self.weights[i] * cp.log(
             self.histograms[i, inds])
     return pred_scores
示例#12
0
    def decompose(
            self,
            lhs: List[int],
            rhs: List[int],
            mergeV: bool = True,
            cutoff: float = 1e-12,
            maxdim: int = 2147483648
    ) -> Tuple["Tensor", "Tensor", xp.array, int]:
        lhs_size = reduce(lambda x, y: x * y,
                          [self._indices[i].size for i in lhs])
        rhs_size = reduce(lambda x, y: x * y,
                          [self._indices[i].size for i in rhs])
        self.transpose(lhs + rhs)
        u, s, v = xp.linalg.svd(self._data.reshape([lhs_size, rhs_size]),
                                full_matrices=False,
                                compute_uv=True)

        s_norm = xp.linalg.norm(s)
        s_cutoff = (1 - cutoff) * s_norm * s_norm
        s_squared_cumsum = xp.cumsum(xp.power(s, 2))

        # dim = 0
        # for i in range(s.size):
        #     dim += 1
        #     if s_squared_cumsum[i] >= s_cutoff or (dim + 1) > maxdim:
        #         break
        dim = int(xp.searchsorted(s_squared_cumsum[:maxdim], s_cutoff)) + 1
        dim = min(dim, s.size, maxdim)

        u = u[:, :dim]
        s = xp.clip(s[:dim] * s_norm / xp.sqrt(s_squared_cumsum[dim - 1]),
                    a_min=1e-32,
                    a_max=None)
        v = v[:dim, :]

        if mergeV:
            v = xp.diag(s) @ v
        else:
            u = u @ xp.diag(s)

        a = Index(dim)
        lhs_indices = self._indices[:len(lhs)] + [a]
        rhs_indices = [a] + self._indices[len(lhs):]
        lhs_tensor = Tensor(lhs_indices,
                            u.reshape([idx.size for idx in lhs_indices]))
        rhs_tensor = Tensor(rhs_indices,
                            v.reshape([idx.size for idx in rhs_indices]))
        return lhs_tensor, rhs_tensor, s, dim
示例#13
0
 def _find_indices(self, xi):
     # find relevant edges between which xi are situated
     indices = []
     # compute distance to lower edge in unity units
     norm_distances = []
     # check for out of bounds xi
     out_of_bounds = cp.zeros((xi.shape[1]), dtype=bool)
     # iterate through dimensions
     for x, grid in zip(xi, self.grid):
         i = cp.searchsorted(grid, x) - 1
         i[i < 0] = 0
         i[i > grid.size - 2] = grid.size - 2
         indices.append(i)
         norm_distances.append((x - grid[i]) / (grid[i + 1] - grid[i]))
         if not self.bounds_error:
             out_of_bounds += x < grid[0]
             out_of_bounds += x > grid[-1]
     return indices, norm_distances, out_of_bounds
示例#14
0
    def _insert_many(self, i, j, x):
        """Inserts new nonzero at each (i, j) with value x
        Here (i,j) index major and minor respectively.
        i, j and x must be non-empty, 1d arrays.
        Inserts each major group (e.g. all entries per row) at a time.
        Maintains has_sorted_indices property.
        Modifies i, j, x in place.
        """

        order = cupy.argsort(i)  # stable for duplicates
        i = i.take(order)
        j = j.take(order)
        x = x.take(order)

        # Update index data type

        idx_dtype = sputils.get_index_dtype(
            (self.indices, self.indptr), maxval=(
                self.nnz + x.size))

        self.indptr = self.indptr.astype(idx_dtype)
        self.indices = self.indices.astype(idx_dtype)
        self.data = self.data.astype(self.dtype)

        indptr_inserts, indices_inserts, data_inserts = \
            _index._select_last_indices(i, j, x, idx_dtype)

        rows, ui_indptr = cupy.unique(indptr_inserts, return_index=True)

        to_add = cupy.empty(ui_indptr.size+1, ui_indptr.dtype)
        to_add[-1] = j.size
        to_add[:-1] = ui_indptr
        ui_indptr = to_add

        # Compute the counts for each row in the insertion array
        row_counts = cupy.zeros(ui_indptr.size-1, dtype=idx_dtype)
        cupyx.scatter_add(
            row_counts, cupy.searchsorted(rows, indptr_inserts), 1)

        self._perform_insert(indices_inserts, data_inserts,
                             rows, row_counts, idx_dtype)
示例#15
0
    def get_contacts(self, indices):
        """
        Returns all contacts of the given indices
        """
        indices = cp.sort(indices)

        res = []

        for start_vertices, end_vertices in self:
            _indices = cp.searchsorted(start_vertices, indices)
            _indices = _indices[_indices != 0]

            if len(_indices) == 0:
                continue

            res.append(end_vertices[_indices])

        if len(res) == 0:
            return cp.array([])

        else:
            return cp.hstack(res)
示例#16
0
    def _sort_by_city_ids(self,
                          city_ids,
                          values,
                          dtype=None,
                          default=0,
                          as_json=False) -> tuple:
        """
        Sorts the inputs such that they are ordered by the city_ids as defined in the constructor

        :param as_json:         if True returns a json-serializable values, otherwise cp/np.ndarray

        :param city_ids:        city ids as returned byt the unique functions

        :param values:          values as returned byt the unique functions

        :param default:         default value for city ids not included in the input city ids

        :return:                city_ids, corresponding_values
        """
        indexes = cp.searchsorted(self._city_id_array, city_ids)

        if default == 0:
            res = cp.zeros(len(self.city_ids))

        else:
            res = cp.ones(len(self.city_ids)) * default

        res[indexes] = values

        if dtype is not None:
            res = res.astype(dtype)

        if as_json:
            res = res.tolist()

        return self.city_ids, res
示例#17
0
def interp(x, xp, fp, left=None, right=None, period=None):
    """ One-dimensional linear interpolation.

    Args:
        x (cupy.ndarray): a 1D array of points on which the interpolation
            is performed.
        xp (cupy.ndarray): a 1D array of points on which the function values
            (``fp``) are known.
        fp (cupy.ndarray): a 1D array containing the function values at the
            the points ``xp``.
        left (float or complex): value to return if ``x < xp[0]``. Default is
            ``fp[0]``.
        right (float or complex): value to return if ``x > xp[-1]``. Default is
            ``fp[-1]``.
        period (None or float): a period for the x-coordinates. Parameters
            ``left`` and ``right`` are ignored if ``period`` is specified.
            Default is ``None``.

    Returns:
        cupy.ndarray: The interpolated values, same shape as ``x``.

    .. note::
        This function may synchronize if ``left`` or ``right`` is not already
        on the device.

    .. seealso:: :func:`numpy.interp`

    """

    if xp.ndim != 1 or fp.ndim != 1:
        raise ValueError('xp and fp must be 1D arrays')
    if xp.size != fp.size:
        raise ValueError('fp and xp are not of the same length')
    if xp.size == 0:
        raise ValueError('array of sample points is empty')
    if not x.flags.c_contiguous:
        raise NotImplementedError('Non-C-contiguous x is currently not '
                                  'supported')
    x_dtype = cupy.common_type(x, xp)
    if not cupy.can_cast(x_dtype, cupy.float64):
        raise TypeError('Cannot cast array data from'
                        ' {} to {} according to the rule \'safe\''.format(
                            x_dtype, cupy.float64))

    if period is not None:
        # The handling of "period" below is modified from NumPy's

        if period == 0:
            raise ValueError("period must be a non-zero value")
        period = abs(period)
        left = None
        right = None

        x = x.astype(cupy.float64)
        xp = xp.astype(cupy.float64)

        # normalizing periodic boundaries
        x %= period
        xp %= period
        asort_xp = cupy.argsort(xp)
        xp = xp[asort_xp]
        fp = fp[asort_xp]
        xp = cupy.concatenate((xp[-1:] - period, xp, xp[0:1] + period))
        fp = cupy.concatenate((fp[-1:], fp, fp[0:1]))
        assert xp.flags.c_contiguous
        assert fp.flags.c_contiguous

    # NumPy always returns float64 or complex128, so we upcast all values
    # on the fly in the kernel
    out_dtype = 'D' if fp.dtype.kind == 'c' else 'd'
    output = cupy.empty(x.shape, dtype=out_dtype)
    idx = cupy.searchsorted(xp, x, side='right')
    left = fp[0] if left is None else cupy.array(left, fp.dtype)
    right = fp[-1] if right is None else cupy.array(right, fp.dtype)
    kern = _get_interp_kernel(out_dtype == 'D')
    kern(x, idx, xp, fp, xp.size, left, right, output)
    return output
示例#18
0
def histogramdd(sample, bins=10, range=None, weights=None, density=False):
    """
    Compute the multidimensional histogram of some data.

    Parameters
    ----------
    sample : (N, D) array, or (D, N) array_like
        The data to be histogrammed.

        Note the unusual interpretation of sample when an array_like:

        * When an array, each row is a coordinate in a D-dimensional space -
          such as ``histogramdd(cupy.array([p1, p2, p3]))``.
        * When an array_like, each element is the list of values for single
          coordinate - such as ``histogramdd((X, Y, Z))``.

        The first form should be preferred.

    bins : sequence or int, optional
        The bin specification:

        * A sequence of arrays describing the monotonically increasing bin
          edges along each dimension.
        * The number of bins for each dimension (nx, ny, ... =bins)
        * The number of bins for all dimensions (nx=ny=...=bins).

    range : sequence, optional
        A sequence of length D, each an optional (lower, upper) tuple giving
        the outer bin edges to be used if the edges are not given explicitly in
        `bins`.
        An entry of None in the sequence results in the minimum and maximum
        values being used for the corresponding dimension.
        The default, None, is equivalent to passing a tuple of D None values.
    density : bool, optional
        If False, the default, returns the number of samples in each bin.
        If True, returns the probability *density* function at the bin,
        ``bin_count / sample_count / bin_volume``.
    weights : (N,) array_like, optional
        An array of values `w_i` weighing each sample `(x_i, y_i, z_i, ...)`.
        The values of the returned histogram are equal to the sum of the
        weights belonging to the samples falling into each bin.

    Returns
    -------
    H : ndarray
        The multidimensional histogram of sample x. See normed and weights
        for the different possible semantics.
    edges : list
        A list of D arrays describing the bin edges for each dimension.

    See Also
    --------
    histogram: 1-D histogram
    histogram2d: 2-D histogram

    Examples
    --------
    >>> r = cupy.random.randn(100,3)
    >>> H, edges = cupy.histogramdd(r, bins = (5, 8, 4))
    >>> H.shape, edges[0].size, edges[1].size, edges[2].size
    ((5, 8, 4), 6, 9, 5)

    """
    if isinstance(sample, cupy.ndarray):
        # Sample is an ND-array.
        if sample.ndim == 1:
            sample = sample[:, cupy.newaxis]
        nsamples, ndim = sample.shape
    else:
        sample = cupy.stack(sample, axis=-1)
        nsamples, ndim = sample.shape

    nbin = numpy.empty(ndim, int)
    edges = ndim * [None]
    dedges = ndim * [None]
    if weights is not None:
        weights = cupy.asarray(weights)

    try:
        nbins = len(bins)
        if nbins != ndim:
            raise ValueError(
                "The dimension of bins must be equal to the dimension of the "
                " sample x.")
    except TypeError:
        # bins is an integer
        bins = ndim * [bins]

    # normalize the range argument
    if range is None:
        range = (None, ) * ndim
    elif len(range) != ndim:
        raise ValueError("range argument must have one entry per dimension")

    # Create edge arrays
    for i in _range(ndim):
        if cnp.ndim(bins[i]) == 0:
            if bins[i] < 1:
                raise ValueError(
                    "`bins[{}]` must be positive, when an integer".format(i))
            smin, smax = _get_outer_edges(sample[:, i], range[i])
            num = int(bins[i] + 1)  # synchronize!
            edges[i] = cupy.linspace(smin, smax, num)
        elif cnp.ndim(bins[i]) == 1:
            edges[i] = cupy.asarray(bins[i])
            if (edges[i][:-1] > edges[i][1:]).any():
                raise ValueError(
                    "`bins[{}]` must be monotonically increasing, when an array"
                    .format(i))
        else:
            raise ValueError(
                "`bins[{}]` must be a scalar or 1d array".format(i))

        nbin[i] = len(edges[i]) + 1  # includes an outlier on each end
        dedges[i] = cupy.diff(edges[i])

    # Compute the bin number each sample falls into.
    ncount = tuple(
        # avoid cupy.digitize to work around gh-11022
        cupy.searchsorted(edges[i], sample[:, i], side="right")
        for i in _range(ndim))

    # Using digitize, values that fall on an edge are put in the right bin.
    # For the rightmost bin, we want values equal to the right edge to be
    # counted in the last bin, and not as an outlier.
    for i in _range(ndim):
        # Find which points are on the rightmost edge.
        on_edge = sample[:, i] == edges[i][-1]
        # Shift these points one bin to the left.
        ncount[i][on_edge] -= 1

    # Compute the sample indices in the flattened histogram matrix.
    # This raises an error if the array is too large.
    xy = cnp.ravel_multi_index(ncount, nbin)

    # Compute the number of repetitions in xy and assign it to the
    # flattened histmat.
    hist = cupy.bincount(xy, weights, minlength=numpy.prod(nbin))

    # Shape into a proper matrix
    hist = hist.reshape(nbin)

    # This preserves the (bad) behavior observed in gh-7845, for now.
    hist = hist.astype(float)  # Note: NumPy uses casting='safe' here too

    # Remove outliers (indices 0 and -1 for each dimension).
    core = ndim * (slice(1, -1), )
    hist = hist[core]

    if density:
        # calculate the probability density function
        s = hist.sum()
        for i in _range(ndim):
            shape = [1] * ndim
            shape[i] = nbin[i] - 2
            hist = hist / dedges[i].reshape(shape)
        hist /= s

    if any(hist.shape != numpy.asarray(nbin) - 2):
        raise RuntimeError("Internal Shape Error")
    return hist, edges
示例#19
0
def digitize(x, bins):
    return np.searchsorted(bins, x, side='left')
示例#20
0
def _get_index_cupy(arr: "cupy.array", val: "cupy.array"):
    index = cupy.searchsorted(arr, val)
    return index
示例#21
0
文件: truth.py 项目: toslunar/cupy
def intersect1d(arr1, arr2, assume_unique=False, return_indices=False):
    """Find the intersection of two arrays.
    Returns the sorted, unique values that are in both of the input arrays.

    Parameters
    ----------
    arr1, arr2 : cupy.ndarray
        Input arrays. Arrays will be flattened if they are not in 1D.
    assume_unique : bool
        By default, False. If set True, the input arrays will be
        assumend to be unique, which speeds up the calculation. If set True,
        but the arrays are not unique, incorrect results and out-of-bounds
        indices could result.
    return_indices : bool
       By default, False. If True, the indices which correspond to the
       intersection of the two arrays are returned.

    Returns
    -------
    intersect1d : cupy.ndarray
        Sorted 1D array of common and unique elements.
    comm1 : cupy.ndarray
        The indices of the first occurrences of the common values
        in `arr1`. Only provided if `return_indices` is True.
    comm2 : cupy.ndarray
        The indices of the first occurrences of the common values
        in `arr2`. Only provided if `return_indices` is True.

    See Also
    --------
    numpy.intersect1d

    """
    if not assume_unique:
        if return_indices:
            arr1, ind1 = cupy.unique(arr1, return_index=True)
            arr2, ind2 = cupy.unique(arr2, return_index=True)
        else:
            arr1 = cupy.unique(arr1)
            arr2 = cupy.unique(arr2)
    else:
        arr1 = arr1.ravel()
        arr2 = arr2.ravel()

    if return_indices:
        arr2_sort_indices = cupy.argsort(arr2)
        arr2 = arr2[arr2_sort_indices]
    else:
        arr2 = cupy.sort(arr2)

    v1 = cupy.searchsorted(arr2, arr1, 'left')
    v2 = cupy.searchsorted(arr2, arr1, 'right')

    mask = v1 != v2
    int1d = arr1[mask]

    if return_indices:
        arr1_indices = cupy.flatnonzero(mask)
        arr2_indices = arr2_sort_indices[v2[mask] - 1]
        if not assume_unique:
            arr1_indices = ind1[arr1_indices]
            arr2_indices = ind2[arr2_indices]

        return int1d, arr1_indices, arr2_indices
    else:
        return int1d
示例#22
0
    def forward(self, x, adj, timinglist=[], dv="cuda", debug=False):
        # assuming x and adj are both numpy stuff

        ts = []
        N = x.shape[0]

        x = torch.tensor(x)

        x = F.dropout(x, self.dropout, training=self.training)

        # ts.append(time.time())
        # part2nodes, node2part = get_shards(N,25)
        # ts.append(time.time())

        # y = torch.zeros((N,self.nhid * self.nheads))

        # ts.append(time.time())
        # att_ts = []
        times = []
        mvtime = 0
        copytime = 0
        t = time.time()
        row_csr_starts, col_csr = adj
        numvalues = row_csr_starts[-1]

        xp = cp if dv == 'cuda' else np

        t = time.time()
        vidxs = xp.arange(numvalues)
        row_csr_starts = xp.array(row_csr_starts)
        col_csr = xp.array(col_csr)
        mvtime += time.time() - t

        t = time.time()
        d = xp.searchsorted(row_csr_starts - 1, vidxs)
        searchtime = time.time() - t

        # prime the cupy library
        if dv == 'cuda':
            dummyvalues = cp.empty((numvalues, ), dtype=cp.float32)
            upper_csr = cp.sparse.csr_matrix(
                (dummyvalues, col_csr, row_csr_starts),
                shape=(N, N),
                copy=False)
            j = time.time()
            lower_csc = cp.sparse.csc_matrix(
                (dummyvalues, col_csr, row_csr_starts),
                shape=(N, N),
                copy=True)
            lower_csr = lower_csc.tocsr(copy=False)

            low_csc2csr = cp.searchsorted(
                lower_csr.indptr - 1,
                vidxs) + lower_csc.indptr[lower_csr.indices]
            # transformed = src[transformedidxs]
            copytime = time.time() - j
            ys = [
                att(x, (row_csr_starts, col_csr),
                    dv=dv,
                    ts=times,
                    upper_csr=upper_csr,
                    lower_csr=lower_csr,
                    low_csc2csr=low_csc2csr,
                    searchd=d) for att in self.attentions
            ]
        else:
            ys = [
                att(x, adj, dv=dv, ts=times, searchd=d)
                for att in self.attentions
            ]

        t0 = time.time()
        ys = torch.cat(ys, dim=1)

        t1 = time.time()
        # y[nodes] = ys
        y = ys
        t2 = time.time()
        # times[-1][0] += t1 - t0
        times[-1][0] += t2 - t1
        times[-1][-1] += copytime
        times[0][0] += mvtime

        tot = time.time() - t
        times = np.array(times)
        # times = np.sum(times,axis=0) / tot * 100
        times = np.sum(times, axis=0)

        for att in self.attentions:
            att.calculatedWh = False

        # x = F.dropout(y, self.dropout, training=self.training)

        # y = torch.zeros((N,self.nclass))

        # for nodes in part2nodes:
        #     y[nodes] = self.out_att(x, adj, nodes,dv=dv)
        # self.out_att.calculatedWh = False

        # x = F.elu(y)
        # out = F.log_softmax(x, dim=1)

        timinglist.append((tot, times))
示例#23
0
def _select(input, labels=None, index=None, find_min=False, find_max=False,
            find_min_positions=False, find_max_positions=False,
            find_median=False):
    """Return one or more of: min, max, min position, max position, median.

    If neither `labels` or `index` is provided, these are the global values
    in `input`. If `index` is None, but `labels` is provided, a global value
    across all non-zero labels is given. When both `labels` and `index` are
    provided, lists of values are provided for each labeled region specified
    in `index`. See further details in :func:`cupyx.scipy.ndimage.minimum`,
    etc.

    Used by minimum, maximum, minimum_position, maximum_position, extrema.
    """
    find_positions = find_min_positions or find_max_positions
    positions = None
    if find_positions:
        positions = cupy.arange(input.size).reshape(input.shape)

    def single_group(vals, positions):
        result = []
        if find_min:
            result += [vals.min()]
        if find_min_positions:
            result += [positions[vals == vals.min()][0]]
        if find_max:
            result += [vals.max()]
        if find_max_positions:
            result += [positions[vals == vals.max()][0]]
        if find_median:
            result += [cupy.median(vals)]
        return result

    if labels is None:
        return single_group(input, positions)

    # ensure input and labels match sizes
    input, labels = cupy.broadcast_arrays(input, labels)

    if index is None:
        mask = labels > 0
        masked_positions = None
        if find_positions:
            masked_positions = positions[mask]
        return single_group(input[mask], masked_positions)

    if cupy.isscalar(index):
        mask = labels == index
        masked_positions = None
        if find_positions:
            masked_positions = positions[mask]
        return single_group(input[mask], masked_positions)

    index = cupy.asarray(index)

    safe_int = _safely_castable_to_int(labels.dtype)
    min_label = labels.min()
    max_label = labels.max()

    # Remap labels to unique integers if necessary, or if the largest label is
    # larger than the number of values.
    if (not safe_int or min_label < 0 or max_label > labels.size):
        # Remap labels, and indexes
        unique_labels, labels = cupy.unique(labels, return_inverse=True)
        idxs = cupy.searchsorted(unique_labels, index)

        # Make all of idxs valid
        idxs[idxs >= unique_labels.size] = 0
        found = unique_labels[idxs] == index
    else:
        # Labels are an integer type, and there aren't too many
        idxs = cupy.asanyarray(index, int).copy()
        found = (idxs >= 0) & (idxs <= max_label)

    idxs[~found] = max_label + 1

    input = input.ravel()
    labels = labels.ravel()
    if find_positions:
        positions = positions.ravel()

    using_cub = _core._accelerator.ACCELERATOR_CUB in \
        cupy._core.get_routine_accelerators()

    if using_cub:
        # Cutoff values below were determined empirically for relatively large
        # input arrays.
        if find_positions or find_median:
            n_label_cutoff = 15
        else:
            n_label_cutoff = 30
    else:
        n_label_cutoff = 0

    if n_label_cutoff and len(idxs) <= n_label_cutoff:
        return _select_via_looping(
            input, labels, idxs, positions, find_min, find_min_positions,
            find_max, find_max_positions, find_median
        )

    order = cupy.lexsort(cupy.stack((input.ravel(), labels.ravel())))
    input = input[order]
    labels = labels[order]
    if find_positions:
        positions = positions[order]

    # Determine indices corresponding to the min or max value for each label
    label_change_index = cupy.searchsorted(labels,
                                           cupy.arange(1, max_label + 2))
    if find_min or find_min_positions or find_median:
        # index corresponding to the minimum value at each label
        min_index = label_change_index[:-1]
    if find_max or find_max_positions or find_median:
        # index corresponding to the maximum value at each label
        max_index = label_change_index[1:] - 1

    result = []
    # the order below matches the order expected by cupy.ndimage.extrema
    if find_min:
        mins = cupy.zeros(int(labels.max()) + 2, input.dtype)
        mins[labels[min_index]] = input[min_index]
        result += [mins[idxs]]
    if find_min_positions:
        minpos = cupy.zeros(labels.max().item() + 2, int)
        minpos[labels[min_index]] = positions[min_index]
        result += [minpos[idxs]]
    if find_max:
        maxs = cupy.zeros(int(labels.max()) + 2, input.dtype)
        maxs[labels[max_index]] = input[max_index]
        result += [maxs[idxs]]
    if find_max_positions:
        maxpos = cupy.zeros(labels.max().item() + 2, int)
        maxpos[labels[max_index]] = positions[max_index]
        result += [maxpos[idxs]]
    if find_median:
        locs = cupy.arange(len(labels))
        lo = cupy.zeros(int(labels.max()) + 2, int)
        lo[labels[min_index]] = locs[min_index]
        hi = cupy.zeros(int(labels.max()) + 2, int)
        hi[labels[max_index]] = locs[max_index]
        lo = lo[idxs]
        hi = hi[idxs]
        # lo is an index to the lowest value in input for each label,
        # hi is an index to the largest value.
        # move them to be either the same ((hi - lo) % 2 == 0) or next
        # to each other ((hi - lo) % 2 == 1), then average.
        step = (hi - lo) // 2
        lo += step
        hi -= step
        if input.dtype.kind in 'iub':
            # fix for https://github.com/scipy/scipy/issues/12836
            result += [(input[lo].astype(float) + input[hi].astype(float)) /
                       2.0]
        else:
            result += [(input[lo] + input[hi]) / 2.0]

    return result
示例#24
0
def interp(x, xp, fp, left=None, right=None, period=None):
    """ One-dimensional linear interpolation.
    Args:
        x (cupy.ndarray): a 1-dimensional input on which the interpolation is
            performed.
        xp (cupy.ndarray): a 1-dimensional input on which the function values
            (``fp``) are known.
        fp (cupy.ndarray): a 1-dimensional input containing the function values
            corresponding to the ``xp`` points.
        left (float or complex): value to return if ``x < xp[0]``. Default is
            ``fp[0]``.
        right (float or complex): value to return if ``x > xp[-1]``. Default is
            ``fp[-1]``.
        period (optional): refer to the Numpy documentation for detail.
    Returns:
        cupy.ndarray: The one-dimensional piecewise linear interpolant to a
            function with given discrete data points (``xp``, ``fp``),
            evaluated at ``x``.
    .. note::
        This function may synchronize if ``left`` or ``right`` is not on the
        device already.
    .. seealso:: :func:`numpy.interp`
    """

    if xp.ndim != 1 or fp.ndim != 1:
        raise ValueError('xp and fp must be 1D arrays')
    if xp.size != fp.size:
        raise ValueError('fp and xp are not of the same length')
    if xp.size == 0:
        raise ValueError('array of sample points is empty')
    if not x.flags.c_contiguous:
        raise NotImplementedError('Non-C-contiguous x is currently not '
                                  'supported')
    if period is not None:
        # The handling of "period" below is borrowed from NumPy

        if period == 0:
            raise ValueError("period must be a non-zero value")
        period = abs(period)
        left = None
        right = None

        x = x.astype(cupy.float64)
        xp = xp.asdtype(cupy.float64)

        # normalizing periodic boundaries
        x %= period
        xp %= period
        asort_xp = cupy.argsort(xp)
        xp = xp[asort_xp]
        fp = fp[asort_xp]
        xp = cupy.concatenate((xp[-1:] - period, xp, xp[0:1] + period))
        fp = cupy.concatenate((fp[-1:], fp, fp[0:1]))
        assert xp.flags.c_contiguous
        assert fp.flags.c_contiguous

    output = cupy.empty(x.shape, dtype=fp.dtype)
    idx = cupy.searchsorted(xp, x, side='right')
    left = fp[0] if left is None else cupy.array(left, xp.dtype)
    right = fp[-1] if right is None else cupy.array(right, xp.dtype)
    kern = _get_interp_kernel()
    kern(x, idx, xp, fp, xp.size, left, right, output)
    return output