示例#1
0
def d_TSV(mat):
    dif_c = 2*cp.diff(mat,axis=1)
    dif_1 = cp.pad(dif_c, [(0,0),(1,0)], mode = 'constant')
    dif_2 = cp.pad(-dif_c, [(0,0),(0,1)], mode = 'constant')
    dif_c= 2*cp.diff(mat,axis=0)
    dif_3 = cp.pad(dif_c, [(1,0),(0,0)], mode = 'constant')
    dif_4 = cp.pad(-dif_c, [(0,1),(0,0)], mode = 'constant')

    return dif_1 + dif_2 + dif_3 + dif_4
示例#2
0
def TSV(mat):
    # TSV terms from left to right 
    dif_c = cp.diff(mat,axis=1)
    sum_tsv1 = cp.sum(dif_c*dif_c)

    # TSV terms from bottom to top  
    dif_c= cp.diff(mat,axis=0)    
    sum_tsv2 = cp.sum(dif_c*dif_c)
    
    #Return all TSV terms
    return sum_tsv1+sum_tsv2
示例#3
0
def count_nonzero(X, axis=None, sample_weight=None):
    """A variant of X.getnnz() with extension to weighting on axis 0

    Useful in efficiently calculating multilabel metrics.

    Parameters
    ----------
    X : CSR sparse matrix of shape (n_samples, n_labels)
        Input data.

    axis : None, 0 or 1
        The axis on which the data is aggregated.

    sample_weight : array-like of shape (n_samples,), default=None
        Weight for each row of X.
    """
    if axis == -1:
        axis = 1
    elif axis == -2:
        axis = 0
    elif X.format != 'csr':
        raise TypeError('Expected CSR sparse format, got {0}'.format(X.format))

    # We rely here on the fact that np.diff(Y.indptr) for a CSR
    # will return the number of nonzero entries in each row.
    # A bincount over Y.indices will return the number of nonzeros
    # in each column. See ``csr_matrix.getnnz`` in scipy >= 0.14.
    if axis is None:
        if sample_weight is None:
            return X.nnz
        else:
            return np.dot(np.diff(X.indptr), sample_weight)
    elif axis == 1:
        out = np.diff(X.indptr)
        if sample_weight is None:
            # astype here is for consistency with axis=0 dtype
            return out.astype('intp')
        return out * sample_weight
    elif axis == 0:
        if sample_weight is None:
            return np.bincount(X.indices, minlength=X.shape[1])
        else:
            weights = np.repeat(sample_weight, np.diff(X.indptr))
            return np.bincount(X.indices,
                               minlength=X.shape[1],
                               weights=weights)
    else:
        raise ValueError('Unsupported axis: {0}'.format(axis))
示例#4
0
文件: util.py 项目: bsuryadevara/clx
def average_precision_score(y_true, y_score):
    """
    Compute average precision score using precision and recall computed from cuml. 
    """
    precision, recall, _ = precision_recall_curve(y_true, y_score)
    # return step function integral
    return -cp.sum(cp.diff(recall) * cp.array(precision)[:-1])
示例#5
0
    def _sparse_fit(self, X, strategy, missing_values, fill_value):
        """Fit the transformer on sparse data."""
        mask_data = _get_mask(X.data, missing_values)
        n_implicit_zeros = X.shape[0] - np.diff(X.indptr)

        statistics = np.empty(X.shape[1])

        if strategy == "constant":
            # for constant strategy, self.statistcs_ is used to store
            # fill_value in each column
            statistics.fill(fill_value)
        else:
            for i in range(X.shape[1]):
                column = X.data[X.indptr[i]:X.indptr[i + 1]]
                mask_column = mask_data[X.indptr[i]:X.indptr[i + 1]]
                column = column[~mask_column]

                # combine explicit and implicit zeros
                mask_zeros = _get_mask(column, 0)
                column = column[~mask_zeros]
                n_explicit_zeros = mask_zeros.sum()
                n_zeros = n_implicit_zeros[i] + n_explicit_zeros

                if strategy == "mean":
                    s = column.size + n_zeros
                    statistics[i] = np.nan if s == 0 else column.sum() / s

                elif strategy == "median":
                    statistics[i] = _get_median(column, n_zeros)

                elif strategy == "most_frequent":
                    statistics[i] = _most_frequent(column, 0, n_zeros)
        return statistics
示例#6
0
    def transform(self, X) -> SparseCumlArray:
        """Impute all missing values in X.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape (n_samples, n_features)
            The input data to complete.
        """
        check_is_fitted(self)

        X = self._validate_input(X, in_fit=False)
        X_indicator = super()._transform_indicator(X)

        statistics = self.statistics_

        if X.shape[1] != statistics.shape[0]:
            raise ValueError("X has %d features per sample, expected %d" %
                             (X.shape[1], self.statistics_.shape[0]))

        # Delete the invalid columns if strategy is not constant
        if self.strategy == "constant":
            valid_statistics = statistics
        else:
            # same as np.isnan but also works for object dtypes
            invalid_mask = _get_mask(statistics, np.nan)
            valid_mask = np.logical_not(invalid_mask)
            valid_statistics = statistics[valid_mask]
            valid_statistics_indexes = np.flatnonzero(valid_mask)

            if invalid_mask.any():
                missing = np.arange(X.shape[1])[invalid_mask]
                if self.verbose:
                    warnings.warn("Deleting features without "
                                  "observed values: %s" % missing)
                X = X[:, valid_statistics_indexes]

        # Do actual imputation
        if sparse.issparse(X):
            if self.missing_values == 0:
                raise ValueError("Imputation not possible when missing_values "
                                 "== 0 and input is sparse. Provide a dense "
                                 "array instead.")
            else:
                mask = _get_mask(X.data, self.missing_values)
                indexes = np.repeat(np.arange(len(X.indptr) - 1, dtype=np.int),
                                    np.diff(X.indptr).tolist())[mask]

                X.data[mask] = valid_statistics[indexes].astype(X.dtype,
                                                                copy=False)
        else:
            mask = _get_mask(X, self.missing_values)
            if self.strategy == "constant":
                X[mask] = valid_statistics[0]
            else:
                for i, vi in enumerate(valid_statistics_indexes):
                    feature_idxs = np.flatnonzero(mask[:, vi])
                    X[feature_idxs, vi] = valid_statistics[i]

        X = super()._concatenate_indicator(X, X_indicator)
        return X
示例#7
0
    def _major_slice(self, idx, copy=False):
        """Index along the major axis where idx is a slice object.
        """

        if idx == slice(None):
            return self.copy() if copy else self

        M, N = self._swap(*self.shape)
        start, stop, step = idx.indices(M)
        M = len(range(start, stop, step))
        new_shape = self._swap(M, N)
        if M == 0:
            return self.__class__(new_shape)

        row_nnz = cupy.diff(self.indptr)
        idx_dtype = self.indices.dtype
        res_indptr = cupy.zeros(M + 1, dtype=idx_dtype)

        cupy.cumsum(row_nnz[idx], out=res_indptr[1:])

        if step == 1:
            idx_start = self.indptr[start]
            idx_stop = self.indptr[stop]
            res_indices = cupy.array(self.indices[idx_start:idx_stop],
                                     copy=copy)
            res_data = cupy.array(self.data[idx_start:idx_stop], copy=copy)
        else:
            res_indices, res_data = _index._csr_row_slice(
                start, step, self.indptr, self.indices, self.data, res_indptr)

        return self.__class__((res_data, res_indices, res_indptr),
                              shape=new_shape,
                              copy=False)
示例#8
0
def _csr_row_index(rows, Ap, Aj, Ax):
    """Populate indices and data arrays from the given row index

    Args:
        rows (cupy.ndarray): index array of rows to populate
        Ap (cupy.ndarray): indptr array from input sparse matrix
        Aj (cupy.ndarray): indices array from input sparse matrix
        Ax (cupy.ndarray): data array from input sparse matrix

    Returns:
        Bp (cupy.ndarray): indptr array for output sparse matrix
        Bj (cupy.ndarray): indices array of output sparse matrix
        Bx (cupy.ndarray): data array of output sparse matrix

    """
    row_nnz = cupy.diff(Ap)
    Bp = cupy.empty(rows.size + 1, dtype=Ap.dtype)
    Bp[0] = 0
    cupy.cumsum(row_nnz[rows], out=Bp[1:])
    nnz = int(Bp[-1])

    out_rows = cupy.empty(nnz, dtype=numpy.int32)

    # Build a COO row array from output CSR indptr.
    # Calling backend cusparse API directly to avoid
    # constructing a whole COO object.
    handle = device.get_cusparse_handle()
    cusparse.xcsr2coo(handle, Bp.data.ptr, nnz, Bp.size - 1, out_rows.data.ptr,
                      cusparse.CUSPARSE_INDEX_BASE_ZERO)

    Bj, Bx = _csr_row_index_ker(out_rows, rows, Ap, Aj, Ax, Bp)
    return Bp, Bj, Bx
def _basic_simps(y, start, stop, x, dx, axis):
    import cupy

    nd = len(y.shape)
    if start is None:
        start = 0
    step = 2
    slice_all = (slice(None), ) * nd
    slice0 = tupleset(slice_all, axis, slice(start, stop, step))
    slice1 = tupleset(slice_all, axis, slice(start + 1, stop + 1, step))
    slice2 = tupleset(slice_all, axis, slice(start + 2, stop + 2, step))

    if x is None:  # Even spaced Simpson's rule.
        result = cupy.sum(dx / 3.0 * (y[slice0] + 4 * y[slice1] + y[slice2]),
                          axis=axis)
    else:
        # Account for possibly different spacings.
        #    Simpson's rule changes a bit.
        h = cupy.diff(x, axis=axis)
        sl0 = tupleset(slice_all, axis, slice(start, stop, step))
        sl1 = tupleset(slice_all, axis, slice(start + 1, stop + 1, step))
        h0 = h[sl0]
        h1 = h[sl1]
        hsum = h0 + h1
        hprod = h0 * h1
        h0divh1 = h0 / h1
        tmp = hsum / 6.0 * (y[slice0] * (2 - 1.0 / h0divh1) +
                            y[slice1] * hsum * hsum / hprod + y[slice2] *
                            (2 - h0divh1))
        result = cupy.sum(tmp, axis=axis)
    return result
示例#10
0
def _min_or_max_axis(X, axis, min_or_max):
    N = X.shape[axis]
    if N == 0:
        raise ValueError("zero-size array to reduction operation")
    M = X.shape[1 - axis]
    mat = X.tocsc() if axis == 0 else X.tocsr()
    mat.sum_duplicates()
    major_index, value = _minor_reduce(mat, min_or_max)
    not_full = np.diff(mat.indptr)[major_index] < N
    if min_or_max == 'min':
        min_or_max = np.fmin
    else:
        min_or_max = np.fmax
    value[not_full] = min_or_max(value[not_full], 0)
    mask = value != 0
    major_index = np.compress(mask, major_index)
    value = np.compress(mask, value)

    if axis == 0:
        res = gpu_sp.coo_matrix((value, (np.zeros(len(value)), major_index)),
                                dtype=X.dtype,
                                shape=(1, M))
    else:
        res = gpu_sp.coo_matrix((value, (major_index, np.zeros(len(value)))),
                                dtype=X.dtype,
                                shape=(M, 1))
    return res.A.ravel()
示例#11
0
def wint(n, t):

    N = len(t)
    s = cp.linspace(1e-40, 1, n)
    # Inverse vandermonde matrix
    tmp1 = cp.arange(n)
    tmp2 = cp.arange(1, n + 2)
    iv = cp.linalg.inv(cp.exp(cp.outer(tmp1, cp.log(s))))
    u = cp.diff(
        cp.exp(cp.outer(tmp2, cp.log(s))) *
        cp.tile(1.0 / tmp2[..., cp.newaxis],
                [1, n]))  # integration over short intervals
    W1 = cp.matmul(iv, u[1:n + 1, :])  # x*pn(x) term
    W2 = cp.matmul(iv, u[0:n, :])  # const*pn(x) term

    # Compensate for overlapping short intervals
    tmp1 = cp.arange(1, n)
    tmp2 = (n - 1) * cp.ones((N - 2 * (n - 1) - 1))
    tmp3 = cp.arange(n - 1, 0, -1)
    p = 1 / cp.concatenate((tmp1, tmp2, tmp3))
    w = cp.zeros(N)
    for j in range(N - n + 1):
        # Change coordinates, and constant and linear parts
        W = ((t[j + n - 1] - t[j])**2) * W1 + (t[j + n - 1] - t[j]) * t[j] * W2

        for k in range(n - 1):
            w[j:j + n] = w[j:j + n] + p[j + k] * W[:, k]

    wn = w
    wn[-40:] = (w[-40]) / (N - 40) * cp.arange(N - 40, N)
    return wn
示例#12
0
    def _perform_insert(self, indices_inserts, data_inserts,
                        rows, row_counts, idx_dtype):
        """Insert new elements into current sparse matrix in sorted order"""
        indptr_diff = cupy.diff(self.indptr)
        indptr_diff[rows] += row_counts

        new_indptr = cupy.empty(self.indptr.shape, dtype=idx_dtype)
        new_indptr[0] = idx_dtype(0)
        new_indptr[1:] = indptr_diff

        # Build output arrays
        cupy.cumsum(new_indptr, out=new_indptr)
        out_nnz = int(new_indptr[-1])

        new_indices = cupy.empty(out_nnz, dtype=idx_dtype)
        new_data = cupy.empty(out_nnz, dtype=self.data.dtype)

        # Build an indexed indptr that contains the offsets for each
        # row but only for in i, j, and x.
        new_indptr_lookup = cupy.zeros(new_indptr.size, dtype=idx_dtype)
        new_indptr_lookup[1:][rows] = row_counts
        cupy.cumsum(new_indptr_lookup, out=new_indptr_lookup)

        _index._insert_many_populate_arrays(
            indices_inserts, data_inserts, new_indptr_lookup,
            self.indptr, self.indices, self.data, new_indptr, new_indices,
            new_data, size=self.indptr.size-1)

        self.indptr = new_indptr
        self.indices = new_indices
        self.data = new_data
示例#13
0
def run_bootstrap(v, number_samples=2, block_size=60, number_of_threads=256):
    """
    @v, stock price matrix. [time, stocks]
    @number_samples, number of samples
    @block_size, sample block size
    """
    length, assets = v.shape  # get the time length and the number of assets,
    init_prices = v[0, :].reshape(1, -1, 1)  # initial prices for all assets
    v = cupy.log(v)
    # compute the price difference, dimension of [length -1, assets]
    ref = cupy.diff(v, axis=0)
    # output results
    output = cupy.zeros((number_samples, assets, length))
    # sample starting position, exclusive
    sample_range = length - block_size
    # number of positions to sample to cover the whole seq length
    num_positions = (length - 2) // block_size + 1
    sample_positions = cupy.random.randint(
        0, sample_range,
        num_positions * number_samples)  # compute random starting posistion
    number_of_blocks = len(sample_positions)
    boot_strap[(number_of_blocks, ),
               (number_of_threads, )](output, ref.T, block_size, num_positions,
                                      sample_positions)
    # reshape the results [number_samples, number assets, time]
    # output = output.reshape(number_samples, assets, length)
    # convert it into prices
    return (cupy.exp(output.cumsum(axis=2)) * init_prices)
示例#14
0
def cumulative_trapezoid(y, x=None, dx=1.0, axis=-1, initial=None):
    if _GPU_ENABLED:
        y = cp.asarray(y)
        if x is None:
            d = dx
        else:
            x = cp.asarray(x)
            if x.ndim == 1:
                d = cp.diff(x)
                shape = [1] * y.ndim
                shape[axis] = -1
                d = d.reshape(shape)
            elif len(x.shape) != len(y.shape):
                raise ValueError("If given, shape of x must be 1-D or the "
                                 "same as y.")
            else:
                d = cp.diff(x, axis=axis)
        if d.shape[axis] != y.shape[axis] - 1:
            raise ValueError("If given, length of x along axis must be the "
                             "same as y.")

        def tupleset(t, i, value):
            l = list(t)
            l[i] = value
            return tuple(l)

        nd = len(y.shape)
        slice1 = tupleset((slice(None), ) * nd, axis, slice(1, None))
        slice2 = tupleset((slice(None), ) * nd, axis, slice(None, -1))

        res = cp.cumsum(d * (y[slice1] + y[slice2]) / 2.0, axis=axis)
        if initial is not None:
            if not np.isscalar(initial):
                raise ValueError("`initial` parameter should be a scalar.")

            shape = list(res.shape)
            shape[axis] = 1
            res = cp.concatenate(
                [cp.full(shape, initial, dtype=res.dtype), res], axis=axis)

        return res
    else:
        try:
            from scipy.integrate import cumulative_trapezoid as ctz
        except ImportError:
            from scipy.integrate import cumtrapz as ctz
        return ctz(y=y, x=x, dx=dx, axis=axis, initial=initial)
示例#15
0
 def _filter_cells(sparse_gpu_array, min_genes, max_genes, barcodes=None):
     degrees = cp.diff(sparse_gpu_array.indptr)
     query = ((min_genes <= degrees) & (degrees <= max_genes))
     query = query.get()
     if barcodes is None:
         return sparse_gpu_array.get()[query]
     else:
         return sparse_gpu_array.get()[query], barcodes[query]
示例#16
0
def euler(func, x0, t, args=None):
    solution = cp.empty(shape=(len(t), len(x0), len(x0[0])))
    solution[0] = x0
    x = x0
    for i, dt in enumerate(cp.diff(t)):
        x = cp.add(x, cp.multiply(dt, func(x, t[i], *args)))
        solution[i + 1] = x
    return solution
示例#17
0
def csr_polynomial_expansion(X, interaction_only, degree):
    """Apply polynomial expansion on CSR matrix

    Parameters
    ----------
    X : sparse CSR matrix
        Input array

    Returns
    -------
    New expansed matrix
    """
    assert degree in (2, 3)

    interaction_only = 1 if interaction_only else 0

    d = X.shape[1]
    if degree == 2:
        expanded_dimensionality = int((d**2 + d) / 2 - interaction_only * d)
    else:
        expanded_dimensionality = int((d**3 + 3 * d**2 + 2 * d) / 6 -
                                      interaction_only * d**2)
    if expanded_dimensionality == 0:
        return None
    assert expanded_dimensionality > 0

    nnz = cp.diff(X.indptr)
    if degree == 2:
        total_nnz = (nnz**2 + nnz) / 2 - interaction_only * nnz
    else:
        total_nnz = ((nnz**3 + 3 * nnz**2 + 2 * nnz) / 6 -
                     interaction_only * nnz**2)
    del nnz
    nnz_cumsum = total_nnz.cumsum(dtype=cp.int64)
    total_nnz_max = int(total_nnz.max())
    total_nnz = int(total_nnz.sum())

    num_rows = X.indptr.shape[0] - 1

    expanded_data = cp.empty(shape=total_nnz, dtype=X.data.dtype)
    expanded_indices = cp.empty(shape=total_nnz, dtype=X.indices.dtype)
    expanded_indptr = cp.empty(shape=num_rows + 1, dtype=X.indptr.dtype)
    expanded_indptr[0] = X.indptr[0]
    expanded_indptr[1:] = nnz_cumsum

    tpb = (32, 32)
    bpg_x = ceil(X.indptr.shape[0] / tpb[0])
    bpg_y = ceil(total_nnz_max / tpb[1])
    bpg = (bpg_x, bpg_y)
    perform_expansion[bpg, tpb](X.indptr, X.indices, X.data, expanded_data,
                                expanded_indices, d, interaction_only, degree,
                                expanded_indptr)

    return cp.sparse.csr_matrix(
        (expanded_data, expanded_indices, expanded_indptr),
        shape=(num_rows, expanded_dimensionality))
示例#18
0
def _minor_reduce(X, min_or_max):
    fminmax = ufunc_dic[min_or_max]

    major_index = np.flatnonzero(np.diff(X.indptr))
    values = cpu_np.zeros(major_index.shape[0], dtype=X.dtype)
    ptrs = X.indptr[major_index]

    start = ptrs[0]
    for i, end in enumerate(ptrs[1:]):
        values[i] = fminmax(X.data[start:end])
        start = end
    values[-1] = fminmax(X.data[end:])

    return major_index, np.array(values)
示例#19
0
def _compute_weights_3d(data, spacing, beta, eps, multichannel):
    # Weight calculation is main difference in multispectral version
    # Original gradient**2 replaced with sum of gradients ** 2
    gradients = cp.concatenate(
        [cp.diff(data[..., 0], axis=ax).ravel() / spacing[ax]
         for ax in [2, 1, 0] if data.shape[ax] > 1], axis=0)
    gradients *= gradients
    for channel in range(1, data.shape[-1]):
        grad = cp.concatenate(
            [cp.diff(data[..., channel], axis=ax).ravel() / spacing[ax]
             for ax in [2, 1, 0] if data.shape[ax] > 1], axis=0)
        grad *= grad
        gradients += grad

    # All channels considered together in this standard deviation
    scale_factor = -beta / (10 * data.std())
    if multichannel:
        # New final term in beta to give == results in trivial case where
        # multiple identical spectra are passed.
        scale_factor /= math.sqrt(data.shape[-1])
    weights = cp.exp(scale_factor * gradients)
    weights += eps
    return -weights
示例#20
0
def inplace_csr_row_scale(X, scale):
    """ Inplace row scaling of a CSR matrix.

    Scale each sample of the data matrix by multiplying with specific scale
    provided by the caller assuming a (n_samples, n_features) shape.

    Parameters
    ----------
    X : CSR sparse matrix, shape (n_samples, n_features)
        Matrix to be scaled.

    scale : float array with shape (n_samples,)
        Array of precomputed sample-wise values to use for scaling.
    """
    assert scale.shape[0] == X.shape[0]
    X.data *= np.repeat(scale, np.diff(X.indptr).tolist())
示例#21
0
def average_precision_score(y_true, y_score):
    """
    Compute average precision (AP) from prediction scores.
    .. note:: this implementation can only be used with binary classification.

    Parameters
    ----------
    y_true : array-like of shape (n_samples,)
        True labels. The binary cases
        expect labels with shape (n_samples,)
    y_score : array-like of shape (n_samples,)
        Target scores. In the binary cases, these can be either
        probability estimates or non-thresholded decision values (as returned
        by `decision_function` on some classifiers). The binary
        case expects a shape (n_samples,), and the scores must be the scores of
        the class with the greater label.

    Returns
    -------
        average_precision : float

    Examples
    --------
    >>> import numpy as np
    >>> from cuml.metrics import average_precision_score
    >>> y_true = np.array([0, 0, 1, 1])
    >>> y_scores = np.array([0.1, 0.4, 0.35, 0.8])
    >>> print(average_precision_score(y_true, y_scores))
    0.83
    """
    # y_true, n_rows, n_cols, ytype = \
    #     input_to_cupy_array(y_true, check_dtype=[np.int32, np.int64,
    #                                              np.float32, np.float64])

    # y_score, _, _, _ = \
    #     input_to_cupy_array(y_score, check_dtype=[np.int32, np.int64,
    #                                              np.float32, np.float64],
    #                         check_rows=n_rows, check_cols=n_cols)

    if cp.unique(y_true).shape[0] == 1:
        raise ValueError("average_precision_score cannot be used when "
                         "only one class present in y_true. Average precision "
                         "score is not defined in that case.")

    precision, recall, thresholds = precision_recall_curve(y_true, y_score)
    return -cp.sum(cp.diff(recall) * cp.array(precision)[:-1])
示例#22
0
    def __init__(
        self,
        points,
        values,
        method="linear",
        bounds_error=True,
        fill_value=cp.nan,
    ):
        if method not in ["linear", "nearest"]:
            raise ValueError("Method '%s' is not defined" % method)
        self.method = method
        self.bounds_error = bounds_error

        # allow reasonable duck-typed values
        values = cp.asarray(values)

        if len(points) > values.ndim:
            raise ValueError("There are %d point arrays, but values has %d "
                             "dimensions" % (len(points), values.ndim))

        if hasattr(values, "dtype") and hasattr(values, "astype"):
            if not cp.issubdtype(values.dtype, cp.inexact):
                values = values.astype(float)

        self.fill_value = fill_value
        if fill_value is not None:
            fill_value_dtype = cp.asarray(fill_value).dtype
            if hasattr(values, "dtype") and not cp.can_cast(
                    fill_value_dtype, values.dtype, casting="same_kind"):
                raise ValueError("fill_value must be either 'None' or "
                                 "of a type compatible with values")

        for i, p in enumerate(points):
            if not cp.all(cp.diff(p) > 0.0):
                raise ValueError("The points in dimension %d must be strictly "
                                 "ascending" % i)
            if not cp.asarray(p).ndim == 1:
                raise ValueError("The points in dimension %d must be "
                                 "1-dimensional" % i)
            if not values.shape[i] == len(p):
                raise ValueError("There are %d points and %d values in "
                                 "dimension %d" % (len(p), values.shape[i], i))
        self.grid = tuple([cp.asarray(p) for p in points])
        self.values = values
示例#23
0
    def _major_index_fancy(self, idx):
        """Index along the major axis where idx is an array of ints.
        """
        _, N = self._swap(*self.shape)
        M = len(idx)
        new_shape = self._swap(M, N)
        if M == 0:
            return self.__class__(new_shape)

        row_nnz = cupy.diff(self.indptr)
        idx_dtype = self.indices.dtype
        res_indptr = cupy.zeros(M + 1, dtype=idx_dtype)
        cupy.cumsum(row_nnz[idx], out=res_indptr[1:])

        res_indices, res_data = _index._csr_row_index(idx, self.indptr,
                                                      self.indices, self.data,
                                                      res_indptr)

        return self.__class__((res_data, res_indices, res_indptr),
                              shape=new_shape,
                              copy=False)
示例#24
0
文件: _index.py 项目: carterbox/cupy
def _csr_row_index(Ax, Aj, Ap, rows):
    """Populate indices and data arrays from the given row index
    Args:
        Ax (cupy.ndarray): data array from input sparse matrix
        Aj (cupy.ndarray): indices array from input sparse matrix
        Ap (cupy.ndarray): indptr array from input sparse matrix
        rows (cupy.ndarray): index array of rows to populate
    Returns:
        Bx (cupy.ndarray): data array of output sparse matrix
        Bj (cupy.ndarray): indices array of output sparse matrix
        Bp (cupy.ndarray): indptr array for output sparse matrix
    """
    row_nnz = cupy.diff(Ap)
    Bp = cupy.empty(rows.size + 1, dtype=Ap.dtype)
    Bp[0] = 0
    cupy.cumsum(row_nnz[rows], out=Bp[1:])
    nnz = int(Bp[-1])

    out_rows = _csr_indptr_to_coo_rows(nnz, Bp)

    Bj, Bx = _csr_row_index_ker(out_rows, rows, Ap, Aj, Ax, Bp)
    return Bx, Bj, Bp
示例#25
0
def inplace_csr_row_normalize_l1(X):
    """Normalize CSR matrix inplace with L1 norm

    Parameters
    ----------
    X : sparse CSR matrix
        Input array

    Returns
    -------
    Normalized matrix
    """
    n_rows = X.indptr.shape[0]
    max_nnz = cp.diff(X.indptr).max()
    tpb = (32, 32)
    bpg_x = ceil(n_rows / tpb[0])
    bpg_y = ceil(max_nnz / tpb[1])
    bpg = (bpg_x, bpg_y)

    norm = cp.zeros(n_rows - 1, dtype=X.dtype)
    l1_step1_k[bpg, tpb](X.indptr, X.data, norm)
    norm_step2_k[bpg, tpb](X.indptr, X.data, norm)
示例#26
0
def _minor_reduce(X, min_or_max):
    if min_or_max == 'min':
        min_or_max = np.min
    else:
        min_or_max = np.max

    major_index = np.flatnonzero(np.diff(X.indptr))

    # reduceat tries casts X.indptr to intp, which errors
    # if it is int64 on a 32 bit system.
    # Reinitializing prevents this where possible, see #13737
    X = type(X)((X.data, X.indices, X.indptr), shape=X.shape)

    value = cpu_np.zeros(len(X.indptr) - 1, dtype=X.dtype)

    start = X.indptr[0]
    for i, end in enumerate(X.indptr[1:]):
        value[i] = min_or_max(X.data[start:end])
        start = end

    value = np.array(value)
    return major_index, value
示例#27
0
def diag_indices_from(arr):
    """
    Return the indices to access the main diagonal of an n-dimensional array.
    See `diag_indices` for full details.

    Args:
        arr (cupy.ndarray): At least 2-D.

    .. seealso:: :func:`numpy.diag_indices_from`

    """
    if not isinstance(arr, cupy.ndarray):
        raise TypeError("Argument must be cupy.ndarray")

    if not arr.ndim >= 2:
        raise ValueError("input array must be at least 2-d")
    # For more than d=2, the strided formula is only valid for arrays with
    # all dimensions equal, so we check first.
    if not cupy.all(cupy.diff(arr.shape) == 0):
        raise ValueError("All dimensions of input must be of equal length")

    return diag_indices(arr.shape[0], arr.ndim)
示例#28
0
def cumsum(x, Kahan=0):
    """
    Wrapper for exclusive prefix sum computation with an optional
    refinement step using a approach similar to Kahan summation.
    This function is not exposed to the user.

    Arguments:
    -------
      x: cupy.core.core.ndarray
        the input array of length n to be scanned with operation +
      Kahan: int
        non-negative number of Kahan summation adjustment rounds
    Returns
    -------
    cupy.core.core.ndarray
        the computed exclusive prefix scan of length n+1

    """

    assert(isinstance(Kahan, int) and Kahan >= 0)

    # allocate an empty array with leading 0
    y = cp.empty(len(x)+1, dtype=x.dtype)
    y[0] = 0

    # compute the inclusive prefix sum starting at entry 1
    cp.cumsum(x, out=y[1:])
    
    # basically exploit that (d/dt int f(t) dt) - f(t) = r = 0 forall f(t)
    # in case delta is non-vanishing due to numeric inaccuracies, we add
    # the prefix scan of r to the final result (inaccuracies might add up)
    if Kahan:
        r = x-cp.diff(y)
        if(cp.max(cp.abs(r))):
            y += cumsum(r, Kahan-1)
    return y
示例#29
0
def trapz(y, x=None, dx=1.0, axis=-1):
    """
    Lifted from `numpy <https://github.com/numpy/numpy/blob/v1.15.1/numpy/lib/function_base.py#L3804-L3891>`_.

    Integrate along the given axis using the composite trapezoidal rule.
    Integrate `y` (`x`) along given axis.

    Parameters
    ==========
    y : array_like
        Input array to integrate.
    x : array_like, optional
        The sample points corresponding to the `y` values. If `x` is None,
        the sample points are assumed to be evenly spaced `dx` apart. The
        default is None.
    dx : scalar, optional
        The spacing between sample points when `x` is None. The default is 1.
    axis : int, optional
        The axis along which to integrate.

    Returns
    =======
    trapz : float
        Definite integral as approximated by trapezoidal rule.


    References
    ==========
    .. [1] Wikipedia page: http://en.wikipedia.org/wiki/Trapezoidal_rule

    Examples
    ========
    >>> trapz([1,2,3])
    4.0
    >>> trapz([1,2,3], x=[4,6,8])
    8.0
    >>> trapz([1,2,3], dx=2)
    8.0
    >>> a = xp.arange(6).reshape(2, 3)
    >>> a
    array([[0, 1, 2],
           [3, 4, 5]])
    >>> trapz(a, axis=0)
    array([ 1.5,  2.5,  3.5])
    >>> trapz(a, axis=1)
    array([ 2.,  8.])
    """
    y = xp.asanyarray(y)
    if x is None:
        d = dx
    else:
        x = xp.asanyarray(x)
        if x.ndim == 1:
            d = xp.diff(x)
            # reshape to correct shape
            shape = [1] * y.ndim
            shape[axis] = d.shape[0]
            d = d.reshape(shape)
        else:
            d = xp.diff(x, axis=axis)
    ndim = y.ndim
    slice1 = [slice(None)] * ndim
    slice2 = [slice(None)] * ndim
    slice1[axis] = slice(1, None)
    slice2[axis] = slice(None, -1)
    product = d * (y[tuple(slice1)] + y[tuple(slice2)]) / 2.0
    try:
        ret = product.sum(axis)
    except ValueError:
        ret = xp.add.reduce(product, axis)
    return ret
示例#30
0
文件: _tfidf.py 项目: vinaydes/cuml
def _sparse_document_frequency(X):
    """Count the number of non-zero values for each feature in sparse X."""
    if cupyx.scipy.sparse.isspmatrix_csr(X):
        return cp.bincount(X.indices, minlength=X.shape[1])
    else:
        return cp.diff(X.indptr)