Пример #1
0
def test_array_split(type, test_size, train_size, shuffle):
    X = np.zeros((100, 10)) + np.arange(100).reshape(100, 1)
    y = np.arange(100).reshape(100, 1)

    if type == 'cupy':
        X = cp.asarray(X)
        y = cp.asarray(y)

    if type == 'numba':
        X = cuda.to_device(X)
        y = cuda.to_device(y)

    if type == 'rmm':
        X = rmm.to_device(X)
        y = rmm.to_device(y)

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        train_size=train_size,
                                                        test_size=test_size,
                                                        shuffle=shuffle,
                                                        random_state=0)

    if type == 'cupy':
        assert isinstance(X_train, cp.ndarray)
        assert isinstance(X_test, cp.ndarray)
        assert isinstance(y_train, cp.ndarray)
        assert isinstance(y_test, cp.ndarray)

    if type in ['numba', 'rmm']:
        assert cuda.devicearray.is_cuda_ndarray(X_train)
        assert cuda.devicearray.is_cuda_ndarray(X_test)
        assert cuda.devicearray.is_cuda_ndarray(y_train)
        assert cuda.devicearray.is_cuda_ndarray(y_test)

    if train_size is not None:
        assert X_train.shape[0] == X.shape[0] * train_size
        assert y_train.shape[0] == y.shape[0] * train_size

    if test_size is not None:
        assert X_test.shape[0] == X.shape[0] * test_size
        assert y_test.shape[0] == y.shape[0] * test_size

    if shuffle is None:
        assert X_train == X[0:train_size]
        assert y_train == y[0:train_size]
        assert X_test == X[-1 * test_size:]
        assert y_test == y[-1 * test_size:]

        if tnc(X_train):
            X_train = PatchedNumbaDeviceArray(X_train)
            X_test = PatchedNumbaDeviceArray(X_test)
            y_train = PatchedNumbaDeviceArray(y_train)
            y_test = PatchedNumbaDeviceArray(y_test)

        X_rec = cp.sort(cp.concatenate(X_train, X_test))
        y_rec = cp.sort(cp.concatenate(y_train, y_test))

        assert X_rec == X
        assert y_rec == y
Пример #2
0
def test_rotated_img():
    """
    The harris filter should yield the same results with an image and it's
    rotation.
    """
    im = img_as_float(cp.asarray(data.astronaut().mean(axis=2)))
    im_rotated = im.T

    # # Moravec
    # results = peak_local_max(corner_moravec(im),
    #                          min_distance=10, threshold_rel=0)
    # results_rotated = peak_local_max(corner_moravec(im_rotated),
    #                                  min_distance=10, threshold_rel=0)
    # assert (cp.sort(results[:, 0]) == cp.sort(results_rotated[:, 1])).all()
    # assert (cp.sort(results[:, 1]) == cp.sort(results_rotated[:, 0])).all()

    # Harris
    results = cp.nonzero(corner_harris(im))
    results_rotated = cp.nonzero(corner_harris(im_rotated))

    assert (cp.sort(results[0]) == cp.sort(results_rotated[1])).all()
    assert (cp.sort(results[1]) == cp.sort(results_rotated[0])).all()

    # Shi-Tomasi
    results = cp.nonzero(corner_shi_tomasi(im))
    results_rotated = cp.nonzero(corner_shi_tomasi(im_rotated))

    assert (cp.sort(results[0]) == cp.sort(results_rotated[1])).all()
    assert (cp.sort(results[1]) == cp.sort(results_rotated[0])).all()
Пример #3
0
 def test_permutation_sort_1dim(self, dtype):
     cupy_random = self._xp_random(cupy)
     a = cupy.arange(10, dtype=dtype)
     b = cupy.copy(a)
     c = cupy_random.permutation(a)
     testing.assert_allclose(a, b)
     testing.assert_allclose(b, cupy.sort(c))
Пример #4
0
def in1d(ar1, ar2, assume_unique=False, invert=False):
    """Tests whether each element of a 1-D array is also present in a second
    array.

    Returns a boolean array the same length as ``ar1`` that is ``True``
    where an element of ``ar1`` is in ``ar2`` and ``False`` otherwise.

    Args:
        ar1 (cupy.ndarray): Input array.
        ar2 (cupy.ndarray): The values against which to test each value of
            ``ar1``.
        assume_unique (bool, optional): Ignored
        invert (bool, optional): If ``True``, the values in the returned array
            are inverted (that is, ``False`` where an element of ``ar1`` is in
            ``ar2`` and ``True`` otherwise). Default is ``False``.

    Returns:
        cupy.ndarray, bool: The values ``ar1[in1d]`` are in ``ar2``.

    """
    # Ravel both arrays, behavior for the first array could be different
    ar1 = ar1.ravel()
    ar2 = ar2.ravel()
    if ar1.size == 0 or ar2.size == 0:
        if invert:
            return cupy.ones(ar1.shape, dtype=cupy.bool_)
        else:
            return cupy.zeros(ar1.shape, dtype=cupy.bool_)
    # Use brilliant searchsorted trick
    # https://github.com/cupy/cupy/pull/4018#discussion_r495790724
    ar2 = cupy.sort(ar2)
    v1 = cupy.searchsorted(ar2, ar1, 'left')
    v2 = cupy.searchsorted(ar2, ar1, 'right')
    return v1 == v2 if invert else v1 != v2
Пример #5
0
 def test_permutation_sort_ndim(self, dtype):
     cupy_random = self._xp_random(cupy)
     a = cupy.arange(15, dtype=dtype).reshape(5, 3)
     b = cupy.copy(a)
     c = cupy_random.permutation(a)
     testing.assert_allclose(a, b)
     testing.assert_allclose(b, cupy.sort(c, axis=0))
Пример #6
0
def generate_negatives(neg_users, true_mat, item_range, sort=False, use_trick=False):
    """ 
    Generate negative samples for data augmentation
    """
    neg_u = []
    neg_i = []

    # If using the shortcut, generate negative items without checking if the associated
    # user has interacted with it. Speeds up training significantly with very low impact
    # on accuracy.
    if use_trick:
        neg_items = cp.random.randint(0, high=item_range, size=neg_users.shape[0])
        return neg_users, neg_items

    # Otherwise, generate negative items, check if associated user has interacted with it,
    # then generate a new one if true
    while len(neg_users) > 0:
        neg_items = cp.random.randint(0, high=item_range, size=neg_users.shape[0])
        neg_mask = true_mat[neg_users, neg_items]
        neg_u.append(neg_users[neg_mask])
        neg_i.append(neg_items[neg_mask])

        neg_users = neg_users[cp.logical_not(neg_mask)]

    neg_users = cp.concatenate(neg_u)
    neg_items = cp.concatenate(neg_i)

    if sort == False:
        return neg_users, neg_items

    sorted_users = cp.sort(neg_users)
    sort_indices = cp.argsort(neg_users)

    return sorted_users, neg_items[sort_indices]
Пример #7
0
def connected_components(csgraph,
                         directed=True,
                         connection='weak',
                         return_labels=True):
    """Analyzes the connected components of a sparse graph
    Args:
        csgraph (cupy.ndarray of cupyx.scipy.sparse.csr_matrix): The adjacency
            matrix representing connectivity among nodes.
        directed (bool): If ``True``, it operates on a directed graph. If
            ``False``, it operates on an undirected graph.
        connection (str): ``'weak'`` or ``'strong'``. For directed graphs, the
            type of connection to use. Nodes i and j are "strongly" connected
            only when a path exists both from i to j and from j to i.
            If ``directed`` is ``False``, this argument is ignored.
        return_labels (bool): If ``True``, it returns the labels for each of
            the connected components.
    Returns:
        tuple of int and cupy.ndarray, or int:
            If ``return_labels`` == ``True``, returns a tuple ``(n, labels)``,
            where ``n`` is the number of connected components and ``labels`` is
            labels of each connected components. Otherwise, returns ``n``.
    .. seealso:: :func:`scipy.sparse.csgraph.connected_components`
    """
    if not _cugraph_available:
        raise RuntimeError('cugraph is not available')

    connection = connection.lower()
    if connection not in ('weak', 'strong'):
        raise ValueError("connection must be 'weak' or 'strong'")

    if not directed:
        connection = 'weak'

    if not cupyx.scipy.sparse.isspmatrix_csr(csgraph):
        csgraph = cupyx.scipy.sparse.csr_matrix(csgraph)
    _util._assert_nd_squareness(csgraph)
    m = csgraph.shape[0]
    if csgraph.nnz == 0:
        return m, cupy.arange(m, dtype=csgraph.indices.dtype)
    labels = cupy.empty(m, dtype=csgraph.indices.dtype)

    if connection == 'strong':
        cugraph.strongly_connected_components(csgraph, labels)
    else:
        csgraph += csgraph.T
        if not cupyx.scipy.sparse.isspmatrix_csr(csgraph):
            csgraph = cupyx.scipy.sparse.csr_matrix(csgraph)
        cugraph.weakly_connected_components(csgraph, labels)
        # Note: In the case of weak connection, cuGraph creates labels with a
        # start number of 1, so decrement the label number.
        labels -= 1

    count = cupy.zeros((1, ), dtype=csgraph.indices.dtype)
    root_labels = cupy.empty((m, ), dtype=csgraph.indices.dtype)
    _cupy_count_components(labels, count, root_labels, size=m)
    n = int(count[0])
    if not return_labels:
        return n
    _cupy_adjust_labels(n, cupy.sort(root_labels[:n]), labels)
    return n, labels
Пример #8
0
    def __init__(self, arg1, shape=None, dtype=None, copy=False):
        if isinstance(arg1, tuple):
            data, offsets = arg1
            if shape is None:
                raise ValueError('expected a shape argument')

        else:
            raise ValueError('unrecognized form for dia_matrix constructor')

        data = cupy.array(data, dtype=dtype, copy=copy)
        data = cupy.atleast_2d(data)
        offsets = cupy.array(offsets, dtype='i', copy=copy)
        offsets = cupy.atleast_1d(offsets)

        if offsets.ndim != 1:
            raise ValueError('offsets array must have rank 1')

        if data.ndim != 2:
            raise ValueError('data array must have rank 2')

        if data.shape[0] != len(offsets):
            raise ValueError(
                'number of diagonals (%d) does not match the number of '
                'offsets (%d)' % (data.shape[0], len(offsets)))

        sorted_offsets = cupy.sort(offsets)
        if (sorted_offsets[:-1] == sorted_offsets[1:]).any():
            raise ValueError('offset array contains duplicate values')

        self.data = data
        self.offsets = offsets
        self._shape = shape
Пример #9
0
def quantile_bin_array(data, bins=6):
    """Returns symbolified array with equal-quantile binning.

    Parameters
    ----------
    data : array
        Data array of shape (time, variables).

    bins : int, optional (default: 6)
        Number of bins.

    Returns
    -------
    symb_array : array
        Converted data of integer type.
    """
    T, N = data.shape

    # get the bin quantile steps
    bin_edge = int(np.ceil(T / float(bins)))

    symb_array = np.zeros((T, N), dtype='int32')

    # get the lower edges of the bins for every time series
    edges = np.sort(data, axis=0)[::bin_edge, :].T
    bins = edges.shape[1]

    # This gives the symbolic time series
    symb_array = (data.reshape(T, N, 1) >= edges.reshape(1, N, bins)).sum(
        axis=2) - 1

    return symb_array.astype('int32')
Пример #10
0
def _masked_column_median(arr, masked_value):
    """Compute the median of each column in the 2D array arr, ignoring any
    instances of masked_value"""
    mask = _get_mask(arr, masked_value)
    if arr.size == 0:
        return cp.full(arr.shape[1], cp.nan)
    arr_sorted = arr.copy()
    if not cp.isnan(masked_value):
        # If nan is not the missing value, any column with nans should
        # have a median of nan
        nan_cols = cp.any(cp.isnan(arr), axis=0)
        arr_sorted[mask] = cp.nan
    else:
        nan_cols = cp.full(arr.shape[1], False)
    # nans are always sorted to end of array
    arr_sorted = cp.sort(arr_sorted, axis=0)

    count_missing_values = mask.sum(axis=0)
    # Ignore missing values in determining "halfway" index of sorted
    # array
    n_elems = arr.shape[0] - count_missing_values

    # If no elements remain after removing missing value, median for
    # that colum is nan
    nan_cols = cp.logical_or(nan_cols, n_elems <= 0)

    col_index = cp.arange(arr_sorted.shape[1])
    median = (arr_sorted[cp.floor_divide(n_elems - 1, 2), col_index] +
              arr_sorted[cp.floor_divide(n_elems, 2), col_index]) / 2

    median[nan_cols] = cp.nan
    return median
def test_modify_generated_cupy():
    b = Test_Cupy.test_custom_cupy_object_creator_1d()
    print()
    print("Test 15, 1D")
    print(b)
    print("First Element + 10")
    b[0] = b[0] + 10
    print(b)
    print("Last Element + 50")
    b[b.size - 1] = b[b.size - 1] + 50
    print(b)
    print("Create new cupy from python")
    c = cp.ones(42)
    print(c)
    print("Add test 15 cupy with new cupy")
    b = b + c
    print(b)
    print("Get Sum")
    print(cp.sum(b))
    print("Sort")
    b = cp.sort(b)
    print(b)
    print("Copy to CPU")
    b_cpu = cp.asnumpy(b)
    print(b_cpu)
Пример #12
0
def test_split_array_single_argument(type, test_size, train_size, shuffle):
    X = np.zeros((100, 10)) + np.arange(100).reshape(100, 1)
    if type == 'cupy':
        X = cp.asarray(X)

    if type == 'numba':
        X = cuda.to_device(X)
    X_train, X_test = train_test_split(X,
                                       train_size=train_size,
                                       test_size=test_size,
                                       shuffle=shuffle,
                                       random_state=0)

    if type == 'cupy':
        assert isinstance(X_train, cp.ndarray)
        assert isinstance(X_test, cp.ndarray)

    if type in ['numba', 'rmm']:
        assert cuda.devicearray.is_cuda_ndarray(X_train)
        assert cuda.devicearray.is_cuda_ndarray(X_test)

    if train_size is not None:
        assert X_train.shape[0] == (int)(X.shape[0] * train_size)

    if test_size is not None:
        assert X_test.shape[0] == (int)(X.shape[0] * test_size)

    if shuffle is None:
        assert X_train == X[0:train_size]
        assert X_test == X[-1 * test_size:]

        X_rec = cp.sort(cp.concatenate(X_train, X_test))

        assert X_rec == X
Пример #13
0
  def local_cov_bet_class_NN(self,key,label,nb_class,batchsize,k):
    key_broadcast=cp.broadcast_to(key,(batchsize,batchsize,key.shape[1]))
    key_broadcast_transpose=cp.transpose(cp.broadcast_to(key,(batchsize,batchsize,key.shape[1])),axes=(1,0,2))
    sub_key_broadcast=key_broadcast-key_broadcast_transpose
    norm_sub_broadcast=cp.linalg.norm(sub_key_broadcast,axis=2)
    sorted_d=cp.sort(norm_sub_broadcast,axis=0)
    kth_d=sorted_d[k]
    kth_d=kth_d.reshape([batchsize,1])
    sigma=cp.matmul(kth_d,cp.transpose(kth_d))

    batchsize_per_class=batchsize//nb_class
    index=cp.arange(key.shape[0])
    xx,yy=cp.meshgrid(index,index)
    sub=key[xx]-key[yy]
    norm_sub=cp.linalg.norm(sub,axis=2)
    a1=cp.exp(-norm_sub*norm_sub/sigma)
    lindex=cp.arange(label.shape[0])
    lx,ly=cp.meshgrid(lindex,lindex)
    l=(label[lx]==label[ly])
    a1=a1*l*(1.0/(batchsize*nb_class)-1.0/batchsize_per_class)
    l2=(label[lx]!=label[ly])
    a2=l2*(1.0/batchsize)
    a=a1+a2
    a=a.reshape([a.shape[0],a.shape[1],1])
    a_sub=a*sub
    Sb=cp.einsum('ijk,ijl->kl',a_sub,sub,dtype='float32')*0.5
    return Sb
Пример #14
0
        def sortAC0001() -> cp.core.core.ndarray:
            """sortAC0001.

            :rtype: cp.core.core.ndarray
            """

            X_AUX = cp.copy(X)
            return X_AUX[y.argsort()], cp.sort(y)
Пример #15
0
def assert_cluster_counts(sk_agg, cuml_agg, digits=25):
    sk_unique, sk_counts = np.unique(sk_agg.labels_, return_counts=True)
    sk_counts = np.sort(sk_counts)

    cu_unique, cu_counts = cp.unique(cuml_agg.labels_, return_counts=True)
    cu_counts = cp.sort(cu_counts).get()

    np.testing.assert_almost_equal(sk_counts, cu_counts, decimal=-1 * digits)
Пример #16
0
def median_filter_blobs(pix, s, picked="center"):
    ph, pw = pix.shape[0], pix.shape[1]

    pick = 0
    if picked == "center":
        pick = s
    if picked == "end":
        pick = s * 2 - 1

    temp = pix.copy()
    r = cup.zeros((ph, s * 2, 4), dtype=np.float32)
    for x in range(pw):
        if x - s >= 0 and x + s <= pw:
            r[:, :, :] = temp[:, x - s:x + s, :]

        if x - s < 0:
            dp = s - x
            r[:, dp:, :] = temp[:, :x + s, :]
            r[:, :dp, :] = temp[:, -dp:, :]

        if x + s > pw:
            dp = x + s - pw
            r[:, :-dp, :] = temp[:, x - s:, :]
            r[:, -dp:, :] = temp[:, :dp, :]

        pix[:, x, :] = cup.sort(r, axis=1)[:, pick, :]

    temp = pix.copy()
    r = cup.zeros((s * 2, pw, 4), dtype=np.float32)
    for y in range(ph):
        if y - s >= 0 and y + s <= ph:
            r[:, :, :] = temp[y - s:y + s, :, :]

        if y - s < 0:
            dp = s - y
            r[dp:, :, :] = temp[:y + s, :, :]
            r[:dp, :, :] = temp[-dp:, :, :]

        if y + s > pw:
            dp = y + s - pw
            r[:-dp, :, :] = temp[y - s:, :, :]
            r[-dp:, :, :] = temp[:dp, :, :]

        pix[y, :, :] = cup.sort(r, axis=0)[pick, :, :]

    return pix
Пример #17
0
    def assemble_sparse(self, ke, tri, perm, n_pts, ref=0):
        '''
        function that assembles the global stiffness matrix from all element stiffness matrices

        takes:

        ke - stiffness on each element matrix - array shape (n_triangles, n_vertices, n_vertices)
        tri - array with all indices (in pts array) of triangle vertices - shape (num_triangles, 3)
        perm - array with permittivity in each element - array shape (num_triangles,)
        n_pts - number of nodes - int
        ref - electrode on which reference value is placed

        returns: 

        K - global stiffness matrix - (n_pts, n_pts)
        '''
        n_tri, n_vertices = tri.shape
        row = cp.tile(tri, (1, n_vertices))
        i = cp.array([0, 3, 6, 1, 4, 7, 2, 5, 8])
        row = row[:, i].ravel()
        col = cp.tile(tri, (n_vertices)).reshape(
            (tri.shape[0] * tri.shape[1] * n_vertices))
        admittanceMatrixC2 = self.admittanceMatrixC2()
        data = cp.multiply(ke[:], perm[:, None, None])
        indexElectrode = cp.sort(self.tri[self.twoFromElectrode][self.isValid],
                                 axis=1)[:, 0] // self.n_per_el
        data[self.twoFromElectrode][self.isValid] = (
            data[self.twoFromElectrode][self.isValid] +
            ((1 / self.z[indexElectrode]))[:, None, None] * admittanceMatrixC2)
        data = data.ravel()
        ind = cp.argsort(row)
        row = row[ind]
        col = col[ind]
        data = data[ind]
        unique, counts = cp.unique(row, return_counts=True)
        index_pointer = cp.zeros(n_pts + 1)
        sum_count = cp.cumsum(counts)
        index_pointer[unique[:] + 1] = sum_count[:]

        K = sp.csr_matrix((data, col, index_pointer),
                          shape=(n_pts, n_pts),
                          dtype=perm.dtype)

        K = K.toarray()

        A = cp.empty((self.n_pts + self.ne, self.n_pts + self.ne), dtype='f8')

        if 0 <= self.ref < n_pts:
            K[self.ref, :] = 0.
            K[:, self.ref] = 0.
            K[self.ref, self.ref] = 1.

        A[:self.n_pts, :self.n_pts] = K[:]
        admittanceMatrixE = self.admittanceMatrixE()
        A[self.n_pts:, :self.n_pts] = admittanceMatrixE.T
        A[:self.n_pts, self.n_pts:] = admittanceMatrixE
        A[self.n_pts:, self.n_pts:] = self.admittanceMatrixD()
        return A
Пример #18
0
def get_spectral_gap_transition_matrix(G):
    Isinvertible = False
    if (len(G) > 0):
        # Checking if the Diagonal Degree Matrix is invertible
        nodes_degrees = G.degree
        c = 0
        for i in nodes_degrees:

            if (i[1] == 0):
                c += 1
        # If is invertible
        if (c == 0):
            Isinvertible = True
            # Calculating the sparse Adj Matrix
            A = nx.to_numpy_matrix(G)
            # Calculating the sparse Degree Diagonal Matrix
            n, m = A.shape
            diags = A.sum(axis=1).flatten()
            # Inverting D
            invDiags = []
            for i in diags:
                invDiags.append(1 / i)
            I = np.identity(n)
            invD = invDiags * I
            # Getting the Transition Matrix

            #P =  invD* A
            cpInvD = cp.asarray(invD)
            cpA = cp.asarray(A)

            P = cp.matmul(cpInvD, cpA)
            #check = P.sum(axis=1).flatten()
            # Getting the spectral gap of P
            #spectrumP = np.linalg.eigvals(P)
            spectrumP, v = cp.linalg.eigh(P)
            cp.cuda.Stream.null.synchronize()

            # The first eigenvalue of the transition matrix is always 1
            #lamba1 = 1

            # Getting the second Eigenvalue
            #ordered_spectrum = sorted(spectrumP,reverse = True)
            ordered_spectrum = cp.sort(spectrumP[0])
            lamba1 = ordered_spectrum[-1]
            #lambda2 =ordered_spectrum[1]
            #lambda_n = ordered_spectrum[-1]
            #lambda_n_1 = ordered_spectrum[-2]
            lambda2 = ordered_spectrum[-2]
            if (np.iscomplex(lambda2)):
                lambda2 = lambda2.real
            spectralGap = float(lamba1 - lambda2)
            # Getting the n-th Eigenvalue
            lambdaN = ordered_spectrum[-2]
            lambdaNGap = ordered_spectrum[-1] - lambdaN
            if isinstance(spectralGap, complex):
                return (Isinvertible, 0, 0)
            return (Isinvertible, spectralGap, lambdaNGap)
    return (Isinvertible, 0, 0)
Пример #19
0
 def setup_method(self, method):
     np.random.seed(12345)
     N = 32
     L = 20
     self.U = cp.ones((N, N, N))
     self.U[:, 0:(old_div(N, 2))] = -1
     self.V = cp.asarray(np.random.randn(N, N, N))
     t = cp.sort(cp.abs(self.V).ravel())[self.V.size - L]
     self.V[cp.abs(self.V) < t] = 0
     self.D = self.U + self.V
Пример #20
0
    def _dense_fit(self, X, strategy, missing_values, fill_value):
        """Fit the transformer on dense data."""
        mask = _get_mask(X, missing_values)

        # Mean
        if strategy == "mean":
            count_missing_values = mask.sum(axis=0)
            n_elems = X.shape[0] - count_missing_values
            mean = np.nansum(X, axis=0)
            mean -= (count_missing_values * missing_values)
            mean /= n_elems
            return mean

        # Median
        elif strategy == "median":
            count_missing_values = mask.sum(axis=0)
            n_elems = X.shape[0] - count_missing_values
            middle, is_odd = np.divmod(n_elems, 2)
            is_odd = is_odd.astype(np.bool)
            middle += count_missing_values
            X_sorted = X.copy()
            X_sorted[mask] = np.nan
            X_sorted = np.sort(X, axis=0)
            median = np.empty(X.shape[1], dtype=X.dtype)
            wis_odd = np.argwhere(is_odd).squeeze()
            wnot_odd = np.argwhere(~is_odd).squeeze()
            median[wis_odd] = X_sorted[middle[wis_odd], wis_odd]
            elm1 = X_sorted[middle[wnot_odd] - 1, wnot_odd]
            elm2 = X_sorted[middle[wnot_odd], wnot_odd]
            median[wnot_odd] = (elm1 + elm2) / 2.
            return median

        # Most frequent
        elif strategy == "most_frequent":
            n_features = X.shape[1]
            most_frequent = cpu_np.empty(n_features, dtype=X.dtype)
            for i in range(n_features):
                feature_mask_idxs = np.where(~mask[:, i])[0]
                values, counts = np.unique(X[feature_mask_idxs, i],
                                           return_counts=True)
                count_max = counts.max()
                if count_max > 0:
                    value = values[counts == count_max].min()
                else:
                    value = np.nan
                most_frequent[i] = value
            return np.array(most_frequent)

        # Constant
        elif strategy == "constant":
            return np.full(X.shape[1], fill_value, dtype=X.dtype)
Пример #21
0
    def test_qid(self):
        import cupy as cp
        rng = cp.random.RandomState(1994)
        rows = 100
        cols = 10
        X, y = rng.randn(rows, cols), rng.randn(rows)
        qid = rng.randint(low=0, high=10, size=rows, dtype=np.uint32)
        qid = cp.sort(qid)

        Xy = xgb.DMatrix(X, y)
        Xy.set_info(qid=qid)
        group_ptr = Xy.get_uint_info('group_ptr')
        assert group_ptr[0] == 0
        assert group_ptr[-1] == rows
Пример #22
0
def _approximate_mode(class_counts, n_draws, rng):
    """
    CuPy implementataiton based on scikit-learn approximate_mode method.
    https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/utils/__init__.py#L984

    It is the mostly likely outcome of drawing n_draws many
    samples from the population given by class_counts.

    Parameters
    ----------
    class_counts : ndarray of int
        Population per class.
    n_draws : int
        Number of draws (samples to draw) from the overall population.
    rng : random state
        Used to break ties.

    Returns
    -------
    sampled_classes : cupy array of int
        Number of samples drawn from each class.
        np.sum(sampled_classes) == n_draws
    """
    # this computes a bad approximation to the mode of the
    # multivariate hypergeometric given by class_counts and n_draws
    continuous = n_draws * class_counts / class_counts.sum()
    # floored means we don't overshoot n_samples, but probably undershoot
    floored = cp.floor(continuous)
    # we add samples according to how much "left over" probability
    # they had, until we arrive at n_samples
    need_to_add = int(n_draws - floored.sum())
    if need_to_add > 0:
        remainder = continuous - floored
        values = cp.sort(cp.unique(remainder))[::-1]
        # add according to remainder, but break ties
        # randomly to avoid biases
        for value in values:
            inds, = cp.where(remainder == value)
            # if we need_to_add less than what's in inds
            # we draw randomly from them.
            # if we need to add more, we add them all and
            # go to the next value
            add_now = min(len(inds), need_to_add)
            inds = rng.choice(inds, size=add_now, replace=False)
            floored[inds] += 1
            need_to_add -= add_now
            if need_to_add == 0:
                break
    return floored.astype(int)
Пример #23
0
def test_nearest_neighbors_rbc(distance, n_neighbors, nrows):
    X, y = make_blobs(n_samples=nrows,
                      centers=25,
                      shuffle=True,
                      n_features=2,
                      cluster_std=3.0,
                      random_state=42)

    knn_cu = cuKNN(metric=distance, algorithm="rbc")
    knn_cu.fit(X)

    query_rows = int(nrows / 2)

    rbc_d, rbc_i = knn_cu.kneighbors(X[:query_rows, :],
                                     n_neighbors=n_neighbors)

    if distance == 'euclidean':
        # Need to use unexpanded euclidean distance
        pw_dists = cuPW(X, metric="l2")
        brute_i = cp.argsort(pw_dists, axis=1)[:query_rows, :n_neighbors]
        brute_d = cp.sort(pw_dists, axis=1)[:query_rows, :n_neighbors]
    else:
        knn_cu_brute = cuKNN(metric=distance, algorithm="brute")
        knn_cu_brute.fit(X)

        brute_d, brute_i = knn_cu_brute.kneighbors(X[:query_rows, :],
                                                   n_neighbors=n_neighbors)

    rbc_i = cp.sort(rbc_i, axis=1)
    brute_i = cp.sort(brute_i, axis=1)

    # TODO: These are failing with 1 or 2 mismatched elements
    # for very small values of k:
    # https://github.com/rapidsai/cuml/issues/4262
    assert len(brute_d[brute_d != rbc_d]) <= 1
    assert len(brute_i[brute_i != rbc_i]) <= 1
Пример #24
0
def _get_median(data, n_zeros):
    """Compute the median of data with n_zeros additional zeros.
    This function is used to support sparse matrices; it modifies data in-place
    """
    n_elems = len(data) + n_zeros
    if not n_elems:
        return np.nan
    n_negative = (data < 0).sum()
    middle, is_odd = divmod(n_elems, 2)
    data = np.sort(data)
    if is_odd:
        return _get_elem_at_rank(middle, data, n_negative, n_zeros)
    elm1 = _get_elem_at_rank(middle - 1, data, n_negative, n_zeros)
    elm2 = _get_elem_at_rank(middle, data, n_negative, n_zeros)
    return (elm1 + elm2) / 2.
Пример #25
0
    def setup(self):
        self.e = np.arange(10000, dtype=np.float32)
        self.o = np.arange(10001, dtype=np.float32)
        np.random.seed(25)
        np.random.shuffle(self.o)
        # quicksort implementations can have issues with equal elements
        self.equal = np.ones(10000)
        self.many_equal = np.sort(np.arange(10000) % 10)

        # quicksort median of 3 worst case
        self.worst = np.arange(1000000)
        x = self.worst
        while x.size > 3:
            mid = x.size // 2
            x[mid], x[-2] = x[-2], x[mid]
            x = x[:-2]
def multiple_point_crossover(parent_pairs, x_probability):
    # define random probability sequence
    SEQUENCE = cupy.random.uniform(0, 1, parent_pairs.shape[0])
    # define new generation population variable
    population_hat = None
    # perform single point crossover
    for i in range(parent_pairs.shape[0]):
        X, Y = parent_pairs[i]
        # check chromosomes' compatibility in case there is an error
        compatible_chromosomes = X.shape == Y.shape
        # define max index boundary
        chromosome_shape = X.shape[0]
        # initialize new chromosome
        a, b = cupy.zeros((2, chromosome_shape), dtype=cupy.int64)
        if not compatible_chromosomes:
            print(
                "Error [13]: Incompatible chromosomes (at: multiple point selection\nExiting...)"
            )
            exit()
        else:
            # crossover random point
            x_idx, y_idx = cupy.sort(
                cupy.random.randint(0, chromosome_shape, 2))
            # first child chromosome
            a = cupy.concatenate((X[:x_idx], Y[x_idx:y_idx], X[y_idx:]))
            # second child chromosome
            b = cupy.concatenate((Y[:x_idx], X[x_idx:y_idx], Y[y_idx:]))
            # crossover with respect to the crossover probability
            if SEQUENCE[i] < x_probability:
                # append children to form the new population
                if i == 0:
                    # if loop run for first time, then initialize the generation population
                    population_hat = cupy.stack((a, b))
                else:
                    # after first time, stack chromosomes to the generation population
                    population_hat = cupy.vstack(
                        (population_hat, cupy.stack((a, b))))
            else:
                # append parents to the new population
                if i == 0:
                    # if loop run for first time, then initialize the generation population
                    population_hat = cupy.stack((X, Y))
                else:
                    # after first time, stack chromosomes to the generation population
                    population_hat = cupy.vstack(
                        (population_hat, cupy.stack((X, Y))))
    return population_hat
Пример #27
0
def GetSASE_gpu(CentralEnergy, dE_FWHM, dt_FWHM, samples=0, onlyT=False):
    from cupy import interp
    import cupy as cp
    h = 4.135667662  #in eV*fs
    dE = dE_FWHM / 2.355  #in eV, converts to sigma
    dt = dt_FWHM / 2.355  #in fs, converts to sigma
    if samples == 0:
        samples = int(600. * dt * CentralEnergy / h)
    else:
        if (samples < 400. * dt * CentralEnergy / h):
            print(
                "Number of samples is a little small, proceeding anyway. Got",
                samples, "prefer more than", 400. * dt * CentralEnergy / h)

    EnAxis = cp.linspace(0.,
                         20. * CentralEnergy,
                         num=samples,
                         dtype=cp.float32)
    newEaxis = cp.linspace(0, 140, 32 * 1024)
    #     EnInput=cp.zeros(samples, dtype=cp.complex64)
    #     for i in range(samples):
    EnInput = cp.exp(-(EnAxis - CentralEnergy)**2 / 2. / dE**2 +
                     2 * cp.pi * 1j * cp.random.random(size=samples),
                     dtype=cp.complex64)
    EnInput = interp(newEaxis, EnAxis, EnInput)

    newTaxis = cp.fft.fftfreq(32 * 1024, d=140 / (32 * 1024)) * h
    TOutput = cp.exp(-newTaxis**2 / 2. / dt**2) * cp.fft.fft(EnInput)

    #     sort TOutput and newTaxis
    ind = cp.argsort(newTaxis, axis=0)
    newTaxis = cp.sort(newTaxis)
    TOutput = cp.take_along_axis(TOutput, ind, axis=0)

    #     En_FFT=cp.fft.fft(EnInput)
    #     TAxis=cp.fft.fftfreq(samples,d=(20.*CentralEnergy)/samples)*h
    #     TOutput=cp.exp(-TAxis**2/2./dt**2)*En_FFT
    if not (onlyT):
        EnOutput = cp.fft.ifft(TOutput)
    if (onlyT):
        return newTaxis, TOutput
    else:
        return newEaxis, EnOutput, newTaxis, TOutput
Пример #28
0
    def __init__(self, arg1, shape=None, dtype=None, copy=False):
        if _scipy_available and scipy.sparse.issparse(arg1):
            x = arg1.todia()
            data = x.data
            offsets = x.offsets
            shape = x.shape
            dtype = x.dtype
            copy = False
        elif isinstance(arg1, tuple):
            data, offsets = arg1
            if shape is None:
                raise ValueError('expected a shape argument')

        else:
            raise ValueError(
                'unrecognized form for dia_matrix constructor')

        data = cupy.array(data, dtype=dtype, copy=copy)
        data = cupy.atleast_2d(data)
        offsets = cupy.array(offsets, dtype='i', copy=copy)
        offsets = cupy.atleast_1d(offsets)

        if offsets.ndim != 1:
            raise ValueError('offsets array must have rank 1')

        if data.ndim != 2:
            raise ValueError('data array must have rank 2')

        if data.shape[0] != len(offsets):
            raise ValueError(
                'number of diagonals (%d) does not match the number of '
                'offsets (%d)'
                % (data.shape[0], len(offsets)))

        sorted_offsets = cupy.sort(offsets)
        if (sorted_offsets[:-1] == sorted_offsets[1:]).any():
            raise ValueError('offset array contains duplicate values')

        self.data = data
        self.offsets = offsets
        if not util.isshape(shape):
            raise ValueError('invalid shape (must be a 2-tuple of int)')
        self._shape = int(shape[0]), int(shape[1])
Пример #29
0
    def test_second_order_accurate(self):
        # Testing that the relative numerical error is less that 3% for
        # this example problem. This corresponds to second order
        # accurate finite differences for all interior and boundary
        # points.
        x = cp.linspace(0, 1, 10)
        dx = x[1] - x[0]
        y = 2 * x**3 + 4 * x**2 + 2 * x
        analytical = 6 * x**2 + 8 * x + 2
        num_error = cp.abs((gradient(y, dx, edge_order=2) / analytical) - 1)
        assert cp.all(num_error < 0.03).item() is True

        # test with unevenly spaced
        cp.random.seed(0)
        x = cp.sort(cp.random.random(10))
        y = 2 * x**3 + 4 * x**2 + 2 * x
        analytical = 6 * x**2 + 8 * x + 2
        num_error = cp.abs((gradient(y, x, edge_order=2) / analytical) - 1)
        assert cp.all(num_error < 0.03).item() is True
Пример #30
0
def _label(x, structure, y):
    elems = numpy.where(structure != 0)
    vecs = [elems[dm] - 1 for dm in range(x.ndim)]
    offset = vecs[0]
    for dm in range(1, x.ndim):
        offset = offset * 3 + vecs[dm]
    indxs = numpy.where(offset < 0)[0]
    dirs = [[vecs[dm][dr] for dm in range(x.ndim)] for dr in indxs]
    dirs = cupy.array(dirs, dtype=numpy.int32)
    ndirs = indxs.shape[0]
    y_shape = cupy.array(y.shape, dtype=numpy.int32)
    count = cupy.zeros(2, dtype=numpy.int32)
    _kernel_init()(x, y)
    _kernel_connect()(y_shape, dirs, ndirs, x.ndim, y, size=y.size)
    _kernel_count()(y, count, size=y.size)
    maxlabel = int(count[0])
    labels = cupy.empty(maxlabel, dtype=numpy.int32)
    _kernel_labels()(y, count, labels, size=y.size)
    _kernel_finalize()(maxlabel, cupy.sort(labels), y, size=y.size)
    return maxlabel