Exemplo n.º 1
0
def count_nonzero(X, axis=None, sample_weight=None):
    """A variant of X.getnnz() with extension to weighting on axis 0

    Useful in efficiently calculating multilabel metrics.

    Parameters
    ----------
    X : CSR sparse matrix of shape (n_samples, n_labels)
        Input data.

    axis : None, 0 or 1
        The axis on which the data is aggregated.

    sample_weight : array-like of shape (n_samples,), default=None
        Weight for each row of X.
    """
    if axis == -1:
        axis = 1
    elif axis == -2:
        axis = 0
    elif X.format != 'csr':
        raise TypeError('Expected CSR sparse format, got {0}'.format(X.format))

    # We rely here on the fact that np.diff(Y.indptr) for a CSR
    # will return the number of nonzero entries in each row.
    # A bincount over Y.indices will return the number of nonzeros
    # in each column. See ``csr_matrix.getnnz`` in scipy >= 0.14.
    if axis is None:
        if sample_weight is None:
            return X.nnz
        else:
            return np.dot(np.diff(X.indptr), sample_weight)
    elif axis == 1:
        out = np.diff(X.indptr)
        if sample_weight is None:
            # astype here is for consistency with axis=0 dtype
            return out.astype('intp')
        return out * sample_weight
    elif axis == 0:
        if sample_weight is None:
            return np.bincount(X.indices, minlength=X.shape[1])
        else:
            weights = np.repeat(sample_weight, np.diff(X.indptr))
            return np.bincount(X.indices,
                               minlength=X.shape[1],
                               weights=weights)
    else:
        raise ValueError('Unsupported axis: {0}'.format(axis))
Exemplo n.º 2
0
 def counts(y):
     _, y_indices = cp.unique(y, return_inverse=True)
     class_counts = cp.bincount(y_indices)
     total = cp.sum(class_counts)
     percent_counts = []
     for count in (class_counts):
         percent_counts.append(
             cp.around(float(count) / total.item(), decimals=2).item())
     return percent_counts
Exemplo n.º 3
0
def remove_small_objects_gpu(mask: cupy.ndarray, min_size: int) -> None:
    """ See scikit-image remove_small_objects()

    N.B.
        Input array must be a labeled mask.
        This is a inplace operation.
    """
    component_sizes = cupy.bincount(mask.ravel())
    too_small = component_sizes < min_size
    too_small_mask = too_small[mask]
    mask[too_small_mask] = 0
Exemplo n.º 4
0
def remove_small_objects_gpu(mask: cupy.ndarray, min_size: int) -> None:
    """ See scikit-image remove_small_objects()

    N.B.
        Input array can be a binary mask (bool type) or
        labeled mask (int type).
        This is a inplace operation.
    """
    _check_dtype_supported(mask)

    ccs, _ = label(mask) if mask.dtype == bool else mask
    component_sizes = cupy.bincount(ccs.ravel())
    too_small = component_sizes < min_size
    too_small_mask = too_small[ccs]
    mask[too_small_mask] = 0
Exemplo n.º 5
0
def keep_largest_connected_component_gpu(mask: cupy.ndarray, ) -> None:
    """ Keep the largest connected component.

    Remove small connected components, only keep the largest
    connected component (excluding background).

    N.B.
        Input array can be a binary mask (bool type) or
        labeled mask (int type).

        This is a inplace operation.
    """
    _check_dtype_supported(mask)
    ccs, _ = label(mask) if mask.dtype == bool else mask
    component_sizes = cupy.bincount(ccs.ravel())
    if len(component_sizes) == 1:  # just background
        return
    largest_cc_index = cupy.argmax(component_sizes[1:]) + 1
    mask[ccs != largest_cc_index] = 0
Exemplo n.º 6
0
def _bincount_histogram(image, source_range):
    """
    Efficient histogram calculation for an image of integers.

    This function is significantly more efficient than cupy.histogram but
    works only on images of integers. It is based on cupy.bincount.

    Parameters
    ----------
    image : array
        Input image.
    source_range : string
        'image' determines the range from the input image.
        'dtype' determines the range from the expected range of the images
        of that data type.

    Returns
    -------
    hist : array
        The values of the histogram.
    bin_centers : array
        The values at the center of the bins.
    """
    if source_range not in ["image", "dtype"]:
        raise ValueError(
            "Incorrect value for `source_range` argument: {}".format(
                source_range
            )
        )
    if source_range == "image":
        image_min = int(image.min().astype(np.int64))
        image_max = int(image.max().astype(np.int64))
    elif source_range == "dtype":
        image_min, image_max = dtype_limits(image, clip_negative=False)
    image, offset = _offset_array(image, image_min, image_max)
    hist = cp.bincount(image.ravel(), minlength=image_max - image_min + 1)
    bin_centers = cp.arange(image_min, image_max + 1)
    if source_range == "image":
        idx = max(image_min, 0)
        hist = hist[idx:]
    return hist, bin_centers
Exemplo n.º 7
0
def perimeter(image, neighbourhood=4):
    """Calculate total perimeter of all objects in binary image.

    Parameters
    ----------
    image : (N, M) ndarray
        2D binary image.
    neighbourhood : 4 or 8, optional
        Neighborhood connectivity for border pixel determination. It is used to
        compute the contour. A higher neighbourhood widens the border on which
        the perimeter is computed.

    Returns
    -------
    perimeter : float
        Total perimeter of all objects in binary image.

    References
    ----------
    .. [1] K. Benkrid, D. Crookes. Design and FPGA Implementation of
           a Perimeter Estimator. The Queen's University of Belfast.
           http://www.cs.qub.ac.uk/~d.crookes/webpubs/papers/perimeter.doc

    Examples
    --------
    >>> from skimage import data, util
    >>> from skimage.measure import label
    >>> # coins image (binary)
    >>> img_coins = data.coins() > 110
    >>> # total perimeter of all objects in the image
    >>> perimeter(img_coins, neighbourhood=4)  # doctest: +ELLIPSIS
    7796.867...
    >>> perimeter(img_coins, neighbourhood=8)  # doctest: +ELLIPSIS
    8806.268...

    """
    if image.ndim != 2:
        raise NotImplementedError("`perimeter` supports 2D images only")

    if neighbourhood == 4:
        strel = STREL_4
    else:
        strel = STREL_8
    strel = cp.asarray(strel)
    image = image.astype(cp.uint8)
    eroded_image = ndi.binary_erosion(image, strel, border_value=0)
    border_image = image - eroded_image

    perimeter_weights = cp.zeros(50, dtype=cp.double)
    perimeter_weights[[5, 7, 15, 17, 25, 27]] = 1
    perimeter_weights[[21, 33]] = sqrt(2)
    perimeter_weights[[13, 23]] = (1 + sqrt(2)) / 2

    perimeter_image = ndi.convolve(
        border_image,
        cp.asarray([[10, 2, 10], [2, 1, 2], [10, 2, 10]]),
        mode="constant",
        cval=0,
    )

    # You can also write
    # return perimeter_weights[perimeter_image].sum()
    # but that was measured as taking much longer than bincount + cp.dot (5x
    # as much time)
    perimeter_histogram = cp.bincount(perimeter_image.ravel(), minlength=50)
    total_perimeter = perimeter_histogram @ perimeter_weights
    return total_perimeter
Exemplo n.º 8
0
def _stratify_split(X, stratify, labels, n_train, n_test, x_numba, y_numba,
                    random_state):
    """
    Function to perform a stratified split based on stratify column.
    Based on scikit-learn stratified split implementation.

    Parameters
    ----------
    X, y: Shuffled input data and labels
    stratify: column to be stratified on.
    n_train: Number of samples in train set
    n_test: number of samples in test set
    x_numba: Determines whether the data should be converted to numba
    y_numba: Determines whether the labales should be converted to numba

    Returns
    -------
    X_train, X_test: Data X divided into train and test sets
    y_train, y_test: Labels divided into train and test sets
    """
    x_cudf = False
    labels_cudf = False

    if isinstance(X, cudf.DataFrame):
        x_cudf = True
    elif hasattr(X, "__cuda_array_interface__"):
        X = cp.asarray(X)
        x_order = _strides_to_order(X.__cuda_array_interface__['strides'],
                                    cp.dtype(X.dtype))

    # labels and stratify will be only cp arrays
    if isinstance(labels, cudf.Series):
        labels_cudf = True
        labels = labels.values
    elif hasattr(labels, "__cuda_array_interface__"):
        labels = cp.asarray(labels)
    elif isinstance(stratify, cudf.DataFrame):
        # ensuring it has just one column
        if labels.shape[1] != 1:
            raise ValueError('Expected one column for labels, but found df'
                             'with shape = %d' % (labels.shape))
        labels_cudf = True
        labels = labels[0].values

    labels_order = _strides_to_order(
                        labels.__cuda_array_interface__['strides'],
                        cp.dtype(labels.dtype))

    # Converting to cupy array removes the need to add an if-else block
    # for startify column
    if isinstance(stratify, cudf.Series):
        stratify = stratify.values
    elif hasattr(stratify, "__cuda_array_interface__"):
        stratify = cp.asarray(stratify)
    elif isinstance(stratify, cudf.DataFrame):
        # ensuring it has just one column
        if stratify.shape[1] != 1:
            raise ValueError('Expected one column, but found column'
                             'with shape = %d' % (stratify.shape))
        stratify = stratify[0].values

    classes, stratify_indices = cp.unique(stratify, return_inverse=True)

    n_classes = classes.shape[0]
    class_counts = cp.bincount(stratify_indices)
    if cp.min(class_counts) < 2:
        raise ValueError("The least populated class in y has only 1"
                         " member, which is too few. The minimum"
                         " number of groups for any class cannot"
                         " be less than 2.")

    if n_train < n_classes:
        raise ValueError('The train_size = %d should be greater or '
                         'equal to the number of classes = %d' % (n_train,
                                                                  n_classes))

    class_indices = cp.split(cp.argsort(stratify_indices),
                             cp.cumsum(class_counts)[:-1].tolist())

    X_train = None

    # random_state won't be None or int, that's handled earlier
    if isinstance(random_state, np.random.RandomState):
        random_state = cp.random.RandomState(seed=random_state.get_state()[1])

    # Break ties
    n_i = _approximate_mode(class_counts, n_train, random_state)
    class_counts_remaining = class_counts - n_i
    t_i = _approximate_mode(class_counts_remaining, n_test, random_state)

    for i in range(n_classes):
        permutation = random_state.permutation(class_counts[i].item())
        perm_indices_class_i = class_indices[i].take(permutation)

        y_train_i = cp.array(labels[perm_indices_class_i[:n_i[i]]],
                             order=labels_order)
        y_test_i = cp.array(labels[perm_indices_class_i[n_i[i]:n_i[i] +
                                                        t_i[i]]],
                            order=labels_order)
        if hasattr(X, "__cuda_array_interface__") or \
           isinstance(X, cupyx.scipy.sparse.csr_matrix):

            X_train_i = cp.array(X[perm_indices_class_i[:n_i[i]]],
                                 order=x_order)
            X_test_i = cp.array(X[perm_indices_class_i[n_i[i]:n_i[i] +
                                                       t_i[i]]],
                                order=x_order)

            if X_train is None:
                X_train = cp.array(X_train_i, order=x_order)
                y_train = cp.array(y_train_i, order=labels_order)
                X_test = cp.array(X_test_i, order=x_order)
                y_test = cp.array(y_test_i, order=labels_order)
            else:
                X_train = cp.concatenate([X_train, X_train_i], axis=0)
                X_test = cp.concatenate([X_test, X_test_i], axis=0)
                y_train = cp.concatenate([y_train, y_train_i], axis=0)
                y_test = cp.concatenate([y_test, y_test_i], axis=0)

        elif x_cudf:
            X_train_i = X.iloc[perm_indices_class_i[:n_i[i]]]
            X_test_i = X.iloc[perm_indices_class_i[n_i[i]:n_i[i] + t_i[i]]]

            if X_train is None:
                X_train = X_train_i
                y_train = y_train_i
                X_test = X_test_i
                y_test = y_test_i
            else:
                X_train = cudf.concat([X_train, X_train_i], ignore_index=False)
                X_test = cudf.concat([X_test, X_test_i], ignore_index=False)
                y_train = cp.concatenate([y_train, y_train_i], axis=0)
                y_test = cp.concatenate([y_test, y_test_i], axis=0)

    if x_numba:
        X_train = cuda.as_cuda_array(X_train)
        X_test = cuda.as_cuda_array(X_test)
    elif x_cudf:
        X_train = cudf.DataFrame(X_train)
        X_test = cudf.DataFrame(X_test)

    if y_numba:
        y_train = cuda.as_cuda_array(y_train)
        y_test = cuda.as_cuda_array(y_test)
    elif labels_cudf:
        y_train = cudf.Series(y_train)
        y_test = cudf.Series(y_test)

    return X_train, X_test, y_train, y_test
Exemplo n.º 9
0
def _stratify_split(X, y, n_train, n_test, x_numba, y_numba, random_state):
    """
    Function to perform a stratified split based on y lables.
    Based on scikit-learn stratified split implementation.

    Parameters
    ----------
    X, y: Shuffled input data and labels
    n_train: Number of samples in train set
    n_test: number of samples in test set
    x_numba: Determines whether the data should be converted to numba
    y_numba: Determines whether the labales should be converted to numba

    Returns
    -------
    X_train, X_test: Data X divided into train and test sets
    y_train, y_test: Labels divided into train and test sets
    """
    x_cudf = False
    y_cudf = False

    if isinstance(X, cudf.DataFrame):
        x_cudf = True
    elif hasattr(X, "__cuda_array_interface__"):
        X = cp.asarray(X)
        x_order = _strides_to_order(X.__cuda_array_interface__['strides'],
                                    cp.dtype(X.dtype))

    if isinstance(y, cudf.Series):
        y_cudf = True
    elif hasattr(y, "__cuda_array_interface__"):
        y = cp.asarray(y)
        y_order = _strides_to_order(y.__cuda_array_interface__['strides'],
                                    cp.dtype(y.dtype))
    elif isinstance(y, cudf.DataFrame):
        y_cudf = True
        # ensuring it has just one column
        if y.shape[1] != 1:
            raise ValueError('Expected one label, but found y'
                             'with shape = %d' % (y.shape))

    classes, y_indices = cp.unique(y.values if y_cudf else y,
                                   return_inverse=True)

    n_classes = classes.shape[0]
    class_counts = cp.bincount(y_indices)
    if n_train < n_classes:
        raise ValueError('The train_size = %d should be greater or '
                         'equal to the number of classes = %d' %
                         (n_train, n_classes))
    if n_test < n_classes:
        raise ValueError('The test_size = %d should be greater or '
                         'equal to the number of classes = %d' %
                         (n_test, n_classes))
    class_indices = cp.array_split(cp.argsort(y_indices), n_classes)

    X_train = None

    # random_state won't be None or int, that's handled earlier
    if isinstance(random_state, np.random.RandomState):
        random_state = cp.random.RandomState(seed=random_state.get_state()[1])

    # Break ties
    n_i = _approximate_mode(class_counts, n_train, random_state)
    class_counts_remaining = class_counts - n_i
    t_i = _approximate_mode(class_counts_remaining, n_test, random_state)

    for i in range(n_classes):
        permutation = random_state.permutation(class_counts[i].item())
        perm_indices_class_i = class_indices[i].take(permutation)

        if hasattr(X, "__cuda_array_interface__") or \
           isinstance(X, cupyx.scipy.sparse.csr_matrix):

            X_train_i = cp.array(X[perm_indices_class_i[:n_i[i]]],
                                 order=x_order)
            X_test_i = cp.array(X[perm_indices_class_i[n_i[i]:n_i[i] +
                                                       t_i[i]]],
                                order=x_order)

            y_train_i = cp.array(y[perm_indices_class_i[:n_i[i]]],
                                 order=y_order)
            y_test_i = cp.array(y[perm_indices_class_i[n_i[i]:n_i[i] +
                                                       t_i[i]]],
                                order=y_order)

            if X_train is None:
                X_train = cp.array(X_train_i, order=x_order)
                y_train = cp.array(y_train_i, order=y_order)
                X_test = cp.array(X_test_i, order=x_order)
                y_test = cp.array(y_test_i, order=y_order)
            else:
                X_train = cp.concatenate([X_train, X_train_i], axis=0)
                X_test = cp.concatenate([X_test, X_test_i], axis=0)
                y_train = cp.concatenate([y_train, y_train_i], axis=0)
                y_test = cp.concatenate([y_test, y_test_i], axis=0)

        elif x_cudf:
            X_train_i = X.iloc[perm_indices_class_i[:n_i[i]]]
            X_test_i = X.iloc[perm_indices_class_i[n_i[i]:n_i[i] + t_i[i]]]

            y_train_i = y.iloc[perm_indices_class_i[:n_i[i]]]
            y_test_i = y.iloc[perm_indices_class_i[n_i[i]:n_i[i] + t_i[i]]]

            if X_train is None:
                X_train = X_train_i
                y_train = y_train_i
                X_test = X_test_i
                y_test = y_test_i
            else:
                X_train = cudf.concat([X_train, X_train_i], ignore_index=False)
                X_test = cudf.concat([X_test, X_test_i], ignore_index=False)
                y_train = cudf.concat([y_train, y_train_i], ignore_index=False)
                y_test = cudf.concat([y_test, y_test_i], ignore_index=False)

    if x_numba:
        X_train = cuda.as_cuda_array(X_train)
        X_test = cuda.as_cuda_array(X_test)
    elif x_cudf:
        X_train = cudf.DataFrame(X_train)
        X_test = cudf.DataFrame(X_test)

    if y_numba:
        y_train = cuda.as_cuda_array(y_train)
        y_test = cuda.as_cuda_array(y_test)
    elif y_cudf:
        y_train = cudf.DataFrame(y_train)
        y_test = cudf.DataFrame(y_test)

    return X_train, X_test, y_train, y_test
Exemplo n.º 10
0
def histogramdd(sample, bins=10, range=None, weights=None, density=False):
    """
    Compute the multidimensional histogram of some data.

    Parameters
    ----------
    sample : (N, D) array, or (D, N) array_like
        The data to be histogrammed.

        Note the unusual interpretation of sample when an array_like:

        * When an array, each row is a coordinate in a D-dimensional space -
          such as ``histogramdd(cupy.array([p1, p2, p3]))``.
        * When an array_like, each element is the list of values for single
          coordinate - such as ``histogramdd((X, Y, Z))``.

        The first form should be preferred.

    bins : sequence or int, optional
        The bin specification:

        * A sequence of arrays describing the monotonically increasing bin
          edges along each dimension.
        * The number of bins for each dimension (nx, ny, ... =bins)
        * The number of bins for all dimensions (nx=ny=...=bins).

    range : sequence, optional
        A sequence of length D, each an optional (lower, upper) tuple giving
        the outer bin edges to be used if the edges are not given explicitly in
        `bins`.
        An entry of None in the sequence results in the minimum and maximum
        values being used for the corresponding dimension.
        The default, None, is equivalent to passing a tuple of D None values.
    density : bool, optional
        If False, the default, returns the number of samples in each bin.
        If True, returns the probability *density* function at the bin,
        ``bin_count / sample_count / bin_volume``.
    weights : (N,) array_like, optional
        An array of values `w_i` weighing each sample `(x_i, y_i, z_i, ...)`.
        The values of the returned histogram are equal to the sum of the
        weights belonging to the samples falling into each bin.

    Returns
    -------
    H : ndarray
        The multidimensional histogram of sample x. See normed and weights
        for the different possible semantics.
    edges : list
        A list of D arrays describing the bin edges for each dimension.

    See Also
    --------
    histogram: 1-D histogram
    histogram2d: 2-D histogram

    Examples
    --------
    >>> r = cupy.random.randn(100,3)
    >>> H, edges = cupy.histogramdd(r, bins = (5, 8, 4))
    >>> H.shape, edges[0].size, edges[1].size, edges[2].size
    ((5, 8, 4), 6, 9, 5)

    """
    if isinstance(sample, cupy.ndarray):
        # Sample is an ND-array.
        if sample.ndim == 1:
            sample = sample[:, cupy.newaxis]
        nsamples, ndim = sample.shape
    else:
        sample = cupy.stack(sample, axis=-1)
        nsamples, ndim = sample.shape

    nbin = numpy.empty(ndim, int)
    edges = ndim * [None]
    dedges = ndim * [None]
    if weights is not None:
        weights = cupy.asarray(weights)

    try:
        nbins = len(bins)
        if nbins != ndim:
            raise ValueError(
                "The dimension of bins must be equal to the dimension of the "
                " sample x.")
    except TypeError:
        # bins is an integer
        bins = ndim * [bins]

    # normalize the range argument
    if range is None:
        range = (None, ) * ndim
    elif len(range) != ndim:
        raise ValueError("range argument must have one entry per dimension")

    # Create edge arrays
    for i in _range(ndim):
        if cnp.ndim(bins[i]) == 0:
            if bins[i] < 1:
                raise ValueError(
                    "`bins[{}]` must be positive, when an integer".format(i))
            smin, smax = _get_outer_edges(sample[:, i], range[i])
            num = int(bins[i] + 1)  # synchronize!
            edges[i] = cupy.linspace(smin, smax, num)
        elif cnp.ndim(bins[i]) == 1:
            edges[i] = cupy.asarray(bins[i])
            if (edges[i][:-1] > edges[i][1:]).any():
                raise ValueError(
                    "`bins[{}]` must be monotonically increasing, when an array"
                    .format(i))
        else:
            raise ValueError(
                "`bins[{}]` must be a scalar or 1d array".format(i))

        nbin[i] = len(edges[i]) + 1  # includes an outlier on each end
        dedges[i] = cupy.diff(edges[i])

    # Compute the bin number each sample falls into.
    ncount = tuple(
        # avoid cupy.digitize to work around gh-11022
        cupy.searchsorted(edges[i], sample[:, i], side="right")
        for i in _range(ndim))

    # Using digitize, values that fall on an edge are put in the right bin.
    # For the rightmost bin, we want values equal to the right edge to be
    # counted in the last bin, and not as an outlier.
    for i in _range(ndim):
        # Find which points are on the rightmost edge.
        on_edge = sample[:, i] == edges[i][-1]
        # Shift these points one bin to the left.
        ncount[i][on_edge] -= 1

    # Compute the sample indices in the flattened histogram matrix.
    # This raises an error if the array is too large.
    xy = cnp.ravel_multi_index(ncount, nbin)

    # Compute the number of repetitions in xy and assign it to the
    # flattened histmat.
    hist = cupy.bincount(xy, weights, minlength=numpy.prod(nbin))

    # Shape into a proper matrix
    hist = hist.reshape(nbin)

    # This preserves the (bad) behavior observed in gh-7845, for now.
    hist = hist.astype(float)  # Note: NumPy uses casting='safe' here too

    # Remove outliers (indices 0 and -1 for each dimension).
    core = ndim * (slice(1, -1), )
    hist = hist[core]

    if density:
        # calculate the probability density function
        s = hist.sum()
        for i in _range(ndim):
            shape = [1] * ndim
            shape[i] = nbin[i] - 2
            hist = hist / dedges[i].reshape(shape)
        hist /= s

    if any(hist.shape != numpy.asarray(nbin) - 2):
        raise RuntimeError("Internal Shape Error")
    return hist, edges
Exemplo n.º 11
0
def test_qn(loss, dtype, penalty, l1_strength, l2_strength, fit_intercept):

    if penalty == "none" and (l1_strength > 0 or l2_strength > 0):
        pytest.skip("`none` penalty does not take l1/l2_strength")

    tol = 1e-6

    qn = cuQN(loss=loss,
              fit_intercept=fit_intercept,
              l1_strength=l1_strength,
              l2_strength=l2_strength,
              tol=1e-8,
              output_type="cupy")

    if loss == 'softmax':
        X, y = make_classification(n_samples=5000,
                                   n_informative=10,
                                   n_features=20,
                                   n_classes=4,
                                   dtype=dtype)

        stratify = y.astype(dtype)
        X_train, X_test, y_train, y_test = train_test_split(X.astype(dtype),
                                                            y.astype(dtype),
                                                            stratify=stratify)
        most_class = cp.unique(y)[cp.argmax(cp.bincount(y))]

        baseline_preds = cp.array([most_class] * y_test.shape[0], dtype=dtype)
        baseline_score = accuracy_score(y_test, baseline_preds)

        y_pred = qn.fit(X_train, y_train).predict(X_test)
        cuml_score = accuracy_score(y_test, y_pred)

        assert (cuml_score > baseline_score)
        assert (cuml_score >= 0.50)

    elif loss == 'sigmoid':
        X = np.array(precomputed_X, dtype=dtype)
        y = np.array(precomputed_y_log, dtype=dtype)
        qn.fit(X, y)
        print(qn.objective)
        print(qn.coef_)

        if penalty == 'none' and l1_strength == 0.0 and l2_strength == 0.0:
            if fit_intercept:
                assert (qn.objective - 0.40263831615448) < tol
                cp.testing.assert_array_almost_equal(qn.coef_,
                                                     np.array([[-2.1088872],
                                                               [2.4812558]]),
                                                     decimal=3)
            else:
                assert (qn.objective - 0.4317452311515808) < tol
                cp.testing.assert_array_almost_equal(qn.coef_,
                                                     np.array([[-2.120777],
                                                               [3.056865]]),
                                                     decimal=3)
        elif penalty == 'l1' and l2_strength == 0.0:
            if fit_intercept:
                if l1_strength == 0.0:
                    assert (qn.objective - 0.40263831615448) < tol
                    cp.testing.assert_array_almost_equal(qn.coef_,
                                                         np.array(
                                                             [[-2.1088872],
                                                              [2.4812558]]),
                                                         decimal=3)
                else:
                    assert (qn.objective - 0.44295936822891235) < tol
                    cp.testing.assert_array_almost_equal(qn.coef_,
                                                         np.array(
                                                             [[-1.6899368],
                                                              [1.9021575]]),
                                                         decimal=3)

            else:
                if l1_strength == 0.0:
                    assert (qn.objective - 0.4317452311515808) < tol
                    cp.testing.assert_array_almost_equal(qn.coef_,
                                                         np.array([[-2.120777],
                                                                   [3.056865]
                                                                   ]),
                                                         decimal=3)

                else:
                    assert (qn.objective - 0.4769895672798157) < tol
                    cp.testing.assert_array_almost_equal(qn.coef_,
                                                         np.array(
                                                             [[-1.6214856],
                                                              [2.3650239]]),
                                                         decimal=3)

                # assert False

        elif penalty == 'l2' and l1_strength == 0.0:
            if fit_intercept:
                if l2_strength == 0.0:
                    assert (qn.objective - 0.40263831615448) < tol
                    cp.testing.assert_array_almost_equal(qn.coef_,
                                                         np.array(
                                                             [[-2.1088872],
                                                              [2.4812558]]),
                                                         decimal=3)
                else:
                    assert (qn.objective - 0.43780848383903503) < tol
                    cp.testing.assert_array_almost_equal(qn.coef_,
                                                         np.array(
                                                             [[-1.5337948],
                                                              [1.678699]]),
                                                         decimal=3)

            else:
                if l2_strength == 0.0:
                    assert (qn.objective - 0.4317452311515808) < tol
                    cp.testing.assert_array_almost_equal(qn.coef_,
                                                         np.array([[-2.120777],
                                                                   [3.056865]
                                                                   ]),
                                                         decimal=3)

                else:
                    assert (qn.objective - 0.4750209450721741) < tol
                    cp.testing.assert_array_almost_equal(qn.coef_,
                                                         np.array(
                                                             [[-1.3931049],
                                                              [2.0140104]]),
                                                         decimal=3)

        if penalty == 'elasticnet':
            if fit_intercept:
                if l1_strength == 0.0 and l2_strength == 0.0:
                    assert (qn.objective - 0.40263831615448) < tol
                    cp.testing.assert_array_almost_equal(qn.coef_,
                                                         np.array(
                                                             [[-2.1088872],
                                                              [2.4812558]]),
                                                         decimal=3)
                elif l1_strength == 0.0:
                    assert (qn.objective - 0.43780848383903503) < tol
                    cp.testing.assert_array_almost_equal(qn.coef_,
                                                         np.array(
                                                             [[-1.5337948],
                                                              [1.678699]]),
                                                         decimal=3)
                elif l2_strength == 0.0:
                    assert (qn.objective - 0.44295936822891235) < tol
                    cp.testing.assert_array_almost_equal(qn.coef_,
                                                         np.array(
                                                             [[-1.6899368],
                                                              [1.9021575]]),
                                                         decimal=3)
                else:
                    assert (qn.objective - 0.467987984418869) < tol
                    cp.testing.assert_array_almost_equal(qn.coef_,
                                                         np.array(
                                                             [[-1.3727235],
                                                              [1.4639963]]),
                                                         decimal=3)
            else:
                if l1_strength == 0.0 and l2_strength == 0.0:
                    assert (qn.objective - 0.4317452311515808) < tol
                    cp.testing.assert_array_almost_equal(qn.coef_,
                                                         np.array([[-2.120777],
                                                                   [3.056865]
                                                                   ]),
                                                         decimal=3)
                elif l1_strength == 0.0:
                    assert (qn.objective - 0.4750209450721741) < tol
                    cp.testing.assert_array_almost_equal(qn.coef_,
                                                         np.array(
                                                             [[-1.3931049],
                                                              [2.0140104]]),
                                                         decimal=3)

                elif l2_strength == 0.0:
                    assert (qn.objective - 0.4769895672798157) < tol
                    cp.testing.assert_array_almost_equal(qn.coef_,
                                                         np.array(
                                                             [[-1.6214856],
                                                              [2.3650239]]),
                                                         decimal=3)
                else:
                    assert (qn.objective - 0.5067970156669617) < tol
                    cp.testing.assert_array_almost_equal(qn.coef_,
                                                         np.array(
                                                             [[-1.2102532],
                                                              [1.752459]]),
                                                         decimal=3)

                print()
Exemplo n.º 12
0
 def time_weights(self):
     np.bincount(self.d, weights=self.e)
Exemplo n.º 13
0
def perimeter_crofton(image, directions=4):
    """Calculate total Crofton perimeter of all objects in binary image.

    Parameters
    ----------
    image : (N, M) ndarray
        2D image. If image is not binary, all values strictly greater than zero
        are considered as the object.
    directions : 2 or 4, optional
        Number of directions used to approximate the Crofton perimeter. By
        default, 4 is used: it should be more accurate than 2.
        Computation time is the same in both cases.

    Returns
    -------
    perimeter : float
        Total perimeter of all objects in binary image.

    Notes
    -----
    This measure is based on Crofton formula [1], which is a measure from
    integral geometry. It is defined for general curve length evaluation via
    a double integral along all directions. In a discrete
    space, 2 or 4 directions give a quite good approximation, 4 being more
    accurate than 2 for more complex shapes.

    Similar to :func:`~.measure.perimeter`, this function returns an
    approximation of the perimeter in continuous space.

    References
    ----------
    .. [1] https://en.wikipedia.org/wiki/Crofton_formula
    .. [2] S. Rivollier. Analyse d’image geometrique et morphometrique par
           diagrammes de forme et voisinages adaptatifs generaux. PhD thesis,
           2010.
           Ecole Nationale Superieure des Mines de Saint-Etienne.
           https://tel.archives-ouvertes.fr/tel-00560838
    """
    if image.ndim != 2:
        raise NotImplementedError(
            "`perimeter_crofton` supports 2D images only")

    # as image could be a label image, transform it to binary image
    image = (image > 0).astype(cp.uint8)
    image = cp.pad(image, pad_width=1, mode="constant")
    XF = ndi.convolve(
        image,
        cp.array([[0, 0, 0], [0, 1, 4], [0, 2, 8]]),
        mode="constant",
        cval=0,
    )

    h = cp.bincount(XF.ravel(), minlength=16)

    # definition of the LUT
    # fmt: off
    if directions == 2:
        coefs = [
            0, np.pi / 2, 0, 0, 0, np.pi / 2, 0, 0, np.pi / 2, np.pi, 0, 0,
            np.pi / 2, np.pi, 0, 0
        ]
    else:
        sq2 = math.sqrt(2)
        coefs = [
            0, np.pi / 4 * (1 + 1 / sq2), np.pi / (4 * sq2), np.pi / (2 * sq2),
            0, np.pi / 4 * (1 + 1 / sq2), 0, np.pi / (4 * sq2), np.pi / 4,
            np.pi / 2, np.pi / (4 * sq2), np.pi / (4 * sq2), np.pi / 4,
            np.pi / 2, 0, 0
        ]
    # fmt: on

    total_perimeter = cp.asarray(coefs) @ h
    return total_perimeter
Exemplo n.º 14
0
def euler_number(image, connectivity=None):
    """Calculate the Euler characteristic in binary image.

    For 2D objects, the Euler number is the number of objects minus the number
    of holes. For 3D objects, the Euler number is obtained as the number of
    objects plus the number of holes, minus the number of tunnels, or loops.

    Parameters
    ----------
    image: (N, M) ndarray or (N, M, D) ndarray.
        2D or 3D images.
        If image is not binary, all values strictly greater than zero
        are considered as the object.
    connectivity : int, optional
        Maximum number of orthogonal hops to consider a pixel/voxel
        as a neighbor.
        Accepted values are ranging from  1 to input.ndim. If ``None``, a full
        connectivity of ``input.ndim`` is used.
        4 or 8 neighborhoods are defined for 2D images (connectivity 1 and 2,
        respectively).
        6 or 26 neighborhoods are defined for 3D images, (connectivity 1 and 3,
        respectively). Connectivity 2 is not defined.

    Returns
    -------
    euler_number : int
        Euler characteristic of the set of all objects in the image.

    Notes
    -----
    The Euler characteristic is an integer number that describes the
    topology of the set of all objects in the input image. If object is
    4-connected, then background is 8-connected, and conversely.

    The computation of the Euler characteristic is based on an integral
    geometry formula in discretized space. In practice, a neighbourhood
    configuration is constructed, and a LUT is applied for each
    configuration. The coefficients used are the ones of Ohser et al.

    It can be useful to compute the Euler characteristic for several
    connectivities. A large relative difference between results
    for different connectivities suggests that the image resolution
    (with respect to the size of objects and holes) is too low.

    References
    ----------
    .. [1] S. Rivollier. Analyse d’image geometrique et morphometrique par
           diagrammes de forme et voisinages adaptatifs generaux. PhD thesis,
           2010. Ecole Nationale Superieure des Mines de Saint-Etienne.
           https://tel.archives-ouvertes.fr/tel-00560838
    .. [2] Ohser J., Nagel W., Schladitz K. (2002) The Euler Number of
           Discretized Sets - On the Choice of Adjacency in Homogeneous
           Lattices. In: Mecke K., Stoyan D. (eds) Morphology of Condensed
           Matter. Lecture Notes in Physics, vol 600. Springer, Berlin,
           Heidelberg.

    Examples
    --------
    >>> import numpy as np
    >>> SAMPLE = np.zeros((100,100,100));
    >>> SAMPLE[40:60, 40:60, 40:60]=1
    >>> euler_number(SAMPLE) # doctest: +ELLIPSIS
    1...
    >>> SAMPLE[45:55,45:55,45:55] = 0;
    >>> euler_number(SAMPLE) # doctest: +ELLIPSIS
    2...
    >>> SAMPLE = np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0],
    ...                    [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0],
    ...                    [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0],
    ...                    [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0],
    ...                    [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0],
    ...                    [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0],
    ...                    [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0],
    ...                    [1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0],
    ...                    [0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1],
    ...                    [0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1]])
    >>> euler_number(SAMPLE)  # doctest:
    0
    >>> euler_number(SAMPLE, connectivity=1)  # doctest:
    2
    """  # noqa

    # as image can be a label image, transform it to binary
    image = (image > 0).astype(int)
    image = cp.pad(image, pad_width=1, mode="constant")

    # check connectivity
    if connectivity is None:
        connectivity = image.ndim

    # config variable is an adjacency configuration. A coefficient given by
    # variable coefs is attributed to each configuration in order to get
    # the Euler characteristic.
    if image.ndim == 2:

        config = cp.array([[0, 0, 0], [0, 1, 4], [0, 2, 8]])
        if connectivity == 1:
            coefs = EULER_COEFS2D_4
        else:
            coefs = EULER_COEFS2D_8
        bins = 16
    else:  # 3D images
        if connectivity == 2:
            raise NotImplementedError(
                "For 3D images, Euler number is implemented "
                "for connectivities 1 and 3 only")

        # fmt: off
        config = cp.array([[[0, 0, 0], [0, 0, 0], [0, 0, 0]],
                           [[0, 0, 0], [0, 1, 4], [0, 2, 8]],
                           [[0, 0, 0], [0, 16, 64], [0, 32, 128]]])
        # fmt: on
        if connectivity == 1:
            coefs = EULER_COEFS3D_26[::-1]
        else:
            coefs = EULER_COEFS3D_26
        bins = 256

    # XF has values in the 0-255 range in 3D, and in the 0-15 range in 2D,
    # with one unique value for each binary configuration of the
    # 27-voxel cube in 3D / 8-pixel square in 2D, up to symmetries
    XF = ndi.convolve(image, config, mode="constant", cval=0)
    h = cp.bincount(XF.ravel(), minlength=bins)

    coefs = cp.asarray(coefs)
    if image.ndim == 2:
        return coefs @ h
    else:
        return int(0.125 * coefs @ h)
Exemplo n.º 15
0
def remove_small_objects(ar, min_size=64, connectivity=1, in_place=False):
    """Remove objects smaller than the specified size.

    Expects ar to be an array with labeled objects, and removes objects
    smaller than min_size. If `ar` is bool, the image is first labeled.
    This leads to potentially different behavior for bool and 0-and-1
    arrays.

    Parameters
    ----------
    ar : ndarray (arbitrary shape, int or bool type)
        The array containing the objects of interest. If the array type is
        int, the ints must be non-negative.
    min_size : int, optional (default: 64)
        The smallest allowable object size.
    connectivity : int, {1, 2, ..., ar.ndim}, optional (default: 1)
        The connectivity defining the neighborhood of a pixel. Used during
        labelling if `ar` is bool.
    in_place : bool, optional (default: False)
        If ``True``, remove the objects in the input array itself.
        Otherwise, make a copy.

    Raises
    ------
    TypeError
        If the input array is of an invalid type, such as float or string.
    ValueError
        If the input array contains negative values.

    Returns
    -------
    out : ndarray, same shape and type as input `ar`
        The input array with small connected components removed.

    Examples
    --------
    >>> import cupy as cp
    >>> from cupyimg.skimage import morphology
    >>> a = cp.array([[0, 0, 0, 1, 0],
    ...               [1, 1, 1, 0, 0],
    ...               [1, 1, 1, 0, 1]], bool)
    >>> b = morphology.remove_small_objects(a, 6)
    >>> b
    array([[False, False, False, False, False],
           [ True,  True,  True, False, False],
           [ True,  True,  True, False, False]])
    >>> c = morphology.remove_small_objects(a, 7, connectivity=2)
    >>> c
    array([[False, False, False,  True, False],
           [ True,  True,  True, False, False],
           [ True,  True,  True, False, False]])
    >>> d = morphology.remove_small_objects(a, 6, in_place=True)
    >>> d is a
    True

    """
    # Raising type error if not int or bool
    _check_dtype_supported(ar)

    if in_place:
        out = ar
    else:
        out = ar.copy()

    if min_size == 0:  # shortcut for efficiency
        return out

    if out.dtype == bool:
        selem = ndi.generate_binary_structure(ar.ndim, connectivity)
        ccs = cp.zeros_like(ar, dtype=cp.int32)
        ndi.label(ar, selem, output=ccs)
    else:
        ccs = out

    try:
        component_sizes = cp.bincount(ccs.ravel())
    except ValueError:
        raise ValueError("Negative value labels are not supported. Try "
                         "relabeling the input with `scipy.ndimage.label` or "
                         "`skimage.morphology.label`.")

    if len(component_sizes) == 2 and out.dtype != bool:
        warn("Only one label was provided to `remove_small_objects`. "
             "Did you mean to use a boolean array?")

    too_small = component_sizes < min_size
    too_small_mask = too_small[ccs]
    out[too_small_mask] = 0

    return out
Exemplo n.º 16
0
 def mode(a):
     counts = xp.bincount(a.astype(xp.int64))
     return xp.argmax(counts)
Exemplo n.º 17
0
def _sparse_document_frequency(X):
    """Count the number of non-zero values for each feature in sparse X."""
    if cupyx.scipy.sparse.isspmatrix_csr(X):
        return cp.bincount(X.indices, minlength=X.shape[1])
    else:
        return cp.diff(X.indptr)
Exemplo n.º 18
0
 def time_bincount(self):
     np.bincount(self.d)