示例#1
0
def _triu(x, k=0):
    m, n = x.shape
    u = cupy.arange(m).reshape(m, 1)
    v = cupy.arange(n).reshape(1, n)
    mask = v - u >= k
    x *= mask
    return x
示例#2
0
def test_cupy_to_chainerx_contiguous():
    dtype = numpy.float32
    a_cupy = cupy.arange(6, dtype=dtype).reshape((2, 3))
    a_cupy_refcount_before = sys.getrefcount(a_cupy)

    a_chx = _fromrawpointer(
        a_cupy.data.mem.ptr,
        a_cupy.shape,
        a_cupy.dtype,
        a_cupy.strides,
        'cuda:0',
        0,
        a_cupy)

    assert sys.getrefcount(a_cupy) == a_cupy_refcount_before + 1
    assert a_chx.device.name == 'cuda:0'
    chainerx.testing.assert_array_equal_ex(a_chx, a_cupy.get())

    # Write to a_cupy
    a_cupy[0, 1] = 8
    chainerx.testing.assert_array_equal_ex(
        a_chx, numpy.array([[0, 8, 2], [3, 4, 5]], dtype))

    # Write to a_chx
    a_chx += 1
    chainerx.testing.assert_array_equal_ex(
        a_cupy.get(), numpy.array([[1, 9, 3], [4, 5, 6]], dtype))
示例#3
0
 def test_getitem_int(self):
     x = cupy.arange(24).reshape((2, 3, 4)).astype('i')
     y = cupy.empty_like(x)
     y = cupy.ElementwiseKernel(
         'raw T x', 'int32 y', 'y = x[i]', 'test_carray_getitem_int',
     )(x, y)
     testing.assert_array_equal(y, x)
示例#4
0
def test_cupy_to_chainerx_noncontiguous_with_offset():
    dtype = numpy.float32
    a_cupy = cupy.arange(12, dtype=dtype).reshape((2, 6))[::-1, ::2]
    offset = a_cupy.data.ptr - a_cupy.data.mem.ptr

    # test preconditions
    assert offset > 0
    assert not a_cupy.flags.c_contiguous

    a_chx = _fromrawpointer(
        a_cupy.data.mem.ptr,
        a_cupy.shape,
        a_cupy.dtype,
        a_cupy.strides,
        'cuda:0',
        offset,
        a_cupy)

    assert a_chx.strides == a_cupy.strides
    chainerx.testing.assert_array_equal_ex(
        a_chx, a_cupy.get(), strides_check=False)

    a_cupy[1, 1] = 53

    assert a_chx.strides == a_cupy.strides
    chainerx.testing.assert_array_equal_ex(
        a_chx, a_cupy.get(), strides_check=False)
示例#5
0
def test_cupy_to_chainerx_noncontiguous_without_offset():
    # This test includes access to address before the given pointer (because of
    # a negative stride).
    dtype = numpy.float32
    a_cupy = cupy.arange(12, dtype=dtype).reshape((2, 6))[::-1, ::2]

    # test preconditons
    assert a_cupy.data.mem.ptr < a_cupy.data.ptr
    assert not a_cupy.flags.c_contiguous

    a_chx = _fromrawpointer(
        a_cupy.data.ptr,
        a_cupy.shape,
        a_cupy.dtype,
        a_cupy.strides,
        'cuda:0',
        0,
        a_cupy)

    assert a_chx.strides == a_cupy.strides
    chainerx.testing.assert_array_equal_ex(
        a_chx, a_cupy.get(), strides_check=False)

    a_cupy[1, 1] = 53

    assert a_chx.strides == a_cupy.strides
    chainerx.testing.assert_array_equal_ex(
        a_chx, a_cupy.get(), strides_check=False)
示例#6
0
 def test_strides(self):
     x = cupy.arange(6).reshape((2, 3)).astype('i')
     y = cupy.ElementwiseKernel(
         'raw int32 x', 'int32 y', 'y = x.strides()[i]',
         'test_carray_strides',
     )(x, size=2)
     testing.assert_array_equal(y, (12, 4))
示例#7
0
 def test_getitem_idx(self):
     x = cupy.arange(24).reshape((2, 3, 4)).astype('i')
     y = cupy.empty_like(x)
     y = cupy.ElementwiseKernel(
         'raw T x', 'int32 y',
         'int idx[] = {i / 12, i / 4 % 3, i % 4}; y = x[idx]',
         'test_carray_getitem_idx',
     )(x, y)
     testing.assert_array_equal(y, x)
示例#8
0
    def test_scan(self, dtype):
        element_num = 10000

        if dtype in {cupy.int8, cupy.uint8}:
            element_num = 100

        a = cupy.ones((element_num,), dtype=dtype)
        prefix_sum = cupy.core.core.scan(a)
        expect = cupy.arange(start=1, stop=element_num + 1).astype(dtype)

        testing.assert_array_equal(prefix_sum, expect)
示例#9
0
def test_cupy_to_chainerx_invalid_device():
    dtype = numpy.float32
    with cupy.cuda.Device(1):
        a_cupy = cupy.arange(6, dtype=dtype).reshape((2, 3))
    with pytest.raises(chainerx.ChainerxError):
        _fromrawpointer(
            a_cupy.data.mem.ptr,
            a_cupy.shape,
            a_cupy.dtype,
            a_cupy.strides,
            'cuda:0',
            0,
            a_cupy)
示例#10
0
    def test_scan_out(self, dtype):
        element_num = 10000

        if dtype in {cupy.int8, cupy.uint8, cupy.float16}:
            element_num = 100

        a = cupy.ones((element_num,), dtype=dtype)
        b = cupy.zeros_like(a)
        cupy.core.core.scan(a, b)
        expect = cupy.arange(start=1, stop=element_num + 1).astype(dtype)

        testing.assert_array_equal(b, expect)

        cupy.core.core.scan(a, a)
        testing.assert_array_equal(a, expect)
示例#11
0
def test_cupy_to_chainerx_nondefault_device():
    dtype = numpy.float32
    with cupy.cuda.Device(1):
        a_cupy = cupy.arange(6, dtype=dtype).reshape((2, 3))
    a_chx = _fromrawpointer(
        a_cupy.data.mem.ptr,
        a_cupy.shape,
        a_cupy.dtype,
        a_cupy.strides,
        'cuda:1',
        0,
        a_cupy)

    assert a_chx.device.name == 'cuda:1'
    chainerx.testing.assert_array_equal_ex(a_chx, a_cupy.get())
示例#12
0
    def test_reshape_contiguity(self):
        a = cupy.arange(6).reshape(2, 3)
        self.assertTrue(a.flags.c_contiguous)
        self.assertFalse(a.flags.f_contiguous)

        a = a.reshape(1, 6, 1)
        self.assertTrue(a.flags.c_contiguous)
        self.assertTrue(a.flags.f_contiguous)

        b = a.T.reshape(1, 6, 1)
        self.assertTrue(b.flags.c_contiguous)
        self.assertTrue(b.flags.f_contiguous)

        b = a.T.reshape(2, 3)
        self.assertTrue(b.flags.c_contiguous)
        self.assertFalse(b.flags.f_contiguous)
示例#13
0
def test_cupy_to_chainerx_delete_chainerx_first():
    dtype = numpy.float32
    a_cupy = cupy.arange(6, dtype=dtype).reshape((2, 3))
    a_chx = _fromrawpointer(
        a_cupy.data.mem.ptr,
        a_cupy.shape,
        a_cupy.dtype,
        a_cupy.strides,
        'cuda:0',
        0,
        a_cupy)

    del a_chx

    a_cupy += 1
    chainerx.testing.assert_array_equal_ex(
        a_cupy.get(), numpy.array([[1, 2, 3], [4, 5, 6]], dtype))
示例#14
0
def fliplr(a):
    """Flip array in the left/right direction.

    Flip the entries in each row in the left/right direction. Columns
    are preserved, but appear in a different order than before.

    Args:
        a (~cupy.ndarray): Input array.

    Returns:
        ~cupy.ndarray: Output array.

    .. seealso:: :func:`numpy.fliplr`

    """
    if a.ndim < 2:
        raise ValueError('Input must be >= 2-d')
    return cupy.take(a, cupy.arange(a.shape[1] - 1, -1, -1), axis=1)
示例#15
0
def flipud(a):
    """Flip array in the up/down direction.

    Flip the entries in each column in the up/down direction. Rows are
    preserved, but appear in a different order than before.

    Args:
        a (~cupy.ndarray): Input array.

    Returns:
        ~cupy.ndarray: Output array.

    .. seealso:: :func:`numpy.flipud`

    """
    if a.ndim < 1:
        raise ValueError('Input must be >= 1-d')
    return cupy.take(a, cupy.arange(a.shape[0] - 1, -1, -1), axis=0)
示例#16
0
 def forward_gpu(self, inputs):
     a, b = inputs
     c = cp.zeros_like(a, 'float32')
     chainer.cuda.elementwise(
         'int32 j, raw T a, raw T b',
         'raw T c',
         '''
             float* ap = &a[j * 3];
             float* bp = &b[j * 3];
             float* cp = &c[j * 3];
             cp[0] = ap[1] * bp[2] - ap[2] * bp[1];
             cp[1] = ap[2] * bp[0] - ap[0] * bp[2];
             cp[2] = ap[0] * bp[1] - ap[1] * bp[0];
         ''',
         'function',
     )(
         cp.arange(a.size / 3).astype('int32'), a, b, c,
     )
     return c,
示例#17
0
def _non_maximum_suppression_gpu(bbox, thresh, score=None, limit=None):
    if len(bbox) == 0:
        return cp.zeros((0,), dtype=np.int32)

    n_bbox = bbox.shape[0]

    if score is not None:
        order = score.argsort()[::-1].astype(np.int32)
    else:
        order = cp.arange(n_bbox, dtype=np.int32)

    sorted_bbox = bbox[order, :]
    selec, n_selec = _call_nms_kernel(
        sorted_bbox, thresh)
    selec = selec[:n_selec]
    selec = order[selec]
    if limit is not None:
        selec = selec[:limit]
    return selec
示例#18
0
文件: indexing.py 项目: toslunar/cupy
def take_along_axis(a, indices, axis):
    """Take values from the input array by matching 1d index and data slices.

    Args:
        a (cupy.ndarray): Array to extract elements.
        indices (cupy.ndarray): Indices to take along each 1d slice of ``a``.
        axis (int): The axis to take 1d slices along.

    Returns:
        cupy.ndarray: The indexed result.

    .. seealso:: :func:`numpy.take_along_axis`
    """

    if indices.dtype.kind not in ('i', 'u'):
        raise IndexError('`indices` must be an integer array')

    if axis is None:
        a = a.ravel()
        axis = 0

    ndim = a.ndim

    axis = internal._normalize_axis_index(axis, ndim)

    if ndim != indices.ndim:
        raise ValueError(
            '`indices` and `a` must have the same number of dimensions')

    fancy_index = []
    for i, n in enumerate(a.shape):
        if i == axis:
            fancy_index.append(indices)
        else:
            ind_shape = (1,) * i + (-1,) + (1,) * (ndim - i - 1)
            fancy_index.append(cupy.arange(n).reshape(ind_shape))

    return a[tuple(fancy_index)]
示例#19
0
def _bincount_histogram(image, source_range):
    """
    Efficient histogram calculation for an image of integers.

    This function is significantly more efficient than cupy.histogram but
    works only on images of integers. It is based on cupy.bincount.

    Parameters
    ----------
    image : array
        Input image.
    source_range : string
        'image' determines the range from the input image.
        'dtype' determines the range from the expected range of the images
        of that data type.

    Returns
    -------
    hist : array
        The values of the histogram.
    bin_centers : array
        The values at the center of the bins.
    """
    if source_range not in ['image', 'dtype']:
        raise ValueError('Incorrect value for `source_range` argument: '
                         f'{source_range}')
    if source_range == 'image':
        image_min = int(image.min().astype(np.int64))
        image_max = int(image.max().astype(np.int64))
    elif source_range == 'dtype':
        image_min, image_max = dtype_limits(image, clip_negative=False)
    image, offset = _offset_array(image, image_min, image_max)
    hist = cp.bincount(image.ravel(), minlength=image_max - image_min + 1)
    bin_centers = cp.arange(image_min, image_max + 1)
    if source_range == 'image':
        idx = max(image_min, 0)
        hist = hist[idx:]
    return hist, bin_centers
示例#20
0
    def __init__(
        self,
        dataset,
        cat_names,
        cont_names,
        label_names,
        batch_size,
        shuffle,
        seed_fn=None,
        parts_per_chunk=1,
        device=None,
        global_size=None,
        global_rank=None,
        drop_last=False,
    ):
        self.data = dataset
        self.indices = cp.arange(dataset.to_ddf().npartitions)
        self.drop_last = drop_last
        self.device = device or 0

        self.global_size = global_size or 1
        self.global_rank = global_rank or 0

        self.cat_names = cat_names or []
        self.cont_names = cont_names or []
        self.label_names = label_names
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.seed_fn = seed_fn

        self.num_rows_processed = 0

        # we set size of chunk queue to 1 we only want one chunk in queue at a time.
        self._buff = ChunkQueue(self, 1, num_parts=parts_per_chunk, shuffle=shuffle)
        # run once instead of everytime len called
        self._buff_len = len(self._buff)
        self._batch_itr = None
        self._workers = None
示例#21
0
    def find(self, data):
        ret = cp.ascontiguousarray(cp.zeros(data.shape[:-1], 'int32')) - 1
        data = cp.ascontiguousarray(data)
        loop_indices = cp.arange(data.size / self.dim).astype('int32')
        chainer.cuda.elementwise(
            'int32 j, raw int32 data, raw int32 indices, raw int32 values, raw int32 ret',
            '',
            string.Template('''
                /* */
                int* value = &data[j * ${dim}];

                /* compute initial key */
                unsigned int key = 0;
                for (int k = 0; k < ${dim}; k++) key = (key + value[k]) * ${hash_factor};
                key = key % ${table_size};

                while (1) {
                    if (indices[key] < 0) {
                        ret[j] = -1;
                        break;
                    }
                    bool match = true;
                    for (int k = 0; k < ${dim}; k++) if (values[key * ${dim} + k] != value[k]) match = false;
                    if (match) {
                        ret[j] = indices[key];
                        break;
                    } else {
                        key = (key + 1) % ${table_size};
                    }
                }
            ''').substitute(
                table_size=self.table_size,
                hash_factor=self.hash_factor,
                dim=self.dim,
            ),
            'function',
        )(loop_indices, data, self.indices, self.values, ret)
        return ret
示例#22
0
文件: norms.py 项目: oesteban/cupy
def _slogdet_one(a):
    util._assert_rank2(a)
    util._assert_nd_squareness(a)
    dtype = a.dtype

    handle = device.get_cusolver_handle()
    m = len(a)
    ipiv = cupy.empty(m, 'i')
    info = cupy.empty((), 'i')

    # Need to make a copy because getrf works inplace
    a_copy = a.copy(order='F')

    if dtype == 'f':
        getrf_bufferSize = cusolver.sgetrf_bufferSize
        getrf = cusolver.sgetrf
    else:
        getrf_bufferSize = cusolver.dgetrf_bufferSize
        getrf = cusolver.dgetrf

    buffersize = getrf_bufferSize(handle, m, m, a_copy.data.ptr, m)
    workspace = cupy.empty(buffersize, dtype=dtype)
    getrf(handle, m, m, a_copy.data.ptr, m, workspace.data.ptr,
          ipiv.data.ptr, info.data.ptr)

    if info[()] == 0:
        diag = cupy.diag(a_copy)
        # ipiv is 1-origin
        non_zero = (cupy.count_nonzero(ipiv != cupy.arange(1, m + 1)) +
                    cupy.count_nonzero(diag < 0))
        # Note: sign == -1 ** (non_zero % 2)
        sign = (non_zero % 2) * -2 + 1
        logdet = cupy.log(abs(diag)).sum()
    else:
        sign = cupy.array(0.0, dtype=dtype)
        logdet = cupy.array(float('-inf'), dtype)

    return sign, logdet
示例#23
0
def indices(dimensions, dtype=int):
    """Returns an array representing the indices of a grid.

    Computes an array where the subarrays contain index values 0,1,...
    varying only along the corresponding axis.

    Args:
        dimensions: The shape of the grid.
        dtype: Data type specifier. It is int by default.

    Returns:
        ndarray:
        The array of grid indices,
        ``grid.shape = (len(dimensions),) + tuple(dimensions)``.

    Examples
    --------
    >>> grid = cupy.indices((2, 3))
    >>> grid.shape
    (2, 2, 3)
    >>> grid[0]        # row indices
    array([[0, 0, 0],
           [1, 1, 1]])
    >>> grid[1]        # column indices
    array([[0, 1, 2],
           [0, 1, 2]])

    .. seealso:: :func:`numpy.indices`

    """
    dimensions = tuple(dimensions)
    N = len(dimensions)
    shape = (1, ) * N
    res = cupy.empty((N, ) + dimensions, dtype=dtype)
    for i, dim in enumerate(dimensions):
        res[i] = cupy.arange(dim, dtype=dtype).reshape(shape[:i] + (dim, ) +
                                                       shape[i + 1:])
    return res
def select_groups(labels, groups_order_subset='all'):
    """Get subset of groups in adata.obs[key].
    """

    adata_obs_key = labels
    groups_order = labels.cat.categories
    groups_masks = cp.zeros(
        (len(labels.cat.categories), len(labels.cat.codes)), dtype=bool)
    for iname, name in enumerate(labels.cat.categories):
        # if the name is not found, fallback to index retrieval
        if labels.cat.categories[iname] in labels.cat.codes:
            mask = labels.cat.categories[iname] == labels.cat.codes
        else:
            mask = iname == labels.cat.codes
        groups_masks[iname] = mask.values
    groups_ids = list(range(len(groups_order)))
    if groups_order_subset != 'all':
        groups_ids = []
        for name in groups_order_subset:
            groups_ids.append(
                cp.where(
                    cp.array(labels.cat.categories.to_array().astype("int32"))
                    == int(name))[0][0])
        if len(groups_ids) == 0:
            # fallback to index retrieval
            groups_ids = cp.where(
                cp.in1d(
                    cp.arange(len(labels.cat.categories)).astype(str),
                    cp.array(groups_order_subset),
                ))[0]

        groups_ids = [groups_id.item() for groups_id in groups_ids]
        groups_masks = groups_masks[groups_ids]
        groups_order_subset = labels.cat.categories[groups_ids].to_array(
        ).astype(int)
    else:
        groups_order_subset = groups_order.to_array()
    return groups_order_subset, groups_masks
示例#25
0
    def __call__(self, input_ids, input_mask, token_type_ids, masked_lm_positions, masked_lm_ids, masked_lm_weights,
                 next_sentence_labels):
        sequence_output, pooled_output = self.bert.get_sequence_output_and_pooled_output(
            input_ids,
            input_mask,
            token_type_ids)
        # parameterを直接取得してmatmulしてもbpされるか
        embedding_table = self.bert.get_embedding_table()

        """Gathers the vectors at the specific positions over a minibatch."""
        batch_size, seq_length, width = sequence_output.shape
        flat_offsets = np.reshape(np.arange(0, batch_size, dtype=np.int32) * seq_length, [-1, 1])
        flat_positions = np.reshape(masked_lm_positions + flat_offsets, [-1])
        flat_sequence_output = np.reshape(sequence_output,
                                          [batch_size * seq_length, width])
        x = flat_sequence_output[[flat_positions]]

        """Get loss for the masked LM."""
        normed = self.layer_norm(self.activate(self.masked_lm_dense(x)))
        masked_lm_logits = F.matmul(normed, embedding_table.T) + self.mask_bias
        label_ids = F.reshape(masked_lm_ids, [-1])
        masked_lm_loss = F.softmax_cross_entropy(masked_lm_logits, label_ids, ignore_label=0)

        chainer.report({'masked_lm_loss': masked_lm_loss}, self)
        chainer.report({'masked_lm_accuracy': F.accuracy(masked_lm_logits, label_ids)}, self)

        """Get loss for the next sentence."""
        next_sentence_logits = F.matmul(pooled_output, self.next_sentence_weights.T) + self.next_sentence_bias
        labels = F.reshape(next_sentence_labels, [-1])
        next_sentence_loss = F.softmax_cross_entropy(next_sentence_logits, labels)

        chainer.report({'next_sentence_loss': next_sentence_loss}, self)
        chainer.report({'next_sentence_accuracy': F.accuracy(next_sentence_logits, labels)}, self)

        loss = masked_lm_loss + next_sentence_loss
        chainer.report({'loss': loss}, self)

        return loss
示例#26
0
 def step(self, state, momentum, rng, **args):
     q = state.copy()
     p = self.draw_momentum(rng)
     q_new = deepcopy(q)
     p_new = deepcopy(p)
     epsilon = self.step_size
     path_length = cp.ceil(2 * cp.random.rand() * self.path_length /
                           epsilon)
     grad_q = self.model.grad(q, **args)
     # SG-HMC leapfrog step
     for _ in cp.arange(path_length - 1):
         for var in self.start.keys():
             dim = (cp.array(q_new[var])).size
             rvar = rng.normal(0, 2 * epsilon, dim).reshape(q[var].shape)
             q_new[var] += epsilon * p_new[var]
             grad_q = self.model.grad(q_new, **args)
             p_new[var] = (
                 1 - epsilon) * p_new[var] + epsilon * grad_q[var] + rvar
     acceptprob = self.accept(q, q_new, p, p_new, **args)
     if cp.isfinite(acceptprob) and (cp.random.rand() < acceptprob):
         q = q_new.copy()
         p = p_new.copy()
     return q, p, acceptprob
示例#27
0
 def do_nadam(self, X, Y, update, learning_rate, **kwargs):
     layers = len(self.structure) - 1
     grads = self.calculate_grads(X, Y, kwargs["l2_reg_param"])
     for ii in cp.arange(1, layers + 1):
         update["mw" + str(ii)] = kwargs["beta1"] * update.get(
             "mw" + str(ii), 0) + (1 - kwargs["beta1"]) * cp.sum(
                 grads["w" + str(ii)], axis=0)
         update["mb" + str(ii)] = kwargs["beta1"] * update.get(
             "mb" + str(ii), 0) + (1 - kwargs["beta1"]) * cp.sum(
                 grads["b" + str(ii)], axis=1).reshape(-1, 1)
         update["vw" + str(ii)] = kwargs["beta2"] * update.get(
             "vw" + str(ii), 0) + (1 - kwargs["beta2"]) * cp.square(
                 cp.sum(grads["w" + str(ii)], axis=0))
         update["vb" + str(ii)] = kwargs["beta2"] * update.get(
             "vb" + str(ii), 0) + (1 - kwargs["beta2"]) * cp.square(
                 cp.sum(grads["b" + str(ii)], axis=1).reshape(-1, 1))
         self.params["w"+str(ii)] -= cp.multiply( (learning_rate / cp.sqrt(kwargs["epsilon"] + (update["vw"+str(ii)]/(1-kwargs["beta2"]**kwargs["step_num"])))),\
                                     kwargs["beta1"]*(update["mw"+str(ii)]/(1-kwargs["beta1"]**kwargs["step_num"]) +\
                                     ((1-kwargs["beta1"])/(1-kwargs["beta1"]**kwargs["step_num"]))*cp.sum(grads["w"+str(ii)],axis=0) ))
         self.params["b"+str(ii)] -= cp.multiply( (learning_rate / cp.sqrt(kwargs["epsilon"] + (update["vb"+str(ii)]/(1-kwargs["beta2"]**kwargs["step_num"])))),\
                                     kwargs["beta1"]*(update["mb"+str(ii)]/(1-kwargs["beta1"]**kwargs["step_num"]) +\
                                     ((1-kwargs["beta1"])/(1-kwargs["beta1"]**kwargs["step_num"]))*cp.sum(grads["b"+str(ii)],axis=1).reshape(-1,1) ))
     return update
示例#28
0
def test_cupy_to_chainerx_noncontiguous_with_offset():
    dtype = numpy.float32
    a_cupy = cupy.arange(12, dtype=dtype).reshape((2, 6))[::-1, ::2]
    offset = a_cupy.data.ptr - a_cupy.data.mem.ptr

    # test preconditions
    assert offset > 0
    assert not a_cupy.flags.c_contiguous

    a_chx = _fromrawpointer(a_cupy.data.mem.ptr, a_cupy.shape, a_cupy.dtype,
                            a_cupy.strides, 'cuda:0', offset, a_cupy)

    assert a_chx.strides == a_cupy.strides
    chainerx.testing.assert_array_equal_ex(a_chx,
                                           a_cupy.get(),
                                           strides_check=False)

    a_cupy[1, 1] = 53

    assert a_chx.strides == a_cupy.strides
    chainerx.testing.assert_array_equal_ex(a_chx,
                                           a_cupy.get(),
                                           strides_check=False)
示例#29
0
def test_view_as_windows_2D():
    A = cp.arange(5 * 4).reshape(5, 4)
    window_shape = (4, 3)
    B = view_as_windows(A, window_shape)
    assert B.shape == (2, 2, 4, 3)
    # fmt: off
    cp.testing.assert_array_equal(
        B, cp.array([[[[0,  1,  2],
                       [4,  5,  6],
                       [8,  9, 10],
                       [12, 13, 14]],
                      [[1,  2,  3],
                       [5,  6,  7],
                       [9, 10, 11],
                       [13, 14, 15]]],
                     [[[4,  5,  6],
                       [8,  9, 10],
                       [12, 13, 14],
                       [16, 17, 18]],
                      [[5,  6,  7],
                       [9, 10, 11],
                       [13, 14, 15],
                       [17, 18, 19]]]]))
    def forward(self, x, t):
        if x.ndim == 2:  # ミニバッチ使用時
            x = x - x.max(axis=1, keepdims=True)
            x = cp.exp(x)
            y = x / x.sum(axis=1, keepdims=True)
        elif x.ndim == 1:
            x = x - cp.max(x)
            y = cp.exp(x) / cp.sum(cp.exp(x))

        if y.ndim == 1:
            t = t.reshape(1, t.size)
            y = y.reshape(1, y.size)

        # 教師ラベルがone-hotベクトルの場合、正解のインデックスに変換
        if t.size == y.size:
            t = t.argmax(axis=1)

        batch_size = y.shape[0]
        loss = -1.0 * cp.sum(
            t * cp.log(y[cp.arange(batch_size), t] + 1e-7)) / batch_size
        self.y = y
        self.t = t
        return loss
示例#31
0
    def inverse_transform(self, y: cudf.Series) -> cudf.Series:
        """
        Revert ordinal label to original label

        Parameters
        ----------
        y : cudf.Series, dtype=int32
            Ordinal labels to be reverted

        Returns
        -------
        reverted : cudf.Series
            Reverted labels
        """
        # check LabelEncoder is fitted
        self._check_is_fitted()
        # check input type is cudf.Series
        if not isinstance(y, cudf.Series):
            raise TypeError(
                'Input of type {} is not cudf.Series'.format(type(y)))

        # check if ord_label out of bound
        ord_label = y.unique()
        category_num = len(self.classes_)
        if self.handle_unknown == 'error':
            for ordi in ord_label.values_host:
                if ordi < 0 or ordi >= category_num:
                    raise ValueError(
                        'y contains previously unseen label {}'.format(ordi))

        y = y.astype(self.dtype)

        ran_idx = cudf.Series(cp.arange(len(self.classes_))).astype(self.dtype)

        reverted = y._column.find_and_replace(ran_idx, self.classes_, False)

        return cudf.Series(reverted)
示例#32
0
def ds(kx, ky, qx, qy, om, d):

    topkq = -complex(0, 1) * V0 * ((kx + qx) - complex(0, 1) * (ky + qy))
    botkq = complex(0, 1) * V0 * ((kx + qx) + complex(0, 1) * (ky + qy))
    innkq = om + complex(0, 1) * Gamm - A * ((kx + qx)**2 + (ky + qy)**2) - V2

    topk = -complex(0, 1) * V0 * (kx - complex(0, 1) * ky)
    botk = complex(0, 1) * V0 * (kx + complex(0, 1) * ky)
    innk = om + complex(0, 1) * Gamm - A * (kx**2 + ky**2) - V2

    cent = cp.arange(-(N - 1) / 2, (N - 1) / 2 + 1, 1)

    d = hOmg * cp.diag(cent, dtype=cp.float32)

    return d

    Ginkq = np.eye(N, N, k=1) * topkq + np.eye(
        N, N, k=-1) * botkq + innkq * np.eye(N, N) - d
    Gink = np.eye(N, N, k=1) * topk + np.eye(
        N, N, k=-1) * botk + innk * np.eye(N, N) - d

    Grkq = np.linalg.inv(Ginkq)
    Gakq = np.transpose(np.conj(Grkq))

    Grk = np.linalg.inv(Gink)
    Gak = np.transpose(np.conj(Grk))

    fer = np.heaviside(-(d + np.eye(N, N) * (om - mu)), 0)

    in1 = np.matmul(Grkq, np.matmul(Grk, np.matmul(fer, Gak)))
    in2 = np.matmul(Grkq, np.matmul(fer, np.matmul(Gakq, Gak)))
    tr = np.trace(in1 + in2)
    # HERE i will divide by DOS, multiply by 2 for spin, and divide by (2pi)^3

    dchi = -(4) * Gamm * tr / math.pi**2

    return dchi
示例#33
0
    def inverse_transform(self, y: cudf.Series) -> cudf.Series:
        """
        Revert ordinal label to original label

        Parameters
        ----------
        y : cudf.Series, pandas.Series, cupy.ndarray or numpy.ndarray
            dtype=int32
            Ordinal labels to be reverted

        Returns
        -------
        reverted : the same type as y
            Reverted labels
        """
        # check LabelEncoder is fitted
        self._check_is_fitted()
        # check input type is cudf.Series
        y = self._to_cudf_series(y)

        # check if ord_label out of bound
        ord_label = y.unique()
        category_num = len(self.classes_)
        if self.handle_unknown == 'error':
            for ordi in ord_label.values_host:
                if ordi < 0 or ordi >= category_num:
                    raise ValueError(
                        'y contains previously unseen label {}'.format(ordi))

        y = y.astype(self.dtype)

        ran_idx = cudf.Series(cp.arange(len(self.classes_))).astype(self.dtype)

        reverted = y._column.find_and_replace(ran_idx, self.classes_, False)

        res = cudf.Series(reverted)
        return res
示例#34
0
def test_mask():
    vol = cp.zeros((30, 30, 30))
    vol[15, 15, 15] = 1
    struct = generate_binary_structure(3, 1)
    # TODO: remove brute_force=True once non-brute force implemented for CuPy
    voln = binary_dilation(vol,
                           structure=struct,
                           iterations=4,
                           brute_force=True).astype("f4")
    initial = cp.sum(voln > 0)
    mask = voln.copy()
    thresh = otsu(mask)
    mask = mask > thresh
    initial_otsu = cp.sum(mask > 0)
    assert_array_equal(initial_otsu, initial)

    mins, maxs = bounding_box(mask)
    voln_crop = crop(mask, mins, maxs)
    initial_crop = cp.sum(voln_crop > 0)
    assert_array_equal(initial_crop, initial)

    applymask(voln, mask)
    final = cp.sum(voln > 0)
    assert_array_equal(final, initial)

    # Test multi_median.
    img = cp.arange(25).reshape(5, 5)
    img_copy = img.copy()
    medianradius = 2
    median_test = multi_median(img, medianradius, 3)
    assert_array_equal(img, img_copy)

    medarr = ((medianradius * 2) + 1, ) * img.ndim
    median_control = median_filter(img, medarr)
    median_control = median_filter(median_control, medarr)
    median_control = median_filter(median_control, medarr)
    assert_array_equal(median_test, median_control)
示例#35
0
def _run_cupy_natural_break(data, num_sample, k):
    num_data = data.size

    if num_sample is not None and num_sample < num_data:
        generator = cupy.random.RandomState(1234567890)
        idx = [i for i in range(0, data.size)]
        generator.shuffle(idx)
        sample_idx = idx[:num_sample]
        sample_data = data.flatten()[sample_idx]
    else:
        sample_data = data.flatten()

    # warning if number of total data points to fit the model bigger than 40k
    if sample_data.size >= 40000:
        warnings.warn(
            'natural_breaks Warning: Natural break classification '
            '(Jenks) has a complexity of O(n^2), '
            'your classification with {} data points may take '
            'a long time.'.format(sample_data.size), Warning)

    uv = cupy.unique(sample_data)
    uvk = len(uv)

    if uvk < k:
        warnings.warn(
            'natural_breaks Warning: Not enough unique values '
            'in data array for {} classes. '
            'n_samples={} should be >= n_clusters={}. '
            'Using k={} instead.'.format(k, uvk, k, uvk), Warning)
        uv.sort()
        bins = uv
    else:
        centroids = _run_cupy_jenks(sample_data, k)
        bins = cupy.array(centroids[1:])

    out = _bin(data, bins, cupy.arange(uvk))
    return out
示例#36
0
    def transform(self, columns: ColumnNames,
                  gdf: cudf.DataFrame) -> cudf.DataFrame:
        # Add temporary column for sorting
        tmp = "__tmp__"
        gdf[tmp] = cupy.arange(len(gdf), dtype="int32")

        fit_folds = self.kfold > 1
        if fit_folds:
            gdf[self.fold_name] = _add_fold(gdf.index, self.kfold,
                                            self.fold_seed)

        # Need mean of contiuous target column
        y_mean = self.target_mean or self.means

        # Loop over categorical-column groups and apply logic
        new_gdf = None
        for ind, cat_group in enumerate(columns):
            if isinstance(cat_group, tuple):
                cat_group = list(cat_group)
            elif isinstance(cat_group, str):
                cat_group = [cat_group]

            if new_gdf is None:
                new_gdf = self._op_group_logic(cat_group, gdf, y_mean,
                                               fit_folds, ind)
            else:
                _df = self._op_group_logic(cat_group, gdf, y_mean, fit_folds,
                                           ind)
                new_gdf = cudf.concat([new_gdf, _df], axis=1)

        # Drop temporary columns
        gdf.drop(columns=[tmp, "__fold__"]
                 if fit_folds and self.drop_folds else [tmp],
                 inplace=True)
        if fit_folds and not self.drop_folds:
            new_gdf[self.fold_name] = gdf[self.fold_name]
        return new_gdf
示例#37
0
def apply_boxcar_drift(data, metadata):
    """ Apply boxcar filter to compensate for doppler smearing
    
    An optimal boxcar is applied per row of drift rate. This retrieves
    a sensitivity increase of sqrt(boxcar_size) for a smeared signal.
    (Stil down a sqrt(boxcar_size) compared to no smearing case).
    
    Args:
        data (np or cp array): 
        metadata (dict): Dictionary of metadata values
    
    Returns:
        data, metadata (array and dict): Data array with filter applied.
    """
    logger.debug(
        f"apply_boxcar_drift: Applying moving average based on drift rate.")
    metadata = deepcopy(metadata)
    # Compute drift rates from metadata
    dr0, ddr = metadata['drift_rate_start'].value, metadata[
        'drift_rate_step'].value
    df = metadata['frequency_step'].to('Hz').value
    dt = metadata['integration_time'].to('s').value
    drates = dr0 + ddr * cp.arange(data.shape[0])

    # Compute smearing (array of n_channels smeared for given driftrate)
    smearing_nchan = cp.abs(dt * drates / df).astype('int32')
    smearing_nchan_max = cp.asnumpy(cp.max(smearing_nchan))

    # Apply boxcar filter to compensate for smearing
    for boxcar_size in range(2, smearing_nchan_max + 1):
        idxs = cp.where(smearing_nchan == boxcar_size)
        # 1. uniform_filter1d computes mean. We want sum, so *= boxcar_size
        # 2. we want noise to stay the same, so divide by sqrt(boxcar_size)
        # combined 1 and 2 give aa sqrt(2) factor
        data[idxs] = uniform_filter1d(data[idxs], size=boxcar_size,
                                      axis=2) * np.sqrt(boxcar_size)
    return data, metadata
示例#38
0
    def _major_slice(self, idx, copy=False):
        """Index along the major axis where idx is a slice object.
        """

        if idx == slice(None):
            return self.copy() if copy else self

        M, N = self._swap(*self.shape)
        start, stop, step = idx.indices(M)
        M = len(range(start, stop, step))
        new_shape = self._swap(M, N)
        if M == 0:
            return self.__class__(new_shape)

        row_nnz = cupy.diff(self.indptr)
        idx_dtype = self.indices.dtype
        res_indptr = cupy.zeros(M + 1, dtype=idx_dtype)

        cupy.cumsum(row_nnz[idx], out=res_indptr[1:])

        if step == 1:
            idx_start = self.indptr[start]
            idx_stop = self.indptr[stop]
            res_indices = cupy.array(self.indices[idx_start:idx_stop],
                                     copy=copy)
            res_data = cupy.array(self.data[idx_start:idx_stop], copy=copy)
        else:
            rows = cupy.arange(start,
                               start + (res_indptr.size - 1) * step,
                               step,
                               dtype=res_indptr.dtype)
            res_indices, res_data = _index._csr_row_index(
                rows, self.indptr, self.indices, self.data, res_indptr)

        return self.__class__((res_data, res_indices, res_indptr),
                              shape=new_shape,
                              copy=False)
示例#39
0
def top_k(array, k, axis=0, biggest=True):
    """ Return the topK index along the specified dimension,
        The returned indices are such that their array values are sorted
        
        -Input:
        array: 1d or 2d array
        k: the top `k` (k>0, integer)
        axis: futile if array is 1d, otherwise sorting along the specified axis
              default to 0
        biggest: whether the top-k biggest or smallest, default to True

        -Output:
        inds: indices
        vals: array values at the indices
    """
    assert array.ndim == 1 or array.ndim == 2
    assert axis == 0 or axis == 1
    if biggest:
        array = -array
    
    if array.ndim == 1:
        inds = xp.argpartition(array, k)[:k]
        vals = array[inds]
        sort_inds = xp.argsort(vals)
        inds = inds[sort_inds]
        vals = vals[sort_inds]

    elif axis == 0:
        inds = xp.argpartition(array, k, axis=0)[:k, :]
        vals = array[inds, xp.arange(array.shape[1])[None, :]]
        sort_inds = xp.argsort(vals, axis=0)
        inds = inds[sort_inds, xp.arange(array.shape[1])[None, :]]
        vals = vals[sort_inds, xp.arange(array.shape[1])[None, :]]

    else:
        inds = xp.argpartition(array, k, axis=1)[:, :k]
        vals = array[xp.arange(array.shape[0])[:, None], inds]
        sort_inds = xp.argsort(vals, axis=1)
        inds = inds[xp.arange(array.shape[0])[:, None], sort_inds]
        vals = vals[xp.arange(array.shape[0])[:, None], sort_inds]

    if biggest:
        vals = -vals
    return inds, vals
示例#40
0
def get_session_id_from_session_boundry(session_change_df, last_session_len):
    """
        This function returns session starts given a session change df
    """
    import cudf

    ## we dont really need the `session_id` to start from 0
    ## the total number of sessions per partition should be fairly limited
    ## and we really should not hit 2,147,483,647 sessions per partition
    ## Can switch to vec_arange code to match spark 1-1
    ## see previously commited code
    ## https://github.com/rapidsai/tpcx-bb/blob/8394f2b8d62540b4077c606c8b687dee96b4f5d3/tpcx-bb1.3.1/tools/sessionization.py

    user_session_ids = cp.arange(len(session_change_df), dtype=np.int32)

    ### up shift the session length df
    session_len = session_change_df["t_index"].diff().reset_index(drop=True)
    session_len = session_len.shift(-1)
    session_len.iloc[-1] = last_session_len

    session_id_final_series = (
        cudf.Series(user_session_ids).repeat(session_len).reset_index(
            drop=True))
    return session_id_final_series
示例#41
0
def _encode(name, path, gdf, cat_cache, na_sentinel=-1, freq_threshold=0):
    value = None
    if path:
        if cat_cache is not None:
            cat_cache = cat_cache.get(name, "disk")
            cache = _get_cache()
            if cache:
                value = cache.get_categories(name, path, cache=cat_cache)
        else:
            value = cudf.io.read_parquet(path, index=False, columns=[name])
            value.index.name = "labels"
            value.reset_index(drop=False, inplace=True)

    vals = gdf[name].copy(deep=False)
    if value is None:
        value = cudf.DataFrame({name: [None]})
        value[name] = value[name].astype(vals.dtype)
        value.index.name = "labels"
        value.reset_index(drop=False, inplace=True)

    if freq_threshold > 0:
        codes = cudf.DataFrame({
            name: vals.copy(),
            "order": cp.arange(len(vals))
        })
        codes = codes.merge(value, on=name,
                            how="left").sort_values("order")["labels"]
        codes.fillna(na_sentinel, inplace=True)
        return codes.values
    else:
        # Use `searchsorted` if we are using a "full" encoding
        labels = value[name].searchsorted(vals,
                                          side="left",
                                          na_position="first")
        labels[labels >= len(value[name])] = na_sentinel
        return labels
示例#42
0
def create_tokenized_df(str_series, delimiter=" ", ngram_range=(1, 1)):
    """ 
        creates a tokenized df from a string column
        where each row is like [token,doc_id], 
        token = 'string' and doc_id = index from which the word came
        Also returns a  empty doc_id series
    """
    import cudf
    token_count_sr = str_series.str.token_count(delimiter=delimiter)
    doc_id_ar = cp.arange(start=0, stop=len(str_series), dtype=np.int32)

    doc_id_sr = cudf.Series(doc_id_ar)

    tokenized_df_ls = []
    for n in range(ngram_range[0], ngram_range[1] + 1):
        ngram_ser = get_ngram(str_series, n, doc_id_sr, token_count_sr,
                              delimiter)
        tokenized_df_ls.append(ngram_ser)

    tokenized_df = cudf.concat(tokenized_df_ls)
    tokenized_df = tokenized_df.reset_index(drop=True)

    empty_doc_ids = doc_id_sr[doc_id_sr[token_count_sr == 0]]
    return tokenized_df, empty_doc_ids
示例#43
0
 def forward_all_cells(self):
     """ move all agents in map one time step forward """
     agents_durations = self.durations[
         cp.arange(0, self.durations.shape[0]),
         self.current_state_ids].flatten()
     print(
         f'DEBUG: agents_durations.shape: {agents_durations.shape}, self.durations.shape: {self.durations.shape}, self.current_state_ids.shape: {self.current_state_ids.shape}'
     )
     to_transit = (self.current_state_durations == agents_durations)
     self.current_state_durations += 1
     to_transit = self.agent_ids[to_transit]
     self.transit_states(to_transit)
     # Contamination at home by end of the period
     self.contaminate(self.agent_ids, self.home_cell_ids)
     # Update r and associated variables
     r = self.n_infected_period / self.n_diseased_period if self.n_diseased_period > 0 else 0
     r = cp.array([r])
     if self.verbose > 1:
         print(f'period {self.current_period}: r={r}')
     self.r_factors = append(self.r_factors, r)
     self.n_diseased_period = self.get_n_diseased()
     self.n_infected_period = 0
     #Move one period forward
     self.current_period += 1
示例#44
0
def get_predicted_traces(matrix_U, matrix_W, sorting_result, time_limits):
    W = cp.asarray(matrix_W, dtype=np.float32)
    U = cp.asarray(matrix_U, dtype=np.float32)

    buffer = W.shape[0]

    predicted_traces = cp.zeros(
        (U.shape[0], 4 * buffer + (time_limits[1] - time_limits[0])),
        dtype=np.int16)

    sorting_result = cp.asarray(sorting_result)

    all_spike_times = sorting_result[:, 0]
    included_spike_pos = cp.asarray(
        (time_limits[0] - buffer // 2 < all_spike_times)
        & (all_spike_times < time_limits[1] + buffer // 2)).nonzero()[0]

    spike_times = all_spike_times[included_spike_pos].astype(np.int32)
    spike_templates = sorting_result[included_spike_pos, 1].astype(np.int32)
    spike_amplitudes = sorting_result[included_spike_pos, 2]

    for s, spike in enumerate(spike_times):
        amplitude = spike_amplitudes[s]
        U_i = U[:, spike_templates[s], :]
        W_i = W[:, spike_templates[s], :]

        addendum = cp.ascontiguousarray(cp.matmul(U_i, W_i.T) * amplitude,
                                        dtype=np.int16)

        pred_pos = cp.arange(
            buffer) + spike - time_limits[0] + buffer + buffer // 2
        predicted_traces[:, pred_pos] += addendum

    output = predicted_traces[:, buffer * 2:-buffer * 2]

    return cp.asnumpy(output).T
示例#45
0
def get_session_id(df):
    """
        This function creates a session id column for each click
        The session id grows in incremeant for each user's susbequent session
        Session boundry is defined by the time_out
    """

    df["user_change_flag"] = df["wcs_user_sk"].diff(periods=1) != 0
    df["session_change_flag"] = df["review_flag"] | df["user_change_flag"]

    df = df.reset_index(drop=True)
    df["t_index"] = cp.arange(start=0, stop=len(df), dtype=np.int32)

    session_change_df = df[df["session_change_flag"]].reset_index(drop=True)
    try:
        last_session_len = len(df) - session_change_df["t_index"].iloc[-1]
    except (AssertionError, IndexError) as e:  # IndexError in numba >= 0.48
        last_session_len = 0

    session_ids = get_session_id_from_session_boundary(session_change_df,
                                                       last_session_len)

    assert len(session_ids) == len(df)
    return session_ids
示例#46
0
def create_test_dataset():
    cp = np  # cpu mode only here
    s1 = np.load(test_path.joinpath('my_conv2_input.npy'))
    s0 = np.copy(s1)
    tmax = np.ceil(4 * sig)
    dt = cp.arange(-tmax, tmax + 1)
    gauss = cp.exp(-dt**2 / (2 * sig**2))
    gauss = (gauss / cp.sum(gauss)).astype(np.float32)

    cNorm = lfilter_cpu(gauss, 1., np.r_[np.ones(s1.shape[0]),
                                         np.zeros(int(tmax))])
    cNorm = cNorm[int(tmax):]

    s1 = lfilter_cpu(gauss,
                     1,
                     np.r_[s1, np.zeros((int(tmax), s1.shape[1]))],
                     axis=0)
    s1 = s1[int(tmax):] / cNorm[:, np.newaxis]

    # import matplotlib.pyplot as plt
    # plt.plot(s0)
    # plt.plot(s1)
    # np.save(test_path.joinpath('my_conv2_input.npy'), s0)
    np.save(test_path.joinpath('my_conv2_output.npy'), s1)
示例#47
0
    def test_texture_input(self):
        width, height, depth = self.dimensions
        dim = 3 if depth != 0 else 2 if height != 0 else 1

        texobj = self._prep_texture()
        ker = getattr(self, f'_prep_kernel{dim}D')()

        # prepare input
        args = [None, texobj]
        size = width
        if height > 0:
            size *= height
            args.append(width)
        if depth > 0:
            size *= depth
            args.append(height)
        in_arr = cupy.arange(size, dtype=cupy.float32)
        in_arr = in_arr.reshape(self.shape)
        args[0] = in_arr

        # compute and validate output
        out_arr = ker(*args)
        expected = in_arr + self.data
        testing.assert_allclose(out_arr, expected)
示例#48
0
def create_texture_image(textures, texture_size_out=16):
    num_faces, texture_size_in = textures.shape[:2]
    tile_width = int((num_faces - 1.) ** 0.5) + 1
    tile_height = int((num_faces - 1.) / tile_width) + 1
    image = np.zeros((tile_height * texture_size_out, tile_width * texture_size_out, 3), 'float32')

    vertices = np.zeros((num_faces, 3, 2), 'float32')  # [:, :, XY]
    face_nums = np.arange(num_faces)
    column = face_nums % tile_width
    row = face_nums / tile_width
    vertices[:, 0, 0] = column * texture_size_out
    vertices[:, 0, 1] = row * texture_size_out
    vertices[:, 1, 0] = column * texture_size_out
    vertices[:, 1, 1] = (row + 1) * texture_size_out - 1
    vertices[:, 2, 0] = (column + 1) * texture_size_out - 1
    vertices[:, 2, 1] = (row + 1) * texture_size_out - 1

    image = chainer.cuda.to_gpu(image)
    vertices = chainer.cuda.to_gpu(vertices)
    textures = chainer.cuda.to_gpu(textures)

    loop = cp.arange(image.size / 3).astype('int32')
    chainer.cuda.elementwise(
        'int32 j, raw float32 image, raw float32 vertices_all, raw float32 textures',
        '',
        string.Template('''
            const int x = i % (${tile_width} * ${texture_size_out});
            const int y = i / (${tile_width} * ${texture_size_out});
            const int row = x / ${texture_size_out};
            const int column = y / ${texture_size_out};
            const int fn = row + column * ${tile_width};
            const int tsi = ${texture_size_in};

            const float* texture = &textures[fn * tsi * tsi * tsi * 3];
            const float* vertices = &vertices_all[fn * 3 * 2];
            const float* p0 = &vertices[2 * 0];
            const float* p1 = &vertices[2 * 1];
            const float* p2 = &vertices[2 * 2];

            /* */
            // if ((y % ${texture_size_out}) < (x % ${texture_size_out})) continue;

            /* compute face_inv */
            float face_inv[9] = {
                p1[1] - p2[1], p2[0] - p1[0], p1[0] * p2[1] - p2[0] * p1[1],
                p2[1] - p0[1], p0[0] - p2[0], p2[0] * p0[1] - p0[0] * p2[1],
                p0[1] - p1[1], p1[0] - p0[0], p0[0] * p1[1] - p1[0] * p0[1]};
            float face_inv_denominator = (
                p2[0] * (p0[1] - p1[1]) +
                p0[0] * (p1[1] - p2[1]) +
                p1[0] * (p2[1] - p0[1]));
            for (int k = 0; k < 9; k++) face_inv[k] /= face_inv_denominator;

            /* compute w = face_inv * p */
            float weight[3];
            float weight_sum = 0;
            for (int k = 0; k < 3; k++) {
                weight[k] = face_inv[3 * k + 0] * x + face_inv[3 * k + 1] * y + face_inv[3 * k + 2];
                weight_sum += weight[k];
            }
            for (int k = 0; k < 3; k++) weight[k] /= (weight_sum + ${eps});

            /* get texture index (float) */
            float texture_index_float[3];
            for (int k = 0; k < 3; k++) {
                float tif = weight[k] * (tsi - 1);
                tif = max(tif, 0.);
                tif = min(tif, tsi - 1 - ${eps});
                texture_index_float[k] = tif;
            }

            /* blend */
            float new_pixel[3] = {0, 0, 0};
            for (int pn = 0; pn < 8; pn++) {
                float w = 1;                         // weight
                int texture_index_int[3];            // index in source (int)
                for (int k = 0; k < 3; k++) {
                    if ((pn >> k) % 2 == 0) {
                        w *= 1 - (texture_index_float[k] - (int)texture_index_float[k]);
                        texture_index_int[k] = (int)texture_index_float[k];
                    } else {
                        w *= texture_index_float[k] - (int)texture_index_float[k];
                        texture_index_int[k] = (int)texture_index_float[k] + 1;
                    }
                }
                int isc = texture_index_int[0] * tsi * tsi + texture_index_int[1] * tsi + texture_index_int[2];
                for (int k = 0; k < 3; k++) new_pixel[k] += w * texture[isc * 3 + k];
            }
            for (int k = 0; k < 3; k++) image[i * 3 + k] = new_pixel[k];
        ''').substitute(
            num_faces=num_faces,
            texture_size_in=texture_size_in,
            texture_size_out=texture_size_out,
            tile_width=tile_width,
            eps=1e-5,
        ),
        'function',
    )(loop, image, vertices, textures)

    chainer.cuda.elementwise(
        'int32 j, raw float32 image, raw float32 vertices_all, raw float32 textures',
        '',
        string.Template('''
            const int x = i % (${tile_width} * ${texture_size_out});
            const int y = i / (${tile_width} * ${texture_size_out});
            const int row = x / ${texture_size_out};
            const int column = y / ${texture_size_out};
            const int fn = row + column * ${tile_width};
            const int tsi = ${texture_size_in};

            const float* texture = &textures[fn * tsi * tsi * tsi * 3];
            const float* vertices = &vertices_all[fn * 3 * 2];
            const float* p0 = &vertices[2 * 0];
            const float* p1 = &vertices[2 * 1];
            const float* p2 = &vertices[2 * 2];

            /* */
            if ((y % ${texture_size_out} + 1) == (x % ${texture_size_out})) {
                for (int k = 0; k < 3; k++) image[i * 3 + k] = image[
                    (y * ${tile_width} * ${texture_size_out} + (x - 1))  * 3 + k];
            }

        ''').substitute(
            num_faces=num_faces,
            texture_size_in=texture_size_in,
            texture_size_out=texture_size_out,
            tile_width=tile_width,
            eps=1e-5,
        ),
        'function',
    )(loop, image, vertices, textures)

    vertices[:, :, 0] /= (image.shape[1] - 1)
    vertices[:, :, 1] /= (image.shape[0] - 1)

    image = image[::-1, ::1]
    image = image.get()
    vertices = vertices.get()
    return image, vertices
# For example, for the P4 (rotation-translation) conv, the input image is a function on Z2,
# which we may think of as a function on P4 that is right-invariant to rotation.
# A right-rotation-invariant P4 function has the same value at (r, u, v) as it has at (r', u, v).
# Naturally, we don't store this invariant P4 function, but we store an array with a length-1 axis for the rotation
# coordinate.
# This is consistent with the numpy convention that lenght-1 axes get broadcast automatically.
# So for Z2 filters, we get the following shapes:
# Filter shape: (output_channels, input_channels, 1, nu, nv)
# Index shape (one per coordinate t, u, v): (output_transforms, 1, nu, nv)
# Result shape: (output_channels, output_transforms, input_channels, 1, nu, nv)


import cupy
from cupy.core.core import compile_with_cache

x = cupy.arange(2, dtype='f')  # WORKAROUND - currently, cupy compile_with_cache fails if no cupy code is executed first

# This computes input[..., T, U, V].swapaxes(1, 2)
_index_group_func_str = \
    """
    extern "C" __global__ void indexing_kernel(
        CArray<{0}, 5> input,
        CArray<int, 4> T,
        CArray<int, 4> U,
        CArray<int, 4> V,
        CArray<{0}, 6> output)
    {{
        CUPY_FOR(i, output.size()) {{

            const int* oshape = output.shape();
            const int* ostrides = output.strides();
示例#50
0
 def test_size(self):
     x = cupy.arange(3).astype('i')
     y = cupy.ElementwiseKernel(
         'raw int32 x', 'int32 y', 'y = x.size()', 'test_carray_size',
     )(x, size=1)
     self.assertEqual(int(y[0]), 3)