def _triu(x, k=0): m, n = x.shape u = cupy.arange(m).reshape(m, 1) v = cupy.arange(n).reshape(1, n) mask = v - u >= k x *= mask return x
def test_cupy_to_chainerx_contiguous(): dtype = numpy.float32 a_cupy = cupy.arange(6, dtype=dtype).reshape((2, 3)) a_cupy_refcount_before = sys.getrefcount(a_cupy) a_chx = _fromrawpointer( a_cupy.data.mem.ptr, a_cupy.shape, a_cupy.dtype, a_cupy.strides, 'cuda:0', 0, a_cupy) assert sys.getrefcount(a_cupy) == a_cupy_refcount_before + 1 assert a_chx.device.name == 'cuda:0' chainerx.testing.assert_array_equal_ex(a_chx, a_cupy.get()) # Write to a_cupy a_cupy[0, 1] = 8 chainerx.testing.assert_array_equal_ex( a_chx, numpy.array([[0, 8, 2], [3, 4, 5]], dtype)) # Write to a_chx a_chx += 1 chainerx.testing.assert_array_equal_ex( a_cupy.get(), numpy.array([[1, 9, 3], [4, 5, 6]], dtype))
def test_getitem_int(self): x = cupy.arange(24).reshape((2, 3, 4)).astype('i') y = cupy.empty_like(x) y = cupy.ElementwiseKernel( 'raw T x', 'int32 y', 'y = x[i]', 'test_carray_getitem_int', )(x, y) testing.assert_array_equal(y, x)
def test_cupy_to_chainerx_noncontiguous_with_offset(): dtype = numpy.float32 a_cupy = cupy.arange(12, dtype=dtype).reshape((2, 6))[::-1, ::2] offset = a_cupy.data.ptr - a_cupy.data.mem.ptr # test preconditions assert offset > 0 assert not a_cupy.flags.c_contiguous a_chx = _fromrawpointer( a_cupy.data.mem.ptr, a_cupy.shape, a_cupy.dtype, a_cupy.strides, 'cuda:0', offset, a_cupy) assert a_chx.strides == a_cupy.strides chainerx.testing.assert_array_equal_ex( a_chx, a_cupy.get(), strides_check=False) a_cupy[1, 1] = 53 assert a_chx.strides == a_cupy.strides chainerx.testing.assert_array_equal_ex( a_chx, a_cupy.get(), strides_check=False)
def test_cupy_to_chainerx_noncontiguous_without_offset(): # This test includes access to address before the given pointer (because of # a negative stride). dtype = numpy.float32 a_cupy = cupy.arange(12, dtype=dtype).reshape((2, 6))[::-1, ::2] # test preconditons assert a_cupy.data.mem.ptr < a_cupy.data.ptr assert not a_cupy.flags.c_contiguous a_chx = _fromrawpointer( a_cupy.data.ptr, a_cupy.shape, a_cupy.dtype, a_cupy.strides, 'cuda:0', 0, a_cupy) assert a_chx.strides == a_cupy.strides chainerx.testing.assert_array_equal_ex( a_chx, a_cupy.get(), strides_check=False) a_cupy[1, 1] = 53 assert a_chx.strides == a_cupy.strides chainerx.testing.assert_array_equal_ex( a_chx, a_cupy.get(), strides_check=False)
def test_strides(self): x = cupy.arange(6).reshape((2, 3)).astype('i') y = cupy.ElementwiseKernel( 'raw int32 x', 'int32 y', 'y = x.strides()[i]', 'test_carray_strides', )(x, size=2) testing.assert_array_equal(y, (12, 4))
def test_getitem_idx(self): x = cupy.arange(24).reshape((2, 3, 4)).astype('i') y = cupy.empty_like(x) y = cupy.ElementwiseKernel( 'raw T x', 'int32 y', 'int idx[] = {i / 12, i / 4 % 3, i % 4}; y = x[idx]', 'test_carray_getitem_idx', )(x, y) testing.assert_array_equal(y, x)
def test_scan(self, dtype): element_num = 10000 if dtype in {cupy.int8, cupy.uint8}: element_num = 100 a = cupy.ones((element_num,), dtype=dtype) prefix_sum = cupy.core.core.scan(a) expect = cupy.arange(start=1, stop=element_num + 1).astype(dtype) testing.assert_array_equal(prefix_sum, expect)
def test_cupy_to_chainerx_invalid_device(): dtype = numpy.float32 with cupy.cuda.Device(1): a_cupy = cupy.arange(6, dtype=dtype).reshape((2, 3)) with pytest.raises(chainerx.ChainerxError): _fromrawpointer( a_cupy.data.mem.ptr, a_cupy.shape, a_cupy.dtype, a_cupy.strides, 'cuda:0', 0, a_cupy)
def test_scan_out(self, dtype): element_num = 10000 if dtype in {cupy.int8, cupy.uint8, cupy.float16}: element_num = 100 a = cupy.ones((element_num,), dtype=dtype) b = cupy.zeros_like(a) cupy.core.core.scan(a, b) expect = cupy.arange(start=1, stop=element_num + 1).astype(dtype) testing.assert_array_equal(b, expect) cupy.core.core.scan(a, a) testing.assert_array_equal(a, expect)
def test_cupy_to_chainerx_nondefault_device(): dtype = numpy.float32 with cupy.cuda.Device(1): a_cupy = cupy.arange(6, dtype=dtype).reshape((2, 3)) a_chx = _fromrawpointer( a_cupy.data.mem.ptr, a_cupy.shape, a_cupy.dtype, a_cupy.strides, 'cuda:1', 0, a_cupy) assert a_chx.device.name == 'cuda:1' chainerx.testing.assert_array_equal_ex(a_chx, a_cupy.get())
def test_reshape_contiguity(self): a = cupy.arange(6).reshape(2, 3) self.assertTrue(a.flags.c_contiguous) self.assertFalse(a.flags.f_contiguous) a = a.reshape(1, 6, 1) self.assertTrue(a.flags.c_contiguous) self.assertTrue(a.flags.f_contiguous) b = a.T.reshape(1, 6, 1) self.assertTrue(b.flags.c_contiguous) self.assertTrue(b.flags.f_contiguous) b = a.T.reshape(2, 3) self.assertTrue(b.flags.c_contiguous) self.assertFalse(b.flags.f_contiguous)
def test_cupy_to_chainerx_delete_chainerx_first(): dtype = numpy.float32 a_cupy = cupy.arange(6, dtype=dtype).reshape((2, 3)) a_chx = _fromrawpointer( a_cupy.data.mem.ptr, a_cupy.shape, a_cupy.dtype, a_cupy.strides, 'cuda:0', 0, a_cupy) del a_chx a_cupy += 1 chainerx.testing.assert_array_equal_ex( a_cupy.get(), numpy.array([[1, 2, 3], [4, 5, 6]], dtype))
def fliplr(a): """Flip array in the left/right direction. Flip the entries in each row in the left/right direction. Columns are preserved, but appear in a different order than before. Args: a (~cupy.ndarray): Input array. Returns: ~cupy.ndarray: Output array. .. seealso:: :func:`numpy.fliplr` """ if a.ndim < 2: raise ValueError('Input must be >= 2-d') return cupy.take(a, cupy.arange(a.shape[1] - 1, -1, -1), axis=1)
def flipud(a): """Flip array in the up/down direction. Flip the entries in each column in the up/down direction. Rows are preserved, but appear in a different order than before. Args: a (~cupy.ndarray): Input array. Returns: ~cupy.ndarray: Output array. .. seealso:: :func:`numpy.flipud` """ if a.ndim < 1: raise ValueError('Input must be >= 1-d') return cupy.take(a, cupy.arange(a.shape[0] - 1, -1, -1), axis=0)
def forward_gpu(self, inputs): a, b = inputs c = cp.zeros_like(a, 'float32') chainer.cuda.elementwise( 'int32 j, raw T a, raw T b', 'raw T c', ''' float* ap = &a[j * 3]; float* bp = &b[j * 3]; float* cp = &c[j * 3]; cp[0] = ap[1] * bp[2] - ap[2] * bp[1]; cp[1] = ap[2] * bp[0] - ap[0] * bp[2]; cp[2] = ap[0] * bp[1] - ap[1] * bp[0]; ''', 'function', )( cp.arange(a.size / 3).astype('int32'), a, b, c, ) return c,
def _non_maximum_suppression_gpu(bbox, thresh, score=None, limit=None): if len(bbox) == 0: return cp.zeros((0,), dtype=np.int32) n_bbox = bbox.shape[0] if score is not None: order = score.argsort()[::-1].astype(np.int32) else: order = cp.arange(n_bbox, dtype=np.int32) sorted_bbox = bbox[order, :] selec, n_selec = _call_nms_kernel( sorted_bbox, thresh) selec = selec[:n_selec] selec = order[selec] if limit is not None: selec = selec[:limit] return selec
def take_along_axis(a, indices, axis): """Take values from the input array by matching 1d index and data slices. Args: a (cupy.ndarray): Array to extract elements. indices (cupy.ndarray): Indices to take along each 1d slice of ``a``. axis (int): The axis to take 1d slices along. Returns: cupy.ndarray: The indexed result. .. seealso:: :func:`numpy.take_along_axis` """ if indices.dtype.kind not in ('i', 'u'): raise IndexError('`indices` must be an integer array') if axis is None: a = a.ravel() axis = 0 ndim = a.ndim axis = internal._normalize_axis_index(axis, ndim) if ndim != indices.ndim: raise ValueError( '`indices` and `a` must have the same number of dimensions') fancy_index = [] for i, n in enumerate(a.shape): if i == axis: fancy_index.append(indices) else: ind_shape = (1,) * i + (-1,) + (1,) * (ndim - i - 1) fancy_index.append(cupy.arange(n).reshape(ind_shape)) return a[tuple(fancy_index)]
def _bincount_histogram(image, source_range): """ Efficient histogram calculation for an image of integers. This function is significantly more efficient than cupy.histogram but works only on images of integers. It is based on cupy.bincount. Parameters ---------- image : array Input image. source_range : string 'image' determines the range from the input image. 'dtype' determines the range from the expected range of the images of that data type. Returns ------- hist : array The values of the histogram. bin_centers : array The values at the center of the bins. """ if source_range not in ['image', 'dtype']: raise ValueError('Incorrect value for `source_range` argument: ' f'{source_range}') if source_range == 'image': image_min = int(image.min().astype(np.int64)) image_max = int(image.max().astype(np.int64)) elif source_range == 'dtype': image_min, image_max = dtype_limits(image, clip_negative=False) image, offset = _offset_array(image, image_min, image_max) hist = cp.bincount(image.ravel(), minlength=image_max - image_min + 1) bin_centers = cp.arange(image_min, image_max + 1) if source_range == 'image': idx = max(image_min, 0) hist = hist[idx:] return hist, bin_centers
def __init__( self, dataset, cat_names, cont_names, label_names, batch_size, shuffle, seed_fn=None, parts_per_chunk=1, device=None, global_size=None, global_rank=None, drop_last=False, ): self.data = dataset self.indices = cp.arange(dataset.to_ddf().npartitions) self.drop_last = drop_last self.device = device or 0 self.global_size = global_size or 1 self.global_rank = global_rank or 0 self.cat_names = cat_names or [] self.cont_names = cont_names or [] self.label_names = label_names self.batch_size = batch_size self.shuffle = shuffle self.seed_fn = seed_fn self.num_rows_processed = 0 # we set size of chunk queue to 1 we only want one chunk in queue at a time. self._buff = ChunkQueue(self, 1, num_parts=parts_per_chunk, shuffle=shuffle) # run once instead of everytime len called self._buff_len = len(self._buff) self._batch_itr = None self._workers = None
def find(self, data): ret = cp.ascontiguousarray(cp.zeros(data.shape[:-1], 'int32')) - 1 data = cp.ascontiguousarray(data) loop_indices = cp.arange(data.size / self.dim).astype('int32') chainer.cuda.elementwise( 'int32 j, raw int32 data, raw int32 indices, raw int32 values, raw int32 ret', '', string.Template(''' /* */ int* value = &data[j * ${dim}]; /* compute initial key */ unsigned int key = 0; for (int k = 0; k < ${dim}; k++) key = (key + value[k]) * ${hash_factor}; key = key % ${table_size}; while (1) { if (indices[key] < 0) { ret[j] = -1; break; } bool match = true; for (int k = 0; k < ${dim}; k++) if (values[key * ${dim} + k] != value[k]) match = false; if (match) { ret[j] = indices[key]; break; } else { key = (key + 1) % ${table_size}; } } ''').substitute( table_size=self.table_size, hash_factor=self.hash_factor, dim=self.dim, ), 'function', )(loop_indices, data, self.indices, self.values, ret) return ret
def _slogdet_one(a): util._assert_rank2(a) util._assert_nd_squareness(a) dtype = a.dtype handle = device.get_cusolver_handle() m = len(a) ipiv = cupy.empty(m, 'i') info = cupy.empty((), 'i') # Need to make a copy because getrf works inplace a_copy = a.copy(order='F') if dtype == 'f': getrf_bufferSize = cusolver.sgetrf_bufferSize getrf = cusolver.sgetrf else: getrf_bufferSize = cusolver.dgetrf_bufferSize getrf = cusolver.dgetrf buffersize = getrf_bufferSize(handle, m, m, a_copy.data.ptr, m) workspace = cupy.empty(buffersize, dtype=dtype) getrf(handle, m, m, a_copy.data.ptr, m, workspace.data.ptr, ipiv.data.ptr, info.data.ptr) if info[()] == 0: diag = cupy.diag(a_copy) # ipiv is 1-origin non_zero = (cupy.count_nonzero(ipiv != cupy.arange(1, m + 1)) + cupy.count_nonzero(diag < 0)) # Note: sign == -1 ** (non_zero % 2) sign = (non_zero % 2) * -2 + 1 logdet = cupy.log(abs(diag)).sum() else: sign = cupy.array(0.0, dtype=dtype) logdet = cupy.array(float('-inf'), dtype) return sign, logdet
def indices(dimensions, dtype=int): """Returns an array representing the indices of a grid. Computes an array where the subarrays contain index values 0,1,... varying only along the corresponding axis. Args: dimensions: The shape of the grid. dtype: Data type specifier. It is int by default. Returns: ndarray: The array of grid indices, ``grid.shape = (len(dimensions),) + tuple(dimensions)``. Examples -------- >>> grid = cupy.indices((2, 3)) >>> grid.shape (2, 2, 3) >>> grid[0] # row indices array([[0, 0, 0], [1, 1, 1]]) >>> grid[1] # column indices array([[0, 1, 2], [0, 1, 2]]) .. seealso:: :func:`numpy.indices` """ dimensions = tuple(dimensions) N = len(dimensions) shape = (1, ) * N res = cupy.empty((N, ) + dimensions, dtype=dtype) for i, dim in enumerate(dimensions): res[i] = cupy.arange(dim, dtype=dtype).reshape(shape[:i] + (dim, ) + shape[i + 1:]) return res
def select_groups(labels, groups_order_subset='all'): """Get subset of groups in adata.obs[key]. """ adata_obs_key = labels groups_order = labels.cat.categories groups_masks = cp.zeros( (len(labels.cat.categories), len(labels.cat.codes)), dtype=bool) for iname, name in enumerate(labels.cat.categories): # if the name is not found, fallback to index retrieval if labels.cat.categories[iname] in labels.cat.codes: mask = labels.cat.categories[iname] == labels.cat.codes else: mask = iname == labels.cat.codes groups_masks[iname] = mask.values groups_ids = list(range(len(groups_order))) if groups_order_subset != 'all': groups_ids = [] for name in groups_order_subset: groups_ids.append( cp.where( cp.array(labels.cat.categories.to_array().astype("int32")) == int(name))[0][0]) if len(groups_ids) == 0: # fallback to index retrieval groups_ids = cp.where( cp.in1d( cp.arange(len(labels.cat.categories)).astype(str), cp.array(groups_order_subset), ))[0] groups_ids = [groups_id.item() for groups_id in groups_ids] groups_masks = groups_masks[groups_ids] groups_order_subset = labels.cat.categories[groups_ids].to_array( ).astype(int) else: groups_order_subset = groups_order.to_array() return groups_order_subset, groups_masks
def __call__(self, input_ids, input_mask, token_type_ids, masked_lm_positions, masked_lm_ids, masked_lm_weights, next_sentence_labels): sequence_output, pooled_output = self.bert.get_sequence_output_and_pooled_output( input_ids, input_mask, token_type_ids) # parameterを直接取得してmatmulしてもbpされるか embedding_table = self.bert.get_embedding_table() """Gathers the vectors at the specific positions over a minibatch.""" batch_size, seq_length, width = sequence_output.shape flat_offsets = np.reshape(np.arange(0, batch_size, dtype=np.int32) * seq_length, [-1, 1]) flat_positions = np.reshape(masked_lm_positions + flat_offsets, [-1]) flat_sequence_output = np.reshape(sequence_output, [batch_size * seq_length, width]) x = flat_sequence_output[[flat_positions]] """Get loss for the masked LM.""" normed = self.layer_norm(self.activate(self.masked_lm_dense(x))) masked_lm_logits = F.matmul(normed, embedding_table.T) + self.mask_bias label_ids = F.reshape(masked_lm_ids, [-1]) masked_lm_loss = F.softmax_cross_entropy(masked_lm_logits, label_ids, ignore_label=0) chainer.report({'masked_lm_loss': masked_lm_loss}, self) chainer.report({'masked_lm_accuracy': F.accuracy(masked_lm_logits, label_ids)}, self) """Get loss for the next sentence.""" next_sentence_logits = F.matmul(pooled_output, self.next_sentence_weights.T) + self.next_sentence_bias labels = F.reshape(next_sentence_labels, [-1]) next_sentence_loss = F.softmax_cross_entropy(next_sentence_logits, labels) chainer.report({'next_sentence_loss': next_sentence_loss}, self) chainer.report({'next_sentence_accuracy': F.accuracy(next_sentence_logits, labels)}, self) loss = masked_lm_loss + next_sentence_loss chainer.report({'loss': loss}, self) return loss
def step(self, state, momentum, rng, **args): q = state.copy() p = self.draw_momentum(rng) q_new = deepcopy(q) p_new = deepcopy(p) epsilon = self.step_size path_length = cp.ceil(2 * cp.random.rand() * self.path_length / epsilon) grad_q = self.model.grad(q, **args) # SG-HMC leapfrog step for _ in cp.arange(path_length - 1): for var in self.start.keys(): dim = (cp.array(q_new[var])).size rvar = rng.normal(0, 2 * epsilon, dim).reshape(q[var].shape) q_new[var] += epsilon * p_new[var] grad_q = self.model.grad(q_new, **args) p_new[var] = ( 1 - epsilon) * p_new[var] + epsilon * grad_q[var] + rvar acceptprob = self.accept(q, q_new, p, p_new, **args) if cp.isfinite(acceptprob) and (cp.random.rand() < acceptprob): q = q_new.copy() p = p_new.copy() return q, p, acceptprob
def do_nadam(self, X, Y, update, learning_rate, **kwargs): layers = len(self.structure) - 1 grads = self.calculate_grads(X, Y, kwargs["l2_reg_param"]) for ii in cp.arange(1, layers + 1): update["mw" + str(ii)] = kwargs["beta1"] * update.get( "mw" + str(ii), 0) + (1 - kwargs["beta1"]) * cp.sum( grads["w" + str(ii)], axis=0) update["mb" + str(ii)] = kwargs["beta1"] * update.get( "mb" + str(ii), 0) + (1 - kwargs["beta1"]) * cp.sum( grads["b" + str(ii)], axis=1).reshape(-1, 1) update["vw" + str(ii)] = kwargs["beta2"] * update.get( "vw" + str(ii), 0) + (1 - kwargs["beta2"]) * cp.square( cp.sum(grads["w" + str(ii)], axis=0)) update["vb" + str(ii)] = kwargs["beta2"] * update.get( "vb" + str(ii), 0) + (1 - kwargs["beta2"]) * cp.square( cp.sum(grads["b" + str(ii)], axis=1).reshape(-1, 1)) self.params["w"+str(ii)] -= cp.multiply( (learning_rate / cp.sqrt(kwargs["epsilon"] + (update["vw"+str(ii)]/(1-kwargs["beta2"]**kwargs["step_num"])))),\ kwargs["beta1"]*(update["mw"+str(ii)]/(1-kwargs["beta1"]**kwargs["step_num"]) +\ ((1-kwargs["beta1"])/(1-kwargs["beta1"]**kwargs["step_num"]))*cp.sum(grads["w"+str(ii)],axis=0) )) self.params["b"+str(ii)] -= cp.multiply( (learning_rate / cp.sqrt(kwargs["epsilon"] + (update["vb"+str(ii)]/(1-kwargs["beta2"]**kwargs["step_num"])))),\ kwargs["beta1"]*(update["mb"+str(ii)]/(1-kwargs["beta1"]**kwargs["step_num"]) +\ ((1-kwargs["beta1"])/(1-kwargs["beta1"]**kwargs["step_num"]))*cp.sum(grads["b"+str(ii)],axis=1).reshape(-1,1) )) return update
def test_cupy_to_chainerx_noncontiguous_with_offset(): dtype = numpy.float32 a_cupy = cupy.arange(12, dtype=dtype).reshape((2, 6))[::-1, ::2] offset = a_cupy.data.ptr - a_cupy.data.mem.ptr # test preconditions assert offset > 0 assert not a_cupy.flags.c_contiguous a_chx = _fromrawpointer(a_cupy.data.mem.ptr, a_cupy.shape, a_cupy.dtype, a_cupy.strides, 'cuda:0', offset, a_cupy) assert a_chx.strides == a_cupy.strides chainerx.testing.assert_array_equal_ex(a_chx, a_cupy.get(), strides_check=False) a_cupy[1, 1] = 53 assert a_chx.strides == a_cupy.strides chainerx.testing.assert_array_equal_ex(a_chx, a_cupy.get(), strides_check=False)
def test_view_as_windows_2D(): A = cp.arange(5 * 4).reshape(5, 4) window_shape = (4, 3) B = view_as_windows(A, window_shape) assert B.shape == (2, 2, 4, 3) # fmt: off cp.testing.assert_array_equal( B, cp.array([[[[0, 1, 2], [4, 5, 6], [8, 9, 10], [12, 13, 14]], [[1, 2, 3], [5, 6, 7], [9, 10, 11], [13, 14, 15]]], [[[4, 5, 6], [8, 9, 10], [12, 13, 14], [16, 17, 18]], [[5, 6, 7], [9, 10, 11], [13, 14, 15], [17, 18, 19]]]]))
def forward(self, x, t): if x.ndim == 2: # ミニバッチ使用時 x = x - x.max(axis=1, keepdims=True) x = cp.exp(x) y = x / x.sum(axis=1, keepdims=True) elif x.ndim == 1: x = x - cp.max(x) y = cp.exp(x) / cp.sum(cp.exp(x)) if y.ndim == 1: t = t.reshape(1, t.size) y = y.reshape(1, y.size) # 教師ラベルがone-hotベクトルの場合、正解のインデックスに変換 if t.size == y.size: t = t.argmax(axis=1) batch_size = y.shape[0] loss = -1.0 * cp.sum( t * cp.log(y[cp.arange(batch_size), t] + 1e-7)) / batch_size self.y = y self.t = t return loss
def inverse_transform(self, y: cudf.Series) -> cudf.Series: """ Revert ordinal label to original label Parameters ---------- y : cudf.Series, dtype=int32 Ordinal labels to be reverted Returns ------- reverted : cudf.Series Reverted labels """ # check LabelEncoder is fitted self._check_is_fitted() # check input type is cudf.Series if not isinstance(y, cudf.Series): raise TypeError( 'Input of type {} is not cudf.Series'.format(type(y))) # check if ord_label out of bound ord_label = y.unique() category_num = len(self.classes_) if self.handle_unknown == 'error': for ordi in ord_label.values_host: if ordi < 0 or ordi >= category_num: raise ValueError( 'y contains previously unseen label {}'.format(ordi)) y = y.astype(self.dtype) ran_idx = cudf.Series(cp.arange(len(self.classes_))).astype(self.dtype) reverted = y._column.find_and_replace(ran_idx, self.classes_, False) return cudf.Series(reverted)
def ds(kx, ky, qx, qy, om, d): topkq = -complex(0, 1) * V0 * ((kx + qx) - complex(0, 1) * (ky + qy)) botkq = complex(0, 1) * V0 * ((kx + qx) + complex(0, 1) * (ky + qy)) innkq = om + complex(0, 1) * Gamm - A * ((kx + qx)**2 + (ky + qy)**2) - V2 topk = -complex(0, 1) * V0 * (kx - complex(0, 1) * ky) botk = complex(0, 1) * V0 * (kx + complex(0, 1) * ky) innk = om + complex(0, 1) * Gamm - A * (kx**2 + ky**2) - V2 cent = cp.arange(-(N - 1) / 2, (N - 1) / 2 + 1, 1) d = hOmg * cp.diag(cent, dtype=cp.float32) return d Ginkq = np.eye(N, N, k=1) * topkq + np.eye( N, N, k=-1) * botkq + innkq * np.eye(N, N) - d Gink = np.eye(N, N, k=1) * topk + np.eye( N, N, k=-1) * botk + innk * np.eye(N, N) - d Grkq = np.linalg.inv(Ginkq) Gakq = np.transpose(np.conj(Grkq)) Grk = np.linalg.inv(Gink) Gak = np.transpose(np.conj(Grk)) fer = np.heaviside(-(d + np.eye(N, N) * (om - mu)), 0) in1 = np.matmul(Grkq, np.matmul(Grk, np.matmul(fer, Gak))) in2 = np.matmul(Grkq, np.matmul(fer, np.matmul(Gakq, Gak))) tr = np.trace(in1 + in2) # HERE i will divide by DOS, multiply by 2 for spin, and divide by (2pi)^3 dchi = -(4) * Gamm * tr / math.pi**2 return dchi
def inverse_transform(self, y: cudf.Series) -> cudf.Series: """ Revert ordinal label to original label Parameters ---------- y : cudf.Series, pandas.Series, cupy.ndarray or numpy.ndarray dtype=int32 Ordinal labels to be reverted Returns ------- reverted : the same type as y Reverted labels """ # check LabelEncoder is fitted self._check_is_fitted() # check input type is cudf.Series y = self._to_cudf_series(y) # check if ord_label out of bound ord_label = y.unique() category_num = len(self.classes_) if self.handle_unknown == 'error': for ordi in ord_label.values_host: if ordi < 0 or ordi >= category_num: raise ValueError( 'y contains previously unseen label {}'.format(ordi)) y = y.astype(self.dtype) ran_idx = cudf.Series(cp.arange(len(self.classes_))).astype(self.dtype) reverted = y._column.find_and_replace(ran_idx, self.classes_, False) res = cudf.Series(reverted) return res
def test_mask(): vol = cp.zeros((30, 30, 30)) vol[15, 15, 15] = 1 struct = generate_binary_structure(3, 1) # TODO: remove brute_force=True once non-brute force implemented for CuPy voln = binary_dilation(vol, structure=struct, iterations=4, brute_force=True).astype("f4") initial = cp.sum(voln > 0) mask = voln.copy() thresh = otsu(mask) mask = mask > thresh initial_otsu = cp.sum(mask > 0) assert_array_equal(initial_otsu, initial) mins, maxs = bounding_box(mask) voln_crop = crop(mask, mins, maxs) initial_crop = cp.sum(voln_crop > 0) assert_array_equal(initial_crop, initial) applymask(voln, mask) final = cp.sum(voln > 0) assert_array_equal(final, initial) # Test multi_median. img = cp.arange(25).reshape(5, 5) img_copy = img.copy() medianradius = 2 median_test = multi_median(img, medianradius, 3) assert_array_equal(img, img_copy) medarr = ((medianradius * 2) + 1, ) * img.ndim median_control = median_filter(img, medarr) median_control = median_filter(median_control, medarr) median_control = median_filter(median_control, medarr) assert_array_equal(median_test, median_control)
def _run_cupy_natural_break(data, num_sample, k): num_data = data.size if num_sample is not None and num_sample < num_data: generator = cupy.random.RandomState(1234567890) idx = [i for i in range(0, data.size)] generator.shuffle(idx) sample_idx = idx[:num_sample] sample_data = data.flatten()[sample_idx] else: sample_data = data.flatten() # warning if number of total data points to fit the model bigger than 40k if sample_data.size >= 40000: warnings.warn( 'natural_breaks Warning: Natural break classification ' '(Jenks) has a complexity of O(n^2), ' 'your classification with {} data points may take ' 'a long time.'.format(sample_data.size), Warning) uv = cupy.unique(sample_data) uvk = len(uv) if uvk < k: warnings.warn( 'natural_breaks Warning: Not enough unique values ' 'in data array for {} classes. ' 'n_samples={} should be >= n_clusters={}. ' 'Using k={} instead.'.format(k, uvk, k, uvk), Warning) uv.sort() bins = uv else: centroids = _run_cupy_jenks(sample_data, k) bins = cupy.array(centroids[1:]) out = _bin(data, bins, cupy.arange(uvk)) return out
def transform(self, columns: ColumnNames, gdf: cudf.DataFrame) -> cudf.DataFrame: # Add temporary column for sorting tmp = "__tmp__" gdf[tmp] = cupy.arange(len(gdf), dtype="int32") fit_folds = self.kfold > 1 if fit_folds: gdf[self.fold_name] = _add_fold(gdf.index, self.kfold, self.fold_seed) # Need mean of contiuous target column y_mean = self.target_mean or self.means # Loop over categorical-column groups and apply logic new_gdf = None for ind, cat_group in enumerate(columns): if isinstance(cat_group, tuple): cat_group = list(cat_group) elif isinstance(cat_group, str): cat_group = [cat_group] if new_gdf is None: new_gdf = self._op_group_logic(cat_group, gdf, y_mean, fit_folds, ind) else: _df = self._op_group_logic(cat_group, gdf, y_mean, fit_folds, ind) new_gdf = cudf.concat([new_gdf, _df], axis=1) # Drop temporary columns gdf.drop(columns=[tmp, "__fold__"] if fit_folds and self.drop_folds else [tmp], inplace=True) if fit_folds and not self.drop_folds: new_gdf[self.fold_name] = gdf[self.fold_name] return new_gdf
def apply_boxcar_drift(data, metadata): """ Apply boxcar filter to compensate for doppler smearing An optimal boxcar is applied per row of drift rate. This retrieves a sensitivity increase of sqrt(boxcar_size) for a smeared signal. (Stil down a sqrt(boxcar_size) compared to no smearing case). Args: data (np or cp array): metadata (dict): Dictionary of metadata values Returns: data, metadata (array and dict): Data array with filter applied. """ logger.debug( f"apply_boxcar_drift: Applying moving average based on drift rate.") metadata = deepcopy(metadata) # Compute drift rates from metadata dr0, ddr = metadata['drift_rate_start'].value, metadata[ 'drift_rate_step'].value df = metadata['frequency_step'].to('Hz').value dt = metadata['integration_time'].to('s').value drates = dr0 + ddr * cp.arange(data.shape[0]) # Compute smearing (array of n_channels smeared for given driftrate) smearing_nchan = cp.abs(dt * drates / df).astype('int32') smearing_nchan_max = cp.asnumpy(cp.max(smearing_nchan)) # Apply boxcar filter to compensate for smearing for boxcar_size in range(2, smearing_nchan_max + 1): idxs = cp.where(smearing_nchan == boxcar_size) # 1. uniform_filter1d computes mean. We want sum, so *= boxcar_size # 2. we want noise to stay the same, so divide by sqrt(boxcar_size) # combined 1 and 2 give aa sqrt(2) factor data[idxs] = uniform_filter1d(data[idxs], size=boxcar_size, axis=2) * np.sqrt(boxcar_size) return data, metadata
def _major_slice(self, idx, copy=False): """Index along the major axis where idx is a slice object. """ if idx == slice(None): return self.copy() if copy else self M, N = self._swap(*self.shape) start, stop, step = idx.indices(M) M = len(range(start, stop, step)) new_shape = self._swap(M, N) if M == 0: return self.__class__(new_shape) row_nnz = cupy.diff(self.indptr) idx_dtype = self.indices.dtype res_indptr = cupy.zeros(M + 1, dtype=idx_dtype) cupy.cumsum(row_nnz[idx], out=res_indptr[1:]) if step == 1: idx_start = self.indptr[start] idx_stop = self.indptr[stop] res_indices = cupy.array(self.indices[idx_start:idx_stop], copy=copy) res_data = cupy.array(self.data[idx_start:idx_stop], copy=copy) else: rows = cupy.arange(start, start + (res_indptr.size - 1) * step, step, dtype=res_indptr.dtype) res_indices, res_data = _index._csr_row_index( rows, self.indptr, self.indices, self.data, res_indptr) return self.__class__((res_data, res_indices, res_indptr), shape=new_shape, copy=False)
def top_k(array, k, axis=0, biggest=True): """ Return the topK index along the specified dimension, The returned indices are such that their array values are sorted -Input: array: 1d or 2d array k: the top `k` (k>0, integer) axis: futile if array is 1d, otherwise sorting along the specified axis default to 0 biggest: whether the top-k biggest or smallest, default to True -Output: inds: indices vals: array values at the indices """ assert array.ndim == 1 or array.ndim == 2 assert axis == 0 or axis == 1 if biggest: array = -array if array.ndim == 1: inds = xp.argpartition(array, k)[:k] vals = array[inds] sort_inds = xp.argsort(vals) inds = inds[sort_inds] vals = vals[sort_inds] elif axis == 0: inds = xp.argpartition(array, k, axis=0)[:k, :] vals = array[inds, xp.arange(array.shape[1])[None, :]] sort_inds = xp.argsort(vals, axis=0) inds = inds[sort_inds, xp.arange(array.shape[1])[None, :]] vals = vals[sort_inds, xp.arange(array.shape[1])[None, :]] else: inds = xp.argpartition(array, k, axis=1)[:, :k] vals = array[xp.arange(array.shape[0])[:, None], inds] sort_inds = xp.argsort(vals, axis=1) inds = inds[xp.arange(array.shape[0])[:, None], sort_inds] vals = vals[xp.arange(array.shape[0])[:, None], sort_inds] if biggest: vals = -vals return inds, vals
def get_session_id_from_session_boundry(session_change_df, last_session_len): """ This function returns session starts given a session change df """ import cudf ## we dont really need the `session_id` to start from 0 ## the total number of sessions per partition should be fairly limited ## and we really should not hit 2,147,483,647 sessions per partition ## Can switch to vec_arange code to match spark 1-1 ## see previously commited code ## https://github.com/rapidsai/tpcx-bb/blob/8394f2b8d62540b4077c606c8b687dee96b4f5d3/tpcx-bb1.3.1/tools/sessionization.py user_session_ids = cp.arange(len(session_change_df), dtype=np.int32) ### up shift the session length df session_len = session_change_df["t_index"].diff().reset_index(drop=True) session_len = session_len.shift(-1) session_len.iloc[-1] = last_session_len session_id_final_series = ( cudf.Series(user_session_ids).repeat(session_len).reset_index( drop=True)) return session_id_final_series
def _encode(name, path, gdf, cat_cache, na_sentinel=-1, freq_threshold=0): value = None if path: if cat_cache is not None: cat_cache = cat_cache.get(name, "disk") cache = _get_cache() if cache: value = cache.get_categories(name, path, cache=cat_cache) else: value = cudf.io.read_parquet(path, index=False, columns=[name]) value.index.name = "labels" value.reset_index(drop=False, inplace=True) vals = gdf[name].copy(deep=False) if value is None: value = cudf.DataFrame({name: [None]}) value[name] = value[name].astype(vals.dtype) value.index.name = "labels" value.reset_index(drop=False, inplace=True) if freq_threshold > 0: codes = cudf.DataFrame({ name: vals.copy(), "order": cp.arange(len(vals)) }) codes = codes.merge(value, on=name, how="left").sort_values("order")["labels"] codes.fillna(na_sentinel, inplace=True) return codes.values else: # Use `searchsorted` if we are using a "full" encoding labels = value[name].searchsorted(vals, side="left", na_position="first") labels[labels >= len(value[name])] = na_sentinel return labels
def create_tokenized_df(str_series, delimiter=" ", ngram_range=(1, 1)): """ creates a tokenized df from a string column where each row is like [token,doc_id], token = 'string' and doc_id = index from which the word came Also returns a empty doc_id series """ import cudf token_count_sr = str_series.str.token_count(delimiter=delimiter) doc_id_ar = cp.arange(start=0, stop=len(str_series), dtype=np.int32) doc_id_sr = cudf.Series(doc_id_ar) tokenized_df_ls = [] for n in range(ngram_range[0], ngram_range[1] + 1): ngram_ser = get_ngram(str_series, n, doc_id_sr, token_count_sr, delimiter) tokenized_df_ls.append(ngram_ser) tokenized_df = cudf.concat(tokenized_df_ls) tokenized_df = tokenized_df.reset_index(drop=True) empty_doc_ids = doc_id_sr[doc_id_sr[token_count_sr == 0]] return tokenized_df, empty_doc_ids
def forward_all_cells(self): """ move all agents in map one time step forward """ agents_durations = self.durations[ cp.arange(0, self.durations.shape[0]), self.current_state_ids].flatten() print( f'DEBUG: agents_durations.shape: {agents_durations.shape}, self.durations.shape: {self.durations.shape}, self.current_state_ids.shape: {self.current_state_ids.shape}' ) to_transit = (self.current_state_durations == agents_durations) self.current_state_durations += 1 to_transit = self.agent_ids[to_transit] self.transit_states(to_transit) # Contamination at home by end of the period self.contaminate(self.agent_ids, self.home_cell_ids) # Update r and associated variables r = self.n_infected_period / self.n_diseased_period if self.n_diseased_period > 0 else 0 r = cp.array([r]) if self.verbose > 1: print(f'period {self.current_period}: r={r}') self.r_factors = append(self.r_factors, r) self.n_diseased_period = self.get_n_diseased() self.n_infected_period = 0 #Move one period forward self.current_period += 1
def get_predicted_traces(matrix_U, matrix_W, sorting_result, time_limits): W = cp.asarray(matrix_W, dtype=np.float32) U = cp.asarray(matrix_U, dtype=np.float32) buffer = W.shape[0] predicted_traces = cp.zeros( (U.shape[0], 4 * buffer + (time_limits[1] - time_limits[0])), dtype=np.int16) sorting_result = cp.asarray(sorting_result) all_spike_times = sorting_result[:, 0] included_spike_pos = cp.asarray( (time_limits[0] - buffer // 2 < all_spike_times) & (all_spike_times < time_limits[1] + buffer // 2)).nonzero()[0] spike_times = all_spike_times[included_spike_pos].astype(np.int32) spike_templates = sorting_result[included_spike_pos, 1].astype(np.int32) spike_amplitudes = sorting_result[included_spike_pos, 2] for s, spike in enumerate(spike_times): amplitude = spike_amplitudes[s] U_i = U[:, spike_templates[s], :] W_i = W[:, spike_templates[s], :] addendum = cp.ascontiguousarray(cp.matmul(U_i, W_i.T) * amplitude, dtype=np.int16) pred_pos = cp.arange( buffer) + spike - time_limits[0] + buffer + buffer // 2 predicted_traces[:, pred_pos] += addendum output = predicted_traces[:, buffer * 2:-buffer * 2] return cp.asnumpy(output).T
def get_session_id(df): """ This function creates a session id column for each click The session id grows in incremeant for each user's susbequent session Session boundry is defined by the time_out """ df["user_change_flag"] = df["wcs_user_sk"].diff(periods=1) != 0 df["session_change_flag"] = df["review_flag"] | df["user_change_flag"] df = df.reset_index(drop=True) df["t_index"] = cp.arange(start=0, stop=len(df), dtype=np.int32) session_change_df = df[df["session_change_flag"]].reset_index(drop=True) try: last_session_len = len(df) - session_change_df["t_index"].iloc[-1] except (AssertionError, IndexError) as e: # IndexError in numba >= 0.48 last_session_len = 0 session_ids = get_session_id_from_session_boundary(session_change_df, last_session_len) assert len(session_ids) == len(df) return session_ids
def create_test_dataset(): cp = np # cpu mode only here s1 = np.load(test_path.joinpath('my_conv2_input.npy')) s0 = np.copy(s1) tmax = np.ceil(4 * sig) dt = cp.arange(-tmax, tmax + 1) gauss = cp.exp(-dt**2 / (2 * sig**2)) gauss = (gauss / cp.sum(gauss)).astype(np.float32) cNorm = lfilter_cpu(gauss, 1., np.r_[np.ones(s1.shape[0]), np.zeros(int(tmax))]) cNorm = cNorm[int(tmax):] s1 = lfilter_cpu(gauss, 1, np.r_[s1, np.zeros((int(tmax), s1.shape[1]))], axis=0) s1 = s1[int(tmax):] / cNorm[:, np.newaxis] # import matplotlib.pyplot as plt # plt.plot(s0) # plt.plot(s1) # np.save(test_path.joinpath('my_conv2_input.npy'), s0) np.save(test_path.joinpath('my_conv2_output.npy'), s1)
def test_texture_input(self): width, height, depth = self.dimensions dim = 3 if depth != 0 else 2 if height != 0 else 1 texobj = self._prep_texture() ker = getattr(self, f'_prep_kernel{dim}D')() # prepare input args = [None, texobj] size = width if height > 0: size *= height args.append(width) if depth > 0: size *= depth args.append(height) in_arr = cupy.arange(size, dtype=cupy.float32) in_arr = in_arr.reshape(self.shape) args[0] = in_arr # compute and validate output out_arr = ker(*args) expected = in_arr + self.data testing.assert_allclose(out_arr, expected)
def create_texture_image(textures, texture_size_out=16): num_faces, texture_size_in = textures.shape[:2] tile_width = int((num_faces - 1.) ** 0.5) + 1 tile_height = int((num_faces - 1.) / tile_width) + 1 image = np.zeros((tile_height * texture_size_out, tile_width * texture_size_out, 3), 'float32') vertices = np.zeros((num_faces, 3, 2), 'float32') # [:, :, XY] face_nums = np.arange(num_faces) column = face_nums % tile_width row = face_nums / tile_width vertices[:, 0, 0] = column * texture_size_out vertices[:, 0, 1] = row * texture_size_out vertices[:, 1, 0] = column * texture_size_out vertices[:, 1, 1] = (row + 1) * texture_size_out - 1 vertices[:, 2, 0] = (column + 1) * texture_size_out - 1 vertices[:, 2, 1] = (row + 1) * texture_size_out - 1 image = chainer.cuda.to_gpu(image) vertices = chainer.cuda.to_gpu(vertices) textures = chainer.cuda.to_gpu(textures) loop = cp.arange(image.size / 3).astype('int32') chainer.cuda.elementwise( 'int32 j, raw float32 image, raw float32 vertices_all, raw float32 textures', '', string.Template(''' const int x = i % (${tile_width} * ${texture_size_out}); const int y = i / (${tile_width} * ${texture_size_out}); const int row = x / ${texture_size_out}; const int column = y / ${texture_size_out}; const int fn = row + column * ${tile_width}; const int tsi = ${texture_size_in}; const float* texture = &textures[fn * tsi * tsi * tsi * 3]; const float* vertices = &vertices_all[fn * 3 * 2]; const float* p0 = &vertices[2 * 0]; const float* p1 = &vertices[2 * 1]; const float* p2 = &vertices[2 * 2]; /* */ // if ((y % ${texture_size_out}) < (x % ${texture_size_out})) continue; /* compute face_inv */ float face_inv[9] = { p1[1] - p2[1], p2[0] - p1[0], p1[0] * p2[1] - p2[0] * p1[1], p2[1] - p0[1], p0[0] - p2[0], p2[0] * p0[1] - p0[0] * p2[1], p0[1] - p1[1], p1[0] - p0[0], p0[0] * p1[1] - p1[0] * p0[1]}; float face_inv_denominator = ( p2[0] * (p0[1] - p1[1]) + p0[0] * (p1[1] - p2[1]) + p1[0] * (p2[1] - p0[1])); for (int k = 0; k < 9; k++) face_inv[k] /= face_inv_denominator; /* compute w = face_inv * p */ float weight[3]; float weight_sum = 0; for (int k = 0; k < 3; k++) { weight[k] = face_inv[3 * k + 0] * x + face_inv[3 * k + 1] * y + face_inv[3 * k + 2]; weight_sum += weight[k]; } for (int k = 0; k < 3; k++) weight[k] /= (weight_sum + ${eps}); /* get texture index (float) */ float texture_index_float[3]; for (int k = 0; k < 3; k++) { float tif = weight[k] * (tsi - 1); tif = max(tif, 0.); tif = min(tif, tsi - 1 - ${eps}); texture_index_float[k] = tif; } /* blend */ float new_pixel[3] = {0, 0, 0}; for (int pn = 0; pn < 8; pn++) { float w = 1; // weight int texture_index_int[3]; // index in source (int) for (int k = 0; k < 3; k++) { if ((pn >> k) % 2 == 0) { w *= 1 - (texture_index_float[k] - (int)texture_index_float[k]); texture_index_int[k] = (int)texture_index_float[k]; } else { w *= texture_index_float[k] - (int)texture_index_float[k]; texture_index_int[k] = (int)texture_index_float[k] + 1; } } int isc = texture_index_int[0] * tsi * tsi + texture_index_int[1] * tsi + texture_index_int[2]; for (int k = 0; k < 3; k++) new_pixel[k] += w * texture[isc * 3 + k]; } for (int k = 0; k < 3; k++) image[i * 3 + k] = new_pixel[k]; ''').substitute( num_faces=num_faces, texture_size_in=texture_size_in, texture_size_out=texture_size_out, tile_width=tile_width, eps=1e-5, ), 'function', )(loop, image, vertices, textures) chainer.cuda.elementwise( 'int32 j, raw float32 image, raw float32 vertices_all, raw float32 textures', '', string.Template(''' const int x = i % (${tile_width} * ${texture_size_out}); const int y = i / (${tile_width} * ${texture_size_out}); const int row = x / ${texture_size_out}; const int column = y / ${texture_size_out}; const int fn = row + column * ${tile_width}; const int tsi = ${texture_size_in}; const float* texture = &textures[fn * tsi * tsi * tsi * 3]; const float* vertices = &vertices_all[fn * 3 * 2]; const float* p0 = &vertices[2 * 0]; const float* p1 = &vertices[2 * 1]; const float* p2 = &vertices[2 * 2]; /* */ if ((y % ${texture_size_out} + 1) == (x % ${texture_size_out})) { for (int k = 0; k < 3; k++) image[i * 3 + k] = image[ (y * ${tile_width} * ${texture_size_out} + (x - 1)) * 3 + k]; } ''').substitute( num_faces=num_faces, texture_size_in=texture_size_in, texture_size_out=texture_size_out, tile_width=tile_width, eps=1e-5, ), 'function', )(loop, image, vertices, textures) vertices[:, :, 0] /= (image.shape[1] - 1) vertices[:, :, 1] /= (image.shape[0] - 1) image = image[::-1, ::1] image = image.get() vertices = vertices.get() return image, vertices
# For example, for the P4 (rotation-translation) conv, the input image is a function on Z2, # which we may think of as a function on P4 that is right-invariant to rotation. # A right-rotation-invariant P4 function has the same value at (r, u, v) as it has at (r', u, v). # Naturally, we don't store this invariant P4 function, but we store an array with a length-1 axis for the rotation # coordinate. # This is consistent with the numpy convention that lenght-1 axes get broadcast automatically. # So for Z2 filters, we get the following shapes: # Filter shape: (output_channels, input_channels, 1, nu, nv) # Index shape (one per coordinate t, u, v): (output_transforms, 1, nu, nv) # Result shape: (output_channels, output_transforms, input_channels, 1, nu, nv) import cupy from cupy.core.core import compile_with_cache x = cupy.arange(2, dtype='f') # WORKAROUND - currently, cupy compile_with_cache fails if no cupy code is executed first # This computes input[..., T, U, V].swapaxes(1, 2) _index_group_func_str = \ """ extern "C" __global__ void indexing_kernel( CArray<{0}, 5> input, CArray<int, 4> T, CArray<int, 4> U, CArray<int, 4> V, CArray<{0}, 6> output) {{ CUPY_FOR(i, output.size()) {{ const int* oshape = output.shape(); const int* ostrides = output.strides();
def test_size(self): x = cupy.arange(3).astype('i') y = cupy.ElementwiseKernel( 'raw int32 x', 'int32 y', 'y = x.size()', 'test_carray_size', )(x, size=1) self.assertEqual(int(y[0]), 3)