def _csr_column_index1(col_idxs, Ap, Aj): """Construct indptr and components for populating indices and data of output sparse array Args col_idxs : column indices to index from input indices Ap : indptr of input sparse matrix Aj : indices of input sparse matrix Returns Bp : indptr of output sparse matrix Aj_mask : Input indices array with all cols not matching the index index masked out with -1. col_counts : Number of times each unique index occurs in Aj sort_idxs : Indices sorted to preserve original order of idxs """ idx_map, sort_idxs = cupy.unique(col_idxs, return_index=True) sort_idxs = sort_idxs.astype(idx_map.dtype) idxs = cupy.searchsorted(idx_map, col_idxs) col_counts = cupy.zeros(idx_map.size, dtype=col_idxs.dtype) cupyx.scatter_add(col_counts, idxs, 1) Bp, Aj_mask = _csr_column_index1_indptr(idx_map, sort_idxs, col_counts, Ap, Aj) return Bp, Aj_mask, col_counts, sort_idxs
def mut_add(A): # in numpy codebase, this used to be: # onp.add.at(A, idx, x) # according to https://docs-cupy.chainer.org/en/stable/reference/ufunc.html?highlight=ufunc.at, # scatter_add is the correct function to use. # TODO: PR into cupy codebase the ability to use scatter_add with float64? ocpx.scatter_add(A, idx, x) return A
def run_viterbi_estep(self, tag_array, len_array, trans_scores, dec_scores): batch_size, real_len = tag_array.shape if self.function_mask_set is not None: self.function_mask(trans_scores, tag_array) heads, head_valences, valences = batch_parse(trans_scores, dec_scores, len_array) len_array = cpasarray(len_array) batch_arange = cp.arange(batch_size) batch_likelihood = cpfzeros(1) for m in range(1, real_len): h = heads[:, m] direction = (h <= m).astype(cpi) h_valence = head_valences[:, m] m_valence = valences[:, m] m_child_valence = h_valence if self.o.cv > 1 else cp.zeros_like( h_valence) len_mask = ((h <= len_array) & (m <= len_array)) if DEBUG and ((m <= len_array) & (h > len_array)).any(): print('find bad arc') batch_likelihood += cp.sum(dec_scores[batch_arange, m, 0, m_valence[:, 0], STOP][len_mask]) batch_likelihood += cp.sum(dec_scores[batch_arange, m, 1, m_valence[:, 1], STOP][len_mask]) self.batch_dec_trace[batch_arange, m - 1, m - 1, 0, m_valence[:, 0], STOP] = len_mask self.batch_dec_trace[batch_arange, m - 1, m - 1, 1, m_valence[:, 1], STOP] = len_mask head_mask = h == 0 mask = head_mask * len_mask if mask.any(): # when use_torch_in_cupy_malloc(), mask.any()=False will raise a NullPointer error batch_likelihood += cp.sum(trans_scores[:, 0, m, 0][mask]) cpx.scatter_add(self.root_counter, tag_array[:, m], mask) head_mask = ~head_mask mask = head_mask * len_mask if mask.any(): batch_likelihood += cp.sum(trans_scores[batch_arange, h, m, m_child_valence][mask]) batch_likelihood += cp.sum(dec_scores[batch_arange, h, direction, h_valence, GO][mask]) self.batch_trans_trace[batch_arange, h - 1, m - 1, direction, m_child_valence] = mask self.batch_dec_trace[batch_arange, h - 1, m - 1, direction, h_valence, GO] = mask return batch_likelihood.get()[0]
def scatter_add(a, slices, value): """Adds given values to specified elements of an array. .. deprecated:: 4.0 Use :func:`cupyx.scatter_add` instead. """ warnings.warn( 'cupy.scatter_add is deprecated. Use cupyx.scatter_add instead.', DeprecationWarning) cupyx.scatter_add(a, slices, value)
def test_atomic_add(self, dtype): @jit.rawkernel() def f(x, index, out): tid = jit.blockDim.x * jit.blockIdx.x + jit.threadIdx.x jit.atomic_add(out, index[tid], x[tid]) x = testing.shaped_random((1024, ), dtype=dtype, seed=0) index = testing.shaped_random((1024, ), dtype=numpy.bool_, seed=1).astype(numpy.int32) out = cupy.zeros((2, ), dtype=dtype) f((32, ), (32, ), (x, index, out)) expected = cupy.zeros((2, ), dtype=dtype) cupyx.scatter_add(expected, index, x) self._check(out, expected)
def get_batch_counter_by_tag(self, tag_array, num_tag=None, mode=0): """counter[batch, sentence_len, ...] to counter[batch, num_tag, ...] mode=0, sum in sentence mode=1, sum over batch """ if self.batch_trans_trace is None or self.batch_dec_trace is None: raise ValueError("No trace can be used") batch_size, max_len = tag_array.shape if num_tag is None: num_tag = self.o.num_tag dec_post_dim = (2, 2, 2) if mode == 0: dec_out = cpfzeros((batch_size, num_tag, *dec_post_dim)) sentence_id = cp.tile( cp.arange(batch_size).reshape(batch_size, 1), (1, max_len)) index = (sentence_id.flatten(), tag_array.flatten()) else: dec_out = cpfzeros((1, num_tag, *dec_post_dim)) index = ( 0, tag_array.flatten(), ) cpx.scatter_add( dec_out, index, cp.sum(self.batch_dec_trace, 2).reshape(-1, *dec_post_dim)) trans_post_dim = (2, self.o.cv) head_ids = cp.tile(cp.expand_dims(tag_array, 2), (1, 1, max_len)) child_ids = cp.tile(cp.expand_dims(tag_array, 1), (1, max_len, 1)) if mode == 0: trans_out = cpfzeros( (batch_size, num_tag, num_tag, *trans_post_dim)) sentence_id = cp.tile(sentence_id, (1, max_len)) index = (sentence_id.flatten(), head_ids.flatten(), child_ids.flatten()) else: trans_out = cpfzeros((1, num_tag, num_tag, *trans_post_dim)) index = (0, head_ids.flatten(), child_ids.flatten()) cpx.scatter_add(trans_out, index, self.batch_trans_trace.reshape(-1, *trans_post_dim)) return dec_out, trans_out
def _insert_many(self, i, j, x): """Inserts new nonzero at each (i, j) with value x Here (i,j) index major and minor respectively. i, j and x must be non-empty, 1d arrays. Inserts each major group (e.g. all entries per row) at a time. Maintains has_sorted_indices property. Modifies i, j, x in place. """ order = cupy.argsort(i) # stable for duplicates i = i.take(order) j = j.take(order) x = x.take(order) # Update index data type idx_dtype = sputils.get_index_dtype( (self.indices, self.indptr), maxval=( self.nnz + x.size)) self.indptr = self.indptr.astype(idx_dtype) self.indices = self.indices.astype(idx_dtype) self.data = self.data.astype(self.dtype) indptr_inserts, indices_inserts, data_inserts = \ _index._select_last_indices(i, j, x, idx_dtype) rows, ui_indptr = cupy.unique(indptr_inserts, return_index=True) to_add = cupy.empty(ui_indptr.size+1, ui_indptr.dtype) to_add[-1] = j.size to_add[:-1] = ui_indptr ui_indptr = to_add # Compute the counts for each row in the insertion array row_counts = cupy.zeros(ui_indptr.size-1, dtype=idx_dtype) cupyx.scatter_add( row_counts, cupy.searchsorted(rows, indptr_inserts), 1) self._perform_insert(indices_inserts, data_inserts, rows, row_counts, idx_dtype)
def scatter_add(self, a, slices, value): import cupyx cupyx.scatter_add(a, slices, value)
def test_scatter_add(self, dtype): a = cupy.zeros((3, ), dtype=dtype) i = cupy.array([1, 1], numpy.int32) v = cupy.array([2., 1.], dtype=dtype) cupyx.scatter_add(a, i, v) testing.assert_array_equal(a, cupy.array([0, 3, 0], dtype=dtype))
def update_decision(change, norm_counter, pos_array): _, word_num = pos_array.shape for i in range(word_num): pos = pos_array[:, i] for direction in range(2): if change[i, direction] > 0: # + and - are just for distinguish, see self.first_child_update cpx.scatter_add(norm_counter, (pos, direction, NOCHILD, GO), 1) cpx.scatter_add(norm_counter, (pos, direction, HASCHILD, GO), -1) cpx.scatter_add(dec_param, (pos, direction, HASCHILD, GO), change[i, direction]) cpx.scatter_add(norm_counter, (pos, direction, NOCHILD, STOP), -1) cpx.scatter_add(norm_counter, (pos, direction, HASCHILD, STOP), 1) cpx.scatter_add(dec_param, (pos, direction, NOCHILD, STOP), 1) else: cpx.scatter_add(dec_param, (pos, direction, NOCHILD, STOP), 1)
def init_param(self, dataset, getter=None): # require same_len harmonic_sum = [0., 1.] dec_param = torch2cp(self.dec_param.data) root_param = torch2cp(self.root_param.data) trans_param = torch2cp(self.trans_param.data) dec_param.fill(0.) root_param.fill(0.) trans_param.fill(0.) def get_harmonic_sum(n): nonlocal harmonic_sum while n >= len(harmonic_sum): harmonic_sum.append(harmonic_sum[-1] + 1 / len(harmonic_sum)) return harmonic_sum[n] def update_decision(change, norm_counter, pos_array): _, word_num = pos_array.shape for i in range(word_num): pos = pos_array[:, i] for direction in range(2): if change[i, direction] > 0: # + and - are just for distinguish, see self.first_child_update cpx.scatter_add(norm_counter, (pos, direction, NOCHILD, GO), 1) cpx.scatter_add(norm_counter, (pos, direction, HASCHILD, GO), -1) cpx.scatter_add(dec_param, (pos, direction, HASCHILD, GO), change[i, direction]) cpx.scatter_add(norm_counter, (pos, direction, NOCHILD, STOP), -1) cpx.scatter_add(norm_counter, (pos, direction, HASCHILD, STOP), 1) cpx.scatter_add(dec_param, (pos, direction, NOCHILD, STOP), 1) else: cpx.scatter_add(dec_param, (pos, direction, NOCHILD, STOP), 1) def first_child_update(norm_counter): all_param = dec_param.flatten() all_norm = norm_counter.flatten() mask = (all_param <= 0) | (0 <= all_norm) ratio = -all_param / all_norm ratio[mask] = 1. return cp.min(ratio) # shape same as self.dec_param norm_counter = cpfzeros((self.o.num_tag, 2, 2, 2)) change = cpfzeros((self.o.max_len, 2)) for arrays in dataset.batch_data: if getter: pos_array = getter(arrays) else: pos_array = cpasarray(arrays[1]) batch_size, word_num = pos_array.shape change.fill(0.) cpx.scatter_add(root_param, pos_array.flatten(), 1. / word_num) if word_num > 1: for child_i in range(word_num): child_sum = get_harmonic_sum( child_i - 0) + get_harmonic_sum(word_num - child_i - 1) scale = (word_num - 1) / word_num / child_sum for head_i in range(word_num): if child_i == head_i: continue direction = 0 if head_i > child_i else 1 head_pos = pos_array[:, head_i] child_pos = pos_array[:, child_i] diff = scale / abs(head_i - child_i) cpx.scatter_add(trans_param, (head_pos, child_pos, direction), diff) change[head_i, direction] += diff update_decision(change, norm_counter, pos_array) trans_param += self.o.count_smoothing dec_param += self.o.count_smoothing root_param += self.o.count_smoothing es = first_child_update(norm_counter) norm_counter *= 0.9 * es dec_param += norm_counter root_param_sum = cp.sum(root_param) trans_param_sum = cp.sum(trans_param, axis=1, keepdims=True) decision_param_sum = cp.sum(dec_param, axis=3, keepdims=True) root_param /= root_param_sum trans_param /= trans_param_sum dec_param /= decision_param_sum cp.log(trans_param, out=trans_param) cp.log(root_param, out=root_param) cp.log(dec_param, out=dec_param)
def init_pretrained(self, dataset, getter=None): if getter is None: def getter(x): return cpasarray(x[1]) def recovery_one(heads): left_most = np.arange(len(heads)) right_most = np.arange(len(heads)) for idx, each_head in enumerate(heads): if each_head in (0, len(heads) + 1): # skip head is ROOT continue each_head -= 1 if idx < left_most[each_head]: left_most[each_head] = idx if idx > right_most[each_head]: right_most[each_head] = idx valences = npiempty((len(heads), 2)) head_valences = npiempty(len(heads)) for idx, each_head in enumerate(heads): each_head -= 1 valences[idx, 0] = NOCHILD if left_most[idx] == idx else HASCHILD valences[idx, 1] = NOCHILD if right_most[idx] == idx else HASCHILD if each_head > idx: # each_head = -1 `s head_valence is never used head_valences[idx] = NOCHILD if left_most[ each_head] == idx else HASCHILD else: head_valences[idx] = NOCHILD if right_most[ each_head] == idx else HASCHILD return valences, head_valences heads = npiempty((len(dataset), self.o.max_len + 1)) valences = npiempty((len(dataset), self.o.max_len + 1, 2)) head_valences = npiempty((len(dataset), self.o.max_len + 1)) for idx, instance in enumerate(dataset.instances): one_heads = npasarray(list(map(int, instance.misc))) one_valences, one_head_valences = recovery_one(one_heads) heads[idx, 1:instance.len + 1] = one_heads valences[idx, 1:instance.len + 1] = one_valences head_valences[idx, 1:instance.len + 1] = one_head_valences heads = cpasarray(heads) valences = cpasarray(valences) head_valences = cpasarray(head_valences) batch_size, sentence_len = heads.shape len_array = cpasarray(dataset.get_len()) batch_arange = cp.arange(batch_size) save_batch_data = dataset.batch_data dataset.build_batchs(len(dataset), same_len=False, shuffle=False) tag_array = getter(dataset.batch_data[0]) dataset.batch_data = save_batch_data self.reset_root_counter() self.batch_trans_trace = cpfzeros( (batch_size, self.o.max_len, self.o.max_len, 2, self.o.cv)) self.batch_dec_trace = cpfzeros( (batch_size, self.o.max_len, self.o.max_len, 2, 2, 2)) for m in range(1, sentence_len): h = heads[:, m] direction = (h <= m).astype(cpi) h_valence = head_valences[:, m] m_valence = valences[:, m] m_child_valence = h_valence if self.o.cv > 1 else cp.zeros_like( h_valence) len_mask = ((h <= len_array) & (m <= len_array)) self.batch_dec_trace[batch_arange, m - 1, m - 1, 0, m_valence[:, 0], STOP] = len_mask self.batch_dec_trace[batch_arange, m - 1, m - 1, 1, m_valence[:, 1], STOP] = len_mask head_mask = h == 0 mask = head_mask * len_mask if mask.any(): cpx.scatter_add(self.root_counter, tag_array[:, m - 1], mask) head_mask = ~head_mask mask = head_mask * len_mask if mask.any(): self.batch_trans_trace[batch_arange, h - 1, m - 1, direction, m_child_valence] = mask self.batch_dec_trace[batch_arange, h - 1, m - 1, direction, h_valence, GO] = mask d, t = self.get_batch_counter_by_tag(tag_array, mode=1) self.m_step(t[0], d[0])
def run_em_estep(self, tag_array, len_array, trans_scores, dec_scores): batch_size, fake_len = tag_array.shape if self.o.use_softmax_em: trans_scores *= 1 / (1 - self.softmax_em_current_sigma) dec_scores *= 1 / (1 - self.softmax_em_current_sigma) if self.function_mask_set is not None: self.function_mask(trans_scores, tag_array) if DEBUG: assert not cp.isnan(trans_scores).any() assert not cp.isnan(dec_scores).any() ictable, iitable, prob = batch_inside(trans_scores, dec_scores, len_array) octable, oitable = batch_outside(ictable, iitable, trans_scores, dec_scores, len_array) if DEBUG: assert not cp.isnan(ictable).any() assert not cp.isnan(iitable).any() assert not cp.isnan(octable).any() assert not cp.isnan(oitable).any() span2id, id2span, ijss, ikcs, ikis, kjcs, kjis, basic_span = constituent_index( fake_len) len_array = cpasarray(len_array) prob = cp.expand_dims(prob, 1) for h in range(fake_len): for m in range(1, fake_len): len_mask = ((h > len_array) | (m > len_array)) if h == m: for direction in range(2): span_id = span2id[h, h, direction] count = cp.exp(ictable[:, span_id, :] + octable[:, span_id, :] - prob) count[len_mask] = 0. self.batch_dec_trace[:, h - 1, m - 1, direction, :, STOP] = count else: direction = 0 if h > m else 1 span_id = span2id[ m, h, direction] if direction == 0 else span2id[h, m, direction] count = cp.exp(iitable[:, span_id, :] + oitable[:, span_id, :] - prob) count[len_mask] = 0. if h == 0: cpx.scatter_add(self.root_counter, tag_array[:, m], cp.sum(count, axis=1)) else: self.batch_trans_trace[:, h - 1, m - 1, direction, :] = \ cp.sum(count, axis=1, keepdims=True) if self.o.cv == 1 else count self.batch_dec_trace[:, h - 1, m - 1, direction, :, GO] = count batch_likelihood = cp.sum(prob).get() if self.o.use_softmax_em: batch_likelihood *= (1 - self.softmax_em_current_sigma) return batch_likelihood