def test_fixed_random_number(self): # Test GPU Fixed random number, which is generated by 'curandStatePhilox4_32_10_t' if not paddle.is_compiled_with_cuda(): return print("Test Fixed Random number on GPU------>") paddle.disable_static() paddle.set_device('gpu') paddle.seed(100) np.random.seed(100) x_np = np.random.rand(32, 1024, 1024) x = paddle.to_tensor(x_np, dtype='float64') y = paddle.bernoulli(x).numpy() index0, index1, index2 = np.nonzero(y) self.assertEqual(np.sum(index0), 260028995) self.assertEqual(np.sum(index1), 8582429431) self.assertEqual(np.sum(index2), 8581445798) expect = [0., 0., 0., 0., 0., 0., 0., 1., 1., 1.] self.assertTrue(np.array_equal(y[16, 500, 500:510], expect)) x = paddle.to_tensor(x_np, dtype='float32') y = paddle.bernoulli(x).numpy() index0, index1, index2 = np.nonzero(y) self.assertEqual(np.sum(index0), 260092343) self.assertEqual(np.sum(index1), 8583509076) self.assertEqual(np.sum(index2), 8582778540) expect = [0., 0., 1., 1., 1., 1., 0., 1., 1., 1.] self.assertTrue(np.array_equal(y[16, 500, 500:510], expect)) paddle.enable_static()
def _mask_tokens(self, inputs, special_tokens_mask, mask_token_id, token_len, mlm_prob=0.15, ignore_label=-100): """ Prepare masked tokens inputs/labels for masked language modeling: 80% MASK, 10% random, 10% original. """ labels = inputs.clone() probability_matrix = paddle.full(labels.shape, mlm_prob) probability_matrix[special_tokens_mask] = 0 masked_indices = paddle.cast( paddle.bernoulli(probability_matrix), dtype=bool) labels[ ~masked_indices] = ignore_label # We only compute loss on masked tokens # 80% of the time, we replace masked input tokens with tokenizer.mask_token ([MASK]) indices_replaced = paddle.cast( paddle.bernoulli(paddle.full(labels.shape, 0.8)), dtype=bool) & masked_indices inputs[indices_replaced] = mask_token_id # 10% of the time, we replace masked input tokens with random word indices_random = paddle.cast( paddle.bernoulli(paddle.full(labels.shape, 0.5)), dtype=bool) & masked_indices & ~indices_replaced random_words = paddle.randint(low=0, high=token_len, shape=labels.shape) inputs[indices_random] = random_words[indices_random] # The rest of the time (10% of the time) we keep the masked input tokens unchanged return inputs, labels
def mask_tokens(self, batch_data): token_ids = [x[0] for x in batch_data] is_suffix = [x[1] for x in batch_data] # Create probability matrix where the probability of real tokens is # self.mlm_prob, while that of others is zero. data = self.add_special_tokens_and_set_maskprob(token_ids, is_suffix) token_ids, is_suffix, prob_matrix = data token_ids = paddle.to_tensor(token_ids, dtype='int64', stop_gradient=True) masked_token_ids = token_ids.clone() labels = token_ids.clone() # Create masks for words, where '百' must be masked if '度' is masked # for the word '百度'. prob_matrix = prob_matrix * (1 - is_suffix) word_mask_index = np.random.binomial(1, prob_matrix).astype('float') is_suffix_mask = (is_suffix == 1) word_mask_index_tmp = word_mask_index while word_mask_index_tmp.sum() > 0: word_mask_index_tmp = np.concatenate([ np.zeros( (word_mask_index.shape[0], 1)), word_mask_index_tmp[:, :-1] ], axis=1) word_mask_index_tmp = word_mask_index_tmp * is_suffix_mask word_mask_index += word_mask_index_tmp word_mask_index = word_mask_index.astype('bool') labels[~word_mask_index] = -100 # 80% replaced with [MASK]. token_mask_index = paddle.bernoulli(paddle.full( labels.shape, 0.8)).astype('bool').numpy() & word_mask_index masked_token_ids[token_mask_index] = self._ids['mask'] # 10% replaced with random token ids. token_random_index = paddle.to_tensor( paddle.bernoulli(paddle.full(labels.shape, 0.5)).astype( 'bool').numpy() & word_mask_index & ~token_mask_index) random_tokens = paddle.randint(low=0, high=self.tokenizer.vocab_size, shape=labels.shape, dtype='int64') masked_token_ids = paddle.where(token_random_index, random_tokens, masked_token_ids) return masked_token_ids, token_ids, labels
def test_dygraph(self): paddle.disable_static() x = paddle.rand([1024, 1024]) out = paddle.bernoulli(x) paddle.enable_static() hist, prob = output_hist(out.numpy()) self.assertTrue(np.allclose(hist, prob, rtol=0, atol=0.01), "hist: " + str(hist))
def test_static(self): x = paddle.rand([1024, 1024]) out = paddle.bernoulli(x) exe = paddle.static.Executor(paddle.CPUPlace()) out = exe.run(paddle.static.default_main_program(), fetch_list=[out.name]) hist, prob = output_hist(out[0]) self.assertTrue(np.allclose(hist, prob, rtol=0, atol=0.01), "hist: " + str(hist))
def mask_tokens(self, examples): if self.tokenizer.mask_token is None: raise ValueError( "the tokenizer does not have mask_token, please check!") mask_token_id = self.tokenizer.convert_tokens_to_ids( self.tokenizer.mask_token) raw_inputs, probability_matrix = self.add_special_tokens_and_set_maskprob( examples, True, self.max_seq_length) raw_inputs = self.tensorize_batch(raw_inputs, "int64") probability_matrix = self.tensorize_batch(probability_matrix, "float32") inputs = raw_inputs.clone() labels = raw_inputs.clone() total_indices = paddle.bernoulli(probability_matrix).astype("bool") unuse_labels = paddle.full(labels.shape, -100).astype("int64") labels = paddle.where(total_indices, labels, unuse_labels) # 80% MASK indices_mask = paddle.bernoulli(paddle.full( labels.shape, 0.8)).astype("bool").logical_and(total_indices) masked_inputs = paddle.full(inputs.shape, mask_token_id).astype("int64") inputs = paddle.where(indices_mask, masked_inputs, inputs) # 10% Random indices_random = paddle.bernoulli(paddle.full( labels.shape, 0.5)).astype("bool").logical_and(total_indices).logical_and( indices_mask.logical_not()) random_words = paddle.randint(low=0, high=self.tokenizer.vocab_size, shape=labels.shape, dtype="int64") inputs = paddle.where(indices_random, random_words, inputs) # 10% Original return inputs, raw_inputs, labels
def _sample(self, n_samples=1, **kwargs): if n_samples > 1: sample_shape_ = np.concatenate([[n_samples], self.batch_shape], axis=0).tolist() _probs = self._probs * paddle.ones(sample_shape_) else: _probs = self._probs # _probs = paddle.cast(_probs, self.param_dtype) _probs *= paddle.cast(_probs <= 1, self.param_dtype) sample_ = paddle.bernoulli(_probs) sample_ = paddle.cast(sample_, dtype=self.dtype) self.sample_cache = sample_ return sample_
def decode(self, x_tree_vecs, prob_decode): """ Decode tree structre from tree latent space. Args: x_tree_mess(tensor): tree latent represenation. prob_decode(bool): using bernoulli distribution in tree decode if prob_decode=true. Returns: root node and all nodes. """ assert x_tree_vecs.shape[0] == 1 stack = [] init_hiddens = paddle.zeros([1, self.hidden_size]) zero_pad = paddle.zeros([1, 1, self.hidden_size]) contexts = paddle.zeros([1]).astype('int64') root_score = self.aggregate(init_hiddens, contexts, x_tree_vecs, 'word') root_wid = paddle.argmax(root_score, axis=1) root_wid = int(root_wid.numpy()) root = MolTreeNode(self.vocab.get_smiles(root_wid)) root.wid = root_wid root.idx = 0 stack.append((root, self.vocab.get_slots(root.wid))) all_nodes = [root] h = {} for step in range(MAX_DECODE_LEN): node_x, fa_slot = stack[-1] cur_h_nei = [h[(node_y.idx, node_x.idx)] for node_y in node_x.neighbors] if len(cur_h_nei) > 0: cur_h_nei = paddle.reshape(paddle.stack(cur_h_nei, axis=0), shape=[1, -1, self.hidden_size]) else: cur_h_nei = zero_pad cur_x = paddle.to_tensor([node_x.wid]) cur_x = self.embedding(cur_x) cur_h = paddle.sum(cur_h_nei, axis=1) stop_hiddens = paddle.concat([cur_x, cur_h], axis=1) stop_hiddens = F.relu(self.U_i(stop_hiddens)) stop_score = self.aggregate(stop_hiddens, contexts, x_tree_vecs, 'stop') if prob_decode: backtrack = (paddle.bernoulli(F.sigmoid(stop_score)).item() == 0) else: backtrack = (float(stop_score.numpy()) < 0) if not backtrack: new_h = GRU(cur_x, cur_h_nei, self.W_z, self.W_r, self.U_r, self.W_h) pred_score = self.aggregate(new_h, contexts, x_tree_vecs, 'word') if prob_decode: sort_wid = paddle.multinomial(F.softmax(pred_score, axis=1).squeeze(), 5) else: sort_wid = paddle.argsort( pred_score, axis=1, descending=True) sort_wid = sort_wid.squeeze() next_wid = None for wid in sort_wid[:5]: slots = self.vocab.get_slots(wid) node_y = MolTreeNode(self.vocab.get_smiles(wid)) if have_slots(fa_slot, slots) and can_assemble(node_x, node_y): next_wid = wid next_slots = slots break if next_wid is None: backtrack = True else: node_y = MolTreeNode(self.vocab.get_smiles(next_wid)) node_y.wid = int(next_wid.numpy()) node_y.idx = len(all_nodes) node_y.neighbors.append(node_x) h[(node_x.idx, node_y.idx)] = new_h[0] stack.append((node_y, next_slots)) all_nodes.append(node_y) if backtrack: if len(stack) == 1: break node_fa, _ = stack[-2] cur_h_nei = [h[(node_y.idx, node_x.idx)] for node_y in node_x.neighbors if node_y.idx != node_fa.idx] if len(cur_h_nei) > 0: cur_h_nei = paddle.reshape(paddle.stack(cur_h_nei, axis=0), shape=[1, -1, self.hidden_size]) else: cur_h_nei = zero_pad new_h = GRU(cur_x, cur_h_nei, self.W_z, self.W_r, self.U_r, self.W_h) h[(node_x.idx, node_fa.idx)] = new_h[0] node_fa.neighbors.append(node_x) stack.pop() return root, all_nodes
def bernoulli_(self,p): paddorch.copy(paddle.bernoulli(paddle.ones_like(self)*p, name=None), self) return self