Exemplo n.º 1
0
    def test_fixed_random_number(self):
        # Test GPU Fixed random number, which is generated by 'curandStatePhilox4_32_10_t'
        if not paddle.is_compiled_with_cuda():
            return

        print("Test Fixed Random number on GPU------>")
        paddle.disable_static()
        paddle.set_device('gpu')
        paddle.seed(100)
        np.random.seed(100)

        x_np = np.random.rand(32, 1024, 1024)

        x = paddle.to_tensor(x_np, dtype='float64')
        y = paddle.bernoulli(x).numpy()
        index0, index1, index2 = np.nonzero(y)
        self.assertEqual(np.sum(index0), 260028995)
        self.assertEqual(np.sum(index1), 8582429431)
        self.assertEqual(np.sum(index2), 8581445798)
        expect = [0., 0., 0., 0., 0., 0., 0., 1., 1., 1.]
        self.assertTrue(np.array_equal(y[16, 500, 500:510], expect))

        x = paddle.to_tensor(x_np, dtype='float32')
        y = paddle.bernoulli(x).numpy()
        index0, index1, index2 = np.nonzero(y)
        self.assertEqual(np.sum(index0), 260092343)
        self.assertEqual(np.sum(index1), 8583509076)
        self.assertEqual(np.sum(index2), 8582778540)
        expect = [0., 0., 1., 1., 1., 1., 0., 1., 1., 1.]
        self.assertTrue(np.array_equal(y[16, 500, 500:510], expect))

        paddle.enable_static()
Exemplo n.º 2
0
    def _mask_tokens(self,
                     inputs,
                     special_tokens_mask,
                     mask_token_id,
                     token_len,
                     mlm_prob=0.15,
                     ignore_label=-100):
        """
        Prepare masked tokens inputs/labels for masked language modeling: 80% MASK, 10% random, 10% original.
        """
        labels = inputs.clone()
        probability_matrix = paddle.full(labels.shape, mlm_prob)
        probability_matrix[special_tokens_mask] = 0

        masked_indices = paddle.cast(
            paddle.bernoulli(probability_matrix), dtype=bool)
        labels[
            ~masked_indices] = ignore_label  # We only compute loss on masked tokens

        # 80% of the time, we replace masked input tokens with tokenizer.mask_token ([MASK])
        indices_replaced = paddle.cast(
            paddle.bernoulli(paddle.full(labels.shape, 0.8)),
            dtype=bool) & masked_indices
        inputs[indices_replaced] = mask_token_id

        # 10% of the time, we replace masked input tokens with random word

        indices_random = paddle.cast(
            paddle.bernoulli(paddle.full(labels.shape, 0.5)),
            dtype=bool) & masked_indices & ~indices_replaced
        random_words = paddle.randint(low=0, high=token_len, shape=labels.shape)
        inputs[indices_random] = random_words[indices_random]

        # The rest of the time (10% of the time) we keep the masked input tokens unchanged
        return inputs, labels
Exemplo n.º 3
0
    def mask_tokens(self, batch_data):

        token_ids = [x[0] for x in batch_data]
        is_suffix = [x[1] for x in batch_data]

        # Create probability matrix where the probability of real tokens is
        # self.mlm_prob, while that of others is zero.
        data = self.add_special_tokens_and_set_maskprob(token_ids, is_suffix)
        token_ids, is_suffix, prob_matrix = data
        token_ids = paddle.to_tensor(token_ids,
                                     dtype='int64',
                                     stop_gradient=True)
        masked_token_ids = token_ids.clone()
        labels = token_ids.clone()

        # Create masks for words, where '百' must be masked if '度' is masked
        # for the word '百度'.
        prob_matrix = prob_matrix * (1 - is_suffix)
        word_mask_index = np.random.binomial(1, prob_matrix).astype('float')
        is_suffix_mask = (is_suffix == 1)
        word_mask_index_tmp = word_mask_index
        while word_mask_index_tmp.sum() > 0:
            word_mask_index_tmp = np.concatenate([
                np.zeros(
                    (word_mask_index.shape[0], 1)), word_mask_index_tmp[:, :-1]
            ],
                                                 axis=1)
            word_mask_index_tmp = word_mask_index_tmp * is_suffix_mask
            word_mask_index += word_mask_index_tmp
        word_mask_index = word_mask_index.astype('bool')
        labels[~word_mask_index] = -100

        # 80% replaced with [MASK].
        token_mask_index = paddle.bernoulli(paddle.full(
            labels.shape, 0.8)).astype('bool').numpy() & word_mask_index
        masked_token_ids[token_mask_index] = self._ids['mask']

        # 10% replaced with random token ids.
        token_random_index = paddle.to_tensor(
            paddle.bernoulli(paddle.full(labels.shape, 0.5)).astype(
                'bool').numpy() & word_mask_index & ~token_mask_index)
        random_tokens = paddle.randint(low=0,
                                       high=self.tokenizer.vocab_size,
                                       shape=labels.shape,
                                       dtype='int64')
        masked_token_ids = paddle.where(token_random_index, random_tokens,
                                        masked_token_ids)

        return masked_token_ids, token_ids, labels
Exemplo n.º 4
0
 def test_dygraph(self):
     paddle.disable_static()
     x = paddle.rand([1024, 1024])
     out = paddle.bernoulli(x)
     paddle.enable_static()
     hist, prob = output_hist(out.numpy())
     self.assertTrue(np.allclose(hist, prob, rtol=0, atol=0.01),
                     "hist: " + str(hist))
Exemplo n.º 5
0
 def test_static(self):
     x = paddle.rand([1024, 1024])
     out = paddle.bernoulli(x)
     exe = paddle.static.Executor(paddle.CPUPlace())
     out = exe.run(paddle.static.default_main_program(),
                   fetch_list=[out.name])
     hist, prob = output_hist(out[0])
     self.assertTrue(np.allclose(hist, prob, rtol=0, atol=0.01),
                     "hist: " + str(hist))
Exemplo n.º 6
0
    def mask_tokens(self, examples):
        if self.tokenizer.mask_token is None:
            raise ValueError(
                "the tokenizer does not have mask_token, please check!")
        mask_token_id = self.tokenizer.convert_tokens_to_ids(
            self.tokenizer.mask_token)

        raw_inputs, probability_matrix = self.add_special_tokens_and_set_maskprob(
            examples, True, self.max_seq_length)
        raw_inputs = self.tensorize_batch(raw_inputs, "int64")
        probability_matrix = self.tensorize_batch(probability_matrix,
                                                  "float32")
        inputs = raw_inputs.clone()
        labels = raw_inputs.clone()

        total_indices = paddle.bernoulli(probability_matrix).astype("bool")
        unuse_labels = paddle.full(labels.shape, -100).astype("int64")
        labels = paddle.where(total_indices, labels, unuse_labels)

        # 80% MASK
        indices_mask = paddle.bernoulli(paddle.full(
            labels.shape, 0.8)).astype("bool").logical_and(total_indices)
        masked_inputs = paddle.full(inputs.shape,
                                    mask_token_id).astype("int64")
        inputs = paddle.where(indices_mask, masked_inputs, inputs)

        # 10% Random
        indices_random = paddle.bernoulli(paddle.full(
            labels.shape,
            0.5)).astype("bool").logical_and(total_indices).logical_and(
                indices_mask.logical_not())
        random_words = paddle.randint(low=0,
                                      high=self.tokenizer.vocab_size,
                                      shape=labels.shape,
                                      dtype="int64")
        inputs = paddle.where(indices_random, random_words, inputs)

        # 10% Original
        return inputs, raw_inputs, labels
Exemplo n.º 7
0
    def _sample(self, n_samples=1, **kwargs):
        if n_samples > 1:
            sample_shape_ = np.concatenate([[n_samples], self.batch_shape],
                                           axis=0).tolist()
            _probs = self._probs * paddle.ones(sample_shape_)
        else:
            _probs = self._probs

        # _probs = paddle.cast(_probs, self.param_dtype)
        _probs *= paddle.cast(_probs <= 1, self.param_dtype)
        sample_ = paddle.bernoulli(_probs)
        sample_ = paddle.cast(sample_, dtype=self.dtype)

        self.sample_cache = sample_
        return sample_
Exemplo n.º 8
0
    def decode(self, x_tree_vecs, prob_decode):
        """
        Decode tree structre from tree latent space.
        Args:
            x_tree_mess(tensor): tree latent represenation.
            prob_decode(bool): using bernoulli distribution in tree decode if prob_decode=true.
        Returns:
            root node and all nodes.
        """
        assert x_tree_vecs.shape[0] == 1
        stack = []
        init_hiddens = paddle.zeros([1, self.hidden_size])
        zero_pad = paddle.zeros([1, 1, self.hidden_size])
        contexts = paddle.zeros([1]).astype('int64')

        root_score = self.aggregate(init_hiddens, contexts, x_tree_vecs, 'word')
        root_wid = paddle.argmax(root_score, axis=1)
        root_wid = int(root_wid.numpy())

        root = MolTreeNode(self.vocab.get_smiles(root_wid))
        root.wid = root_wid
        root.idx = 0
        stack.append((root, self.vocab.get_slots(root.wid)))

        all_nodes = [root]
        h = {}
        for step in range(MAX_DECODE_LEN):
            node_x, fa_slot = stack[-1]
            cur_h_nei = [h[(node_y.idx, node_x.idx)] for node_y in node_x.neighbors]
            if len(cur_h_nei) > 0:
                cur_h_nei = paddle.reshape(paddle.stack(cur_h_nei, axis=0), shape=[1, -1, self.hidden_size])
            else:
                cur_h_nei = zero_pad

            cur_x = paddle.to_tensor([node_x.wid])
            cur_x = self.embedding(cur_x)
            cur_h = paddle.sum(cur_h_nei, axis=1)
            stop_hiddens = paddle.concat([cur_x, cur_h], axis=1)
            stop_hiddens = F.relu(self.U_i(stop_hiddens))
            stop_score = self.aggregate(stop_hiddens, contexts, x_tree_vecs, 'stop')

            if prob_decode:
                backtrack = (paddle.bernoulli(F.sigmoid(stop_score)).item() == 0)
            else:
                backtrack = (float(stop_score.numpy()) < 0)

            if not backtrack:
                new_h = GRU(cur_x, cur_h_nei, self.W_z, self.W_r, self.U_r, self.W_h)
                pred_score = self.aggregate(new_h, contexts, x_tree_vecs, 'word')

                if prob_decode:
                    sort_wid = paddle.multinomial(F.softmax(pred_score, axis=1).squeeze(), 5)
                else:
                    sort_wid = paddle.argsort(
                        pred_score, axis=1, descending=True)
                    sort_wid = sort_wid.squeeze()

                next_wid = None
                for wid in sort_wid[:5]:
                    slots = self.vocab.get_slots(wid)
                    node_y = MolTreeNode(self.vocab.get_smiles(wid))
                    if have_slots(fa_slot, slots) and can_assemble(node_x, node_y):
                        next_wid = wid
                        next_slots = slots
                        break

                if next_wid is None:
                    backtrack = True
                else:
                    node_y = MolTreeNode(self.vocab.get_smiles(next_wid))
                    node_y.wid = int(next_wid.numpy())
                    node_y.idx = len(all_nodes)
                    node_y.neighbors.append(node_x)
                    h[(node_x.idx, node_y.idx)] = new_h[0]
                    stack.append((node_y, next_slots))
                    all_nodes.append(node_y)

            if backtrack:
                if len(stack) == 1:
                    break

                node_fa, _ = stack[-2]
                cur_h_nei = [h[(node_y.idx, node_x.idx)] for node_y in node_x.neighbors if node_y.idx != node_fa.idx]
                if len(cur_h_nei) > 0:
                    cur_h_nei = paddle.reshape(paddle.stack(cur_h_nei, axis=0), shape=[1, -1, self.hidden_size])
                else:
                    cur_h_nei = zero_pad

                new_h = GRU(cur_x, cur_h_nei, self.W_z, self.W_r, self.U_r, self.W_h)
                h[(node_x.idx, node_fa.idx)] = new_h[0]
                node_fa.neighbors.append(node_x)
                stack.pop()

        return root, all_nodes
Exemplo n.º 9
0
 def bernoulli_(self,p):
     paddorch.copy(paddle.bernoulli(paddle.ones_like(self)*p, name=None), self)
     return self