Пример #1
0
    def infer(self, inputs, outputs):
        """Run model inference.

        Only support generation now.
        """
        if self.do_generation:
            return self.generator.inference(self, inputs, outputs)
        else:
            tgt_logits = self._calc_logits(outputs["enc_out"], inputs["tgt_idx"])
            tgt_lm_loss = layers.softmax_with_cross_entropy(
                logits=tgt_logits, label=inputs["tgt_label"])
            lm_loss = layers.fill_constant_batch_size_like(
                outputs["enc_out"], [-1], self.dtype, 0)
            lm_loss = layers.scatter(lm_loss, inputs["tgt_idx"][:, 0], tgt_lm_loss[:, 0], overwrite=False)
            tokens_num = layers.fill_constant_batch_size_like(
                outputs["enc_out"], [-1], self.dtype, 0)
            tgt_tokens_num = layers.fill_constant_batch_size_like(
                tgt_lm_loss, [-1], self.dtype, 1)
            tokens_num = layers.scatter(tokens_num, inputs["tgt_idx"][:, 0], tgt_tokens_num, overwrite=False)
            predictions = {
                "lm_loss": lm_loss,
                "tokens_num": tokens_num,
                "data_id": inputs["data_id"]
            }
            return predictions
Пример #2
0
 def forward(self, src_word, src_pos, src_slf_attn_bias, trg_src_attn_bias):
     enc_output = self.encoder(src_word, src_pos, src_slf_attn_bias)
     ## init states (caches) for transformer, need to be updated according to selected beam
     caches = [{
         "k": layers.fill_constant_batch_size_like(
             input=enc_output,
             shape=[-1, self.n_head, 0, self.d_key],
             dtype=enc_output.dtype,
             value=0),
         "v": layers.fill_constant_batch_size_like(
             input=enc_output,
             shape=[-1, self.n_head, 0, self.d_value],
             dtype=enc_output.dtype,
             value=0),
     } for i in range(self.n_layer)]
     enc_output = TransformerBeamSearchDecoder.tile_beam_merge_with_batch(
         enc_output, self.beam_size)
     trg_src_attn_bias = TransformerBeamSearchDecoder.tile_beam_merge_with_batch(
         trg_src_attn_bias, self.beam_size)
     static_caches = self.decoder.decoder.prepare_static_cache(enc_output)
     rs, _ = self.beam_search_decoder(
         inits=caches,
         enc_output=enc_output,
         trg_src_attn_bias=trg_src_attn_bias,
         static_caches=static_caches)
     return rs
Пример #3
0
    def forward(self, inputs, is_infer=False):
        """
        Run model main forward.
        """
        outputs = {}
        if is_infer:
            self.generation_caches = [{
                "k":
                layers.fill_constant_batch_size_like(
                    input=inputs["token_ids"],
                    shape=[-1, 0, self.d_key * self.n_head],
                    dtype=self.dtype,
                    value=0),
                "v":
                layers.fill_constant_batch_size_like(
                    input=inputs["token_ids"],
                    shape=[-1, 0, self.d_value * self.n_head],
                    dtype=self.dtype,
                    value=0),
            } for i in range(self.n_layer)]
        else:
            self.generation_caches = None

        outputs["enc_out"], generation_checkpoints = self._generation_network(
            token_ids=inputs["token_ids"],
            type_ids=inputs["type_ids"],
            pos_ids=inputs["pos_ids"],
            generation_mask=inputs["generation_mask"],
            gather_idx=inputs.get("parent_idx", None))

        if not is_infer:
            outputs["checkpoints"] = generation_checkpoints
        return outputs
Пример #4
0
    def _init_generation_caches(self, src_ids):
        # not fuse, return None
        if self._init_gen_cache or self._fuse is False:
            return self.generation_caches

        self.generation_caches = []
        num_heads = self.gpt.num_attention_heads
        num_layers = self.gpt.num_hidden_layers
        mp_n_head = num_heads // self.gpt.topo.mp_info.size
        hidden_size = self.gpt.hidden_size
        head_size = hidden_size // num_heads
        for i in range(num_layers):
            if self._fuse:
                kv = layers.fill_constant_batch_size_like(
                    input=src_ids,
                    shape=[2, -1, mp_n_head, 0, head_size],
                    dtype=self._dtype,
                    value=0,
                    output_dim_idx=1)
                self.generation_caches.append(
                    TransformerDecoderLayer.Cache(kv))
            else:
                k = layers.fill_constant_batch_size_like(
                    input=src_ids,
                    shape=[-1, mp_n_head, 0, head_size],
                    dtype=self._dtype,
                    value=0)
                v = layers.fill_constant_batch_size_like(
                    input=src_ids,
                    shape=[-1, mp_n_head, 0, head_size],
                    dtype=self._dtype,
                    value=0)
                self.generation_caches.append(MultiHeadAttention.Cache(k, v))
        self._init_gen_cache = True
        return self.generation_caches
Пример #5
0
    def _prepare_timestep_input(self, state, step_idx):
        model_input = {"gather_idx": state["parent_idx"]}

        # token ids
        pre_ids = layers.array_read(array=state["tgt_ids"], i=step_idx)
        model_input["token_ids"] = layers.unsqueeze(pre_ids, 1)

        # position ids
        pre_pos = layers.array_read(array=state["tgt_pos"], i=step_idx)
        model_input["pos_ids"] = layers.gather(pre_pos, state["parent_idx"])

        pre_scores = layers.array_read(array=state["scores"], i=step_idx)

        # generation_mask
        tgt_generation_mask = layers.array_read(state["tgt_generation_mask"], i=step_idx)
        append_mask = layers.fill_constant_batch_size_like(pre_ids, [-1, 1, 1], "float32", 1.0)
        tgt_generation_mask = layers.concat([tgt_generation_mask, append_mask], axis=2)

        model_input["generation_mask"] = pre_mask = layers.gather(tgt_generation_mask, state["parent_idx"])

        model_input["type_ids"] = layers.fill_constant_batch_size_like(pre_mask, [-1, 1, 1], "int64", 1)
        if self.use_role:
            model_input["role_ids"] = layers.fill_constant_batch_size_like(pre_mask, [-1, 1, 1], "int64", 0)

        return model_input, pre_ids, pre_scores
Пример #6
0
    def forward(self, inputs, is_infer=False):
        """
        Run model main forward.
        """
        outputs = {}
        if is_infer:
            self.generation_caches = [{
                "k":
                layers.fill_constant_batch_size_like(
                    input=inputs["token_ids"],
                    shape=[-1, 0, self.d_key * self.n_head],
                    dtype=self.dtype,
                    value=0),
                "v":
                layers.fill_constant_batch_size_like(
                    input=inputs["token_ids"],
                    shape=[-1, 0, self.d_value * self.n_head],
                    dtype=self.dtype,
                    value=0),
            } for i in range(self.n_layer)]
        else:
            self.generation_caches = None

        latent_embeddings = layers.create_parameter(
            shape=[self.emb_size, self.latent_type_size],
            dtype=self.dtype,
            attr=fluid.ParamAttr(name=self.latent_emb_name,
                                 initializer=self.param_initializer))

        if is_infer:
            latent_id = inputs["latent_id"]
            weights = layers.one_hot(latent_id, self.latent_type_size)
        else:
            logits, recognition_checkpoints = self._recognition_network(
                token_ids=inputs["token_ids"],
                type_ids=inputs["type_ids"],
                pos_ids=inputs["pos_ids"],
                role_ids=inputs.get("role_ids", None),
                recognition_mask=inputs["recognition_mask"],
            )
            outputs["post_probs"] = layers.softmax(logits)
            weights = self._gumbel_softmax(logits)
            outputs["checkpoints"] = recognition_checkpoints

        latent_emb = layers.matmul(x=weights,
                                   y=latent_embeddings,
                                   transpose_y=True)
        outputs["enc_out"], generation_checkpoints = self._generation_network(
            token_ids=inputs["token_ids"],
            type_ids=inputs["type_ids"],
            pos_ids=inputs["pos_ids"],
            role_ids=inputs.get("role_ids", None),
            generation_mask=inputs["generation_mask"],
            aux_emb=layers.unsqueeze(latent_emb, axes=[1]),
            gather_idx=inputs.get("parent_idx", None),
        )

        if not is_infer:
            outputs["checkpoints"].extend(generation_checkpoints)
        return outputs
Пример #7
0
 def _get_statistics(self, inputs, outputs):
     statistics = {}
     if "tgt_label" in inputs:
         statistics["tokens_num"] = layers.reduce_sum(
             layers.fill_constant_batch_size_like(input=inputs["tgt_label"], value=1.0, shape=[-1], dtype="int64"))
     statistics["batch_size"] = layers.reduce_sum(
         layers.fill_constant_batch_size_like(input=inputs["token_ids"], value=1.0, shape=[-1], dtype="int64"))
     return statistics
Пример #8
0
def fluid_sequence_first_step(lodtensor):
    """
    return a lod tensor
    """
    offset = layers.fill_constant_batch_size_like(lodtensor, shape=[-1,1], value=0, dtype='int64')
    length = layers.fill_constant_batch_size_like(lodtensor, shape=[-1,1], value=1, dtype='int64')
    res = layers.sequence_slice(lodtensor, offset=offset, length=length)
    return res
Пример #9
0
    def forward(self):
        """ forward
        """
        src, dst = L.read_file(self.pyreader)

        if self.is_sparse:
            # sparse mode use 2 dims input.
            src = L.reshape(src, [-1, 1])
            dst = L.reshape(dst, [-1, 1])

        src_embed = split_embedding(src, self.num_nodes, self.hidden_size,
                                    self.embed_init, "weight", self.num_part,
                                    self.is_sparse)

        dst_embed = split_embedding(dst, self.num_nodes, self.hidden_size,
                                    self.embed_init, "weight", self.num_part,
                                    self.is_sparse)

        if self.is_sparse:
            src_embed = L.reshape(src_embed,
                                  [-1, 1, self.num_featuers, self.hidden_size])
            dst_embed = L.reshape(
                dst_embed,
                [-1, self.neg_num + 1, self.num_featuers, self.hidden_size])

        src_embed = L.reduce_mean(src_embed, 2)
        dst_embed = L.reduce_mean(dst_embed, 2)

        logits = L.matmul(src_embed, dst_embed,
                          transpose_y=True)  # [batch_size, 1, neg_num+1]

        pos_label = L.fill_constant_batch_size_like(logits, [-1, 1, 1],
                                                    "float32", 1)
        neg_label = L.fill_constant_batch_size_like(logits,
                                                    [-1, 1, self.neg_num],
                                                    "float32", 0)
        label = L.concat([pos_label, neg_label], -1)

        pos_weight = L.fill_constant_batch_size_like(logits, [-1, 1, 1],
                                                     "float32", self.neg_num)
        neg_weight = L.fill_constant_batch_size_like(logits,
                                                     [-1, 1, self.neg_num],
                                                     "float32", 1)
        weight = L.concat([pos_weight, neg_weight], -1)

        weight.stop_gradient = True
        label.stop_gradient = True

        loss = L.sigmoid_cross_entropy_with_logits(logits, label)
        loss = loss * weight
        loss = L.reduce_mean(loss)
        loss = loss * ((self.neg_num + 1) / 2 / self.neg_num)
        loss.persistable = True
        self.loss = loss
        return loss
Пример #10
0
        def erniesage_v2_aggregator(gw, feature, hidden_size, act, initializer,
                                    learning_rate, name):
            feature = L.unsqueeze(feature, [-1])
            msg = gw.send(ernie_send, nfeat_list=[("term_ids", feature)])
            neigh_feature = gw.recv(
                msg,
                lambda feat: F.layers.sequence_pool(feat, pool_type="sum"))

            term_ids = feature
            cls = L.fill_constant_batch_size_like(term_ids, [-1, 1, 1],
                                                  "int64", 1)
            term_ids = L.concat([cls, term_ids], 1)
            term_ids.stop_gradient = True
            ernie = ErnieModel(term_ids,
                               L.zeros_like(term_ids),
                               config=self.config.ernie_config)
            self_feature = ernie.get_pooled_output()

            self_feature = L.fc(
                self_feature,
                hidden_size,
                act=act,
                param_attr=F.ParamAttr(name=name + "_l",
                                       learning_rate=learning_rate),
            )
            neigh_feature = L.fc(
                neigh_feature,
                hidden_size,
                act=act,
                param_attr=F.ParamAttr(name=name + "_r",
                                       learning_rate=learning_rate),
            )
            output = L.concat([self_feature, neigh_feature], axis=1)
            output = L.l2_normalize(output, axis=1)
            return output
Пример #11
0
def _apply_rule(condition, inputs, gmr_mask, grammar, name=None):
    """apply_rule.

    Args:
        condition (TYPE): NULL
        inputs (Variable): shape = [batch_size, max_len, hidden_size]. infer 阶段 max_len 恒为1
        gmr_mask (TYPE): NULL
        grammar (TYPE): NULL

    Returns: TODO

    Raises: NULL
    """
    fc_name = None
    if name is not None:
        fc_name = name + '_apply_rule_fc'

    condition = layers.cast(condition, dtype='float32')
    gmr_output = layers.fc(inputs,
                           size=grammar.grammar_size,
                           **nn_utils.param_attr(fc_name,
                                                 INIT_SCALE,
                                                 need_bias=True))
    gmr_output_masked = layers.elementwise_add(gmr_output, gmr_mask)

    zeros = layers.fill_constant_batch_size_like(
        gmr_output_masked,
        shape=[-1, grammar.MAX_TABLE + grammar.MAX_COLUMN + grammar.MAX_VALUE],
        dtype='float32',
        value=-INF)
    final_output = tensor.concat([gmr_output_masked, zeros], axis=-1)
    true_final_output = layers.elementwise_mul(final_output, condition, axis=0)
    return true_final_output
Пример #12
0
 def ernie_pool(self, term_ids):
     cls = L.fill_constant_batch_size_like(term_ids, [-1, 1], "int64",
                                           self.config.cls_id)
     term_ids = L.concat([cls, term_ids], 1)
     ernie_model = ErnieModel(self.config.ernie_config, "")
     feature, _ = ernie_model(term_ids)
     return feature
Пример #13
0
    def _recognition_network(self,
                             token_ids,
                             type_ids,
                             pos_ids,
                             role_ids,
                             input_mask):
        """Run recognition network.

        Args:
            tokens_ids: represents the token id of each token, shape is [batch_size, max_seq_len, 1]
            type_ids: represents the type of each token, shape is [batch_size, max_seq_len, 1]
            pos_ids: represents the position of each token, shape is [batch_size, max_seq_len, 1]
            input_mask: represents the attention masking mastrix in each Transformer blocks,
                shape is [batch_size, max_seq_len + 1, max_seq_len + 1]

        Returns:
            A tuple contains the output embeddings of Transformer and the checkpoints of Transformer in this pass.
        """
        mask_id = layers.fill_constant_batch_size_like(
            input=token_ids, shape=[-1, 1, 1], value=self.mask_id, dtype="int64")
        mask_emb = layers.embedding(
            input=mask_id,
            size=[self.vocab_size, self.emb_size],
            dtype=self.dtype,
            param_attr=fluid.ParamAttr(
                name=self.token_emb_name, initializer=self.param_initializer))
        emb_out, attn_bias = self._gen_input(
            token_ids, type_ids, pos_ids, role_ids, input_mask, aux_emb=mask_emb)

        return self._encode(emb_out, attn_bias)
Пример #14
0
def fluid_sequence_index(input, index):
    """
    index: (batch_size, 1)
    """
    ones = layers.fill_constant_batch_size_like(input, shape=[-1,1], value=1, dtype='int64')
    output = layers.sequence_slice(input, offset=index, length=ones)
    return output
Пример #15
0
    def _recognition_network(self,
                             token_ids,
                             type_ids,
                             pos_ids,
                             role_ids,
                             recognition_mask):
        mask_id = layers.fill_constant_batch_size_like(
            input=token_ids, shape=[-1, 1, 1], value=self.mask_id, dtype="int64")
        mask_emb = layers.embedding(
            input=mask_id,
            size=[self.vocab_size, self.emb_size],
            dtype=self.dtype,
            param_attr=fluid.ParamAttr(
                name=self.token_emb_name, initializer=self.param_initializer))
        emb_out, n_head_self_attn_mask = self._gen_input(
            token_ids, type_ids, pos_ids, role_ids, recognition_mask, aux_emb=mask_emb)

        recognition_out, checkpoints = self._encode(emb_out, n_head_self_attn_mask)

        recognition_feat = layers.slice(
            input=recognition_out, axes=[1], starts=[0], ends=[1])
        recognition_feat = layers.fc(
            input=recognition_feat,
            size=self.hidden_size,
            act="tanh",
            param_attr=fluid.ParamAttr(
                name="recognition_fc.w_0", initializer=self.param_initializer),
            bias_attr="recognition_fc.b_0")
        logits = layers.fc(
            input=recognition_feat,
            size=self.latent_type_size,
            param_attr=fluid.ParamAttr(
                name=self.latent_emb_name, initializer=self.param_initializer),
            bias_attr="recognition_bias")
        return logits, checkpoints
Пример #16
0
    def forward(self, feat):
        """
        Args:
            feat: input feature with shape [batch, n_edges, dim].
        
        Return:
            output_feat: output feature of set2set pooling with shape [batch, 2*dim].
        """

        seqlen = 1
        h = L.fill_constant_batch_size_like(
            feat, [1, self.n_layers, self.input_dim], "float32", 0)
        h = L.transpose(h, [1, 0, 2])
        c = h

        # [seqlen, batch, dim]
        q_star = L.fill_constant_batch_size_like(
            feat, [1, seqlen, self.output_dim], "float32", 0)
        q_star = L.transpose(q_star, [1, 0, 2])

        for _ in range(self.n_iters):

            # q [seqlen, batch, dim]
            # h [layer, batch, dim]
            q, h, c = L.lstm(
                q_star,
                h,
                c,
                seqlen,
                self.input_dim,
                self.n_layers,
                is_bidirec=False)

            # e [batch, seqlen, n_edges]
            e = L.matmul(L.transpose(q, [1, 0, 2]), feat, transpose_y=True)
            # alpha [batch, seqlen, n_edges]
            alpha = L.softmax(e)

            # readout [batch, seqlen, dim]
            readout = L.matmul(alpha, feat)
            readout = L.transpose(readout, [1, 0, 2])

            # q_star [seqlen, batch, dim + dim]
            q_star = L.concat([q, readout], -1)

        return L.squeeze(q_star, [0])
Пример #17
0
def fluid_sequence_advance(input, OOV):
    """
    args:
        input.data = [1,2,3, 4,5]
        input.lod = [[0, 3, 5]]
    return:
        output.data = [0,1,2, 0,4]
        output.lod = [[0, 3, 5]]
    """
    seq_len = fluid_sequence_get_seq_len(input)
    zeros = layers.fill_constant_batch_size_like(seq_len, shape=[-1,1], value=0, dtype='int64')
    ones = layers.fill_constant_batch_size_like(seq_len, shape=[-1,1], value=1, dtype='int64')
    oov = layers.sequence_slice(input, zeros, ones) * 0 + OOV
    oov.stop_gradient = True
    input_padded = layers.sequence_concat([oov, input])
    output = layers.sequence_slice(input_padded, zeros, seq_len)
    return output
Пример #18
0
    def test_ifelse(self):
        prog = Program()
        startup_prog = Program()
        with program_guard(prog, startup_prog):
            image = layers.data(name='x', shape=[784], dtype='float32')

            label = layers.data(name='y', shape=[1], dtype='int64')

            limit = layers.fill_constant_batch_size_like(input=label,
                                                         dtype='int64',
                                                         shape=[1],
                                                         value=5.0)
            cond = layers.less_than(x=label, y=limit)
            ie = layers.IfElse(cond)

            with ie.true_block():
                true_image = ie.input(image)
                hidden = layers.fc(input=true_image, size=100, act='tanh')
                prob = layers.fc(input=hidden, size=10, act='softmax')
                ie.output(prob)

            with ie.false_block():
                false_image = ie.input(image)
                hidden = layers.fc(input=false_image, size=200, act='tanh')
                prob = layers.fc(input=hidden, size=10, act='softmax')
                ie.output(prob)

            prob = ie()
            loss = layers.cross_entropy(input=prob[0], label=label)
            avg_loss = layers.mean(loss)

            optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9)
            optimizer.minimize(avg_loss, startup_prog)
        train_reader = paddle.batch(paddle.reader.shuffle(
            paddle.dataset.mnist.train(), buf_size=8192),
                                    batch_size=200)

        place = core.CPUPlace()
        exe = Executor(place)

        exe.run(kwargs['startup_program'])
        PASS_NUM = 100
        for pass_id in range(PASS_NUM):
            for data in train_reader():
                x_data = np.array(map(lambda x: x[0], data)).astype("float32")
                y_data = np.array(map(lambda x: x[1], data)).astype("int64")
                y_data = y_data.reshape((y_data.shape[0], 1))

                outs = exe.run(kwargs['main_program'],
                               feed={
                                   'x': x_data,
                                   'y': y_data
                               },
                               fetch_list=[avg_loss])
                print outs[0]
                if outs[0] < 1.0:
                    return
        self.assertFalse(True)
Пример #19
0
 def body_func(step_idx, pre_ids, pre_scores, gather_idx, caches,
               trg_src_attn_bias):
     # gather cell states corresponding to selected parent
     pre_caches = map_structure(
         lambda x: layers.gather(x, index=gather_idx), caches)
     pre_src_attn_bias = layers.gather(trg_src_attn_bias,
                                       index=gather_idx)
     pre_pos = layers.elementwise_mul(
         x=layers.fill_constant_batch_size_like(
             input=pre_src_attn_bias,  # cann't use lod tensor here
             value=1,
             shape=[-1, 1],
             dtype=pre_ids.dtype),
         y=step_idx,
         axis=0)
     logits = wrap_decoder((pre_ids, pre_pos, None, pre_src_attn_bias),
                           trg_vocab_size,
                           max_in_len,
                           n_layer,
                           n_head,
                           d_key,
                           d_value,
                           d_model,
                           d_inner_hid,
                           prepostprocess_dropout,
                           attention_dropout,
                           relu_dropout,
                           preprocess_cmd,
                           postprocess_cmd,
                           weight_sharing,
                           enc_output=enc_output,
                           caches=pre_caches,
                           bos_idx=bos_idx)
     # intra-beam topK
     topk_scores, topk_indices = layers.topk(
         input=layers.softmax(logits), k=beam_size)
     accu_scores = layers.elementwise_add(x=layers.log(topk_scores),
                                          y=pre_scores,
                                          axis=0)
     # beam_search op uses lod to differentiate branches.
     accu_scores = layers.lod_reset(accu_scores, pre_ids)
     # topK reduction across beams, also contain special handle of
     # end beams and end sentences(batch reduction)
     selected_ids, selected_scores, gather_idx = layers.beam_search(
         pre_ids=pre_ids,
         pre_scores=pre_scores,
         ids=topk_indices,
         scores=accu_scores,
         beam_size=beam_size,
         end_id=eos_idx,
         return_parent_idx=True)
     step_idx = layers.increment(x=step_idx, value=1.0, in_place=False)
     layers.array_write(selected_ids, i=step_idx, array=ids)
     layers.array_write(selected_scores, i=step_idx, array=scores)
     return (step_idx, selected_ids, selected_scores, gather_idx,
             pre_caches, pre_src_attn_bias)
Пример #20
0
 def gen_batch_like(value,
                    dtype="int64",
                    shape=[-1, 1, 1],
                    is_scalar=True):
     if is_scalar:
         return layers.fill_constant_batch_size_like(
             input=parent_idx,
             value=value,
             shape=shape,
             dtype=dtype)
     else:
         return layers.elementwise_mul(
             x=layers.fill_constant_batch_size_like(
                 input=parent_idx,
                 value=1,
                 shape=shape,
                 dtype=dtype),
             y=value,
             axis=0)
Пример #21
0
        def erniesage_v3_aggregator(gw, feature, hidden_size, act, initializer, learning_rate, name):
            msg = gw.send(copy_send, nfeat_list=[("h", feature)])
            neigh_feature = gw.recv(msg, ernie_recv)
            neigh_feature = L.cast(L.unsqueeze(neigh_feature, [-1]), "int64")

            feature = L.unsqueeze(feature, [-1])
            cls = L.fill_constant_batch_size_like(feature, [-1, 1, 1], "int64", 1)
            term_ids = L.concat([cls, feature[:, :-1], neigh_feature], 1)
            term_ids.stop_gradient = True
            return term_ids
Пример #22
0
    def test_ifelse(self):
        prog = Program()
        startup_prog = Program()
        with program_guard(prog, startup_prog):
            image = layers.data(name='x', shape=[784], dtype='float32')

            label = layers.data(name='y', shape=[1], dtype='int64')

            limit = layers.fill_constant_batch_size_like(
                input=label, dtype='int64', shape=[1], value=5.0)
            cond = layers.less_than(x=label, y=limit)
            ie = layers.IfElse(cond)

            with ie.true_block():
                true_image = ie.input(image)
                hidden = layers.fc(input=true_image, size=100, act='tanh')
                prob = layers.fc(input=hidden, size=10, act='softmax')
                ie.output(prob)

            with ie.false_block():
                false_image = ie.input(image)
                hidden = layers.fc(input=false_image, size=200, act='tanh')
                prob = layers.fc(input=hidden, size=10, act='softmax')
                ie.output(prob)

            prob = ie()
            loss = layers.cross_entropy(input=prob[0], label=label)
            avg_loss = layers.mean(loss)

            optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9)
            optimizer.minimize(avg_loss, startup_prog)
        train_reader = paddle.batch(
            paddle.reader.shuffle(
                paddle.dataset.mnist.train(), buf_size=8192),
            batch_size=200)

        place = core.CPUPlace()
        exe = Executor(place)

        exe.run(kwargs['startup_program'])
        PASS_NUM = 100
        for pass_id in range(PASS_NUM):
            for data in train_reader():
                x_data = np.array(map(lambda x: x[0], data)).astype("float32")
                y_data = np.array(map(lambda x: x[1], data)).astype("int64")
                y_data = y_data.reshape((y_data.shape[0], 1))

                outs = exe.run(kwargs['main_program'],
                               feed={'x': x_data,
                                     'y': y_data},
                               fetch_list=[avg_loss])
                print outs[0]
                if outs[0] < 1.0:
                    return
        self.assertFalse(True)
Пример #23
0
def fluid_sequence_delay(input, OOV):
    """
    args:
        input: 1-level LoDTensor
    return:
        
    """
    seq_len = fluid_sequence_get_seq_len(input)
    zeros = layers.fill_constant_batch_size_like(seq_len,
                                                 shape=[-1, 1],
                                                 value=0,
                                                 dtype='int64')
    ones = layers.fill_constant_batch_size_like(seq_len,
                                                shape=[-1, 1],
                                                value=1,
                                                dtype='int64')
    oov = layers.sequence_slice(input, zeros, ones) * 0 + OOV
    oov.stop_gradient = True
    input_padded = layers.sequence_concat([input, oov])
    output = layers.sequence_slice(input_padded, ones, seq_len)
    return output
Пример #24
0
    def __call__(self, src, src_length, trg=None, trg_length=None):
        # encoder
        encoder_output, encoder_final_state = self.encoder(
            self.src_embeder(src), src_length)

        decoder_initial_states = [
            encoder_final_state,
            self.decoder.decoder_cell.get_initial_states(
                batch_ref=encoder_output, shape=[encoder_output.shape[-1]])
        ]
        src_mask = layers.sequence_mask(src_length,
                                        maxlen=layers.shape(src)[1],
                                        dtype="float32")
        encoder_padding_mask = (src_mask - 1.0) * 1e9
        encoder_padding_mask = layers.unsqueeze(encoder_padding_mask, [1])

        # decoder
        decoder_kwargs = {
            "inputs": self.trg_embeder(trg),
            "sequence_length": trg_length,
        } if self.decoder.decoding_strategy == "train_greedy" else (
            {
                "embedding_fn": self.trg_embeder,
                "beam_size": self.beam_size,
                "start_token": self.start_token,
                "end_token": self.end_token
            } if self.decoder.decoding_strategy == "beam_search" else {
                "embedding_fn":
                self.trg_embeder,
                "start_tokens":
                layers.fill_constant_batch_size_like(input=encoder_output,
                                                     shape=[-1],
                                                     dtype=src.dtype,
                                                     value=self.start_token),
                "end_token":
                self.end_token
            })
        decoder_kwargs["output_layer"] = self.output_layer

        (decoder_output, decoder_final_state,
         dec_seq_lengths) = self.decoder(decoder_initial_states,
                                         encoder_output, encoder_padding_mask,
                                         **decoder_kwargs)
        if self.decoder.decoding_strategy == "beam_search":  # for inference
            return decoder_output
        logits, samples, sample_length = (decoder_output.cell_outputs,
                                          decoder_output.sample_ids,
                                          dec_seq_lengths)
        probs = layers.softmax(logits)
        return probs, samples, sample_length
Пример #25
0
    def spatio_conv_layer(self, x, Ks, c_in, c_out, name):
        """Spatio convolution layer"""
        _, T, n, _ = x.shape
        if c_in > c_out:
            x_input = fl.conv2d(input=x,
                                num_filters=c_out,
                                filter_size=[1, 1],
                                stride=[1, 1],
                                padding="SAME",
                                data_format="NHWC",
                                param_attr=fluid.ParamAttr(name="%s_conv2d_1" %
                                                           name))
        elif c_in < c_out:
            # if the size of input channel is less than the output,
            # padding x to the same size of output channel.
            pad = fl.fill_constant_batch_size_like(
                input=x,
                shape=[-1, T, n, c_out - c_in],
                dtype="float32",
                value=0.0)
            x_input = fl.concat([x, pad], axis=3)
        else:
            x_input = x

        for i in range(Ks):
            # x_input shape: [B,T, num_nodes, c_out]
            x_input = fl.reshape(x_input, [-1, c_out])

            x_input = self.message_passing(self.gw,
                                           x_input,
                                           name="%s_mp_%d" % (name, i),
                                           norm=self.gw.node_feat["norm"])

            x_input = fl.fc(x_input,
                            size=c_out,
                            bias_attr=False,
                            param_attr=fluid.ParamAttr(name="%s_gcn_fc_%d" %
                                                       (name, i)))

            bias = fluid.layers.create_parameter(shape=[c_out],
                                                 dtype='float32',
                                                 is_bias=True,
                                                 name='%s_gcn_bias_%d' %
                                                 (name, i))
            x_input = fluid.layers.elementwise_add(x_input, bias, act="relu")

            x_input = fl.reshape(x_input, [-1, T, n, c_out])

        return x_input
Пример #26
0
 def gen_cache(self, key, value=None, type=Cache):
     """
     Generates cache for `forward` usage in inference accroding to arguments.
     The generated cache is an instance of `MultiHeadAttention.Cache` or an
     instance of `MultiHeadAttention.StaticCache`.
     """
     if type == MultiHeadAttention.StaticCache:  # static_kv
         k, v = self.compute_kv(key, value)
         return self.StaticCache(k, v)
     elif value is None:  # incremental_state
         k = layers.fill_constant_batch_size_like(
             input=key,
             shape=[-1, self.num_heads, 0, self.head_dim],
             dtype=key.dtype,
             value=0)
         v = layers.fill_constant_batch_size_like(
             input=key,
             shape=[-1, self.num_heads, 0, self.head_dim],
             dtype=key.dtype,
             value=0)
         return self.Cache(k, v)
     else:
         # incremental_state with initial value, mainly for usage like UniLM
         return self.Cache(key, value)
    def gru_step(self, input, hidden, mask=None):
        """ gru step """
        hidden_array = []
        for i in range(self.num_layers):
            hidden_temp = layers.slice(hidden,
                                       axes=[0],
                                       starts=[i],
                                       ends=[i + 1])
            hidden_temp = layers.reshape(hidden_temp,
                                         shape=[-1, self.hidden_size])
            hidden_array.append(hidden_temp)

        last_hidden_array = []
        for k in range(self.num_layers):
            trans_input = layers.matmul(input, self.weight_input_array[k])
            trans_input += self.bias_input_array[k]
            trans_hidden = layers.matmul(hidden_array[k],
                                         self.weight_hidden_array[k])
            trans_hidden += self.bias_hidden_array[k]

            input_array = layers.split(trans_input, num_or_sections=3, dim=-1)
            trans_array = layers.split(trans_hidden, num_or_sections=3, dim=-1)

            reset_gate = layers.sigmoid(input_array[0] + trans_array[0])
            input_gate = layers.sigmoid(input_array[1] + trans_array[1])
            new_gate = layers.tanh(input_array[2] +
                                   reset_gate * trans_array[2])

            new_hidden = new_gate + input_gate * (hidden_array[k] - new_gate)

            if mask:
                neg_mask = layers.fill_constant_batch_size_like(
                    input=mask, shape=[1], value=1.0, dtype='float32') - mask
                new_hidden = new_hidden * mask + hidden_array[k] * neg_mask

            last_hidden_array.append(new_hidden)
            input = new_hidden

            if self.dropout and self.dropout > 0.0:
                input = layers.dropout(input, dropout_prob=self.dropout)

        last_hidden = layers.concat(last_hidden_array, 0)
        last_hidden = layers.reshape(
            last_hidden, shape=[self.num_layers, -1, self.hidden_size])

        return input, last_hidden
Пример #28
0
def fluid_sequence_delay2(input, seq_len, OOV):
    """
    args:
        input: 1-level LoDTensor
        seq_len: 1-
    return:
        
    """
    oov = layers.cast(seq_len * 0 + OOV, input.dtype)
    oov.stop_gradient = True
    input_padded = layers.sequence_concat([input, oov])
    offset = layers.fill_constant_batch_size_like(seq_len,
                                                  shape=[-1, 1],
                                                  value=1,
                                                  dtype='int64')
    output = layers.sequence_slice(input_padded, offset,
                                   layers.cast(seq_len, 'int64'))
    return output
Пример #29
0
        def ernie_send(src_feat, dst_feat, edge_feat):
            """doc"""
            cls = L.fill_constant_batch_size_like(src_feat["term_ids"],
                                                  [-1, 1, 1], "int64", 1)
            src_ids = L.concat([cls, src_feat["term_ids"]], 1)
            dst_ids = dst_feat["term_ids"]

            sent_ids = L.concat([L.zeros_like(src_ids),
                                 L.ones_like(dst_ids)], 1)
            term_ids = L.concat([src_ids, dst_ids], 1)

            term_ids.stop_gradient = True
            sent_ids.stop_gradient = True
            ernie = ErnieModel(term_ids,
                               sent_ids,
                               config=self.config.ernie_config)
            feature = ernie.get_pooled_output()
            return feature
Пример #30
0
    def ernie_send_aggregate(self, gw, feature, act, name):
        def ernie_send(src_feat, dst_feat, edge_feat):
            def build_position_ids(term_ids):
                input_mask = L.cast(term_ids > 0, "int64")
                position_ids = L.cumsum(input_mask, axis=1) - 1
                return position_ids

            """doc"""
            # input_ids
            cls = L.fill_constant_batch_size_like(src_feat["term_ids"],
                                                  [-1, 1], "int64",
                                                  self.config.cls_id)
            src_ids = L.concat([cls, src_feat["term_ids"]], 1)
            dst_ids = dst_feat["term_ids"]

            # sent_ids
            sent_ids = L.concat([L.zeros_like(src_ids),
                                 L.ones_like(dst_ids)], 1)
            term_ids = L.concat([src_ids, dst_ids], 1)

            # position_ids
            position_ids = build_position_ids(term_ids)
            ernie_model = ErnieModel(self.config.ernie_config, "")
            feature, _ = ernie_model(term_ids, sent_ids, position_ids)
            return feature

        term_ids = feature
        msg = gw.send(ernie_send, nfeat_list=[("term_ids", term_ids)])
        neigh_feature = gw.recv(
            msg, lambda feat: F.layers.sequence_pool(feat, pool_type="sum"))

        cls = L.fill_constant_batch_size_like(term_ids, [-1, 1], "int64",
                                              self.config.cls_id)
        term_ids = L.concat([cls, term_ids], 1)
        ernie_model = ErnieModel(self.config.ernie_config, "")
        self_feature, _ = ernie_model(term_ids)

        hidden_size = self.config.hidden_size
        self_feature = linear(self_feature, hidden_size, name + "_l", act)
        neigh_feature = linear(neigh_feature, hidden_size, name + "_r", act)
        output = L.concat([self_feature, neigh_feature], axis=1)
        output = L.l2_normalize(output, axis=1)
        return output
Пример #31
0
 def topp_sampling(self, probs):
     sorted_probs, sorted_idx = layers.argsort(probs, descending=True)
     cum_sorted_probs = layers.cumsum(sorted_probs, axis=1, exclusive=True)
     lt_cond = paddle.cast(
         paddle.less_than(
             cum_sorted_probs,
             layers.fill_constant_batch_size_like(cum_sorted_probs,
                                                  cum_sorted_probs.shape,
                                                  cum_sorted_probs.dtype,
                                                  self.topp)), "float32")
     old_probs = probs
     candidate_probs = sorted_probs * lt_cond
     probs = candidate_probs / paddle.sum(
         candidate_probs, axis=-1, keep_dim=True)
     sampling_ids = layers.sampling_id(probs, dtype="int")
     sampling_ids = paddle.index_sample(sorted_idx,
                                        paddle.unsqueeze(sampling_ids, [1]))
     sampling_ids = paddle.squeeze(sampling_ids, [1])
     probs = old_probs
     return probs, sampling_ids
Пример #32
0
    def test_raw_api(self):
        prog = Program()
        startup_prog = Program()
        with program_guard(prog, startup_prog):
            image = layers.data(name='x', shape=[784], dtype='float32')

            label = layers.data(name='y', shape=[1], dtype='int64')

            limit = layers.fill_constant_batch_size_like(
                input=label, dtype='int64', shape=[1], value=5.0)
            cond = layers.less_than(x=label, y=limit)
            true_image, false_image = layers.split_lod_tensor(
                input=image, mask=cond)

            true_out = layers.create_tensor(dtype='float32')
            true_cond = layers.ConditionalBlock([true_image])

            with true_cond.block():
                hidden = layers.fc(input=true_image, size=100, act='tanh')
                prob = layers.fc(input=hidden, size=10, act='softmax')
                layers.assign(input=prob, output=true_out)

            false_out = layers.create_tensor(dtype='float32')
            false_cond = layers.ConditionalBlock([false_image])

            with false_cond.block():
                hidden = layers.fc(input=false_image, size=200, act='tanh')
                prob = layers.fc(input=hidden, size=10, act='softmax')
                layers.assign(input=prob, output=false_out)

            prob = layers.merge_lod_tensor(
                in_true=true_out, in_false=false_out, mask=cond, x=image)
            loss = layers.cross_entropy(input=prob, label=label)
            avg_loss = layers.mean(loss)

            optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9)
            optimizer.minimize(avg_loss, startup_prog)

        train_reader = paddle.batch(
            paddle.reader.shuffle(
                paddle.dataset.mnist.train(), buf_size=8192),
            batch_size=200)

        place = core.CPUPlace()
        exe = Executor(place)

        exe.run(startup_prog)
        PASS_NUM = 100
        for pass_id in range(PASS_NUM):
            for data in train_reader():
                x_data = np.array(map(lambda x: x[0], data)).astype("float32")
                y_data = np.array(map(lambda x: x[1], data)).astype("int64")
                y_data = np.expand_dims(y_data, axis=1)

                outs = exe.run(prog,
                               feed={'x': x_data,
                                     'y': y_data},
                               fetch_list=[avg_loss])
                print outs[0]
                if outs[0] < 1.0:
                    return
        self.assertFalse(True)