示例#1
0
 def __call__(self, x_t, hidden_t_prev, cell_t_prev):
     return layers.lstm_unit(x_t=x_t,
                             hidden_t_prev=hidden_t_prev,
                             cell_t_prev=cell_t_prev,
                             forget_bias=forget_bias,
                             param_attr=self.attr_holder.param_attr,
                             bias_attr=self.attr_holder.bias_attr)
示例#2
0
 def test_lstm_unit(self):
     program = Program()
     with program_guard(program):
         x_t_data = layers.data(
             name='x_t_data', shape=[10, 10], dtype='float32')
         x_t = layers.fc(input=x_t_data, size=10)
         prev_hidden_data = layers.data(
             name='prev_hidden_data', shape=[10, 30], dtype='float32')
         prev_hidden = layers.fc(input=prev_hidden_data, size=30)
         prev_cell_data = layers.data(
             name='prev_cell', shape=[10, 30], dtype='float32')
         prev_cell = layers.fc(input=prev_cell_data, size=30)
         self.assertIsNotNone(
             layers.lstm_unit(
                 x_t=x_t, hidden_t_prev=prev_hidden, cell_t_prev=prev_cell))
     print(str(program))
示例#3
0
 def test_lstm_unit(self):
     program = Program()
     with program_guard(program):
         x_t_data = layers.data(
             name='x_t_data', shape=[10, 10], dtype='float32')
         x_t = layers.fc(input=x_t_data, size=10)
         prev_hidden_data = layers.data(
             name='prev_hidden_data', shape=[10, 30], dtype='float32')
         prev_hidden = layers.fc(input=prev_hidden_data, size=30)
         prev_cell_data = layers.data(
             name='prev_cell', shape=[10, 30], dtype='float32')
         prev_cell = layers.fc(input=prev_cell_data, size=30)
         self.assertIsNotNone(
             layers.lstm_unit(
                 x_t=x_t, hidden_t_prev=prev_hidden, cell_t_prev=prev_cell))
     print(str(program))
示例#4
0
    def call(self, global_img_feat, p_img_feat, embedding_fn, words=None):
        # 图片特征
        img_feat = layers.fc(p_img_feat, self.hid_size, num_flatten_dims=2, act='tanh')  # [batch, k, hid]
        img_feat_emb = layers.fc(p_img_feat, self.hid_size, num_flatten_dims=2)

        if self.mode == 'eval':
            word = layers.fill_constant_batch_size_like(global_img_feat, [-1],
                                                        dtype='int64',
                                                        value=config.data['start_idx'])
        else:
            words = layers.transpose(words, [1, 0])  # [seq, batch]
            words.stop_gradient = True
        # lstm 初始化
        hid, cell = create_zero_state(global_img_feat), create_zero_state(global_img_feat)

        # While loop 参数初始化
        mx = decoder_config['sentence_length'] - 1 if self.mode == 'train' else decoder_config['infer_max_length']
        if self.mode == 'eval':
            mx = decoder_config['infer_max_length']
            while_op_output = layers.create_array('int64')
        else:
            while_op_output = layers.create_array('float32')
        max_step = layers.fill_constant(shape=[1], dtype='int64', value=mx)
        step = layers.fill_constant(shape=[1], dtype='int64', value=0)
        cond = layers.less_than(step, max_step)
        while_op = layers.While(cond)

        with while_op.block():
            if self.mode == 'train':
                st = layers.cast(step, 'int32')
                word = layers.slice(words, axes=[0], starts=st, ends=st + 1)
                word = layers.squeeze(word, [0])
                word.stop_gradient = True

            word_emb = embedding_fn(word)
            # 这里可能用+效果更好?
            xt = layers.concat([word_emb, global_img_feat], axis=-1)  # [batch, feat]
            h, c = layers.lstm_unit(xt, hid, cell, param_attr=fluid.ParamAttr('lstm_w'),
                                    bias_attr=fluid.ParamAttr('lstm_b'))
            p_word_emb = layers.fc(xt, size=self.hid_size)
            p_hidden = layers.fc(hid, size=self.hid_size)
            sentinel_gate = layers.sigmoid(p_word_emb + p_hidden)  # [batch, hidden]
            sentinel = layers.elementwise_mul(sentinel_gate, layers.tanh(c))  # [batch, hidden]

            layers.assign(h, hid)
            layers.assign(c, cell)

            k = layers.shape(p_img_feat)[1]

            p_hid = layers.fc(h, self.hid_size, act='tanh')
            # attention 部分
            #     alpha
            hid_emb = layers.fc(p_hid, self.hid_size)  # [batch, hidden]
            exp_hid_emb = layers.expand(layers.unsqueeze(hid_emb, 1), [1, k + 1, 1])  # [batch, k+1, hidden]
            sentinel_emb = layers.unsqueeze(layers.fc(sentinel, self.hid_size), axes=1)  # [batch, 1, hidden]
            feat_emb = layers.concat([img_feat_emb, sentinel_emb], axis=1)  # [batch, k+1, hidden]
            z = layers.tanh(feat_emb + exp_hid_emb)  # [batch, k+1, 1]
            alpha = layers.fc(z, size=1, num_flatten_dims=2, act='softmax')  # [batch, k+1, 1]

            #     context vector

            context = layers.concat([img_feat, layers.unsqueeze(sentinel, axes=1)], axis=1)  # [batch, k+1, hidden]
            context = layers.elementwise_mul(context, alpha, axis=0)
            context = layers.reduce_mean(context, dim=1)  # [batch, hidden]

            out = layers.fc(context + p_hid, self.hid_size, act='tanh')

            word_pred = weight_tying_fc(out)  # [batch, vocab]

            if self.mode == 'eval':
                next_word = layers.argmax(word_pred, axis=-1)
                layers.assign(next_word, word)
                next_word = layers.cast(next_word, 'float32')
                layers.array_write(next_word, step, array=while_op_output)
            else:
                layers.array_write(word_pred, step, array=while_op_output)
            layers.increment(step)
            layers.less_than(step, max_step, cond=cond)
        if self.mode == 'train':
            output_time_major, _ = layers.tensor_array_to_tensor(while_op_output, axis=0, use_stack=True)
            output = layers.transpose(output_time_major, [1, 0, 2])
        else:
            output_time_major = layers.tensor_array_to_tensor(while_op_output, axis=0, use_stack=True)[0]
            output = layers.transpose(output_time_major, [1, 0])

        return output
 def test_pre_cell_type():
     error_pre_cell = fluid.data(name='error_pre_cell',
                                 shape=[batch_size, hidden_dim],
                                 dtype='int32')
     lstm_unit(inputs, pre_hidden, error_pre_cell)
 def test_input_type():
     error_input = fluid.data(name='error_input',
                              shape=[batch_size, emb_dim],
                              dtype='int32')
     lstm_unit(error_input, pre_hidden, pre_cell)
 def test_pre_cell_Variable():
     lstm_unit(inputs, pre_hidden, np_pre_cell)
 def test_input_Variable():
     lstm_unit(np_input, pre_hidden, pre_cell)