Exemplo n.º 1
0
    def step(self, x, states):
        """
            LSTM的几个表达式都在这
        :param x:
        :param states: 上个时刻的输出和隐层状态st
        :return:
        """
        ytm, stm = states

        # repeat the hidden state to the length of the sequence
        # 按照steps的维度重复n次,(sample, step, dim)
        _stm = K.repeat(stm, self.timesteps)

        # now multiplty the weight matrix with the repeated hidden state
        _Wxstm = K.dot(_stm, self.W_a)

        # calculate the attention probabilities
        # this relates how much other timesteps contributed to this one.
        et = K.dot(activations.tanh(_Wxstm + self._uxpb),
                   K.expand_dims(self.V_a))
        # softmax
        at = K.exp(et)
        at_sum = K.sum(at, axis=1)
        at_sum_repeated = K.repeat(at_sum, self.timesteps)
        at /= at_sum_repeated  # vector of size (batchsize, timesteps, 1)

        # calculate the context vector
        context = K.squeeze(K.batch_dot(at, self.x_seq, axes=1), axis=1)
        # ~~~> calculate new hidden state
        # first calculate the "r" gate:

        rt = activations.sigmoid(
            K.dot(ytm, self.W_r) + K.dot(stm, self.U_r) +
            K.dot(context, self.C_r) + self.b_r)

        # now calculate the "z" gate
        zt = activations.sigmoid(
            K.dot(ytm, self.W_z) + K.dot(stm, self.U_z) +
            K.dot(context, self.C_z) + self.b_z)

        # calculate the proposal hidden state:
        s_tp = activations.tanh(
            K.dot(ytm, self.W_p) + K.dot((rt * stm), self.U_p) +
            K.dot(context, self.C_p) + self.b_p)

        # new hidden state:
        st = (1 - zt) * stm + zt * s_tp

        yt = activations.softmax(
            K.dot(ytm, self.W_o) + K.dot(stm, self.U_o) +
            K.dot(context, self.C_o) + self.b_o)

        if self.return_probabilities:
            return at, [yt, st]
        else:
            return yt, [yt, st]
Exemplo n.º 2
0
    def step(self, x, states, training=None):
        h_tm1 = states[0]
        c_tm1 = states[1]
        x_seq = states[2]
        # repeat the hidden state to the length of the sequence
        _htm = K.repeat(h_tm1, self.time_step_e)  #(batch,time_step,units)
        # concatenate a(previus output lstm) + hidden state
        concatenate = K.concatenate(
            [_htm, x_seq], axis=-1)  #(batch,time_step,h_units+x_seq_units)
        # now multiplty the weight matrix with the repeated hidden state

        # apply the a dense layer over the time dimension of the sequence
        # do it here because it doesn't depend on any previous steps
        # thefore we can save computation time:
        dot_dense = time_distributed_dense(
            concatenate,
            self.W_cx,
            b=self.b_cx,
            input_dim=self.input_dim_e + self.units,
            timesteps=self.time_step_e,
            output_dim=self.atten_units)  #(samples,timestep,atten_units)
        # we need to supply the full sequence of inputs to step (as the attention_vector)

        # calculate the attention probabilities
        # this relates how much other timesteps contributed to this one.
        et = K.dot(
            K.relu(dot_dense),  #(batch,time_step,atten_units)
            K.expand_dims(self.C_cx))

        at = K.exp(et)  #(batch,time_step,1)
        at_sum = K.cast(K.sum(at, axis=1) + K.epsilon(),
                        K.floatx())  #(batch,1)
        at_sum_repeated = K.repeat(at_sum,
                                   self.time_step_e)  #(batch,time_step,1)
        at /= at_sum_repeated  # vector of size (batchsize, time_steps, 1)
        # calculate the context vector
        context = K.squeeze(K.batch_dot(at, x_seq, axes=1),
                            axis=1)  #(batchsize,input_dim)

        if 0 < self.dropout < 1 and self._dropout_mask is None:
            self._dropout_mask = _generate_dropout_mask(K.ones_like(context),
                                                        self.dropout,
                                                        training=training,
                                                        count=4)
        if (0 < self.recurrent_dropout < 1
                and self._recurrent_dropout_mask is None):
            self._recurrent_dropout_mask = _generate_dropout_mask(
                K.ones_like(states[0]),
                self.recurrent_dropout,
                training=training,
                count=4)

        # dropout matrices for input units
        B_W = self._dropout_mask
        # dropout matrices for recurrent units
        B_U = self._recurrent_dropout_mask
        # ~~~> calculate new hidden state

        yhat_i = K.dot(x, self.V_i)  #(batchsize,units)
        yhat_f = K.dot(x, self.V_f)  #(batchsize,units)
        yhat_c = K.dot(x, self.V_c)  #(batchsize,units)
        yhat_o = K.dot(x, self.V_o)  #(batchsize,units)

        if 0 < self.dropout < 1.:
            x_i = K.dot(context * B_W[0],
                        self.W_i) + self.b_i  #(batchsize,units)
            x_f = K.dot(context * B_W[1],
                        self.W_f) + self.b_f  #(batchsize,units)
            x_c = K.dot(context * B_W[2],
                        self.W_c) + self.b_c  #(batchsize,units)
            x_o = K.dot(context * B_W[3],
                        self.W_o) + self.b_o  #(batchsize,units)
        else:
            x_i = K.dot(context, self.W_i) + self.b_i  #(batchsize,units)
            x_f = K.dot(context, self.W_f) + self.b_f  #(batchsize,units)
            x_c = K.dot(context, self.W_c) + self.b_c  #(batchsize,units)
            x_o = K.dot(context, self.W_o) + self.b_o  #(batchsize,units)
        if 0 < self.recurrent_dropout < 1.:
            h_tm1_i = K.dot(h_tm1 * B_U[0], self.U_i)  #(batchsize,units)
            h_tm1_f = K.dot(h_tm1 * B_U[1], self.U_f)  #(batchsize,units)
            h_tm1_c = K.dot(h_tm1 * B_U[2], self.U_c)  #(batchsize,units)
            h_tm1_o = K.dot(h_tm1 * B_U[3], self.U_o)  #(batchsize,units)
        else:
            h_tm1_i = K.dot(h_tm1, self.U_i)  #(batchsize,units)
            h_tm1_f = K.dot(h_tm1, self.U_f)  #(batchsize,units)
            h_tm1_c = K.dot(h_tm1, self.U_c)  #(batchsize,units)
            h_tm1_o = K.dot(h_tm1, self.U_o)  #(batchsize,units)

        i = self.recurrent_activation(x_i + h_tm1_i +
                                      yhat_i)  #(batchsize,units)
        f = self.recurrent_activation(x_f + h_tm1_f +
                                      yhat_f)  #(batchsize,units)
        o = self.recurrent_activation(x_o + h_tm1_o +
                                      yhat_o)  #(batchsize,units)
        c_ = self.activation(x_c + h_tm1_c + yhat_c)  #(batchsize,units)
        c = f * c_tm1 + i * c_

        h = o * self.activation(c)  #(batchsize,units)

        if 0 < self.dropout + self.recurrent_dropout:
            if training is None:
                h._uses_learning_phase = True
        #apply maxout layer with dropout
        maxout = self.max_out(inputs=K.dot(h, self.U_p), num_units=self.gmax)
        drop = Dropout(0.3)(maxout)
        #apply softmax
        _y_hat = activations.softmax(K.dot(drop, self.M_p) + self.b_p)

        if self.return_probabilities:
            return at, [h, c]
        else:
            return _y_hat, [h, c]
    def call(self, inputs, states, constants):
        '''
        call函数 会在RNN中被调用然后被RNN改写 此时constant参数可用
        :param inputs: [wt; v_g] 维度为self.input_dim
        :param states: 前一步ht,mt
        :param constants: cnn_encoder outputs
        :return:
        '''
        h_tm = states[0]  # last hidden state
        m_tm = states[1]  # last memory cell
        self.v_seq = constants[
            0]  # [self.cnn_encoder_k, self.units] self.units=cnn_encoder_d
        """
            f-gate
        """
        ft = activations.sigmoid(
            K.dot(h_tm, self.W_f) + K.dot(inputs, self.U_f) + self.b_f)
        """
            i-gate
        """
        it = activations.sigmoid(
            K.dot(h_tm, self.W_i) + K.dot(inputs, self.U_i) + self.b_i)
        """
            o-gate
        """
        ot = activations.sigmoid(
            K.dot(h_tm, self.W_o) + K.dot(inputs, self.U_o) + self.b_o)
        """
            g-gate (sentinel gate)
        """
        gt = activations.sigmoid(
            K.dot(h_tm, self.W_g) + K.dot(inputs, self.U_g) + self.b_g)
        """
            at-renew input
        """
        at = activations.tanh(
            K.dot(h_tm, self.W_a) + K.dot(inputs, self.U_a) + self.b_a)
        """
            mt-memory cell
        """
        mt = m_tm * ft + it * at
        """
            ht-hidden state
        """
        ht = ot * activations.tanh(mt)
        """
            st-visual sentinel
        """
        st = gt * activations.tanh(mt)
        """
            ct-visual context
        """
        st = K.expand_dims(st, axis=1)
        # 将st合并进来一起计算权重参数[?, k+1, d] d=self.units 与论文的处理稍有不同
        self.v_expand = K.concatenate([self.v_seq, st], axis=1)
        # one_matrix = K.ones((self.cnn_encoder_k + 1, 1))
        vtt = K.dot(self.v_expand, self.W_z)
        dtt = K.repeat(K.dot(ht, self.U_z),
                       self.cnn_encoder_k + 1)  # (?, k + 1, k + 1)
        tantt = K.tanh(vtt + dtt)

        zt = K.dot(tantt, self.W_h)

        alpha_t = activations.softmax(zt)  # (?, k + 1, 1)
        # alpha_t = K.expand_dims(alpha_t)  # (?, k + 1, 1)
        # 将st,v1,...,vk包括在内直接加权求和 与论文的处理稍有不同 (?, k + 1, units)
        # 输出(?, units)
        ct = K.squeeze(K.batch_dot(alpha_t, self.v_expand, axes=1),
                       axis=1)  # batch_dot 针对 k + 1
        ht_plus_ct = ht + ct

        return ht_plus_ct, [ht, mt]
 def call(self, inputs):
     return activations.softmax(inputs, axis=self.axis)