Exemplo n.º 1
0
def mean_absolute_tp_max_ratio_error_tanhmap_0_7(y_true, y_pred):
    # y:[0 ~ 7], map:[0.2 ~ 9.8]
    # y < 0     , map:[0 ~ 0.2]
    # y > 7    , map:[9.8 ~ 10]
    t_map = (K.tanh((y_true - 3.5) * 0.57) + 1.00001) * 5.0
    p_map = (K.tanh((y_pred - 3.5) * 0.57) + 1.00001) * 5.0
    return K.mean(math_ops.abs(t_map - p_map) * math_ops.maximum(t_map, p_map))
Exemplo n.º 2
0
    def call(self, inputs):
        h = K.bias_add(K.dot(inputs, self.fc_kernel), self.fc_bias)
        relu_h = K.tanh(h)

        self.mu = K.bias_add(K.dot(relu_h, self.mu_kernel), self.mu_bias)
        self.logvar = K.bias_add(K.dot(relu_h, self.sigma_kernel),
                                 self.sigma_bias)

        h_z = self.sample_z(self.mu, self.logvar)

        z = K.bias_add(K.dot(h_z, self.trans_kernel), self.trans_bias)
        z = K.tanh(z)

        return z
Exemplo n.º 3
0
    def call(self, inputs, states, constants):
        if not isinstance(constants, (list, tuple)):
            keys = values = constants
        elif len(constants) == 1:
            keys = values = constants[0]
        elif len(constants) == 2:
            keys, values = constants
        else:
            raise ValueError(
                'constants can either be a list with keys and values or just attention vectors'
            )

        if not isinstance(states, (list, tuple)):
            query = states
        else:
            query = states[0]

        query = self._query_transformation(query)
        repeated_query = K.repeat(query, K.shape(keys)[1])

        logits = self._attention_logits_dense(K.tanh(repeated_query + keys))
        attention_weights = keras.activations.softmax(logits, axis=1)
        attention_context = K.sum(attention_weights * values,
                                  axis=1,
                                  keepdims=False)
        inputs = inputs + attention_context
        return self._cell.call(inputs, states)
    def call(self, x):
        print(x)
        features_dim = x.shape[-1].value
        step_dim = x.shape[-2].value
        print(K.reshape(self.kernel, (-1, features_dim)))  # n, d
        print(K.reshape(self.W, (features_dim, 1)))  # w= dx1
        print(K.dot(K.reshape(self.kernel, (-1, features_dim)), K.reshape(self.W, (features_dim, 1))))  # nx1

        eij = K.reshape(K.dot(K.reshape(self.kernel, (-1, features_dim)), K.reshape(self.W, (features_dim, 1))),
                        (-1, step_dim))  # batch,step
        print(eij)

        eij += self.b

        eij = K.tanh(eij)

        a = K.exp(eij)


        a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())
        a = tf.transpose(a,(1,0))
        print(a)

        print("x:")
        print(self.kernel)
        weighted_input = self.kernel * a  # 自动填充为相同的维度相乘 N T K
        print(weighted_input.shape)
        temp = K.sum(weighted_input, axis=0)  # N K  权重相加
        temp = K.tile(K.expand_dims(temp, 0), [step_dim, 1])
        temp = keras.layers.concatenate([self.kernel, temp])
        temp = K.dot(temp, self.W2) + self.b2
        return x + temp
    def call(self, x):
        eij1 = K.reshape(
            K.dot(K.reshape(x[:, :, 0:768], (-1, self.features_dim)), K.reshape(self.W, (self.features_dim, 1))),
            (-1, self.step_dim))
        eij1 += self.b
        eij1 = K.expand_dims(eij1)

        eij2 = K.reshape(
            K.dot(K.reshape(x[:, :, 768:768*2], (-1, self.features_dim)), K.reshape(self.W, (self.features_dim, 1))),
            (-1, self.step_dim))
        eij2 += self.b
        eij2 = K.expand_dims(eij2)

        eij3 = K.reshape(
            K.dot(K.reshape(x[:, :, 768*2:768*3], (-1, self.features_dim)), K.reshape(self.W, (self.features_dim, 1))),
            (-1, self.step_dim))
        eij3 += self.b
        eij3 = K.expand_dims(eij3)


        eij = keras.layers.concatenate([eij1, eij2, eij3], axis=2)
        print(eij)
        eij = K.tanh(eij)
        a = K.exp(eij)
        a /= K.cast(K.sum(a, axis=2, keepdims=True) + K.epsilon(), K.floatx())
        print(a)
        temp = a[:,:,0:1] * x[:, :, 0:768] + a[:,:,1:2] * x[:, :, 768:768*2] + a[:,:,2:3] * x[:, :, 768*2:768*3]
        print(temp)

        return temp
Exemplo n.º 6
0
def gelu(x):
    """
    GELU activation, described in paper "Gaussian Error Linear Units (GELUs)"
    https://arxiv.org/pdf/1606.08415.pdf
    """
    c = math.sqrt(2 / math.pi)
    return 0.5 * x * (1 + K.tanh(c * (x + 0.044715 * K.pow(x, 3))))
Exemplo n.º 7
0
        def energy_step(inputs, states):
            """ Step function for computing energy for a single decoder state
            inputs: (batchsize * 1 * de_in_dim)
            states: (batchsize * 1 * de_latent_dim)
            """
            """ Some parameters required for shaping tensors"""
            en_seq_len, en_hidden = encoder_out_seq.shape[
                1], encoder_out_seq.shape[2]
            de_hidden = inputs.shape[-1]
            """ Computing S.Wa where S=[s0, s1, ..., si]"""
            # <= batch size * en_seq_len * latent_dim
            W_a_dot_s = K.dot(encoder_out_seq, self.W_a)
            """ Computing hj.Ua """
            U_a_dot_h = K.expand_dims(K.dot(inputs, self.U_a),
                                      1)  # <= batch_size, 1, latent_dim
            """ tanh(S.Wa + hj.Ua) """
            # <= batch_size*en_seq_len, latent_dim
            Ws_plus_Uh = K.tanh(W_a_dot_s + U_a_dot_h)
            """ softmax(va.tanh(S.Wa + hj.Ua)) """
            # <= batch_size, en_seq_len
            e_i = K.squeeze(K.dot(Ws_plus_Uh, self.V_a), axis=-1)
            # <= batch_size, en_seq_len
            e_i = K.softmax(e_i)

            return e_i, [e_i]
Exemplo n.º 8
0
        def energy_step(inputs, states):
            """ Step function for computing energy for a single decoder state """

            # input: (batch_size, latent_dim)
            assert_msg = "States must be a list. However states {} is of type {}".format(
                states, type(states))
            assert isinstance(states, list) or isinstance(states,
                                                          tuple), assert_msg
            """ Computing sj.Ua """
            # (batch_size, 1, d3)
            U_a_dot_s = K.expand_dims(K.dot(inputs, self.U_a), 1)
            if verbose:
                print('Ua.h>', K.int_shape(U_a_dot_s))
            """ tanh(h.Wa + s.Ua) """
            # (batch_size, h1*h2*...*hn, d3) = (batch_size, h1*h2*...*hn, d3) + (batch_size, 1, d3)
            Wh_plus_Us = K.tanh(W_hi + U_a_dot_s)
            # (batch_size, d3, h1*h2*...*hn)
            Wh_plus_Us = K.permute_dimensions(Wh_plus_Us, (0, 2, 1))
            if verbose:
                print('Wh+Us>', K.int_shape(Wh_plus_Us))
            """ softmax(va.tanh(S.Wa + hj.Ua)) """
            # (1, batch_size, h1*h2*...*hn) = (1, d3) . (batch_size, d3, h1*h2*...*hn)
            Wh_plus_Us_dot_Va = K.dot(self.V_a, Wh_plus_Us)
            # (batch_size, h1*h2*...*hn)
            e_i = K.squeeze(Wh_plus_Us_dot_Va, 0)
            e_i = K.softmax(e_i)

            if verbose:
                print('ei>', K.int_shape(e_i))

            # (batch_size, h1*h2*...*hn)
            return e_i, states
Exemplo n.º 9
0
    def call(self, x, mask=None):
        eij = dot_product(x, self.W)

        if self.bias:
            eij += self.b

        eij = K.tanh(eij)

        a = K.exp(eij)

        # apply mask after the exp. will be re-normalized next
        if mask is not None:
            # Cast the mask to floatX to avoid float64 upcasting in theano
            a *= K.cast(mask, K.floatx())

        # in some cases especially in the early stages of training the sum may be almost zero
        # and this results in NaN's. A workaround is to add a very small positive number ε to the sum.
        # a /= K.cast(K.sum(a, axis=1, keepdims=True), K.floatx())
        a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())

        weighted_input = x * K.expand_dims(a)

        result = K.sum(weighted_input, axis=1)

        if self.return_attention:
            return [result, a]
        return result
Exemplo n.º 10
0
        def energy_step(decode_outs, states):  # decode_outs(batch,dim)

            # decoder_seq [N,30,512] 30是字符串长度
            en_seq_len, en_hidden = encoder_out_seq.shape[
                1], encoder_out_seq.shape[2]  # 30, 512
            de_hidden = decode_outs.shape[-1]
            #  W * h_j
            reshaped_enc_outputs = K.reshape(
                encoder_out_seq, (-1, en_hidden))  #[b,64,512]=> [b*64,512]

            # W_a[512x512],reshaped_enc_outputs[b*64,512] => [b*64,512] => [b,64,512]
            W_a_dot_s = K.reshape(K.dot(reshaped_enc_outputs, self.W_a),
                                  (-1, en_seq_len, en_hidden))

            # U * S_t - 1,decode_outs[b,512],U_a[512,512] => [b,512]    => [b,1,512]
            U_a_dot_h = K.expand_dims(K.dot(decode_outs, self.U_a),
                                      axis=1)  # <= batch_size, 1, latent_dim

            # 这个细节很变态,其实就是完成了decoder的输出复制time(64)个,和encoder的输出【64,512】,相加的过程

            # tanh ( W * h_j + U * S_t-1 + b ),[b,64,512] = [b*64,512]
            reshaped_Ws_plus_Uh = K.tanh(
                K.reshape(W_a_dot_s + U_a_dot_h, (-1, en_hidden)))

            # V * tanh ( W * h_j + U * S_t-1 + b ), [b*64,512]*[512,1] => [b*64,1] => [b,64]
            e_i = K.reshape(K.dot(reshaped_Ws_plus_Uh, self.V_a),
                            (-1, en_seq_len))

            e_i = K.softmax(e_i)

            return e_i, [e_i]
Exemplo n.º 11
0
        def energy_step(inputs, states):

            assert_msg = "States must be a list. However states {} is of type {}".format(
                states, type(states))

            assert isinstance(states, list) or isinstance(states,
                                                          tuple), assert_msg

            en_seq_len, en_hidden = encoder_out_seq.shape[
                1], encoder_out_seq.shape[2]
            de_hidden = inputs.shape[-1]

            reshaped_enc_outputs = K.reshape(encoder_out_seq, (-1, en_hidden))
            W_a_dot_s = K.reshape(K.dot(reshaped_enc_outputs, self.W_a),
                                  (-1, en_seq_len, en_hidden))
            if verbose:
                print('wa.s > ', W_a_dot_s.shape)

            U_a_dot_h = K.expand_dims(K.dot(inputs, self.U_a),
                                      1)  # (batch_size, 1, latent_dim)
            if verbose:
                print('Ua.h > ', U_a_dot_h.shape)

            reshaped_Ws_plus_Uh = K.tanh(
                K.reshape(W_a_dot_s + U_a_dot_h, (-1, en_hidden)))
            if verbose:
                print('Ws+Uh > ', reshaped_Ws_plus_Uh.shape)

            e_i = K.reshape(K.dot(reshaped_Ws_plus_Uh, self.V_a),
                            (-1, en_seq_len))
            e_i = K.softmax(e_i)
            if verbose:
                print('ei > ', e_i.shape)

            return e_i, [e_i]
    def call(self, x, mask=None):
        embedding_dim = self.embedding_dim
        sequence_length = self.sequence_length

        eij = K.reshape(
            K.dot(K.reshape(x, (-1, embedding_dim)),
                  K.reshape(self.W, (embedding_dim, 1))),
            (-1, sequence_length))

        if self.bias:
            eij += self.b

        eij = K.tanh(eij)

        a = K.exp(eij)

        if mask is not None:
            a *= K.cast(mask, K.floatx())

        a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())

        weighted_input = x * K.expand_dims(a)
        output = K.sum(weighted_input, axis=1)
        if self.return_attentions:
            return output, a
        else:
            return output
Exemplo n.º 13
0
    def call(self, inputs, mask=None):
        # output = softmax(score)
        k, q = inputs
        if len(q.shape) == 2:
            q = K.expand_dims(q, axis=1)
        # k: (?, K_LEN, EMBED_DIM,)
        # q: (?, Q_LEN, EMBED_DIM,)
        # score: (?, Q_LEN, K_LEN,)
        if self.score_function == 'scaled_dot_product':
            kt = K.permute_dimensions(k, (0, 2, 1))
            qkt = K.batch_dot(q, kt)
            score = qkt / self.EMBED_DIM
        elif self.score_function == 'mlp':
            kq = K.concatenate([k, q], axis=1)
            kqw2 = K.tanh(K.dot(kq, self.W2))
            score = K.permute_dimensions(K.dot(self.W1, kqw2), (1, 0, 2))
        elif self.score_function == 'bi_linear':
            qw = K.dot(q, self.W)
            kt = K.permute_dimensions(k, (0, 2, 1))
            score = K.batch_dot(qw, kt)
        else:
            raise RuntimeError('invalid score_function')
        score = K.softmax(score)
        # if mask is not None:
        #     score *= K.cast(mask[0], K.floatx())
        # output: (?, Q_LEN, EMBED_DIM,)
        output = K.batch_dot(score, k)

        return output
Exemplo n.º 14
0
        def energy_step(inputs, states):
            """ Step function for computing energy for a single decoder state """

            assert_msg = "States must be a list. However states {} is of type {}".format(
                states, type(states))
            assert isinstance(states, list) or isinstance(states,
                                                          tuple), assert_msg
            """ Some parameters required for shaping tensors"""
            en_seq_len, en_hidden = encoder_out_seq.shape[
                1], encoder_out_seq.shape[2]
            de_hidden = inputs.shape[-1]
            """ Computing S.Wa where S=[s0, s1, ..., si]"""
            # <= batch_size*en_seq_len, latent_dim
            reshaped_enc_outputs = K.reshape(encoder_out_seq, (-1, en_hidden))
            # <= batch_size*en_seq_len, latent_dim
            W_a_dot_s = K.reshape(K.dot(reshaped_enc_outputs, self.W_a),
                                  (-1, en_seq_len, en_hidden))
            if verbose:
                print('wa.s>', W_a_dot_s.shape)
            """ Computing hj.Ua """
            U_a_dot_h = K.expand_dims(K.dot(inputs, self.U_a),
                                      1)  # <= batch_size, 1, latent_dim
            if verbose:
                print('Ua.h >', U_a_dot_h.shape)
                print('U_a >', self.U_a.shape)
                print('inputs.shape >', inputs.shape)
            """ tanh(S.Wa + hj.Ua) """
            # <= batch_size*en_seq_len, latent_dim
            reshaped_Ws_plus_Uh = K.tanh(
                K.reshape(W_a_dot_s + U_a_dot_h, (-1, en_hidden)))
            if verbose:
                print('Ws+Uh>', reshaped_Ws_plus_Uh.shape)
            """ softmax(va.tanh(S.Wa + hj.Ua)) """
            # <= batch_size, en_seq_len
            e_i = K.reshape(K.dot(reshaped_Ws_plus_Uh, K.tanh(self.V_a)),
                            (-1, en_seq_len))
            # <= batch_size, en_seq_len
            e_i = K.softmax(e_i)

            if verbose:
                print('ei>', e_i.shape)
                K.print_tensor(reshaped_Ws_plus_Uh,
                               message='reshaped_Ws_plus_Uh')
                K.print_tensor(self.V_a, message='V_a')
                K.print_tensor(e_i, message='e_i')

            return e_i, [e_i]
Exemplo n.º 15
0
    def call(self, inputs, mask=None):
        '''
        :param inputs: a list of tensor of length not larger than 2, or a memory tensor of size BxTXD1.
        If a list, the first entry is memory, and the second one is query tensor of size BxD2 if any
        :param mask: the masking entry will be directly discarded
        :return: a tensor of size BxD1, weighted summing along the sequence dimension
        '''
        if isinstance(inputs, list) and len(inputs) == 2:
            memory, query = inputs
            if self.method is None:
                return memory[:, -1, :]
            elif self.method == 'cba':
                hidden = K.dot(memory, self.Wh) + K.expand_dims(K.dot(query, self.Wq), 1)
                hidden = K.tanh(hidden)
                s = K.squeeze(K.dot(hidden, self.v), -1)
            elif self.method == 'ga':
                s = K.sum(K.expand_dims(K.dot(query, self.Wq), 1) * memory, axis=-1)
            else:
                s = K.squeeze(K.dot(memory, self.v), -1)
            if mask is not None:
                mask = mask[0]
        else:
            if isinstance(inputs, list):
                if len(inputs) != 1:
                    raise ValueError('inputs length should not be larger than 2')
                memory = inputs[0]
            else:
                memory = inputs
            if self.method is None:
                return memory[:, -1, :]
            elif self.method == 'cba':
                hidden = K.dot(memory, self.Wh)
                hidden = K.tanh(hidden)
                s = K.squeeze(K.dot(hidden, self.v), -1)
            elif self.method == 'ga':
                raise ValueError('general attention needs the second input')
            else:
                s = K.squeeze(K.dot(memory, self.v), -1)

        s = K.softmax(s)
        if mask is not None:
            s *= K.cast(mask, dtype='float32')
            sum_by_time = K.sum(s, axis=-1, keepdims=True)
            s = s / (sum_by_time + K.epsilon())
        return K.sum(memory * K.expand_dims(s), axis=1)
Exemplo n.º 16
0
 def call(self, inputs, mask=None):
    
     x = K.permute_dimensions(inputs, (0, 2, 1))
    
     a = K.softmax(K.tanh(K.dot(x, self.W)))
     a = K.permute_dimensions(a, (0, 2, 1))
     outputs = a * inputs
     outputs = K.sum(outputs, axis=1)
     return outputs
Exemplo n.º 17
0
 def call(self, inputs, mask=None):
     # inputs.shape = (batch_size, time_steps, seq_len)
     x = K.permute_dimensions(inputs, (0, 2, 1))
     # x.shape = (batch_size, seq_len, time_steps)
     # general
     a = K.softmax(K.tanh(K.dot(x, self.W)))
     a = K.permute_dimensions(a, (0, 2, 1))
     outputs = a * inputs
     outputs = K.sum(outputs, axis=1)
     return outputs
Exemplo n.º 18
0
    def call(self, inputs, **kwargs):
        W = K.tanh(self.W_hat) * K.sigmoid(self.M_hat)
        a = K.dot(inputs, W)

        if self.nac_only:
            outputs = a
        else:
            m = K.exp(K.dot(K.log(K.abs(inputs) + self.epsilon), W))
            g = K.sigmoid(K.dot(inputs, self.G))
            outputs = g * a + (1. - g) * m

        return outputs
Exemplo n.º 19
0
 def attention(self, x, dw, pw):
   z = K.separable_conv2d(
       K.tanh(x),
       dw,
       pw,
       strides=self.strides,
       padding=self.padding,
       data_format=self.data_format,
       dilation_rate=self.dilation_rate)
   att = math_ops.exp(z)/math_ops.reduce_sum(math_ops.exp(z), [1, 2], keep_dims=True)
   att = att/math_ops.reduce_max(att, [1, 2], keep_dims=True)
   return att 
Exemplo n.º 20
0
    def call(self, inputs, **kwargs):
        query, values, keys = inputs

        hidden_with_time_axis = K.expand_dims(query, 1)
        score = self.attention_variable(
            K.tanh(keys + self.query_layer(hidden_with_time_axis))
        )  # TODO Mask option for score with infinity
        alignment = K.softmax(score, axis=1)
        attention = alignment * values
        alignment = K.squeeze(alignment, axis=2)
        attention = K.sum(attention, axis=1)

        return attention, alignment
Exemplo n.º 21
0
    def call(self, h, mask=None):
        h_shape = K.shape(h)
        d_w, T = h_shape[0], h_shape[1]

        logits = K.dot(h, self.w)  # w^T h
        logits = K.reshape(logits, (d_w, T))
        alpha = K.exp(logits - K.max(logits, axis=-1, keepdims=True))  # exp

        # masked timesteps have zero weight
        if mask is not None:
            mask = K.cast(mask, K.floatx())
            alpha = alpha * mask
        alpha = alpha / K.sum(alpha, axis=1, keepdims=True)  # softmax
        r = K.sum(h * K.expand_dims(alpha), axis=1)  # r = h*alpha^T
        h_star = K.tanh(r)  # h^* = tanh(r)
        if self.return_attention:
            return [h_star, alpha]
        return h_star
    def call(self, x):
        print(x)
        features_dim = x.shape[-1].value
        step_dim = x.shape[-2].value
        # print(K.reshape(self.kernel, (-1, features_dim)))  # n, d
        # print(K.reshape(self.W, (features_dim, 1)))  # w= dx1
        # print(K.dot(K.reshape(self.kernel, (-1, features_dim)), K.reshape(self.W, (features_dim, 1))))  # nx1

        eij = K.reshape(
            K.dot(K.reshape(self.kernel, (-1, features_dim)),
                  K.reshape(self.W, (features_dim, 1))),
            (-1, step_dim + self.windows))
        print(eij)

        eij += self.b

        eij = K.tanh(eij)

        a = K.exp(eij)
        a = K.reshape(a, (step_dim + self.windows, 1))
        print(a)

        temp = a[0:self.windows, ]
        print(temp)
        temp /= K.cast(
            K.sum(temp, axis=0, keepdims=True) + K.epsilon(), K.floatx())

        weighted_input = self.kernel[0:self.windows, ] * temp
        alltemp = K.sum(weighted_input, axis=0, keepdims=True)

        for i in range(self.windows // 2 + 1, step_dim + self.windows // 2):
            temp = a[i - self.windows // 2:i + self.windows // 2, ]
            temp /= K.cast(
                K.sum(temp, axis=0, keepdims=True) + K.epsilon(), K.floatx())
            weighted_input = self.kernel[i - self.windows // 2:i +
                                         self.windows // 2, ] * temp
            temp = K.sum(weighted_input, axis=0, keepdims=True)
            alltemp = keras.layers.concatenate([alltemp, temp], 0)

        print(alltemp)

        alltemp = keras.activations.tanh(alltemp)
        return x + alltemp
Exemplo n.º 23
0
    def call(self, x, mask=None):
        features_dim = self.features_dim
        step_dim = self.step_dim
        eij = K.reshape(
            K.dot(K.reshape(x, (-1, features_dim)),
                  K.reshape(self.W, (features_dim, 1))), (-1, step_dim))
        if self.bias:
            eij += self.b
        eij = K.tanh(eij)
        a = K.exp(eij)
        # apply mask after the exp. will be re-normalized next
        if mask is not None:
            # Cast the mask to floatX to avoid float64 upcasting in theano
            a *= K.cast(mask, K.floatx())

        # in some cases especially in the early stages of training the sum may be almost zero
        a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())

        a = K.expand_dims(a)
        weighted_input = x * a
        return K.sum(weighted_input, axis=1)
Exemplo n.º 24
0
 def call(self, inputs, mask=None):
     x, u = inputs
     if u is None:
         u = self.add_weight(name="u_{:s}".format(self.name),
                             shape=(self.ATTENTION_SIZE, ),
                             initializer="glorot_normal",
                             trainable=True)
     # u: (?, ATTENTION_SIZE,)
     # x: (?, MAX_TIMESTEPS, EMBED_SIZE)
     # ut: (?, MAX_TIMESTEPS, ATTENTION_SIZE)
     ut = K.tanh(K.dot(x, self.W) + self.b)
     # at: (?, MAX_TIMESTEPS,)
     at = K.batch_dot(ut, u)
     at = K.softmax(at)
     if mask is not None:
         at *= K.cast(mask, K.floatx())
     # ot: (?, MAX_TIMESTEPS, EMBED_SIZE,)
     atx = K.expand_dims(at, axis=-1)
     ot = atx * x
     # output: (?, EMBED_SIZE,)
     output = K.sum(ot, axis=1)
     return output
Exemplo n.º 25
0
        def energy_step(inputs, states):
            """ Step function for computing energy for a single decoder state
            inputs: (batchsize * 1 * de_in_dim)
            states: (batchsize * 1 * de_latent_dim)
            """

            assert_msg = "States must be an iterable. Got {} of type {}".format(states, type(states))
            assert isinstance(states, list) or isinstance(states, tuple), assert_msg

            """ Some parameters required for shaping tensors"""
            en_seq_len, en_hidden = encoder_out_seq.shape[1], encoder_out_seq.shape[2]
            de_hidden = inputs.shape[-1]

            """ Computing S.Wa where S=[s0, s1, ..., si]"""
            # <= batch size * en_seq_len * latent_dim
            W_a_dot_s = K.dot(encoder_out_seq, self.W_a)

            """ Computing hj.Ua """
            U_a_dot_h = K.expand_dims(K.dot(inputs, self.U_a), 1)  # <= batch_size, 1, latent_dim
            if verbose:
                print('Ua.h>', U_a_dot_h.shape)

            """ tanh(S.Wa + hj.Ua) """
            # <= batch_size*en_seq_len, latent_dim
            Ws_plus_Uh = K.tanh(W_a_dot_s + U_a_dot_h)
            if verbose:
                print('Ws+Uh>', Ws_plus_Uh.shape)

            """ softmax(va.tanh(S.Wa + hj.Ua)) """
            # <= batch_size, en_seq_len
            e_i = K.squeeze(K.dot(Ws_plus_Uh, self.V_a), axis=-1)
            # <= batch_size, en_seq_len
            e_i = K.softmax(e_i)

            if verbose:
                print('ei>', e_i.shape)

            return e_i, [e_i]
    def call(self, x, mask=None):
        features_dim = self.features_dim
        step_dim = self.step_dim

        eij = K.reshape(
            K.dot(K.reshape(x, (-1, features_dim)),
                  K.reshape(self.W, (features_dim, 1))), (-1, step_dim))

        if self.bias:
            eij += self.b

        eij = K.tanh(eij)

        a = K.exp(eij)

        if mask is not None:
            a *= K.cast(mask, K.floatx())

        a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())

        a = K.expand_dims(a)
        weighted_input = x * a
        return K.sum(weighted_input, axis=1)
Exemplo n.º 27
0
        def energy_step(inputs, states):
            """ Step function for computing energy for a single decoder state
            inputs: (batchsize * 1 * de_in_dim)
            states: (batchsize * 1 * de_latent_dim)
            """

            logger.debug("Running energy computation step")

            if not isinstance(states, (list, tuple)):
                raise TypeError(
                    f"States must be an iterable. Got {states} of type {type(states)}"
                )

            encoder_full_seq = states[-1]
            """ Computing S.Wa where S=[s0, s1, ..., si]"""
            # <= batch size * en_seq_len * latent_dim
            W_a_dot_s = K.dot(encoder_full_seq, self.W_a)
            """ Computing hj.Ua """
            U_a_dot_h = K.expand_dims(K.dot(inputs, self.U_a),
                                      1)  # <= batch_size, 1, latent_dim

            logger.debug(f"U_a_dot_h.shape = {U_a_dot_h.shape}")
            """ tanh(S.Wa + hj.Ua) """
            # <= batch_size*en_seq_len, latent_dim
            Ws_plus_Uh = K.tanh(W_a_dot_s + U_a_dot_h)

            logger.debug(f"Ws_plus_Uh.shape = {Ws_plus_Uh.shape}")
            """ softmax(va.tanh(S.Wa + hj.Ua)) """
            # <= batch_size, en_seq_len
            e_i = K.squeeze(K.dot(Ws_plus_Uh, self.V_a), axis=-1)
            # <= batch_size, en_seq_len
            e_i = K.softmax(e_i)

            logger.debug(f"ei.shape = {e_i.shape}")

            return e_i, [e_i]
Exemplo n.º 28
0
def energy_step(S_t_1,): # inputs(batch,dim)
    inputs = _p(S_t_1 ,"energy_step:S_t_1 算能量函数了..........")  # S_t_1:[1,20]


    en_seq_len, en_hidden = encoder_out_seq.shape[1], encoder_out_seq.shape[2]
    de_hidden = S_t_1.shape[-1]

    #  W * h_j
    reshaped_enc_outputs = K.reshape(encoder_out_seq, (-1, en_hidden))
    W_a_dot_s = K.reshape(K.dot(reshaped_enc_outputs, self.W_a), (-1, en_seq_len, en_hidden))

    # U * S_t - 1
    U_a_dot_h = K.expand_dims(K.dot(, self.U_a), 1)  # <= batch_size, 1, latent_dim

    # tanh ( W * h_j + U * S_t-1 + b )
    reshaped_Ws_plus_Uh = K.tanh(K.reshape(W_a_dot_s + U_a_dot_h, (-1, en_hidden)))

    # V * tanh ( W * h_j + U * S_t-1 + b )
    e_i = K.reshape(K.dot(reshaped_Ws_plus_Uh, self.V_a), (-1, en_seq_len))

    # softmax(e_tj)
    e_i = K.softmax(e_i)
    e_i = _p(e_i ,"energy_step:e_i")
    return e_i, [e_i]
Exemplo n.º 29
0
 def call(self, x):
     return x * (K.tanh(K.softplus(x)))
Exemplo n.º 30
0
    def call(self, inputs, states, training=None):
        h_tm1 = states[0]
        c_tm1 = states[1]

        # dropout matrices for input units
        dp_mask = self.get_dropout_mask_for_cell(inputs, training, count=4)
        # dropout matrices for recurrent units
        rec_dp_mask = self.get_recurrent_dropout_mask_for_cell(h_tm1,
                                                               training,
                                                               count=4)

        if 0 < self.dropout < 1.:
            inputs_i = inputs * dp_mask[0]
            inputs_f = inputs * dp_mask[1]
            inputs_c = inputs * dp_mask[2]
            inputs_o = inputs * dp_mask[3]
        else:
            inputs_i = inputs
            inputs_f = inputs
            inputs_c = inputs
            inputs_o = inputs

        if 0 < self.recurrent_dropout < 1.:
            h_tm1_i = h_tm1 * rec_dp_mask[0]
            h_tm1_f = h_tm1 * rec_dp_mask[1]
            h_tm1_c = h_tm1 * rec_dp_mask[2]
            h_tm1_o = h_tm1 * rec_dp_mask[3]
        else:
            h_tm1_i = h_tm1
            h_tm1_f = h_tm1
            h_tm1_c = h_tm1
            h_tm1_o = h_tm1

        (kernel_i, kernel_f, kernel_c,
         kernel_o) = array_ops.split(self.kernel, 4,
                                     axis=3)  # (3, 3, input_dim, filters)
        (recurrent_kernel_i, recurrent_kernel_f, recurrent_kernel_c,
         recurrent_kernel_o) = array_ops.split(self.recurrent_kernel,
                                               4,
                                               axis=3)

        if self.use_bias:
            bias_i, bias_f, bias_c, bias_o = array_ops.split(self.bias, 4)
        else:
            bias_i, bias_f, bias_c, bias_o = None, None, None, None

        # input_i: batch
        x_i = self.input_conv(inputs_i, kernel_i, bias_i, padding=self.padding)
        x_f = self.input_conv(inputs_f, kernel_f, bias_f, padding=self.padding)
        x_c = self.input_conv(inputs_c, kernel_c, bias_c, padding=self.padding)
        x_o = self.input_conv(inputs_o, kernel_o, bias_o, padding=self.padding)
        h_i = self.recurrent_conv(h_tm1_i, recurrent_kernel_i)
        h_f = self.recurrent_conv(h_tm1_f, recurrent_kernel_f)
        h_c = self.recurrent_conv(h_tm1_c, recurrent_kernel_c)
        h_o = self.recurrent_conv(h_tm1_o, recurrent_kernel_o)

        i = self.recurrent_activation(x_i + h_i)
        f = self.recurrent_activation(x_f + h_f)
        c = f * c_tm1 + i * self.activation(x_c + h_c)
        o = self.recurrent_activation(x_o + h_o)
        h = o * self.activation(c)

        # sa computation
        m_t_minus_one = states[2]  # h, w, filters
        h_t, c_t = h, c

        (kernel_hv, kernel_hk, kernel_hq, kernel_mk,
         kernel_mv) = array_ops.split(
             self.sa_kernel, 5,
             axis=3)  # kernel_size, filters, 1, turn to one layer

        if self.use_bias:
            bias_i, bias_g, bias_o = array_ops.split(self.sa_bias, 3)
        else:
            bias_i, bias_g, bias_o = None, None, None

        v_h = self.sa_conv(h_t, kernel_hv)
        k_h = self.sa_conv(h_t, kernel_hk)
        q_h = self.sa_conv(h_t, kernel_hq)
        k_m = self.sa_conv(m_t_minus_one, kernel_mk)
        v_m = self.sa_conv(m_t_minus_one, kernel_mv)  # h, w, 1

        q_h = K.squeeze(q_h, 3)
        k_m = K.squeeze(k_m, 3)
        k_h = K.squeeze(k_h, 3)

        e_m = tf.matmul(q_h, k_m)
        alpha_m = K.softmax(e_m)
        e_h = tf.matmul(q_h, k_h)
        alpha_h = K.softmax(e_h)

        v_m = K.squeeze(v_m, 3)
        v_h = K.squeeze(v_h, 3)
        z_m = tf.matmul(alpha_m, v_m)
        z_h = tf.matmul(alpha_h, v_h)

        z_m = K.expand_dims(z_m, 3)
        z_h = K.expand_dims(z_h, 3)
        zi = self.sa_conv(K.concatenate((z_h, z_m), 3), self.kernel_z)

        (kernel_m_zi, kernel_m_hi, kernel_m_zg, kernel_m_hg, kernel_m_zo,
         kernel_m_ho) = array_ops.split(self.depth_wise_kernel, 6, axis=3)  #

        i = K.sigmoid(
            K.depthwise_conv2d(zi, kernel_m_zi, padding='same') +
            K.depthwise_conv2d(h_t, kernel_m_hi, padding='same') + bias_i)
        g = K.tanh(
            K.depthwise_conv2d(zi, kernel_m_zg, padding='same') +
            K.depthwise_conv2d(h_t, kernel_m_hg, padding='same') + bias_g)
        o = K.sigmoid(
            K.depthwise_conv2d(zi, kernel_m_zo, padding='same') +
            K.depthwise_conv2d(h_t, kernel_m_ho, padding='same') + bias_o)

        m_t = (1 - i) * m_t_minus_one + i * g
        h_hat_t = m_t * o
        # sa computation end
        return h_hat_t, [c_t, h_hat_t, m_t]