Exemplo n.º 1
0
 def __call__(self, hidden_states):
     """
     SelfAttention is originally proposed by Cheng et al., 2016 https://arxiv.org/pdf/1601.06733.pdf
     Here using the implementation of Philipperemy from
     https://github.com/philipperemy/keras-attention-mechanism/blob/master/attention/attention.py with modification
     that `attn_units` and `attn_activation` attributes can be changed. The default values of these attributes are same
     as used by the auther. However, there is another implementation of SelfAttention at
     https://github.com/CyberZHG/keras-self-attention/blob/master/keras_self_attention/seq_self_attention.py
     but the author have cited a different paper i.e. Zheng et al., 2018 https://arxiv.org/pdf/1806.01264.pdf and
     named it as additive attention.
     A useful discussion about this (in this class) implementation can be found at
     https://github.com/philipperemy/keras-attention-mechanism/issues/14
     Many-to-one attention mechanism for Keras.
     @param hidden_states: 3D tensor with shape (batch_size, time_steps, input_dim).
     @return: 2D tensor with shape (batch_size, 128)
     @author: felixhao28.
     The original code which has here been modified had Apache Licence 2.0.
     """
     hidden_size = int(hidden_states.shape[2])
     # Inside dense layer
     #              hidden_states            dot               W            =>           score_first_part
     # (batch_size, time_steps, hidden_size) dot (hidden_size, hidden_size) => (batch_size, time_steps, hidden_size)
     # W is the trainable weight matrix of attention Luong's multiplicative style score
     score_first_part = Dense(hidden_size, use_bias=False, name='attention_score_vec' + self.context)(hidden_states)
     #            score_first_part           dot        last_hidden_state     => attention_weights
     # (batch_size, time_steps, hidden_size) dot   (batch_size, hidden_size)  => (batch_size, time_steps)
     h_t = Lambda(lambda x: x[:, -1, :], output_shape=(hidden_size,), name='last_hidden_state' + self.context)(hidden_states)
     score = dot([score_first_part, h_t], [2, 1], name='attention_score' + self.context)
     attention_weights = Activation('softmax', name='attention_weight' + self.context)(score)
     # (batch_size, time_steps, hidden_size) dot (batch_size, time_steps) => (batch_size, hidden_size)
     context_vector = dot([hidden_states, attention_weights], [1, 1], name='context_vector' + self.context)
     pre_activation = concatenate([context_vector, h_t], name='attention_output' + self.context)
     attention_vector = Dense(self.attn_units, use_bias=False, activation=self.attn_activation, name='attention_vector' + self.context)(pre_activation)
     return attention_vector
Exemplo n.º 2
0
def finetuning_siamese_cnn(mymodel_tmp, num_frame, num_neg_singers,
                           num_pos_tracks):
    anchor = Input(shape=(num_frame, config.n_mels))
    pos_items = [
        Input(shape=(num_frame, config.n_mels)) for i in range(num_pos_tracks)
    ]
    neg_items = [
        Input(shape=(num_frame, config.n_mels)) for i in range(num_neg_singers)
    ]

    dense = Dense(256)
    ap = GlobalAvgPool1D()

    anchor_out = mymodel_tmp(anchor)
    pos_outs = [mymodel_tmp(pos_item) for pos_item in pos_items]
    neg_outs = [mymodel_tmp(neg_item) for neg_item in neg_items]

    ### cosine
    pos_dists = [
        dot([anchor_out, pos_out], axes=1, normalize=True)
        for pos_out in pos_outs
    ]
    neg_dists = [
        dot([anchor_out, neg_out], axes=1, normalize=True)
        for neg_out in neg_outs
    ]

    all_dists = concatenate(pos_dists + neg_dists)

    outputs = Activation('linear')(all_dists)

    model = Model(inputs=[anchor] + pos_items + neg_items, outputs=outputs)

    return model
Exemplo n.º 3
0
def getModelInstance(parameters):

    encoderInput = Input(shape=(None, parameters["enc_vocab_size"],))
    encoder = Bidirectional(LSTM(128, return_sequences=True, return_state=True),
                            merge_mode='concat')
    encoder_outputs, forward_h, forward_c, backward_h, backward_c = encoder(encoderInput)

    encoderH = concatenate([forward_h, backward_h])
    encoderC = concatenate([forward_c, backward_c])

    decoderInput = Input(shape=(None, parameters["dec_vocab_size"],))
    decoderLstm = LSTM(256, return_sequences=True)
    decoderOutput = decoderLstm(decoderInput, initial_state=[encoderH, encoderC])

    attention = dot([decoderOutput, encoder_outputs], axes=(2, 2))
    attention = Activation('softmax', name='attention')(attention)
    context = dot([attention, encoder_outputs], axes=(2, 1))
    decoderCombined = concatenate([context, decoderOutput])

    output = TimeDistributed(Dense(128, activation="relu"))(decoderCombined)
    output = TimeDistributed(Dense(parameters["dec_vocab_size"], activation="softmax"))(output)

    model = Model([encoderInput, decoderInput], [output])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

    return model
    def __call__(self, hidden_states): # hidden_states가 아니라 outputs가 넘어온다.

        # 시계열 데이터가 아니므로 decoder를 정의하지 않는다.
        """
        Many-to-one attention mechanism for Keras.
        @param hidden_states: 3D tensor with shape (batch_size, time_steps, input_dim).
        @return: 2D tensor with shape (batch_size, 128)
        @author: felixhao28.
        """
        hidden_size = int(hidden_states.shape[2])

        # 1) 어텐션 스코어(Attention Score)를 구한다.
        # Inside dense layer
        #              hidden_states            dot               W            =>           score_first_part
        # (batch_size, time_steps, hidden_size) dot (hidden_size, hidden_size) => (batch_size, time_steps, hidden_size) 행렬곱을 하면 2차원의 행렬곱셈을 batch_size만큼
        # W is the trainable weight matrix of attention Luong's multiplicative style score
        score_first_part = Dense(hidden_size, use_bias=False, name='attention_score_vec')(hidden_states)
        #            score_first_part           dot        last_hidden_state     => attention_weights
        # (batch_size, time_steps, hidden_size) dot   (batch_size, hidden_size)  => (batch_size, time_steps)
        h_t = Lambda(lambda x: x[:, -1, :], output_shape=(hidden_size,), name='last_hidden_state')(hidden_states) # 마지막 hiddenstate만 갖고 온다. x[:, -1, :]
        score = dot([score_first_part, h_t], [2, 1], name='attention_score') # 행렬곱   [2, 1] time_steps, hidden_size->hidden_size

        # 2) 소프트맥스(softmax) 함수를 통해 어텐션 분포(Attention Distribution)를 구한다.
        attention_weights = Activation('softmax', name='attention_weight')(score)
        # (batch_size, time_steps, hidden_size) dot (batch_size, time_steps) => (batch_size, hidden_size)

        # 3) 각 인코더의 어텐션 가중치와 은닉 상태를 가중합하여 어텐션 값(Attention Value)을 구한다.
        context_vector = dot([hidden_states, attention_weights], [1, 1], name='context_vector')

        # 4) 어텐션 값과 디코더의 t 시점의 은닉 상태를 연결한다.(Concatenate)
        pre_activation = concatenate([context_vector, h_t], name='attention_output')

        # 5) 출력층 연산의 입력이 되는 s~t를 계산합니다.
        attention_vector = Dense(128, use_bias=False, activation='tanh', name='attention_vector')(pre_activation)
        return attention_vector
Exemplo n.º 5
0
        def ls(yt, yp):
            f = dot([self.y2, self.x2], axes=-1, normalize=True)
            fg = dot([self.g2, self.x2], axes=-1, normalize=True)

            r = maximum(0.0, 0.3 + subtract([fg, f]))
            r = sum(r, axis=-1)
            return mean(r) # batch
 def attention_3d_block(self, hidden_states):
   """Attention mechanism.
   
   Reference - https://github.com/philipperemy/keras-attention-mechanism
   
   Args:
     - hidden_states: RNN hidden states (3d array)
     
   Return:
     - attention_vector: output states after attention mechanism.
   """
   # hidden_states.shape = (batch_size, time_steps, hidden_size)
   hidden_size = int(hidden_states.shape[2])
   # Inside dense layer
   #              hidden_states            dot               W            =>           score_first_part
   # (batch_size, time_steps, hidden_size) dot (hidden_size, hidden_size) => (batch_size, time_steps, hidden_size)
   # W is the trainable weight matrix of attention Luong's multiplicative style score
   score_first_part = Dense(hidden_size, use_bias=False, name='attention_score_vec')(hidden_states)
   #            score_first_part           dot        last_hidden_state     => attention_weights
   # (batch_size, time_steps, hidden_size) dot   (batch_size, hidden_size)  => (batch_size, time_steps)
   h_t = Lambda(lambda x: x[:, -1, :], output_shape=(hidden_size,), name='last_hidden_state')(hidden_states)
   score = dot([score_first_part, h_t], [2, 1], name='attention_score')
   attention_weights = Activation('softmax', name='attention_weight')(score)
   # (batch_size, time_steps, hidden_size) dot (batch_size, time_steps) => (batch_size, hidden_size)
   context_vector = dot([hidden_states, attention_weights], [1, 1], name='context_vector')
   pre_activation = concatenate([context_vector, h_t], name='attention_output')
   attention_vector = Dense(self.h_dim, use_bias=False, activation='tanh', name='attention_vector')(pre_activation)
   return attention_vector
Exemplo n.º 7
0
def attention_3d_block(hidden_states, dense_activation='tanh'):
    """
    Many-to-one attention mechanism for Keras.
    @param hidden_states: 3D tensor with shape (batch_size, time_steps, input_dim).
    @return: 2D tensor with shape (batch_size, 128)
    @author: felixhao28.
    """
    hidden_size = int(hidden_states.shape[2])
    # Inside dense layer
    #              hidden_states            dot               W            =>           score_first_part
    # (batch_size, time_steps, hidden_size) dot (hidden_size, hidden_size) => (batch_size, time_steps, hidden_size)
    # W is the trainable weight matrix of attention Luong's multiplicative style score
    score_first_part = Dense(hidden_size,
                             use_bias=False,
                             name='attention_score_vec')(hidden_states)
    #            score_first_part           dot        last_hidden_state     => attention_weights
    # (batch_size, time_steps, hidden_size) dot   (batch_size, hidden_size)  => (batch_size, time_steps)
    h_t = Lambda(lambda x: x[:, -1, :],
                 output_shape=(hidden_size, ),
                 name='last_hidden_state')(hidden_states)
    score = dot([score_first_part, h_t], [2, 1], name='attention_score')
    attention_weights = Activation('softmax', name='attention_weight')(score)
    # (batch_size, time_steps, hidden_size) dot (batch_size, time_steps) => (batch_size, hidden_size)
    context_vector = dot([hidden_states, attention_weights], [1, 1],
                         name='context_vector')
    pre_activation = concatenate([context_vector, h_t],
                                 name='attention_output')
    attention_vector = Dense(128,
                             use_bias=False,
                             activation=dense_activation,
                             name='attention_vector')(pre_activation)
    return attention_vector
Exemplo n.º 8
0
    def read(self, keys, scale=None):
        """Read from memory.

        Read the memory for given the keys. For each key in keys we will get one
        result as `r = sum_i M[i] a[i]` where `M[i]` is the memory content
        at location i and `a[i]` is the attention weight for key at location i.
        `a` is calculated as softmax of a scaled similarity between key and
        each memory content: `a[i] = exp(scale*sim[i])/(sum_i scale*sim[i])`

        Args:
            keys (Tensor): shape[-1] is dim.
              For single key read, the shape is (batch_size, dim).
              For multiple key read, the shape is (batch_szie, k, dim), where
              k is the number of keys.
            scale (None|float|Tensor): shape is () or keys.shape[:-1]. The
              cosine similarities are multiplied with `scale` before softmax
              is applied. If None, use the scale provided at constructor.
        Returns:
            resutl Tensor: shape is same as keys. result[..., i] is the read
              result for the corresponding key.

        """
        if not self._built:
            self.build(keys.shape[0])
        assert 2 <= len(keys.shape) <= 3
        assert keys.shape[0] == self._batch_size
        assert keys.shape[-1] == self.dim

        if scale is None:
            scale = self._scale
        else:
            if isinstance(scale, (int, float)):
                pass
            else:  # assuming it's Tensor
                scale = expand_dims_as(scale, keys)
        sim = layers.dot([keys, self._memory],
                         axes=-1,
                         normalize=self._normalize)
        sim = sim * scale

        attention = activations.softmax(sim)
        result = layers.dot([attention, self._memory], axes=(-1, 1))

        if len(sim.shape) > 2:  # multiple read keys
            usage = tf.reduce_sum(attention,
                                  axis=tf.range(1,
                                                len(sim.shape) - 1))
        else:
            usage = attention

        if self._snapshot_only:
            self._usage.assign_add(usage)
        else:
            self._usage = self._usage + usage

        return result
def attention_block(hidden_states):
    print(hidden_states.shape)
    hidden_size = int(hidden_states.shape[2])
    score_first_part = Dense(hidden_size, use_bias=False, name='attention_score_vec')(hidden_states)
    h_t = Lambda(lambda x: x[:, -1, :], output_shape=(hidden_size,), name='last_hidden_state')(hidden_states)
    score = dot([score_first_part, h_t], [2, 1], name='attention_score')
    attention_weights = Activation('softmax', name='attention_weight')(score)
    context_vector = dot([hidden_states, attention_weights], [1, 1], name='context_vector')
    pre_activation = concatenate([context_vector, h_t], name='attention_output')
    attention_vector = Dense(128, use_bias=False, activation='tanh', name='attention_vector')(pre_activation)
    return attention_vector
Exemplo n.º 10
0
def attention_3d_block(hidden_states):
    """
    Many-to-one attention mechanism for Keras.
    @param hidden_states: 3D tensor with shape (batch_size, time_steps, input_dim).
    @return: 2D tensor with shape (batch_size, 128)
    @author: felixhao28.
    """
    if False:
        hidden_size = int(hidden_states.shape[2])
        # Inside dense layer
        #              hidden_states            dot               W            =>           score_first_part
        # (batch_size, time_steps, hidden_size) dot (hidden_size, hidden_size) => (batch_size, time_steps, hidden_size)
        # W is the trainable weight matrix of attention Luong's multiplicative style score
        score_first_part = Dense(hidden_size,
                                 use_bias=False,
                                 name='attention_score_vec')(hidden_states)
        #            score_first_part           dot        last_hidden_state     => attention_weights
        # (batch_size, time_steps, hidden_size) dot   (batch_size, hidden_size)  => (batch_size, time_steps)
        h_t = Lambda(lambda x: x[:, -1, :],
                     output_shape=(hidden_size, ),
                     name='last_hidden_state')(hidden_states)
        score = dot([score_first_part, h_t], [2, 1], name='attention_score')
        attention_weights = Activation('softmax',
                                       name='attention_weight')(score)
        # (batch_size, time_steps, hidden_size) dot (batch_size, time_steps) => (batch_size, hidden_size)
        context_vector = dot([hidden_states, attention_weights], [1, 1],
                             name='context_vector')
        pre_activation = concatenate([context_vector, h_t],
                                     name='attention_output')
        attention_vector = Dense(128,
                                 use_bias=False,
                                 activation='tanh',
                                 name='attention_vector')(pre_activation)
        return attention_vector
    """
    Many-to-one attention mechanism for Keras. (modified version)
    @author: ysmoon 
    """
    hidden_size = int(hidden_states.shape[2])
    query = Dense(hidden_size, use_bias=False, name="query")(hidden_states)
    key = Dense(hidden_size, use_bias=False, name="key")(hidden_states)
    score = dot([query, key], [2, 2])  # [batch, seq, seq]
    attention_weights = Activation('softmax', name='attention_weight')(score)
    value = Dense(hidden_size, use_bias=False, name="value")(hidden_states)
    context_vector = dot([attention_weights, value], [2, 1])
    context = tf.keras.backend.max(context_vector, axis=2)
    attention_vector = Dense(128,
                             use_bias=False,
                             activation='tanh',
                             name='attention_vector')(context)
    return attention_vector
Exemplo n.º 11
0
    def get_model(self):
        # encoder_inputs shape == (batch_size, encoder_seq_length)
        self.encoder_inputs = Input(shape=(None, ))
        # encoder_emb shape == (batch_size, encoder_seq_length, embedding_dim)
        encoder_emb = Embedding(self.num_encoder_tokens + 1,
                                self.embedding_dim,
                                mask_zero=True)(self.encoder_inputs)
        # encoder shape == (batch_size, encoder_seq_length, num_encoder_units)
        self.encoder_outputs = Bidirectional(
            LSTM(self.num_encoder_units, return_sequences=True,
                 unroll=False))(encoder_emb)
        self.encoder_outputs = Dense(self.num_decoder_units)(
            self.encoder_outputs)
        # encoder_last shape == (batch_size, num_decoder_units)
        self.encoder_last = self.encoder_outputs[:, -1, :]
        self.encoder_last.set_shape([None, self.num_decoder_units])

        # decoder_inputs shape == (batch_size, decoder_seq_length)
        self.decoder_inputs = Input(shape=(None, ))
        # decoder_emb shape == (batch_size, decoder_seq_length, embedding_dim)
        decoder_emb = Embedding(self.num_decoder_tokens + 1,
                                self.embedding_dim,
                                mask_zero=True)(self.decoder_inputs)
        # decoder_outputs shape == (batch_size, decoder_seq_length, num_decoder_units)
        decoder_outputs = LSTM(
            self.num_decoder_units, return_sequences=True,
            unroll=False)(decoder_emb,
                          initial_state=[self.encoder_last, self.encoder_last])

        # attention shape == (batch_size, decoder_seq_length, max_encoder_seq_length)
        attention = dot([decoder_outputs, self.encoder_outputs], axes=[2, 2])
        attention = Activation("softmax", name="attention")(attention)

        # context shape == (batch_size, decoder_seq_length, latent_dim)
        context = dot([attention, self.encoder_outputs], axes=[2, 1])

        # decoder_combined_context shape == (batch_size, decoder_seq_length, latent_dim)
        decoder_combined_context = concatenate([context, decoder_outputs])

        # decoder_outputs shape == (batch_size, decoder_seq_length)
        decoder_outputs = TimeDistributed(
            Dense(self.num_decoder_units,
                  activation="tanh"))(decoder_combined_context)
        # decoder_outputs shape == (batch_size, decoder_seq_length, num_decoder_tokens)
        decoder_outputs = TimeDistributed(
            Dense(self.num_decoder_tokens,
                  activation="softmax"))(decoder_outputs)

        return Model([self.encoder_inputs, self.decoder_inputs],
                     decoder_outputs)
Exemplo n.º 12
0
def dssm(index2vec, max_reviews=5, dim=32, J=4):
    # 用户embedding
    user_input = Input(shape=(max_reviews, ), name='user_input')
    # 用户点击item的embedding
    pos_input = Input(shape=(1, ), name='pos_input')
    # 未点击的embedding
    neg_inputs = [Input(shape=(1, )) for _ in range(J)]
    # 用户看过历史item的embedding
    user_embedding = Embedding(len(index2vec),
                               dim,
                               weights=[index2vec],
                               input_length=max_reviews,
                               trainable=False)(user_input)
    # 取所有看过的item embedding的平均值
    user_average = GlobalAveragePooling1D()(user_embedding)
    user_fc = Dense(32, activation='relu', name='ufc')(user_average)

    pos_embedding = Embedding(len(index2vec),
                              dim,
                              weights=[index2vec],
                              trainable=False)(pos_input)
    neg_embeddings = [
        Embedding(len(index2vec), dim, weights=[index2vec],
                  trainable=False)(neg_input) for neg_input in neg_inputs
    ]

    pos_flatten = Flatten()(pos_embedding)
    neg_flattens = [
        Flatten()(neg_embedding) for neg_embedding in neg_embeddings
    ]

    item_fc = Dense(32, activation='relu', name='ifc')

    pos_fc = item_fc(pos_flatten)
    neg_fcs = [item_fc(neg_flatten) for neg_flatten in neg_flattens]

    user_product_pos = dot([user_fc, pos_fc], axes=1, normalize=True)
    user_product_negs = [
        dot([user_fc, neg_fc], axes=1, normalize=True) for neg_fc in neg_fcs
    ]

    concat = concatenate([user_product_pos] + user_product_negs)

    ctr = Activation("softmax")(concat)

    model = Model(inputs=[user_input, pos_input] + neg_inputs, outputs=ctr)
    model.compile(optimizer="adam",
                  loss='categorical_crossentropy',
                  metrics=['acc'])
    return model
Exemplo n.º 13
0
def get_qpair_model():
    embedding_size = 128

    inp1 = layers.Input(shape=(100, ))
    inp2 = layers.Input(shape=(100, ))

    x1 = layers.Embedding(6000, embedding_size)(inp1)
    x2 = layers.Embedding(6000, embedding_size)(inp2)

    x3 = layers.Bidirectional(layers.LSTM(32, return_sequences=True))(x1)
    x4 = layers.Bidirectional(layers.LSTM(32, return_sequences=True))(x2)

    x5 = layers.GlobalMaxPool1D()(x3)
    x6 = layers.GlobalMaxPool1D()(x4)

    x7 = layers.dot([x5, x6], axes=1)

    x8 = layers.Dense(40, activation='relu')(x7)
    x9 = layers.Dropout(0.05)(x8)
    x10 = layers.Dense(10, activation='relu')(x9)
    output = layers.Dense(2, activation="softmax")(x10)

    model = models.Model(inputs=[inp1, inp2], outputs=output)
    model.compile(loss='CategoricalCrossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    # batch_size = 100
    # epochs = 3
    return model
Exemplo n.º 14
0
    def build(self, vector_dim=5, learn_rate=0.1):
        self.embedding_size = vector_dim

        if os.path.exists(self.trained_weights_path):
            self.model = load_model(self.trained_weights_path)
        else:
            stddev = 1.0 / vector_dim
            initializer = tf.random_normal_initializer(mean=0.0, stddev=stddev, seed=None)

            business_input = Input(shape=(1,), name="business_input")
            business_emnbedding = Embedding(input_dim=self.business_size,
                                            output_dim=vector_dim,
                                            input_length=1,
                                            name="input_embedding",
                                            embeddings_initializer=initializer)(business_input)

            target_input = Input(shape=(1,), name="business_target")
            target_embedding = Embedding(input_dim=self.business_size,
                                         output_dim=vector_dim,
                                         input_length=1,
                                         name="target_embedding", embeddings_initializer=initializer)(target_input)

            merged = dot([business_emnbedding, target_embedding], axes=2, normalize=False, name="dot")
            merged = Flatten()(merged)
            output = Dense(1, activation='sigmoid', name="output")(merged)

            model = Model(inputs=[business_input, target_input], outputs=output)
            model.compile(loss="binary_crossentropy", optimizer=Adam(learn_rate), metrics=['accuracy'])

            self.model = model

        logging.info(self.model.summary())
Exemplo n.º 15
0
def monotonic_alignment(args):
    h_enc, h_dec, T_x, T_y, Y, hidden_dim = args
    struc_zeros = K.expand_dims(
        K.cast(np.triu(np.ones([T_x, T_x])), dtype='float32'), 0)
    alignment_probs = K.softmax(
        dot([Dense(hidden_dim)(h_enc), h_dec], axes=-1, normalize=False), -2)
    h_enc_rep = K.tile(K.expand_dims(h_enc, -2), [1, 1, T_y, 1])
    h_dec_rep = K.tile(K.expand_dims(h_dec, -3), [1, T_x, 1, 1])
    h_rep = K.concatenate([h_enc_rep, h_dec_rep], -1)
    alignment_probs_ = []
    for i in range(T_y):
        if i == 0:
            align_prev_curr = tf.gather(alignment_probs, i, axis=-1)
        if i > 0:
            align_prev_curr = tf.einsum('nx,ny->nxy',
                                        tf.gather(alignment_probs, i, axis=-1),
                                        alignment_probs_[i - 1])
            align_prev_curr *= struc_zeros
            align_prev_curr = K.sum(align_prev_curr, 1) + 1e-6
            align_prev_curr /= K.sum(align_prev_curr, -1, keepdims=True)
        alignment_probs_.append(align_prev_curr)
    alignment_probs_ = K.stack(alignment_probs_, -1)
    emission_probs = Dense(hidden_dim * 3, activation='tanh')(h_rep)
    emission_probs = Dense(Y, activation='softmax')(emission_probs)
    #alphas = tf.expand_dims(alignment_probs_,-1)*emission_probs
    #return(tf.reduce_sum(alphas,-3))
    return (alignment_probs_, emission_probs)
Exemplo n.º 16
0
def build_model(embedding_layer,embedding_layer_entity,max_len):
    sequence_input = Input(shape=(max_len,))
    entity_input = Input(shape=(2,),)
    embedded_sequences = embedding_layer(sequence_input)
    embedded_entity = embedding_layer_entity(entity_input)
    #print(entity_input.shape)
    x = Conv1D(128, 3, activation='relu',padding='same')(embedded_sequences)
    x1 = Conv1D(128, 2, activation='relu')(embedded_entity)
    ###aspect based attention block
    con = Concatenate(axis = 1)([x,x1])
    x2 = Dense(1,activation= 'tanh')(con)
    x2 = Flatten()(x2)
    x2 = Activation('softmax')(x2)
    x2 = RepeatVector(64)(x2)
    x2 = dot([x,x2],axes = 1)
    x2 = Permute([2, 1])(x2)
    ###attention end
    x = MaxPooling1D(3)(x2)
    x = Conv1D(128, 3, activation='relu')(x)
    x = MaxPooling1D(3)(x)
    x = Conv1D(128, 3, activation='relu')(x)
    x = MaxPooling1D(3)(x)  # global max pooling
    x = Flatten()(x)
    x = Dense(128, activation='relu')(x)
    #x = concatenate([x,d])
    preds = Dense(1, activation='sigmoid')(x)

    model = Model([sequence_input,entity_input], preds)
    model.compile(optimizer='adam', loss='binary_crossentropy', 
                  metrics=['acc',f1_m,precision_m, recall_m])
    return model
Exemplo n.º 17
0
    def make_model(self):
        
        with k.name_scope("SVM_features"):

            svm_features = Input(shape = (self.svm_dims,), name = "svm_features")
            svm_input = Dense(128, activation = "tanh", name = "svm_dense")(svm_features)
            # svm_input = LeakyReLU()(svm_input) 
        
        with k.name_scope("LSTM_features"):
            
            lstm_features = Input(shape = (None, self.input_shape), name = "lstm_features")
            lstm_mask = Masking(mask_value = Config.MASKING_VALUE, input_shape = (self.time_steps, self.input_shape))(lstm_features)
            lstm_output, state_h, state_c = LSTM(Config.LSTM_UNITS, return_sequences = True, return_state = True, name = "lstm_sequence")(lstm_mask)
            # lstm_output_last = LSTM(Config.LSTM_UNITS, return_sequences = False, name = "lstm_last_output")(lstm_mask)
        
        with k.name_scope("AttentionLayer_1"):
            
            __, lstm_output_ex_last = Lambda(lambda t: [t, t[:, :-1, :]], name = "lstm_T1_Tn-1")(lstm_output)
            lstm_output_last = state_h 
            attention_weights1 = dot([lstm_output_last, lstm_output_ex_last], name = "attention_weights1", axes = -1) # [B, 1, M]
            attention_weights2 = Activation("softmax", name = "attention_weights2")(attention_weights1)
            lstm_attention = dot([attention_weights2, lstm_output_ex_last], name = "lstm_attention", axes = 1)
            # final_attention = concatenate([lstm_attention, lstm_output_last])
            print(lstm_attention)
        
        """
        with k.name_scope("AttentionLayer_2"):
            # Attention layer 2 - attention params
            input_attention = Input(shape = (Config.ATTENTION_UNITS, ), name = "attention_params")
            u = Dense(Config.ATTENTION_UNITS, activation = "softmax", name = "attention_u")(input_attention)
            alpha = dot([u, lstm_output], axes = -1)
            alpha = Activation("softmax", name = "attention_weights")(alpha)
            # weighted pool
            lstm_attention = dot([alpha, lstm_output], name = "attention_output", axes = 1)
        """
        with k.name_scope("Concatenate"):
            x = concatenate([lstm_attention, svm_input])
            x_dense = Dense(128, activation = "tanh")(x)
            # x_dense = LeakyReLU()(x_dense)
            dense_2 = Dense(128, activation = "tanh")(x_dense)
            batchnorm2 = BatchNormalization()(dense_2)
            dropout = Dropout(rate = 0.3, name = "dropout")(batchnorm2) 
            
        pred = Dense(self.num_classes, activation = "softmax", name = "output")(dropout)
        self.model = Model(inputs = [svm_features, lstm_features], outputs = [pred])
        
        return self.model
Exemplo n.º 18
0
def attention_3d_block(hidden_states):

    # @author: felixhao28.

    # hidden_states.shape = (batch_size, time_steps, hidden_size)

    hidden_size = int(hidden_states.shape[2])

    # Inside dense layer

    #              hidden_states            dot               W            =>           score_first_part

    # (batch_size, time_steps, hidden_size) dot (hidden_size, hidden_size) => (batch_size, time_steps, hidden_size)

    # W is the trainable weight matrix of attention Luong's multiplicative style score

    score_first_part = Dense(hidden_size,
                             use_bias=False,
                             name='attention_score_vec')(hidden_states)

    #            score_first_part           dot        last_hidden_state     => attention_weights

    # (batch_size, time_steps, hidden_size) dot   (batch_size, hidden_size)  => (batch_size, time_steps)

    h_t = Lambda(lambda x: x[:, -1, :],
                 output_shape=(hidden_size, ),
                 name='last_hidden_state')(hidden_states)

    score = dot([score_first_part, h_t], [2, 1], name='attention_score')

    attention_weights = Activation('softmax', name='attention_weight')(score)

    # (batch_size, time_steps, hidden_size) dot (batch_size, time_steps) => (batch_size, hidden_size)

    context_vector = dot([hidden_states, attention_weights], [1, 1],
                         name='context_vector')

    pre_activation = concatenate([context_vector, h_t],
                                 name='attention_output')

    attention_vector = Dense(256,
                             use_bias=False,
                             activation='tanh',
                             name='attention_vector')(pre_activation)

    return attention_vector
Exemplo n.º 19
0
def attn(hidden_states,name='Attention_layer'):
    hidden_size = int(hidden_states.shape[2])
    # Inside dense layer
    #              hidden_states            dot               W            =>           score_first_part
    # (batch_size, time_steps, hidden_size) dot (hidden_size, hidden_size) => (batch_size, time_steps, hidden_size)
    # W is the trainable weight matrix of attention Luong's multiplicative style score
    score_first_part = Dense(hidden_size, use_bias=False)(hidden_states)
    #            score_first_part           dot        last_hidden_state     => attention_weights
    # (batch_size, time_steps, hidden_size) dot   (batch_size, hidden_size)  => (batch_size, time_steps)
    h_t = Lambda(lambda x: x[:, -1, :], output_shape=(hidden_size,))(hidden_states)
    score = dot([score_first_part, h_t], [2, 1])
    attention_weights = Activation('softmax')(score)
    # (batch_size, time_steps, hidden_size) dot (batch_size, time_steps) => (batch_size, hidden_size)
    context_vector = dot([hidden_states, attention_weights], [1, 1])
    pre_activation = concatenate([context_vector, h_t])
    attention_vector = Dense(128, use_bias=False, activation='tanh')(pre_activation)
    return attention_vector
Exemplo n.º 20
0
def get_siamese_model(input_shape):
    """
        Model architecture
    """

    # Define the tensors for the two input images
    left_input = Input(input_shape)
    right_input = Input(input_shape)

    # Convolutional Neural Network
    model = Sequential()
    model.add(
        Conv2D(64, (10, 10),
               activation='relu',
               input_shape=input_shape,
               kernel_initializer="random_uniform",
               kernel_regularizer=l2(2e-4)))
    model.add(MaxPooling2D())
    model.add(
        Conv2D(128, (7, 7),
               activation='relu',
               kernel_initializer="random_uniform",
               bias_initializer="zeros",
               kernel_regularizer=l2(2e-4)))
    model.add(MaxPooling2D())
    model.add(
        Conv2D(128, (4, 4),
               activation='relu',
               kernel_initializer="random_uniform",
               bias_initializer="zeros",
               kernel_regularizer=l2(2e-4)))
    model.add(MaxPooling2D())
    model.add(
        Conv2D(256, (4, 4),
               activation='relu',
               kernel_initializer="random_uniform",
               bias_initializer="zeros",
               kernel_regularizer=l2(2e-4)))
    model.add(Flatten())
    model.add(
        Dense(4096,
              activation='sigmoid',
              kernel_regularizer=l2(1e-3),
              kernel_initializer="random_uniform",
              bias_initializer="zeros"))

    # Generate the encodings (feature vectors) for the two images
    encoded_l = model(left_input)
    encoded_r = model(right_input)

    similarity = dot([encoded_l, encoded_r], axes=-1, normalize=True)

    # Connect the inputs with the outputs
    siamese_net = Model(inputs=[left_input, right_input], outputs=similarity)

    # return the model
    return siamese_net
Exemplo n.º 21
0
    def create_model(self):
        output_len = self.max_seq_length

        inputt = Input(shape=(self.max_seq_length), dtype='int32')
        emb = self.one_hot_layer()
        emb.trainable = True
        embedded = emb(inputt)

        conv2 = Conv1D(
            self.latent_dim,
            kernel_size=2,
            activation='tanh',
            padding='same'  #,dilation_rate=2
        )(embedded)

        lstm_input = concatenate([embedded, conv2])

        encoder_output = Bidirectional(
            LSTM(self.latent_dim, return_sequences=True),
            input_shape=(output_len, self.token_count),
        )(lstm_input)
        # Due to `return_sequences` the encoder outputs are of shape
        # (X, sequence_length, 2 x LSTM hidden dim).
        # we only need the last timestep for our decoder input
        encoder_last = encoder_output[:, -1, :]

        repeated = RepeatVector(output_len)(encoder_last)

        decoder_output = Bidirectional(
            LSTM(self.latent_dim, return_sequences=True))(repeated)

        # custom attention
        attention = dot([decoder_output, encoder_output], axes=[2, 2])
        attention = Activation('softmax', name='attention')(attention)
        context = dot([attention, encoder_output], axes=[2, 1])
        decoder_combined_context = concatenate([context, decoder_output])

        td_dense = TimeDistributed(Dense(self.latent_dim, activation='tanh'))
        output_1 = td_dense(decoder_combined_context)
        output = self.output_layer()(output_1)

        self.model = Model(inputs=inputt, outputs=output)
        self.compile_model()
Exemplo n.º 22
0
def self_attention(x):
    ''' 
    .  stands for dot product 
    *  stands for elemwise multiplication
        
    m = x . transpose(x)
    n = softmax(m)
    o = n . x  
    a = o * x           
       
    return a
        
    '''

    m = dot([x, x], axes=[2, 2])
    n = Activation('softmax')(m)
    o = dot([n, x], axes=[2, 1])
    a = multiply([o, x])

    return a
Exemplo n.º 23
0
def skeleton_cnn(num_frame, weights):
    x_input = Input(shape=(num_frame, 128))
    
    # audio model 
    conv1 = Conv1D(128, kernel_size=3, padding='same', use_bias=True, kernel_regularizer=l2(1e-5), kernel_initializer='he_normal')
    bn1 = BatchNormalization()
    activ1 = LeakyReLU(0.2)
    # activ1 = Activation('relu')
    mp1 = MaxPool1D(pool_size=3)

    conv2 = Conv1D(128, kernel_size=3, padding='same', use_bias=True, kernel_regularizer=l2(1e-5), kernel_initializer='he_normal')
    bn2 = BatchNormalization()
    activ2 = LeakyReLU(0.2)
    # activ2 = Activation('relu')
    mp2 = MaxPool1D(pool_size=3)
    
    conv3 = Conv1D(128, kernel_size=3, padding='same', use_bias=True, kernel_regularizer=l2(1e-5), kernel_initializer='he_normal')
    bn3 = BatchNormalization()
    activ3 = LeakyReLU(0.2)
    # activ3 = Activation('relu')
    mp3 = MaxPool1D(pool_size=3)
    do3 = Dropout(0.5)
    
    conv4 = Conv1D(128, kernel_size=3, padding='same', use_bias=True, kernel_regularizer=l2(1e-5), kernel_initializer='he_normal')
    bn4 = BatchNormalization()
    activ4 = LeakyReLU(0.2)
    # activ4 = Activation('relu')
    mp4 = MaxPool1D(pool_size=3)

    conv5 = Conv1D(256, kernel_size=1, padding='same', use_bias=True, kernel_regularizer=l2(1e-5), kernel_initializer='he_normal')
    bn5 = BatchNormalization()
    activ5 = LeakyReLU(0.2)
    # activ5 = Activation('relu')
    do5 = Dropout(0.5)

    ap = GlobalAvgPool1D()
    
    # Anchor 
    out = mp1(activ1(bn1(conv1(x_input))))
    out = mp2(activ2(bn2(conv2(out))))
    out = mp3(activ3(bn3(conv3(out))))
    out = do3(out)
    out = mp4(activ4(bn4(conv4(out))))
    out = activ5(bn5(conv5(out)))
    out = do5(out)
    out = ap(out)
   
    # out = Dense(num_artist, activation='softmax')(out)
    out = dot([out, out], axes=1, normalize=True)
    out = Activation('linear')(out)
    model = Model(inputs=x_input, outputs = out)

    model.load_weights(weights)
    return model
Exemplo n.º 24
0
 def __init__(self, dictionarySize=2500, sentenceLength=30):
     # settings
     self.dictionarySize = dictionarySize
     self.sentenceLength = sentenceLength
     # keras overall model
     embedding = Embedding(dictionarySize,
                           128,
                           mask_zero=True,
                           input_length=None)
     encoder = LSTM(256, return_sequences=True, return_state=True)
     decoder = LSTM(256, return_sequences=True, return_state=True)
     classifierLayer1 = TimeDistributed(Dense(256, activation='tanh'))
     classifierLayer2 = TimeDistributed(
         Dense(dictionarySize, activation='softmax'))
     questions = Input(shape=(None, ), dtype='int32')
     answers = Input(shape=(None, ), dtype='int32')
     embeddedQuestions = embedding(questions)
     embeddedAnswers = embedding(answers)
     encoded, h, c = encoder(embeddedQuestions)
     decoded, _, _ = decoder(embeddedAnswers, initial_state=[h, c])
     attention = Activation('softmax')(dot([encoded, decoded], axes=[2, 2]))
     context = dot([attention, encoded], axes=[2, 1])
     features = concatenate([decoded, context])
     distributions = classifierLayer2(classifierLayer1(features))
     self.kerasOverallModel = Model([questions, answers], distributions)
     self.kerasOverallModel.compile(optimizer='rmsprop',
                                    loss='categorical_crossentropy',
                                    sample_weight_mode='temporal')
     # keras model interfaces
     self.kerasEncoderModel = Model(questions, [encoded, h, c])
     encoded = Input(shape=(None, 256))
     hMemCells = Input(shape=(256, ))
     cMemCells = Input(shape=(256, ))
     decoded, h, c = decoder(embeddedAnswers,
                             initial_state=[hMemCells, cMemCells])
     attention = Activation('softmax')(dot([encoded, decoded], axes=[2, 2]))
     context = dot([attention, encoded], axes=[2, 1])
     features = concatenate([decoded, context])
     distributions = classifierLayer2(classifierLayer1(features))
     self.kerasDecoderModel = Model(
         [answers, encoded, hMemCells, cMemCells], [distributions, h, c])
Exemplo n.º 25
0
def cross_modal_attention(x, y):
    ''' 
    .  stands for dot product 
    *  stands for elemwise multiplication
    {} stands for concatenation
        
    m1 = x . transpose(y) ||  m2 = y . transpose(x) 
    n1 = softmax(m1)      ||  n2 = softmax(m2)
    o1 = n1 . y           ||  o2 = m2 . x
    a1 = o1 * x           ||  a2 = o2 * y
       
    return {a1, a2}
        
    '''

    m1 = dot([x, y], axes=[2, 2])
    n1 = Activation('softmax')(m1)
    o1 = dot([n1, y], axes=[2, 1])
    a1 = multiply([o1, x])

    return a1
Exemplo n.º 26
0
    def _get_model2(self):
        x1 = Input(shape=(10, ))
        x2 = Input(shape=(10, ))
        y = dot([Dense(10)(x1), Dense(10)(x2)], axes=1)
        model = Model(inputs=[x1, x2], outputs=y)
        model.compile(loss="mse", optimizer="adam")

        wrapped = OracleWrapper(model, BiasedReweightingPolicy(), score="loss")

        x = [np.random.rand(16, 10), np.random.rand(16, 10)]
        y = np.random.rand(16, 1)

        return model, wrapped, x, y
Exemplo n.º 27
0
def finetuning_mono2mix(vocal_model, mix_model, num_frame,num_neg_artist, num_pos_track):

    anchor = Input(shape=(num_frame,config.n_mels))
    pos_items = [Input(shape=(num_frame, config.n_mels)) for i in range(num_pos_track)]
    neg_items = [Input(shape=(num_frame, config.n_mels)) for i in range(num_neg_artist)]

    anchor_out = vocal_model(anchor)
    # anchor_out = mix_model(anchor)
    pos_outs = [mix_model(pos_item) for pos_item in pos_items]
    neg_outs = [mix_model(neg_item) for neg_item in neg_items]


    ### cosine 
    pos_dists = [dot([anchor_out, pos_out], axes=1, normalize=True) for pos_out in pos_outs]
    neg_dists = [dot([anchor_out, neg_out], axes=1, normalize=True) for neg_out in neg_outs]
    
    all_dists = concatenate(pos_dists + neg_dists)

    outputs = Activation('linear', name='siamese')(all_dists)
    '''
    # euc distance 
    norm = Lambda(lambda x: K.l2_normalize(x, axis=1), name='l2_norm')
    anchor_out = norm(anchor_out)
    pos_outs = [norm(pos_out) for pos_out in pos_outs]
    neg_outs = [norm(neg_out) for neg_out in neg_outs]
    distance = Lambda(euclidean_dist, output_shape=euclidean_dist_output_shape, name='euclidean')
    pos_dists = [distance([anchor_out, pos_out]) for pos_out in pos_outs]
    neg_dists = [distance([anchor_out, neg_out]) for neg_out in neg_outs]
    outputs = concatenate(pos_dists + neg_dists)
    '''

    '''
    distance  = Lambda(euclidean_dist, output_shape=euclidean_dist_output_shape, name='euclidean')
    pos_dist = distance([anchor_out, pos_outs[0]]) 
    model = Model(inputs=[anchor]+ pos_items + neg_items, outputs=[outputs, pos_dist])
    '''
    model = Model(inputs=[anchor]+ pos_items + neg_items, outputs=outputs)

    return model 
Exemplo n.º 28
0
 def make_seq2seq_models(self,x_train,y_train):
     input=Input(shape=(1, x_train.shape[1]))
     output=Input(shape=(1, y_train.shape[1]))
     n_hidden = 50
     encoder_stack_h, encoder_last_h, encoder_last_c = LSTM(
     n_hidden, activation='elu', dropout=0.2,
     return_sequences=True, return_state=True)(input)
     encoder_last_h = BatchNormalization(momentum=0.1)(encoder_last_h)
     encoder_last_c = BatchNormalization(momentum=0.1)(encoder_last_c)
     decoder = RepeatVector(self.len_pred)(encoder_last_h)
     decoder_stack_h, decoder_last_h, decoder_last_c = LSTM(n_hidden, activation='elu', dropout=0.2,return_state=True, return_sequences=True)(decoder, initial_state=[encoder_last_h, encoder_last_c])
     attention = dot([decoder_stack_h, encoder_stack_h], axes=[2, 2])
     attention = Activation('softmax')(attention)
     context = dot([attention, encoder_stack_h], axes=[2,1])
     context = BatchNormalization(momentum=0.6)(context)
     decoder_combined_context = concatenate([context, decoder_stack_h])
     out = TimeDistributed(Dense(1))(decoder_combined_context)
     model = Model(inputs=input, outputs=out)
     opt = Adam(lr=0.001)
     model.compile(loss='mae', optimizer=opt, metrics=['mse'])
     print(model.summary())
     return model
def Build_Attention_layer(Parametre_layer, encoder, decoder):

    if Parametre_layer["type_attention"] == "Luong":
        # the luong's attention
        attention = L.dot([decoder[0], encoder], axes=[2, 2])
        attention = L.Activation('softmax')(attention)
        context = L.dot([attention, encoder], axes=[2, 1])
        decoder_combined_context = K.concatenate([context, decoder[0]])
    elif Parametre_layer["type_attention"] == "Luong_keras":
        # the luong's attention
        context_vector = L.Attention(
            use_scale=Parametre_layer["use_scale"],
            causal=Parametre_layer["use_self_attention"],
            dropout=Parametre_layer["dropout"])([decoder[0], encoder])
        decoder_combined_context = K.concatenate([context_vector, decoder[0]])
    elif Parametre_layer["type_attention"] == "Bah_keras":
        #we are going to use the AditiveAttention = bahd of keras
        context_vector = L.AdditiveAttention(
            use_scale=Parametre_layer["use_scale"],
            causal=Parametre_layer["use_self_attention"],
            dropout=Parametre_layer["dropout"])([decoder[0], encoder])
        decoder_combined_context = K.concatenate([context_vector, decoder[0]])

    return decoder_combined_context
    def __init__(self, *args, **kwargs):
        self.model = Sequential([
            Dense(10, activation="relu", input_shape=(2, )),
            Dense(10, activation="relu"),
            Dense(2)
        ])
        self.model.compile("sgd", "mse", metrics=["mae"])

        x1 = Input(shape=(10, ))
        x2 = Input(shape=(10, ))
        y = dot([Dense(10)(x1), Dense(10)(x2)], axes=1)
        self.model2 = Model(inputs=[x1, x2], outputs=y)
        self.model2.compile(loss="mse", optimizer="adam")

        super(TestTraining, self).__init__(*args, **kwargs)