示例#1
0
def triplet_lstm(input_length,
                 input_dim,
                 lstm_core_length,
                 activation_function='tanh',
                 inner_activation_function='hard_sigmoid',
                 distance_function='cos',
                 initializer='glorot_uniform',
                 inner_initializer='orthogonal',
                 regularizer=None,
                 optimizer="sgd",
                 dropout=0.3,
                 embedding_dimension=-1):
    if embedding_dimension > 0:
        input_target = Input(shape=(input_length, ))
        input_pos = Input(shape=(input_length, ))
        input_neg = Input(shape=(input_length, ))
    else:
        input_target = Input(shape=(input_length, input_dim))
        input_pos = Input(shape=(input_length, input_dim))
        input_neg = Input(shape=(input_length, input_dim))

    if embedding_dimension > 0:
        embedded_class = Embedding(input_dim + 1,
                                   embedding_dimension,
                                   input_length=input_length,
                                   mask_zero=True)
        embedded_target = Embedding(input_dim + 1,
                                    embedding_dimension,
                                    input_length=input_length,
                                    mask_zero=True)(input_target)

        embedded_pos = embedded_class(input_pos)
        embedded_neg = embedded_class(input_neg)

        masking_target = Masking(mask_value=0)(embedded_target)
        masking_pos = Masking(mask_value=0)(embedded_pos)
        masking_neg = Masking(mask_value=0)(embedded_neg)

    else:
        masking_target = Masking(mask_value=0)(input_target)
        masking_pos = Masking(mask_value=0)(input_pos)
        masking_neg = Masking(mask_value=0)(input_neg)

    lstm_target = Bidirectional(
        LSTM(output_dim=lstm_core_length,
             init=initializer,
             inner_init=inner_initializer,
             activation=activation_function,
             inner_activation=inner_activation_function,
             W_regularizer=regularizer,
             U_regularizer=regularizer,
             b_regularizer=regularizer,
             dropout_W=dropout,
             dropout_U=dropout,
             return_sequences=False))(masking_target)

    lstm_class = Bidirectional(
        LSTM(output_dim=lstm_core_length,
             init=initializer,
             inner_init=inner_initializer,
             activation=activation_function,
             inner_activation=inner_activation_function,
             W_regularizer=regularizer,
             U_regularizer=regularizer,
             b_regularizer=regularizer,
             dropout_W=dropout,
             dropout_U=dropout,
             return_sequences=False))
    lstm_pos = lstm_class(masking_pos)
    sim_pos = merge([lstm_target, lstm_pos], mode=distance_function)

    lstm_neg = lstm_class(masking_neg)
    sim_neg = merge([lstm_target, lstm_neg], mode=distance_function)

    model = Model([input_target, input_pos, input_neg], [sim_pos, sim_neg])
    model.compile(optimizer=optimizer, loss=hinge_triplet_loss)

    assert lstm_class.get_output_at(0) == lstm_pos
    return model
config.gpu_options.allow_growth = True
set_session(tf.Session(config=config))
bi_GRU = Bidirectional(GRU(LSTM_Dim, unroll=False, return_sequences=True))
linerLayer = LinerLayer()
embedding_layer = Embedding(
    num_words + 1,
    Embedding_Dim,
    #                             weights=[embedding_matrix],
    input_length=MAX_SEQ_LEN,
    mask_zero=True,
    trainable=True)

input_layer_1 = Input(shape=(MAX_SEQ_LEN, ))
embedding_layer_1 = embedding_layer(input_layer_1)
bi_GRU_1 = bi_GRU(embedding_layer_1)
assert bi_GRU.get_output_at(0) == bi_GRU_1
# max_pooling_1 = MaxPooling1D()(bi_GRU_1)

input_layer_2 = Input(shape=(MAX_SEQ_LEN, ))
embedding_layer_2 = embedding_layer(input_layer_2)
bi_GRU_2 = bi_GRU(embedding_layer_2)
assert bi_GRU.get_output_at(1) == bi_GRU_2
# max_pooling_2 = MaxPooling1D()(bi_GRU_2)

# #----------------求权重------------------------------------------------------
# repeat_GRU_2=RepeatVector(MAX_SEQ_LEN)(bi_GRU_2)
# weight_layer_1=concatenate([bi_GRU_1,repeat_GRU_2],axis=-1)
# weight_layer_1=Activation('tanh')(weight_layer_1)
# weight_layer_1=TimeDistributed(Dense(1))(weight_layer_1)
# weight_layer_1=Lambda(lambda x: K.sum(x, axis=-1))(weight_layer_1)