示例#1
0
 def create_critic_network(self, S, G=None):
     if self.args['--network'] == '1':
         l1 = concatenate([S, G])
         l2 = Dense(400, activation="relu")(l1)
         l3 = concatenate([l2, G])
         l4 = Dense(300, activation="relu")(l3)
         Q_values = Dense(self.num_actions)(l4)
     elif self.args['--network'] == '2':
         l1 = subtract([S, G])
         l2 = concatenate([l1, S])
         l3 = Dense(400, activation="relu")(l2)
         l4 = Dense(300, activation="relu")(l3)
         Q_values = Dense(self.num_actions)(l4)
     elif self.args['--network'] == '3':
         shared_l = Dense(200, activation='relu')
         l1 = shared_l(S)
         l2 = shared_l(G)
         l3 = subtract([l1, l2])
         l4 = Dense(200, activation="relu")(l3)
         l5 = Dense(300, activation="relu")(l4)
         Q_values = Dense(self.num_actions)(l5)
     else:
         l1 = concatenate([S, G])
         l2 = Dense(400, activation="relu")(l1)
         l3 = Dense(300, activation="relu")(l2)
         Q_values = Dense(self.num_actions)(l3)
     return Q_values
示例#2
0
    def create_critic_network(self, S, V):
        if self.network == '0':
            L1 = concatenate([multiply([subtract([S, G]), M]), S])
            L2 = Dense(400,
                       activation="relu",
                       kernel_initializer=lecun_uniform(),
                       kernel_regularizer=l2(0.01))(L1)
            L3 = Dense(300,
                       activation="relu",
                       kernel_initializer=lecun_uniform(),
                       kernel_regularizer=l2(0.01))(L2)
            Q_values = Dense(self.env.action_dim,
                             activation='linear',
                             kernel_initializer=RandomUniform(minval=-3e-4,
                                                              maxval=3e-4),
                             kernel_regularizer=l2(0.01),
                             bias_initializer=RandomUniform(minval=-3e-4,
                                                            maxval=3e-4))(L3)
        else:
            L1 = Dense(200,
                       activation="relu",
                       kernel_initializer=lecun_uniform(),
                       kernel_regularizer=l2(0.01))
            L2 = Dense(300,
                       activation="relu",
                       kernel_initializer=lecun_uniform(),
                       kernel_regularizer=l2(0.01))
            i1 = multiply([subtract([S, G]), M])
            i2 = S
            h1 = L1(i1)
            h2 = L1(i2)
            h3 = concatenate([h1, h2])
            h4 = L2(h3)

            Q_values = Dense(self.env.action_dim,
                             activation='linear',
                             kernel_initializer=RandomUniform(minval=-3e-4,
                                                              maxval=3e-4),
                             kernel_regularizer=l2(0.01),
                             bias_initializer=RandomUniform(minval=-3e-4,
                                                            maxval=3e-4))(h4)

        return Q_values
 def create_actor_network(self, S, G=None, M=None):
     input = concatenate([multiply([subtract([S, G]), M]), S])
     h0 = Dense(400, activation="relu",
                kernel_initializer=lecun_uniform())(input)
     h1 = Dense(300, activation="relu",
                kernel_initializer=lecun_uniform())(h0)
     V = Dense(self.a_dim[0],
               activation="tanh",
               kernel_initializer=RandomUniform(minval=-3e-3, maxval=3e-3),
               bias_initializer=RandomUniform(minval=-3e-3, maxval=3e-3))(h1)
     return V
    def initModels(self):

        S_c = Input(shape=self.s_dim)
        A_c = Input(shape=self.a_dim)
        G_c = Input(shape=self.g_dim)
        M_c = Input(shape=self.g_dim)
        TARGETS = Input(shape=(1,))

        layers, qval = self.create_critic_network(S_c, A_c, G_c, M_c)
        self.qvalModel = Model([S_c, A_c, G_c, M_c], qval)
        loss_dqn = K.mean(K.square(qval - TARGETS), axis=0)
        inputs = [S_c, A_c, G_c, M_c, TARGETS]
        outputs = [loss_dqn, qval]
        self.updatesQval = Adam(lr=0.001).get_updates(params=self.qvalModel.trainable_weights, loss=loss_dqn)
        self.trainQval = K.function(inputs=inputs, outputs=outputs, updates=self.updatesQval)

        S_a = Input(shape=self.s_dim)
        G_a = Input(shape=self.g_dim)
        M_a = Input(shape=self.g_dim)
        action = self.create_actor_network(S_a, G_a, M_a)
        self.actionModel = Model([S_a, G_a, M_a], action)
        self.action = K.function(inputs=[S_a, G_a, M_a], outputs=[action], updates=None)

        L1, L2, L3 = layers
        qvalTrain = L1(concatenate([multiply([subtract([S_a, G_a]), M_a]), S_a]))
        qvalTrain = concatenate([qvalTrain, action])
        qvalTrain = L2(qvalTrain)
        qvalTrain = L3(qvalTrain)
        self.criticActionGrads = K.gradients(qvalTrain, action)[0]

        low = tf.convert_to_tensor(self.env.action_space.low)
        high = tf.convert_to_tensor(self.env.action_space.high)
        width = high - low
        pos = K.cast(K.greater_equal(self.criticActionGrads, 0), dtype='float32')
        pos *= high - action
        neg = K.cast(K.less(self.criticActionGrads, 0), dtype='float32')
        neg *= action - low
        inversion = (pos + neg) / width
        self.invertedCriticActionGrads = self.criticActionGrads * inversion

        if self.inv_grads == '0':
            self.actorGrads = tf.gradients(action, self.actionModel.trainable_weights, grad_ys=-self.criticActionGrads)
        else:
            self.actorGrads = tf.gradients(action, self.actionModel.trainable_weights, grad_ys=-self.invertedCriticActionGrads)

        self.updatesActor = DDPGAdam(lr=0.0001).get_updates(params=self.actionModel.trainable_weights,
                                                        loss=None,
                                                        grads=self.actorGrads)
        inputs = [S_a, G_a, M_a]
        outputs = []
        self.trainActor = K.function(inputs=inputs, outputs=outputs, updates=self.updatesActor)
示例#5
0
    def initModels(self):

        S_c = Input(shape=self.s_dim)
        A_c = Input(shape=(1, ), dtype='uint8')
        G_c = Input(shape=self.g_dim)
        M_c = Input(shape=self.g_dim)
        TARGETS = Input(shape=(1, ))

        layers, qvals = self.create_critic_network(S_c, G_c, M_c)
        self.qvalsModel = Model([S_c, G_c, M_c], qvals)
        self.qvals = K.function(inputs=[S_c, G_c, M_c],
                                outputs=[qvals],
                                updates=None)

        actionFilter = K.squeeze(K.one_hot(A_c, self.a_dim), axis=1)
        qval = K.sum(actionFilter * qvals, axis=1, keepdims=True)
        self.qval = K.function(inputs=[S_c, G_c, M_c, A_c],
                               outputs=[qval],
                               updates=None)

        loss_dqn = K.mean(K.square(qval - TARGETS), axis=0)
        inputs = [S_c, A_c, G_c, M_c, TARGETS]
        outputs = [loss_dqn, qval]
        self.updatesQval = Adam(lr=0.001).get_updates(
            params=self.qvalsModel.trainable_weights, loss=loss_dqn)
        self.trainCritic = K.function(inputs=inputs,
                                      outputs=outputs,
                                      updates=self.updatesQval)

        S_a = Input(shape=self.s_dim)
        G_a = Input(shape=self.g_dim)
        M_a = Input(shape=self.g_dim)
        probs = self.create_actor_network(S_a, G_a, M_a)
        self.probsModel = Model([S_a, G_a, M_a], probs)
        self.probs = K.function(inputs=[S_a, G_a, M_a],
                                outputs=[probs],
                                updates=None)

        L1, L2, L3 = layers
        input = concatenate([multiply([subtract([S_a, G_a]), M_a]), S_a])
        qvalTrain = L1(input)
        qvalTrain = L2(qvalTrain)
        qvalTrain = L3(qvalTrain)
        val = K.sum(qvalTrain * probs, axis=1, keepdims=True)
        inputs = [S_a, G_a, M_a]
        outputs = [probs, qvalTrain, val]
        self.updatesActor = Adam(lr=0.001).get_updates(
            params=self.probsModel.trainable_weights, loss=-val)
        self.trainActor = K.function(inputs=inputs,
                                     outputs=outputs,
                                     updates=self.updatesActor)
 def create_critic_network(self, S, A, G=None, M=None):
     input = concatenate([multiply([subtract([S, G]), M]), S])
     L1 = Dense(400, activation="relu",
                kernel_initializer=lecun_uniform(),
                kernel_regularizer=l2(0.01))
     L1out = L1(input)
     L1out = concatenate([L1out, A])
     L2 = Dense(300, activation="relu",
                kernel_initializer=lecun_uniform(),
                kernel_regularizer=l2(0.01))
     L2out = L2(L1out)
     L3 = Dense(1, activation='linear',
                kernel_initializer=RandomUniform(minval=-3e-4, maxval=3e-4),
                kernel_regularizer=l2(0.01),
                bias_initializer=RandomUniform(minval=-3e-4, maxval=3e-4))
     qval = L3(L2out)
     return [L1, L2, L3], qval
示例#7
0
def Model_sent2tag_MLP_1(sentvocabsize, tagvocabsize, sent_W, tag_W, s2v_k,
                         tag2v_k):

    input_sent = Input(shape=(1, ), dtype='int32')
    sent_embedding = Embedding(input_dim=sentvocabsize,
                               output_dim=s2v_k,
                               input_length=1,
                               mask_zero=False,
                               trainable=False,
                               weights=[sent_W])(input_sent)

    input_tag = Input(shape=(1, ), dtype='int32')
    tag_embedding = Embedding(input_dim=tagvocabsize,
                              output_dim=tag2v_k,
                              input_length=1,
                              mask_zero=False,
                              trainable=False,
                              weights=[tag_W])(input_tag)

    x1_1 = Flatten()(sent_embedding)
    x2_0 = Flatten()(tag_embedding)

    # x1_1 = Dense(100, activation='tanh')(x1_0)

    sub = subtract([x2_0, x1_1])
    mul = multiply([x2_0, x1_1])
    max = maximum([x2_0, x1_1])
    avg = average([x2_0, x1_1])
    class_input = concatenate([x2_0, x1_1, sub, mul, max, avg], axis=-1)
    # class_input = Flatten()(class_input)
    class_mlp1 = Dense(200, activation='tanh')(class_input)
    class_mlp1 = Dropout(0.5)(class_mlp1)
    class_mlp2 = Dense(2)(class_mlp1)
    class_output = Activation('softmax', name='CLASS')(class_mlp2)

    # distance = Lambda(euclidean_distance, output_shape=eucl_dist_output_shape)([mlp_x1_2, x2_0])
    # distance = dot([x1_0, x2_0], axes=-1, normalize=True)

    mymodel = Model([input_sent, input_tag], class_output)

    mymodel.compile(loss='categorical_crossentropy',
                    optimizer=optimizers.Adam(lr=0.001),
                    metrics=['acc'])

    return mymodel
def load_model(number, nb_words, n_handcrafted_features):

    embedding_matrix = np.zeros((nb_words, GloVe_embedding_dim))

    embedding_layer = Embedding(nb_words,
                                GloVe_embedding_dim,
                                weights=[embedding_matrix],
                                input_length=max_sentence_length,
                                trainable=False)
    lstm_layer = Bidirectional(LSTM(100,
                                    recurrent_dropout=0.4,
                                    return_sequences=False),
                               merge_mode='mul')

    # sentence 1
    sequence_1_input = Input(shape=(max_sentence_length, ), dtype="int32")
    embedded_sequences_1 = embedding_layer(sequence_1_input)
    s1 = lstm_layer(embedded_sequences_1)
    s1 = BatchNormalization()(s1)

    # sentence 2
    sequence_2_input = Input(shape=(max_sentence_length, ), dtype="int32")
    embedded_sequences_2 = embedding_layer(sequence_2_input)
    s2 = lstm_layer(embedded_sequences_2)
    s2 = BatchNormalization()(s2)

    # handcrafted features
    nlp_input = Input(shape=(n_handcrafted_features, ), dtype="float32")
    features_dense = BatchNormalization()(nlp_input)
    features_dense = Dense(100, activation="relu")(features_dense)
    features_dense = BatchNormalization()(features_dense)

    # computing cosine similarity
    csd = dot([s1, s2], axes=-1, normalize=True)
    # computng  multiplication between the 2 vectors
    mul_v = multiply([s1, s2])
    # compute the absolute difference
    x_y = subtract([s1, s2])
    merged = Lambda(lambda x: abs(x))(x_y)

    # merge the features
    merged = concatenate([merged, mul_v])
    merged = Dropout(0.3)(merged)

    merged = concatenate([merged, features_dense, csd])
    merged = BatchNormalization()(merged)

    merged = Dense(200, activation="relu")(merged)
    merged = Dropout(0.2)(merged)
    merged = BatchNormalization()(merged)

    out = Dense(2, activation="softmax")(merged)
    model = Model(inputs=[sequence_1_input, sequence_2_input, nlp_input],
                  outputs=out)
    model.compile(loss="binary_crossentropy",
                  optimizer="nadam",
                  metrics=['acc'])
    best_model_path = "Kfold/best_model_" + str(number)
    model.load_weights(best_model_path)

    return model
示例#9
0
    def _build_network(self,
                       vocab_size,
                       maxlen,
                       emb_weights=[],
                       c_emb_weights=[],
                       hidden_units=256,
                       trainable=True,
                       batch_size=1):

        print('Building model...')

        context_input = Input(name='context', batch_shape=(batch_size, maxlen))

        if (len(c_emb_weights) == 0):
            c_emb = Embedding(vocab_size,
                              256,
                              input_length=maxlen,
                              embeddings_initializer='glorot_normal',
                              trainable=trainable)(context_input)
        else:
            c_emb = Embedding(vocab_size,
                              c_emb_weights.shape[1],
                              input_length=maxlen,
                              weights=[c_emb_weights],
                              trainable=trainable)(context_input)

        c_lstm1 = LSTM(hidden_units,
                       kernel_initializer='he_normal',
                       recurrent_initializer='orthogonal',
                       bias_initializer='he_normal',
                       activation='sigmoid',
                       recurrent_activation='sigmoid',
                       kernel_regularizer=regularizers.l2(0.01),
                       activity_regularizer=regularizers.l2(0.01),
                       recurrent_regularizer=regularizers.l2(0.01),
                       dropout=0.25,
                       recurrent_dropout=.0,
                       unit_forget_bias=False,
                       return_sequences=False)(c_emb)

        c_lstm2 = LSTM(hidden_units,
                       kernel_initializer='he_normal',
                       recurrent_initializer='orthogonal',
                       bias_initializer='he_normal',
                       activation='sigmoid',
                       recurrent_activation='sigmoid',
                       kernel_regularizer=regularizers.l2(0.01),
                       activity_regularizer=regularizers.l2(0.01),
                       recurrent_regularizer=regularizers.l2(0.01),
                       dropout=0.25,
                       recurrent_dropout=.0,
                       unit_forget_bias=False,
                       return_sequences=False,
                       go_backwards=True)(c_emb)

        c_merged = add([c_lstm1, c_lstm2])
        c_merged = Dropout(0.25)(c_merged)

        text_input = Input(name='text', batch_shape=(batch_size, maxlen))

        if (len(emb_weights) == 0):
            emb = Embedding(vocab_size,
                            256,
                            input_length=maxlen,
                            embeddings_initializer='glorot_normal',
                            trainable=trainable)(text_input)
        else:
            emb = Embedding(vocab_size,
                            c_emb_weights.shape[1],
                            input_length=maxlen,
                            weights=[emb_weights],
                            trainable=trainable)(text_input)

        t_lstm1 = LSTM(hidden_units,
                       kernel_initializer='he_normal',
                       recurrent_initializer='he_normal',
                       bias_initializer='he_normal',
                       activation='sigmoid',
                       recurrent_activation='sigmoid',
                       kernel_regularizer=regularizers.l2(0.01),
                       activity_regularizer=regularizers.l2(0.01),
                       recurrent_regularizer=regularizers.l2(0.01),
                       dropout=0.25,
                       recurrent_dropout=0.25,
                       unit_forget_bias=False,
                       return_sequences=False)(emb)

        t_lstm2 = LSTM(hidden_units,
                       kernel_initializer='he_normal',
                       recurrent_initializer='he_normal',
                       bias_initializer='he_normal',
                       activation='sigmoid',
                       recurrent_activation='sigmoid',
                       kernel_regularizer=regularizers.l2(0.01),
                       activity_regularizer=regularizers.l2(0.01),
                       recurrent_regularizer=regularizers.l2(0.01),
                       dropout=0.25,
                       recurrent_dropout=0.25,
                       unit_forget_bias=False,
                       return_sequences=False,
                       go_backwards=True)(emb)

        t_merged = add([t_lstm1, t_lstm2])
        t_merged = Dropout(0.25)(t_merged)

        merged = subtract([c_merged, t_merged])

        dnn_1 = Dense(hidden_units,
                      kernel_initializer="he_normal",
                      activation='sigmoid')(merged)
        dnn_1 = Dropout(0.25)(dnn_1)
        dnn_2 = Dense(2, activation='sigmoid')(dnn_1)

        softmax = Activation('softmax')(dnn_2)

        model = Model(inputs=[context_input, text_input], outputs=softmax)

        model.compile(loss='binary_crossentropy',
                      optimizer='adam',
                      metrics=['accuracy'])
        print('No of parameter:', model.count_params())

        print(model.summary())
        return model
示例#10
0
    def __init__(self,
                 unit=64,
                 dropout=0.2,
                 max_len=39,
                 update_num=3,
                 regularization=0.1,
                 embedding_matrix=None,
                 use_cudnn=False,
                 use_share=False,
                 use_one_cell=False):
        self.unit = unit
        self.dropout = dropout
        self.use_share = use_share
        self.use_one_cell = use_one_cell
        self.regularization = l2(regularization)

        Q1_input = Input(shape=(max_len, ), dtype='int32', name='Q1')  # (?, L)
        Q2_input = Input(shape=(max_len, ), dtype='int32', name='Q2')  # (?, L)
        # Q1_m = Input(shape=(max_len,), dtype='int32', name='mask1')
        # Q2_m = Input(shape=(max_len,), dtype='int32', name='mask2')
        # magic = Input(shape=(4,), dtype='float32', name='magic')

        embedding = Embedding(input_dim=embedding_matrix.shape[0],
                              output_dim=embedding_matrix.shape[1],
                              mask_zero=True,
                              weights=[embedding_matrix],
                              trainable=False)
        # bn = BatchNormalization()
        Q1 = embedding(Q1_input)
        Q2 = embedding(Q2_input)

        GRULayer = CuDNNGRU if use_cudnn else GRU
        for i in range(update_num):
            Q1, Q2 = self.update_module(Q1, Q2, GRULayer)
        Q1, Q2 = self.attention(Q1, Q2, GRULayer, implementation=3)
        # bn1 = BatchNormalization()
        # bnm = BatchNormalization()
        # bns = BatchNormalization()
        # regression = Bilinear(implementation=0, activation='tanh')([Q1, Q2])
        att = SelfAttention(1, activation='tanh')
        Q1 = att(Q1)
        Q2 = att(Q2)
        vector = concatenate([  # Q1, Q2,
            merge.multiply([Q1, Q2]),
            # merge.subtract([Q1, Q2], use_abs=True),
            merge.subtract([Q1, Q2]),
            merge.average([Q1, Q2])
        ])
        # vector = merge.subtract([Q1, Q2])
        # vector = merge.add([Q1, Q2])
        # vector = Dropout(self.dropout)(vector)
        # vector = Dense(units=512, activation='tanh')(vector)
        # magic_new = Dense(units=64, activation='tanh')(magic)

        # vector = concatenate([vector, magic_new])
        vector = Dropout(self.dropout)(vector)

        vector = Dense(units=256, activation='tanh')(vector)

        vector = Dropout(self.dropout)(vector)

        regression = Dense(units=1, activation='sigmoid')(vector)
        super(IAM, self).__init__(inputs=[Q1_input, Q2_input],
                                  outputs=regression)
示例#11
0
def train_wgan_with_grad_penalty(prior_gen,
                                 generator,
                                 data_gen,
                                 critic,
                                 batch_size,
                                 epochs,
                                 batches_per_epoch=100,
                                 optimizer=Adam(lr=1e-4, beta_1=0, beta_2=0.9),
                                 grad_pen_coef=10.,
                                 critic_gen_train_ratio=2,
                                 callbacks=None):
    # build model to train the critic
    data_shape = critic.input_shape[1:]
    real_critic_input = Input(shape=data_shape, name='real_in')
    fake_critic_input = Input(shape=data_shape, name='fake_in')
    interp_critic_input = Input(shape=data_shape, name='interp_in')

    real_critic_score = critic(real_critic_input)
    fake_critic_score = critic(fake_critic_input)
    interp_critic_score = critic(interp_critic_input)

    critic_loss = subtract([fake_critic_score, real_critic_score])
    gradient_penalty = GradPenLayer()(
        [interp_critic_input, interp_critic_score])

    critic_train_mdl = Model(
        [real_critic_input, fake_critic_input, interp_critic_input],
        [critic_loss, gradient_penalty])

    critic_train_mdl.compile(optimizer=optimizer,
                             loss=lambda y_true, y_pred: y_pred,
                             loss_weights=[1., grad_pen_coef])

    # build model to train generator
    prior_input = Input(shape=generator.input_shape[1:], name='prior_in')
    critic.trainable = False
    critic_on_generator_score = critic(generator(prior_input))
    generator_train_mdl = Model(prior_input, critic_on_generator_score)
    generator_train_mdl.compile(optimizer=optimizer,
                                loss=lambda y_true, y_pred: -y_pred)

    # init callbacks
    callbacks = callbacks or []
    callbacks = CallbackList(callbacks)
    callbacks.set_model({'generator': generator, 'critic': critic})
    callbacks.set_params({
        'batch_size': batch_size,
        'epochs': epochs,
        'steps': batches_per_epoch,
        'samples': batches_per_epoch * batch_size,
        'prior_gen': prior_gen,
        'data_gen': data_gen,
    })

    # train
    print('Training on {} samples for {} epochs'.format(
        batches_per_epoch * batch_size, epochs))
    callbacks.on_train_begin()
    for e in range(epochs):
        print('Epoch {}/{}'.format(e + 1, epochs))
        callbacks.on_epoch_begin(e)
        progbar = Progbar(target=batches_per_epoch * batch_size)
        dummy_y = np.array([None] * batch_size)
        for b in range(batches_per_epoch):
            callbacks.on_batch_begin(b)
            batch_losses = np.zeros(shape=3)
            for critic_upd in range(critic_gen_train_ratio):
                real_batch = data_gen(batch_size)
                fake_batch = generator.predict(prior_gen(batch_size))
                weights = np.random.uniform(size=batch_size)
                weights = weights.reshape((-1, ) + (1, ) *
                                          (len(real_batch.shape) - 1))
                interp_batch = weights * real_batch + (1. -
                                                       weights) * fake_batch

                x_batch = {
                    'real_in': real_batch,
                    'fake_in': fake_batch,
                    'interp_in': interp_batch
                }
                cur_losses = np.array(
                    critic_train_mdl.train_on_batch(x=x_batch,
                                                    y=[dummy_y, dummy_y]))
                batch_losses += cur_losses

            generator_train_mdl.train_on_batch(x=prior_gen(batch_size),
                                               y=dummy_y)

            losses_names = ('total_loss', 'critic_loss', 'gradient_pen')
            progbar.add(batch_size, zip(losses_names, batch_losses))
            callbacks.on_batch_end(b)

        progbar.update(batches_per_epoch * batch_size)
        callbacks.on_epoch_end(e)

    callbacks.on_train_end()
def deep_neural_net_gru(train_data_1, train_data_2, train_data_3, train_labels,
                        test_data_1, test_data_2, test_data_3, test_labels,
                        max_len, len_chars, bidirectional, hidden_units, n):
    early_stop = EarlyStopping(monitor='loss', patience=2, verbose=1)
    checkpointer = ModelCheckpoint(
        filepath="/home/amarinho/data-amarinho/checkpoint" + str(n) + ".hdf5",
        verbose=1,
        save_best_only=True)
    lstm1 = GRU(hidden_units,
                implementation=2,
                return_sequences=True,
                name='lstm1')
    lstm2 = GRU(hidden_units,
                implementation=2,
                return_sequences=True,
                name='lstm2')
    lstm3 = GRU(hidden_units,
                implementation=2,
                return_sequences=True,
                name='lstm3')
    lstm1 = Bidirectional(lstm1, name='bilstm1')
    lstm2 = Bidirectional(lstm2, name='bilstm2')
    lstm3 = Bidirectional(lstm3, name='bilstm3')

    input_word1 = Input(shape=(max_len, len_chars))
    input_word2 = Input(shape=(max_len, len_chars))
    input_feature = Input(shape=(max_len, ))

    mask = Masking(mask_value=0, input_shape=(max_len, len_chars))(input_word1)
    l1 = lstm1(mask)
    l1 = Dropout(0.01)(l1)
    l1 = MaxPooling1DMasked(pool_size=1, name='maxpooling')(l1)

    input_concat = concatenate([l1, mask])
    l2 = lstm2(input_concat)
    l2 = Dropout(0.01)(l2)
    l2 = MaxPooling1DMasked(pool_size=1, name='maxpooling2')(l2)

    input_concat = concatenate([mask, l2])
    l3 = lstm3(input_concat)
    l3 = Dropout(0.01)(l3)
    l3 = MaxPooling1DMasked(pool_size=1, name='maxpooling3')(l3)

    final_input_concat = concatenate([l1, l2, l3], axis=1)
    final_input_concat = Flatten()(final_input_concat)
    SentenceEncoder = Model(input_word1, final_input_concat)

    word1_representation = SentenceEncoder(input_word1)
    word2_representation = SentenceEncoder(input_word2)

    concat = concatenate([word1_representation, word2_representation])
    mul = multiply([word1_representation, word2_representation])
    sub = subtract([word1_representation, word2_representation])

    final_merge = concatenate([concat, mul, subtract, input_feature])
    dropout3 = Dropout(0.01)(final_merge)
    dense1 = Dense(hidden_units * 2, activation='relu',
                   name='dense1')(dropout3)
    dropout4 = Dropout(0.01)(dense1)
    #flatten = Flatten()(dense1)
    #dropout5 = Dropout(0.01)(flatten)
    dense2 = Dense(1, activation='sigmoid', name='dense2')(dropout4)
    final_model = Model([input_word1, input_word2, input_feature], dense2)
    print(final_model.summary())

    print('Compiling...')
    final_model.compile(optimizer='adam',
                        loss='binary_crossentropy',
                        metrics=['accuracy'])

    print('Fitting...')
    final_model.fit([train_data_1, train_data_2, train_data_3],
                    train_labels,
                    verbose=0,
                    validation_data=([test_data_1, test_data_2,
                                      test_data_3], test_labels),
                    callbacks=[checkpointer, early_stop],
                    epochs=20)

    start_time = time.time()
    aux1 = final_model.predict([test_data_1, test_data_2, test_data_3],
                               verbose=0)
    aux = (aux1 > 0.5).astype('int32').ravel()
    return aux, (time.time() - start_time)
示例#13
0
    s2=BatchNormalization()(s2)

    #define an input for handcrafted features
    nlp_input = Input(shape=(train_nlp_features.shape[1],), dtype="float32")
    features_dense = BatchNormalization()(nlp_input)
    features_dense = Dense(100, activation="relu")(features_dense)
    features_dense = BatchNormalization()(features_dense)

    #computing cosine similarity
    csd=dot([s1,s2],axes=-1, normalize=True)

    #computng  multiplication between the 2 vectors
    mul_v = multiply([s1, s2])

    #compute the absolute difference
    x_y = subtract([s1, s2])
    merged=Lambda(lambda x:abs(x))(x_y)

    #merge the features
    merged = concatenate([merged, mul_v])
    merged = Dropout(0.3)(merged)

    #final features for each pair of sentences
    merged = concatenate([merged, features_dense,csd])
    merged = BatchNormalization()(merged)

    merged = Dense(200, activation="relu")(merged)
    merged = Dropout(0.2)(merged)
    merged = BatchNormalization()(merged)

    #using a softmax classifer
def deep_neural_net_gru(train_data_1, train_data_2, train_labels, test_data_1,
                        test_data_2, test_labels, max_len, len_chars,
                        bidirectional, hidden_units, selfattention, maxpooling,
                        alignment, shortcut, multiplerlu, onlyconcat, n):
    early_stop = EarlyStopping(monitor='loss', patience=2, verbose=1)
    checkpointer = ModelCheckpoint(
        filepath="/home/amarinho/data-amarinho/checkpoint" + str(n) + ".hdf5",
        verbose=1,
        save_best_only=True)
    gru1 = GRU(hidden_units,
               implementation=2,
               return_sequences=True,
               name='gru1')
    gru2 = GRU(hidden_units,
               implementation=2,
               return_sequences=(alignment or selfattention or maxpooling),
               name='gru2')
    gru1 = Bidirectional(gru1, name='bigru1')
    gru2 = Bidirectional(gru2, name='bigru2')

    input_word1 = Input(shape=(max_len, len_chars))
    input_word2 = Input(shape=(max_len, len_chars))

    mask = Masking(mask_value=0, input_shape=(max_len, len_chars))(input_word1)
    g1 = gru1(mask)
    g1 = Dropout(0.01)(g1)
    if shortcut:  # shortcut connections
        shortcut_con = concatenate([g1, mask])
        g2 = gru2(shortcut_con)
    else:
        g2 = gru2(g1)
    g2 = Dropout(0.01)(g2)
    if selfattention:  # selfattention
        g2 = Attention()(g2)
    elif maxpooling:  # maxpooling
        g2 = GlobalMaxPooling1DMasked(name='maxpooling')(g2)
    SentenceEncoder = Model(input_word1, g2)
    print(SentenceEncoder.summary())

    word1_representation = SentenceEncoder(input_word1)
    word2_representation = SentenceEncoder(input_word2)

    if alignment:
        att1 = AlignmentAttentionLayer()(
            [word1_representation, word2_representation])
        att2 = AlignmentAttentionLayer()(
            [word2_representation, word1_representation])

        concat = concatenate([att1, att2])
        mul = multiply([att1, att2])
        sub = subtract([att1, att2])
    else:
        concat = concatenate([word1_representation, word2_representation])
        mul = multiply([word1_representation, word2_representation])
        sub = subtract([word1_representation, word2_representation])

    final_merge = concatenate([concat, mul, sub])
    dropout3 = Dropout(0.01)(final_merge)
    dense1 = Dense(hidden_units, activation='relu', name='dense1')(dropout3)
    dropout4 = Dropout(0.01)(dense1)
    #dropout4 = Reshape((2400,))(dropout4)
    dense2 = Dense(1, activation='sigmoid', name='dense2')(dropout4)
    final_model = Model([input_word1, input_word2], dense2)
    final_model.summary()
    print('Compiling...')
    final_model.compile(optimizer='adam',
                        loss='binary_crossentropy',
                        metrics=['accuracy'])

    print('Fitting...')
    final_model.fit([train_data_1, train_data_2],
                    train_labels,
                    verbose=0,
                    validation_data=([test_data_1, test_data_2], test_labels),
                    callbacks=[checkpointer, early_stop],
                    epochs=20)

    start_time = time.time()
    aux1 = final_model.predict([test_data_1, test_data_2], verbose=0)
    aux = (aux1 > 0.5).astype('int32').ravel()
    return aux, (time.time() - start_time)
示例#15
0
def Model3_LSTM_BiLSTM_LSTM(wordvocabsize,
                            targetvocabsize,
                            charvobsize,
                            word_W,
                            char_W,
                            input_fragment_lenth,
                            input_leftcontext_lenth,
                            input_rightcontext_lenth,
                            input_maxword_length,
                            w2v_k,
                            c2v_k,
                            hidden_dim=200,
                            batch_size=32,
                            optimizer='rmsprop'):
    hidden_dim = 100

    word_input_fragment = Input(shape=(input_fragment_lenth, ), dtype='int32')
    word_embedding_fragment = Embedding(input_dim=wordvocabsize + 1,
                                        output_dim=w2v_k,
                                        input_length=input_fragment_lenth,
                                        mask_zero=False,
                                        trainable=True,
                                        weights=[word_W])(word_input_fragment)
    word_embedding_fragment = Dropout(0.5)(word_embedding_fragment)

    char_input_fragment = Input(shape=(
        input_fragment_lenth,
        input_maxword_length,
    ),
                                dtype='int32')
    char_embedding_fragment = TimeDistributed(
        Embedding(input_dim=charvobsize,
                  output_dim=c2v_k,
                  batch_input_shape=(batch_size, input_fragment_lenth,
                                     input_maxword_length),
                  mask_zero=False,
                  trainable=True,
                  weights=[char_W]))(char_input_fragment)

    char_cnn_fragment = TimeDistributed(
        Conv1D(50, 3, activation='relu', padding='valid'))
    char_embedding_fragment = char_cnn_fragment(char_embedding_fragment)
    char_embedding_fragment = TimeDistributed(
        GlobalMaxPooling1D())(char_embedding_fragment)
    char_embedding_fragment = Dropout(0.25)(char_embedding_fragment)

    word_input_leftcontext = Input(shape=(input_leftcontext_lenth, ),
                                   dtype='int32')
    word_embedding_leftcontext = Embedding(
        input_dim=wordvocabsize + 1,
        output_dim=w2v_k,
        input_length=input_leftcontext_lenth,
        mask_zero=True,
        trainable=True,
        weights=[word_W])(word_input_leftcontext)
    word_embedding_leftcontext = Dropout(0.5)(word_embedding_leftcontext)

    char_input_leftcontext = Input(shape=(
        input_leftcontext_lenth,
        input_maxword_length,
    ),
                                   dtype='int32')
    char_input_rightcontext = Input(shape=(
        input_rightcontext_lenth,
        input_maxword_length,
    ),
                                    dtype='int32')

    word_input_rightcontext = Input(shape=(input_rightcontext_lenth, ),
                                    dtype='int32')
    word_embedding_rightcontext = Embedding(
        input_dim=wordvocabsize + 1,
        output_dim=w2v_k,
        input_length=input_rightcontext_lenth,
        mask_zero=True,
        trainable=True,
        weights=[word_W])(word_input_rightcontext)
    word_embedding_rightcontext = Dropout(0.5)(word_embedding_rightcontext)

    embedding_fragment = concatenate(
        [word_embedding_fragment, char_embedding_fragment], axis=-1)
    embedding_leftcontext = word_embedding_leftcontext
    embedding_rightcontext = word_embedding_rightcontext

    LSTM_leftcontext = LSTM(hidden_dim, go_backwards=False,
                            activation='tanh')(embedding_leftcontext)
    Rep_LSTM_leftcontext = RepeatVector(input_fragment_lenth)(LSTM_leftcontext)
    LSTM_rightcontext = LSTM(hidden_dim, go_backwards=True,
                             activation='tanh')(embedding_rightcontext)
    Rep_LSTM_rightcontext = RepeatVector(input_fragment_lenth)(
        LSTM_rightcontext)

    BiLSTM_fragment = Bidirectional(LSTM(hidden_dim // 2,
                                         activation='tanh',
                                         return_sequences=True),
                                    merge_mode='concat')(embedding_fragment)
    context_ADD = add([LSTM_leftcontext, BiLSTM_fragment, LSTM_rightcontext])
    context_subtract_l = subtract([BiLSTM_fragment, LSTM_leftcontext])
    context_subtract_r = subtract([BiLSTM_fragment, LSTM_rightcontext])
    context_average = average(
        [LSTM_leftcontext, BiLSTM_fragment, LSTM_rightcontext])
    context_maximum = maximum(
        [LSTM_leftcontext, BiLSTM_fragment, LSTM_rightcontext])

    embedding_mix = concatenate([
        embedding_fragment, BiLSTM_fragment, context_ADD, context_subtract_l,
        context_subtract_r, context_average, context_maximum
    ],
                                axis=-1)

    # BiLSTM_fragment = Bidirectional(LSTM(hidden_dim // 2, activation='tanh'), merge_mode='concat')(embedding_fragment)

    decoderlayer1 = Conv1D(50, 1, activation='relu', strides=1,
                           padding='same')(embedding_mix)
    decoderlayer2 = Conv1D(50, 2, activation='relu', strides=1,
                           padding='same')(embedding_mix)
    decoderlayer3 = Conv1D(50, 3, activation='relu', strides=1,
                           padding='same')(embedding_mix)
    decoderlayer4 = Conv1D(50, 4, activation='relu', strides=1,
                           padding='same')(embedding_mix)

    CNNs_fragment = concatenate(
        [decoderlayer1, decoderlayer2, decoderlayer3, decoderlayer4], axis=-1)
    CNNs_fragment = Dropout(0.5)(CNNs_fragment)
    CNNs_fragment = GlobalMaxPooling1D()(CNNs_fragment)

    concat = Dropout(0.3)(CNNs_fragment)

    output = Dense(targetvocabsize, activation='softmax')(concat)

    Models = Model([
        word_input_fragment, word_input_leftcontext, word_input_rightcontext,
        char_input_fragment, char_input_leftcontext, char_input_rightcontext
    ], output)

    Models.compile(loss='categorical_crossentropy',
                   optimizer=optimizers.RMSprop(lr=0.001),
                   metrics=['acc'])

    return Models