示例#1
0
model.add(BatchNormalization())
model.add(LeakyReLU())

model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())

model.add(Dense(128))
model.add(BatchNormalization())
model.add(LeakyReLU())

model.add(Dense(num_classes))
model.add(BatchNormalization())
model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer=optimizers.Nadam(),
              metrics=['accuracy'])

history = model.fit(training_data,
                    training_target,
                    batch_size=batch_size,
                    epochs=epochs,
                    verbose=2,
                    validation_data=(testing_data, testing_target))
score = model.evaluate(testing_data, testing_target, verbose=0)
print('Test loss: ', score[0])
print('Test accuracy: ', score[1])

model.save('model_1')

print(history.history.keys())
示例#2
0
def own_model(train_forward_data, train_backward_data, train_sense_embedding, test_f, test_b, test_i,
              val_forward_data=None, val_backward_data=None, val_sense_embedding=None,
              n_units=100, dense_units=256, is_training=True, EMBEDDING_DIM=100, epochs=100, batch_size=2048,
              init_word_vecs=None, ):
    model = get_model(n_units=n_units, dense_unints=dense_units, is_training=is_training, emb_dim=EMBEDDING_DIM,
                      init_word_vecs=init_word_vecs, max_sequence_length=40, word_to_id=word_to_id)

    # Switchable optimizers
    opti = optimizers.Nadam(clipnorm=1.)  # , clipvalue=0.5
    # opti = optimizers.SGD(lr=0.00001, momentum=0.1)
    # opti = optimizers.Adam(lr=0.00001)

    model.compile(loss='mse', optimizer=opti, metrics=[cos_distance, get_f1])

    print(model.summary())

    early_stopping = EarlyStopping(monitor='val_loss', patience=10)
    bst_model_path = "weights.best.hdf5"
    model_checkpoint = ModelCheckpoint(bst_model_path, save_best_only=True, save_weights_only=True, verbose=1)

    hist = model.fit([train_forward_data, train_backward_data], train_sense_embedding,
                     validation_data=([val_forward_data, val_backward_data], val_sense_embedding),
                     epochs=epochs, batch_size=batch_size, shuffle=True,
                     callbacks=[early_stopping, model_checkpoint])

    model.save('1_project_2_TT.h5')

    def get_embedded_sense(goal_key):
        for elem in train_data_:
            key = elem['target_sense']
            if key in goal_key:
                return elem['id']
        return -1

    ''' Modified testing Code '''
    # Uses the testing target sense id to get the actual embedding from target_sense_to_context_embedding
    # That actual embedding is then used to calculate the cosine distance between it and the predicted vector
    pred_a = model.predict([test_f, test_b])
    cos_sim_total = 0
    counter = 0
    not_testable = 0
    test_answers = get_test_ansers(23)
    for i in range(len(pred_a)):
        pred = pred_a[i]
        goal_id = test_i[i]
        idx = test_answers.index[test_answers['Targets'] == goal_id]
        # This is for the entries where the sense was either just 'U' or 'P' or both
        if len(idx) == 0:
            continue
        goal_key = test_answers.iloc[idx]['Senses'].to_numpy()[0]
        train_id_key = get_embedded_sense(goal_key)
        # This is in case the testing target sense is not in the training corpus
        if train_id_key == -1:
            not_testable += 1
            continue
        goal_embedding = target_sense_to_context_embedding.get(train_id_key)
        cos_sim = (1 - spatial.distance.cosine(goal_embedding, pred))
        cos_sim_total += cos_sim
        counter += 1

    print("Average Testing Cos Sim:", (cos_sim_total / counter))
    print("Number of untestable due to the lack of a comparable embedding:", not_testable)
示例#3
0
        
        
        return loss*1000
    
    return focal_loss 

model = Sequential()

input_dim = xtrain.shape[1]
#nb_classes = y_train.shape[1]

model = Sequential()
model.add(Dense(input_dim=input_dim, units=1))
model.add(Activation('sigmoid'))
from keras import optimizers
opt = optimizers.Nadam(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=1e-08, schedule_decay=0.004)
model.compile(loss=binary_focal_loss(gamma=2,alpha=0.9),
              optimizer=opt,metrics=[f1])

history = model.fit(xtrain, ytrain, epochs=15, batch_size=500, verbose=1) 


# Training
print("------------------Training performance--------------------------------------")
print ("AUC Score (test): %f" % roc_auc_score(ytrain, y_predprob))
prediction =  model.predict(xtrain)
print("Precison is",precision_score(ytrain, prediction, average='binary'))
print("Recall is",recall_score(ytrain, prediction , average='binary'))
print("F1 score is", f1_score(ytrain, prediction , average='binary'))
print("Accuracy is",  model.score(xtrain, ytrain))
print(confusion_matrix(ytrain, prediction))
示例#4
0
model.add(LSTM(units=256, return_sequences=True))
model.add(Dropout(rate=0.5))
model.add(TimeDistributed(Dense(1)))


# set optimizers & callbacks
adam = optimizers.Adam(lr=0.001,
    beta_1=0.9,
    beta_2=0.999,
    epsilon=None,
    decay=1e-6,
    amsgrad=False)

nadam = optimizers.Nadam(lr=0.0001, 
    beta_1=0.9, 
    beta_2=0.999, 
    epsilon=None, 
    schedule_decay=0.001)

model.compile(
    loss = "mean_squared_logarithmic_error",
    optimizer = nadam,
    metrics = ["mean_squared_logarithmic_error"])

checkpoint = ModelCheckpoint(
    filepath = model_saved,
    monitor = "val_loss",
    verbose = 0,
    save_best_only = True,
    mode = "min")
示例#5
0
    def create_network(self):
        state_input = Input(shape=(self.state_size, ), name='state_input')

        #define the network
        h = state_input
        for _ in range(self.params['num_layers']):
            h = Dense(self.params['layer_size'], activation='relu')(h)

        q_output = Dense(self.params['num_points'], name='latent_q')(h)
        action_input = Input(shape=(self.action_size, ), name='action_input')
        temp_li = []
        a_li = []
        for a_index in range(self.params['num_points']):
            h = state_input

            temp = Dense(self.action_size,
                         activation='tanh',
                         kernel_initializer=RandomUniform(minval=-.1,
                                                          maxval=+.1,
                                                          seed=None),
                         bias_initializer=RandomUniform(minval=-1,
                                                        maxval=+1,
                                                        seed=None))(h)
            temp = Lambda(lambda x: x * self.env.action_space.high[0],
                          name="action" + str(a_index))(temp)
            a_li.append(temp)
            layer = Lambda(func_L2)
            temp = layer([temp, action_input])
            temp_li.append(temp)
        merged = Concatenate(axis=-1)(temp_li)

        merged = Lambda(lambda x: x * self.params['temperature'])(merged)
        softmax = Activation('softmax')(merged)
        final_q = dot([q_output, softmax], axes=1, normalize=False)
        model = Model(inputs=[state_input, action_input], outputs=final_q)
        if self.params['opt'] == 'adam':
            opt = optimizers.Adam(lr=self.params['learning_rate'])
        elif self.params['opt'] == 'nadam':
            opt = optimizers.Nadam(lr=self.params['learning_rate'])
        elif self.params['opt'] == 'rmsprop':
            opt = optimizers.RMSprop(lr=self.params['learning_rate'])
        model.compile(loss='mse', optimizer=opt)

        qRef_li = []
        for j in range(self.params['num_points']):
            each_qRef = []
            for i in range(self.params['num_points']):
                layer = Lambda(func_L2)
                each_qRef.append(layer([a_li[i], a_li[j]]))
            each_qRef = Concatenate(axis=-1)(each_qRef)
            each_qRef = Lambda(lambda x: x * self.params['temperature'])(
                each_qRef)
            each_qRef = Activation('softmax')(each_qRef)
            test_final_q = dot([q_output, each_qRef], axes=1, normalize=False)
            qRef_li.append(test_final_q)
        qRef_li = Model(
            inputs=state_input,
            outputs=[Concatenate(axis=1)(a_li),
                     Concatenate(axis=-1)(qRef_li)])

        return model, qRef_li
metrics = ['mean_absolute_error', 'mean_absolute_percentage_error']

lr = args.lrearning_rate
epsilon = args.epsilon
optimizer_selection = {'Adadelta' : optimizers.Adadelta( \
                               lr=lr, rho=0.95, epsilon=epsilon, decay=0.0), \
                       'Adagrad' : optimizers.Adagrad( \
                               lr=lr, epsilon=epsilon, decay=0.0), \
                       'Adam' : optimizers.Adam( \
                               lr=lr, beta_1=0.9, beta_2=0.999, \
                               epsilon=epsilon, decay=0.0, amsgrad=False), \
                       'Adamax' : optimizers.Adamax( \
                               lr=lr, beta_1=0.9, beta_2=0.999, \
                               epsilon=epsilon, decay=0.0), \
                       'Nadam' : optimizers.Nadam( \
                               lr=lr, beta_1=0.9, beta_2=0.999, \
                               epsilon=epsilon, schedule_decay=0.004), \
                       'RMSprop' : optimizers.RMSprop( \
                               lr=lr, rho=0.9, epsilon=epsilon, decay=0.0), \
                       'SGD' : optimizers.SGD( \
                               lr=lr, momentum=0.0, decay=0.0, nesterov=False)}

optimizer = optimizer_selection[args.optimizer]

model.compile(optimizer = optimizer, \
              loss = loss_function, \
              metrics = metrics)

#%%
# Save trained models for every epoch
def cnn_dropout_mnist(args):
    """
    Main function
    """
    # %%
    # IMPORTS

    # code repository sub-package imports
    from artificial_neural_networks.utils.download_mnist import download_mnist
    from artificial_neural_networks.utils.generic_utils import save_classif_model
    from artificial_neural_networks.utils.vis_utils import plot_confusion_matrix, epoch_plot

    # %%

    if args.verbose > 0:
        print(args)

    # For reproducibility
    if args.reproducible:
        os.environ['PYTHONHASHSEED'] = '0'
        np.random.seed(args.seed)
        rn.seed(args.seed)
        tf.set_random_seed(args.seed)
        sess = tf.Session(graph=tf.get_default_graph())
        K.set_session(sess)
        # print(hash("keras"))

    # %%
    # Load the MNIST dataset

    mnist_path = download_mnist()
    mnist = np.load(mnist_path)
    train_x = mnist['x_train'].astype(np.float32)
    train_y = mnist['y_train'].astype(np.int32)
    test_x = mnist['x_test'].astype(np.float32)
    test_y = mnist['y_test'].astype(np.int32)
    mnist.close()

    # %%
    # PREPROCESSING STEP

    scaling_factor = args.scaling_factor
    translation = args.translation

    img_width = train_x.shape[1]
    img_height = train_x.shape[2]

    n_train = train_x.shape[0]  # number of training examples/samples
    n_test = test_x.shape[0]  # number of test examples/samples

    n_in = img_width * img_height  # number of features / dimensions
    n_out = np.unique(train_y).shape[0]  # number of classes/labels

    # Reshape training and test sets
    train_x = train_x.reshape(n_train, img_width, img_height, 1)
    test_x = test_x.reshape(n_test, img_width, img_height, 1)

    # Apply preprocessing
    train_x = scaling_factor * (train_x - translation)
    test_x = scaling_factor * (test_x - translation)

    one_hot = False  # It works exactly the same for both True and False

    # Convert class vectors to binary class matrices (i.e. One hot encoding)
    if one_hot:
        train_y = to_categorical(train_y, n_out)
        test_y = to_categorical(test_y, n_out)

    # %%
    # Model hyperparameters and ANN Architecture

    N = []
    N.append(n_in)  # input layer
    if args.same_size:
        n_layers = args.n_layers
        for i in range(n_layers):
            N.append(args.layer_size)  # hidden layer i
    else:
        n_layers = len(args.explicit_layer_sizes)
        for i in range(n_layers):
            N.append(args.explicit_layer_sizes[i])  # hidden layer i
    N.append(n_out)  # output layer

    # ANN Architecture
    L = len(N) - 1

    x = Input(shape=(img_width, img_height, 1))  # input layer
    h = Dropout(rate=args.dropout_rate_input)(x)

    h = Conv2D(filters=32, kernel_size=(3, 3), activation='relu')(h)
    h = MaxPooling2D(pool_size=(2, 2))(h)
    h = Dropout(rate=args.dropout_rate_conv)(h)

    h = Conv2D(filters=32, kernel_size=(3, 3), activation='relu')(h)
    h = MaxPooling2D(pool_size=(2, 2))(h)
    h = Dropout(rate=args.dropout_rate_conv)(h)

    h = Flatten()(h)

    for i in range(1, L):
        h = Dense(units=N[i], activation='relu')(h)  # hidden layer i
        h = Dropout(rate=args.dropout_rate_hidden)(h)

    out = Dense(units=n_out, activation='softmax')(h)  # output layer

    model = Model(inputs=x, outputs=out)

    if args.verbose > 0:
        model.summary()

    if one_hot:
        loss_function = 'categorical_crossentropy'
    else:
        loss_function = 'sparse_categorical_crossentropy'

    metrics = ['accuracy']

    lr = args.lrearning_rate
    epsilon = args.epsilon
    optimizer_selection = {
        'Adadelta':
        optimizers.Adadelta(lr=lr, rho=0.95, epsilon=epsilon, decay=0.0),
        'Adagrad':
        optimizers.Adagrad(lr=lr, epsilon=epsilon, decay=0.0),
        'Adam':
        optimizers.Adam(lr=lr,
                        beta_1=0.9,
                        beta_2=0.999,
                        epsilon=epsilon,
                        decay=0.0,
                        amsgrad=False),
        'Adamax':
        optimizers.Adamax(lr=lr,
                          beta_1=0.9,
                          beta_2=0.999,
                          epsilon=epsilon,
                          decay=0.0),
        'Nadam':
        optimizers.Nadam(lr=lr,
                         beta_1=0.9,
                         beta_2=0.999,
                         epsilon=epsilon,
                         schedule_decay=0.004),
        'RMSprop':
        optimizers.RMSprop(lr=lr, rho=0.9, epsilon=epsilon, decay=0.0),
        'SGD':
        optimizers.SGD(lr=lr, momentum=0.0, decay=0.0, nesterov=False)
    }

    optimizer = optimizer_selection[args.optimizer]

    model.compile(optimizer=optimizer, loss=loss_function, metrics=metrics)

    # %%
    # Save trained models for every epoch

    models_path = r'artificial_neural_networks/trained_models/'
    model_name = 'mnist_cnn_dropout'
    weights_path = models_path + model_name + '_weights'
    model_path = models_path + model_name + '_model'
    file_suffix = '_{epoch:04d}_{val_acc:.4f}_{val_loss:.4f}'

    if args.save_weights_only:
        file_path = weights_path
    else:
        file_path = model_path

    file_path += file_suffix

    # monitor = 'val_loss'
    monitor = 'val_acc'

    if args.save_models:
        checkpoint = ModelCheckpoint(file_path + '.h5',
                                     monitor=monitor,
                                     verbose=args.verbose,
                                     save_best_only=args.save_best_only,
                                     mode='auto',
                                     save_weights_only=args.save_weights_only)
        callbacks = [checkpoint]
    else:
        callbacks = []

    # %%
    # TRAINING PHASE

    if args.time_training:
        start = timer()

    model_history = model.fit(x=train_x,
                              y=train_y,
                              validation_data=(test_x, test_y),
                              batch_size=args.batch_size,
                              epochs=args.n_epochs,
                              verbose=args.verbose,
                              callbacks=callbacks)

    if args.time_training:
        end = timer()
        duration = end - start
        print('Total time for training (in seconds):')
        print(duration)

    # %%
    # TESTING PHASE

    train_y_pred = np.argmax(model.predict(train_x), axis=1)
    test_y_pred = np.argmax(model.predict(test_x), axis=1)

    train_score = model.evaluate(x=train_x, y=train_y, verbose=args.verbose)
    train_dict = {'loss': train_score[0], 'acc': train_score[1]}

    test_score = model.evaluate(x=test_x, y=test_y, verbose=args.verbose)
    test_dict = {'val_loss': test_score[0], 'val_acc': test_score[1]}

    if args.verbose > 0:
        print('Train loss:', train_dict['loss'])
        print('Train accuracy:', train_dict['acc'])

        print('Test loss:', test_dict['val_loss'])
        print('Test accuracy:', test_dict['val_acc'])

    # %%
    # Data Visualization

    if args.plot:

        # Confusion matrices

        classes = list(range(n_out))

        train_cm = confusion_matrix(train_y, train_y_pred)
        plot_confusion_matrix(train_cm,
                              classes=classes,
                              title='Confusion matrix for training set')

        test_cm = confusion_matrix(test_y, test_y_pred)
        plot_confusion_matrix(test_cm,
                              classes=classes,
                              title='Confusion matrix for test set')

        # Loss vs epoch

        epoch_axis = range(1, args.n_epochs + 1)

        train_loss = model_history.history['loss']
        test_loss = model_history.history['val_loss']
        epoch_plot(epoch_axis, train_loss, test_loss, 'Loss')

        # Accuracy vs epoch

        train_acc = model_history.history['acc']
        test_acc = model_history.history['val_acc']
        epoch_plot(epoch_axis, train_acc, test_acc, 'Accuracy')

    # %%
    # Save the architecture and the lastly trained model

    save_classif_model(model, models_path, model_name, weights_path,
                       model_path, file_suffix, test_dict, args)

    # %%

    return model
decoder_gru_output, _ = decoder_gru(dec_bn, initial_state=seq2seq_encoder_out)
x = BatchNormalization(name='Decoder-Batchnorm-2')(decoder_gru_output)

# Dense layer for prediction
decoder_dense = Dense(num_decoder_tokens,
                      activation='softmax',
                      name='Final-Output-Dense')

decoder_outputs = decoder_dense(x)

########################
#### Seq2Seq Model ####

#seq2seq_decoder_out = decoder_model([decoder_inputs, seq2seq_encoder_out])
seq2seq_Model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
seq2seq_Model.compile(optimizer=optimizers.Nadam(lr=0.001),
                      loss='sparse_categorical_crossentropy')

seq2seq_Model.summary()

########################
#### Train the Model ####

from keras.callbacks import CSVLogger, ModelCheckpoint

#setup callbacks for model logging
script_name_base = 'keras_seq2seq'
csv_logger = CSVLogger('{:}.log'.format(script_name_base))
model_checkpoint = ModelCheckpoint(
    '{:}.epoch{{epoch:02d}}-val{{val_loss:.5f}}.hdf5'.format(script_name_base),
    save_best_only=True)
示例#9
0
model.add(Dense(units=512, activation="relu"))
model.add(Dense(units=2, activation="softmax"))

#compilation with adam optimizier
#and cross entropy loss

from keras.optimizers import Adam

#visualize the model
model.summary()

model_final = model

#COMPILE
kwargs = {'decay', 'lr'}
opt = optimizers.Nadam(learning_rate=0.00020, beta_1=0.9, beta_2=0.899)

model_final.compile(optimizer=opt,
                    loss=keras.losses.categorical_crossentropy,
                    metrics=['accuracy'])

model_final.summary()

from keras.callbacks import ModelCheckpoint, EarlyStopping

#save the model with the best validation accuracy
checkpoint = ModelCheckpoint("weights.h5",
                             monitor='val_accuracy',
                             verbose=1,
                             save_best_only=False,
                             save_weights_only=False,
示例#10
0
                                        input_profile=input_profile)
        answer = TimeDistributed(Dense(dim_wordvec))(answer)

    # the original paper uses a matrix multiplication for this reduction step.
    # we choose to use a RNN instead.
    # TODO: provide options for this preditction step [lstm, cnn, dense]
    answer = LSTM(dim_lstm)(answer)  # (samples, 32)

    # one regularization layer -- more would probably be needed.
    answer = Dropout(dropout)(answer)
    answer = Dense(dim_output)(answer)  # (samples, vocab_size)
    # we output a probability distribution over the vocabulary
    answer = Activation('softmax')(answer)

    # build the final model
    nadam = optimizers.Nadam(lr=lr)
    if profile:
        model = Model([input_sequence, question, input_profile], answer)
    else:
        model = Model([input_sequence, question], answer)
    model.compile(optimizer=nadam,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    # model.summary()
    logger.debug('...Compile done.')
    return model

    # # train
    # model.fit([inputs_train, queries_train], answers_train,
    # batch_size=32,
    # epochs=120,
示例#11
0
X, Y, ds = getnewdata(iids[2], config)
Xt, Yt, ds = getnewdata(
    iids[randint(config.getTestIndex()[0],
                 config.getTestIndex()[1])], config)

model = Sequential()
s2s = SimpleSeq2Seq(batch_input_shape=(1, X.shape[1], X.shape[2]),
                    hidden_dim=1,
                    output_length=config.getWindows()[1],
                    output_dim=1)
model.add(s2s)
model.add(Dense(40, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(2999, activation='softmax'))
opt = optimizers.Nadam()
model.compile(loss='sparse_categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])

for rounds in range(0, config.getEpochs()):
    for index in range(config.getTrainIndex()[0], config.getTrainIndex()[1]):
        print "Index ", index, " Round ", rounds
        X, Y, ds = getnewdata(iids[index], config)
        r = randint(config.getTestIndex()[0], config.getTestIndex()[1])
        print "random", r
        Xt, Yt, ds = getnewdata(iids[r], config)
        model.fit(X,
                  Y,
                  initial_epoch=rounds,
                  epochs=rounds + 1,
示例#12
0
# create a Keras model for a multilayer perceptron with one hidden layer (multiclass classification so last layer is softmax activation)
model = Sequential()
model.add(
    Dense(hidden_nodes,
          activation='relu',
          input_shape=(input_nodes, ),
          use_bias=False))
model.add(
    Dense(hidden_nodes,
          activation='relu',
          input_shape=(hidden_nodes, ),
          use_bias=False))
model.add(Dense(output_nodes, activation='softmax', bias=False))

model.summary()
opt = optimizers.Nadam(lr=learning_rate)
model.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])

# setup callbacks
callbacks = [
    EarlyStopping(monitor='val_loss', patience=patience, verbose=1),
    ModelCheckpoint(model_name,
                    monitor='val_loss',
                    save_best_only=True,
                    verbose=1),
    ReduceLROnPlateau(monitor='val_loss',
                      factor=lr_update_factor,
                      patience=lr_patience,
                      verbose=1,
示例#13
0
#### Encoder Model ####
encoder_inputs = Input(shape=(doc_length, ), name='Encoder-Input')
enc_out = encoder_model(encoder_inputs)

# first dense layer with batch norm
x = Dense(500, activation='relu')(enc_out)
x = BatchNormalization(name='bn-1')(x)
out = Dense(500)(x)
code2emb_model = Model([encoder_inputs], out)
print(code2emb_model.summary())

print("Starting the training")
from keras.callbacks import CSVLogger, ModelCheckpoint
from keras import optimizers

code2emb_model.compile(optimizer=optimizers.Nadam(lr=0.002),
                       loss='cosine_proximity')
script_name_base = 'code2emb_model_'
csv_logger = CSVLogger('{:}.log'.format(script_name_base))
model_checkpoint = ModelCheckpoint(
    '{:}.epoch{{epoch:02d}}-val{{val_loss:.5f}}.hdf5'.format(script_name_base),
    save_best_only=True)

batch_size = 20000
epochs = 15
history = code2emb_model.fit([encoder_input_data],
                             fastailm_emb,
                             batch_size=batch_size,
                             epochs=epochs,
                             validation_split=0.12,
                             callbacks=[csv_logger, model_checkpoint])
    return model


try:
    train_bow = np.array(train_bow.toarray())
    train_bow = train_bow.reshape(train_bow.shape[0], train_bow.shape[1], 1)
    test_bow = np.array(test_bow.toarray())
    test_bow = test_bow.reshape(test_bow.shape[0], test_bow.shape[1], 1)
except:
    pass

train_bow.shape

adm = optimizers.Adam(lr=1e-3, decay=1e-4)
sgd = optimizers.SGD(lr=1e-3, nesterov=True, momentum=0.7, decay=1e-4)
Nadam = optimizers.Nadam(lr=1e-3, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
model = baseline_cnn_model(train_bow, 5, 'cla', adm)

y_train_final = to_categorical(Y_train)
y_test_final = to_categorical(Y_test)

num_epochs = 25
for epoch in range(num_epochs):
    print(epoch + 1, '/', num_epochs)
    model1 = model.fit(train_bow,
                       Y_train,
                       batch_size=128,
                       epochs=1,
                       verbose=1,
                       validation_split=0.3)
示例#15
0
文件: trainer.py 项目: rela0426/scite
    def slm(self, data):
        """
        Returns Sequence Labeling Model.
        """
        seq = Input(shape=(None, ), name='INPUT')
        emb = Embedding(VOCAB_SIZE,
                        EXTVEC_DIM,
                        weights=[data.embedding],
                        mask_zero=True,
                        trainable=False,
                        name='WE')(seq)
        input_node = [seq]

        if args.use_flair:
            flair = Input(shape=(None, FLAIR_DIM), name='FLAIR')
            emb = concatenate([emb, flair], axis=-1, name='EMB_FLAIR')
            input_node.append(flair)

        if args.char_emb != None:
            char_embedding = []
            for _ in range(CHAR_SIZE):
                scale = math.sqrt(3.0 / CHAR_DIM)
                char_embedding.append(
                    np.random.uniform(-scale, scale, CHAR_DIM))
            char_embedding[0] = np.zeros(CHAR_DIM)
            char_embedding = np.asarray(char_embedding)

            char_seq = Input(shape=(None, None), name='CHAR_INPUT')
            char_emb = TimeDistributed(Embedding(CHAR_SIZE,
                                                 CHAR_DIM,
                                                 weights=[char_embedding],
                                                 mask_zero=True,
                                                 trainable=True),
                                       name='CHAR_EMB')(char_seq)

            if args.char_emb == 'lstm':
                char_emb = TimeDistributed(Bidirectional(
                    LSTM(CHAR_LSTM_SIZE,
                         kernel_initializer=self.kernel_initializer,
                         recurrent_initializer=self.recurrent_initializer,
                         implementation=2,
                         return_sequences=False)),
                                           name="CHAR_BiLSTM")(char_emb)

            if args.char_emb == 'cnn':
                char_emb = TimeDistributed(MaskConv1D(
                    filters=NUM_CHAR_CNN_FILTER,
                    kernel_size=CHAR_CNN_KERNEL_SIZE,
                    padding='same',
                    kernel_initializer=self.kernel_initializer),
                                           name="CHAR_CNN")(char_emb)
                char_emb = TimeDistributed(Lambda(lambda x: K.max(x, axis=1)),
                                           name="MAX_POOLING")(char_emb)

            input_node.append(char_seq)
            emb = concatenate([emb, char_emb], axis=-1, name='EMB_CHAR')

        if args.backbone == 'lstm':

            dec = Bidirectional(LSTM(
                args.lstm_size,
                kernel_initializer=self.kernel_initializer,
                recurrent_initializer=self.recurrent_initializer,
                dropout=args.dropout_rate,
                recurrent_dropout=args.dropout_rate,
                implementation=2,
                return_sequences=True),
                                merge_mode='concat',
                                name='BiLSTM-1')(emb)
            '''
            enc_bilstm = Bidirectional(LSTM(args.lstm_size,
                                            kernel_initializer=self.kernel_initializer,
                                            recurrent_initializer=self.recurrent_initializer,
                                            dropout=args.dropout_rate,
                                            recurrent_dropout=args.dropout_rate,
                                            implementation=2,
                                            return_sequences=True),
                                       merge_mode='concat', name='BiLSTM-1')(emb)
            dec = Bidirectional(LSTM(args.lstm_size,
                                     kernel_initializer=self.kernel_initializer,
                                     recurrent_initializer=self.recurrent_initializer,
                                     dropout=args.dropout_rate,
                                     recurrent_dropout=args.dropout_rate,
                                     implementation=2,
                                     return_sequences=True),
                                merge_mode='concat', name='BiLSTM-2')(enc_bilstm)
            '''
            if args.use_att:
                mhsa = MultiHeadSelfAttention(
                    head_num=args.nb_head,
                    size_per_head=args.size_per_head,
                    kernel_initializer=self.kernel_initializer,
                    name='MHSA')(dec)
                dec = concatenate([dec, mhsa], axis=-1, name='CONTEXT')

        if args.backbone == 'cnn':
            conv_1 = self.conv_block(emb,
                                     dilation_rate=DILATION_RATE[0],
                                     name='1')
            conv_2 = self.conv_block(conv_1,
                                     dilation_rate=DILATION_RATE[1],
                                     name='2')
            conv_3 = self.conv_block(conv_2,
                                     dilation_rate=DILATION_RATE[2],
                                     name='3')
            dec = self.conv_block(conv_3,
                                  dilation_rate=DILATION_RATE[-1],
                                  use_dropout=False,
                                  name='4')

        if args.classifier == 'softmax':
            output = TimeDistributed(Dense(
                NUM_CLASS,
                activation='softmax',
                kernel_initializer=self.kernel_initializer),
                                     name='DENSE')(dec)
            loss_func = 'sparse_categorical_crossentropy'

        if args.classifier == 'crf':
            dense = TimeDistributed(Dense(
                NUM_CLASS,
                activation=None,
                kernel_initializer=self.kernel_initializer),
                                    name='DENSE')(dec)
            crf = ChainCRF(init=self.kernel_initializer, name='CRF')
            output = crf(dense)
            loss_func = crf.sparse_loss

        optimizer = optimizers.Nadam(lr=self.lr, clipnorm=args.clip_norm)
        model = Model(inputs=input_node, outputs=output)
        model.compile(loss=loss_func, optimizer=optimizer)
        return model
merge_train_data = merge_train_data.values
merge_val_data = merge_val_data.values



#Model Definition


model=models.Sequential()


model.add(layers.Dense(64,kernel_regularizer=regularizers.l2(0.001),activation='relu',input_shape=(14,)))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(64,kernel_regularizer=regularizers.l2(0.001),activation='relu'))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(1,activation='sigmoid'))


#Compiling the model and configuring optimizer,loss and metrics


model.compile(optimizer=optimizers.Nadam(),loss='binary_crossentropy',metrics=[metrics.binary_accuracy])


#Training the model


history=model.fit(merge_train_data,merge_train_labels,epochs=30,batch_size=512,validation_data=(merge_val_data,merge_val_labels))