예제 #1
0
def lstm_classification(train,
                        valid,
                        labels_train,
                        labels_valid,
                        save_path,
                        num_classes,
                        num_epochs=10):

    train_lab = labels_for_NN(labels_train)
    EMBEDDING_DIM = 300
    MAX_SEQUENCE_LENGTH = 750
    embedding_matrix, vocab, train_we, test_we = create_embedding(train, valid)
    VOCAB_SIZE = len(vocab)

    model = Sequential()
    model.add(
        Embedding(VOCAB_SIZE,
                  EMBEDDING_DIM,
                  input_length=MAX_SEQUENCE_LENGTH,
                  weights=[embedding_matrix]))
    model.add(LSTM(512))
    model.add(Dense(100, activation='sigmoid'))
    model.add(Dense(num_classes, activation='softmax'))
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])

    ## Fit train data
    history = model.fit(train_we,
                        np.array(train_lab),
                        validation_split=0.2,
                        epochs=num_epochs,
                        batch_size=batch_size)
    utils.plot_history(history)

    # SE LA MATRICE TFIDF NON VA BENE O I BAG OF WORDS NON VANNO BENE ALLORA USO QUESTO
    # tokenizer = Tokenizer(num_words=VOCAB_SIZE)
    # sequences = tokenizer.texts_to_sequences(valid)
    # data_test = pad_sequences(sequences, maxlen=MAX_SEQUENCE_LENGTH)
    # list_prediction_proba = model.predict(data_test)

    list_prediction_proba = model.predict(test_we)

    predizione = [
        np.where(probabilities == probabilities.max())[0].min()
        for probabilities in list_prediction_proba
    ]

    utils.report_and_confmat(labels_train, labels_valid, predizione, save_path,
                             "TINY_lstm_" + str(EMBEDDING_DIM))
예제 #2
0
def conv_classification(train,
                        valid,
                        labels_train,
                        labels_valid,
                        save_path,
                        num_classes,
                        num_epochs=10):

    train_lab = labels_for_NN(labels_train)
    EMBEDDING_DIM = 300
    MAX_SEQUENCE_LENGTH = 750
    embedding_matrix, vocab, train_we, test_we = create_embedding(train, valid)
    VOCAB_SIZE = len(vocab)

    model = Sequential()
    model.add(
        Embedding(VOCAB_SIZE,
                  EMBEDDING_DIM,
                  input_length=MAX_SEQUENCE_LENGTH,
                  weights=[embedding_matrix]))
    model.add(Dropout(0.2))
    model.add(Conv1D(512, 7, activation='relu'))
    model.add(GlobalMaxPooling1D())
    model.add(Dense(100, activation='relu'))
    model.add(Dense(num_classes, activation='softmax'))

    # NB binary classification -->binary_crossentropy, Multi-class classification --> categorical_crossentropy
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    history = model.fit(train_we,
                        np.array(train_lab),
                        validation_split=0.2,
                        epochs=num_epochs,
                        batch_size=batch_size)
    utils.plot_history(history)

    list_prediction_proba = model.predict(test_we)

    predizione = [
        np.where(probabilities == probabilities.max())[0].min()
        for probabilities in list_prediction_proba
    ]

    utils.report_and_confmat(labels_train, labels_valid, predizione, save_path,
                             "TINY_conv_1_layer" + str(EMBEDDING_DIM))
예제 #3
0
def bi_lstm_classification(train,
                           valid,
                           labels_train,
                           labels_valid,
                           save_path,
                           num_classes,
                           num_epochs=10):

    train_lab = labels_for_NN(labels_train)

    EMBEDDING_DIM = 300
    MAX_SEQUENCE_LENGTH = 750
    embedding_matrix, vocab, train_we, test_we = create_embedding(train, valid)
    VOCAB_SIZE = len(vocab)

    model = Sequential()
    model.add(
        Embedding(VOCAB_SIZE,
                  EMBEDDING_DIM,
                  input_length=MAX_SEQUENCE_LENGTH,
                  weights=[embedding_matrix]))
    model.add(Bidirectional(LSTM(512, return_sequences=False)))
    model.add(Dropout(0.2))
    model.add(Dense(100, activation='sigmoid'))
    model.add(Dense(num_classes, activation='softmax'))
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    ## Fit train data
    history = model.fit(train_we,
                        np.array(train_lab),
                        validation_split=0.2,
                        epochs=num_epochs,
                        batch_size=batch_size)
    utils.plot_history(history)

    list_prediction_proba = model.predict(test_we)

    predizione = [
        np.where(probabilities == probabilities.max())[0].min()
        for probabilities in list_prediction_proba
    ]

    utils.report_and_confmat(labels_train, labels_valid, predizione, save_path,
                             "TINY_bilstm" + str(EMBEDDING_DIM))
예제 #4
0
        layer.bias_regularizer = keras.regularizers.l2(L2_BP)
        layer.activation = keras.activations.linear
        my_model.add(layer)
        my_model.add(keras.layers.LeakyReLU())
        my_model.add(keras.layers.BatchNormalization())
        my_model.add(keras.layers.Dropout(0.65))

    # Final layer
    my_model.add(keras.layers.Dense(1, activation=keras.activations.sigmoid))

    my_model.summary()
    my_model.compile(optimizer=OPTIMIZER,
                     loss='binary_crossentropy',
                     metrics=['accuracy', 'binary_crossentropy'])

model_hist = None

try:
    model_hist = get_history_of(my_model)
    model_acc = 'Acc: {} - Max is {}'.format(
        model_hist.history['val_acc'][-1], max(model_hist.history['val_acc']))
    print(model_acc)
except Exception:
    pass
finally:
    load_model('my-model.hdf5')

    utils.make_predictions(my_model, test_data, test_labels)
if model_hist:
    utils.plot_history(model_hist)
    # network
    net = SiameseNet(dim_embedding=config["dim_embedding"],
                     is_rgb=config["is_rgb"])
    if os.path.isfile("./output/best_model.pth"):
        net.load_state_dict(torch.load("./output/best_model.pth"))
    net.to(device)

    # loss
    criterion = ContrastLoss()
    optimizer = optim.Adam(net.parameters(), lr=1e-3)

    # Decay LR by a factor of 0.5 every 2 epochs
    exp_lr_scheduler = optim.lr_scheduler.StepLR(optimizer,
                                                 step_size=2,
                                                 gamma=0.5)

    net, history = train_model(
        model=net,
        data_loaders=data_loaders,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=exp_lr_scheduler,
        num_epochs=config["train_epochs"],
        early_stopping_patience=config["early_stopping_patience"],
        reduce_lr_on_plateau=config["reduce_lr_on_plateau"])

    with open("./output/history.pickle", "wb") as fw:
        pickle.dump(history, fw)

    plot_history(history, "./output/history.png")
def training(Epochs, train_filename, validation_filename, model_dir):
    B1 = 1.0
    B2 = 0.01
    Filters = 32
    Kernel_size = 3
    Batch_size = 2
    Number_of_RES_blocks = 9
    Activation = 'selu'
    Input_image_shape = (128, 128, 1)
    delete_previous = True
    if not os.path.exists(model_dir):
        os.mkdir(model_dir)
    elif delete_previous:
        shutil.rmtree(model_dir)
        os.mkdir(model_dir)
    bestmodel_dir = model_dir
    if not os.path.exists(bestmodel_dir):
        os.mkdir(bestmodel_dir)
    elif delete_previous:
        shutil.rmtree(bestmodel_dir)
        os.mkdir(bestmodel_dir)
    src_train, tar_train = data_gen(train_filename)
    src_valid, tar_valid = data_gen(validation_filename)
    print(len(src_train)), print(len(src_valid))
    train_dataset = tf.data.Dataset.from_tensor_slices((src_train, tar_train))
    train_dataset = train_dataset.repeat(-1)
    valid_dataset = tf.data.Dataset.from_tensor_slices((src_valid, tar_valid))
    valid_dataset = valid_dataset.repeat(-1)
    train_dataset = train_dataset.batch(Batch_size)
    valid_dataset = valid_dataset.batch(Batch_size)
    opt = tf.keras.optimizers.Adam(learning_rate=0.001,
                                   beta_1=0.9,
                                   beta_2=0.999,
                                   epsilon=1e-07,
                                   amsgrad=False,
                                   name='Adam')
    unet_model = getModel(input_shape=Input_image_shape,
                          filters=Filters,
                          no_of_resnetblocks=Number_of_RES_blocks,
                          kernel_size=Kernel_size,
                          activation=Activation)
    unet_model.compile(optimizer=opt,
                       loss=model_loss(B1, B2),
                       metrics=[
                           'mean_absolute_error', 'mean_squared_error',
                           KLDivergence, SavingMetric, PSNR, SSIM
                       ])
    bestmodel_callbacks = ModelCheckpoint(filepath=os.path.join(
        bestmodel_dir,
        'saved_model.epoch_{epoch:02d}-SSIM_{val_SSIM:.5f}-PSNR_{val_PSNR:.5f}-metric_{val_SavingMetric:.5f}.h5'
    ),
                                          monitor='val_SavingMetric',
                                          verbose=0,
                                          save_best_only=True,
                                          save_weights_only=False,
                                          mode='min',
                                          save_freq='epoch')
    reduce_lr_loss = ReduceLROnPlateau(
        monitor='val_SavingMetric',
        factor=0.5,
        patience=10,
        verbose=1,
        mode='min',
        min_lr=0.000001,
        epsilon=1e-04,
    )
    history = unet_model.fit(
        train_dataset,
        steps_per_epoch=np.ceil(len(src_train) / Batch_size),
        epochs=Epochs,
        callbacks=[bestmodel_callbacks, reduce_lr_loss],
        validation_data=valid_dataset,
        validation_steps=np.ceil(len(src_valid) / Batch_size),
        max_queue_size=256,
        shuffle=True,
        verbose=2)
    plot_history(history, 'loss', 'val_loss')
    plot_history(history, 'mean_absolute_error', 'val_mean_absolute_error')
    plot_history(history, 'mean_squared_error', 'val_mean_squared_error')
    plot_history(history, 'KLDivergence', 'val_KLDivergence')
    plot_history(history, 'PSNR', 'val_PSNR')
    plot_history(history, 'SSIM', 'val_SSIM')
    plot_history(history, 'SavingMetric', 'val_SavingMetric')
예제 #7
0
        # del useless variables from here
        del heads_emb, bodies_emb, stances, output

    pbar.close()

    t_n = time.time() - t_0
    print(f'Execution time :{t_n}')

    acc = 100. * n_correct / test_size
    print(f"Accuracy : {acc}%")
    print(f"Score : {score}")

    # specify that a new epoch must begin
    test_dataset.is_epoch = False

    print("Saving plots:")

    print("  * Train and val score history")
    plot_history(os.path.join(SAVE_PATH, f'{chpt}_score.png'),
                 main_checkpoint['train_score_history'],
                 main_checkpoint['val_score_history'],
                 f'Score history from {chpt} checkpoint', 'Epoch', 'Score')

    print("  * Train and val accuracy history")
    plot_history(os.path.join(SAVE_PATH, f'{chpt}_score.png'),
                 main_checkpoint['train_acc_history'],
                 main_checkpoint['val_acc_history'],
                 f'Accuracy history from {chpt} checkpoint', 'Epoch',
                 'Accuracy')