Пример #1
0
def aishufan_train(folder, batch_size, epoch_size, model_name):
    """
    Train network with the parameters specified.

    :param folder: image folder for training
    :param batch_size: training batch size
    :param epoch_size: training epoch size
    :param model_name: IR2, InceptionResNetV2; NL, NASNetLarge; NM, NASNetLarge
    :return: None
    """
    image_wh = system_config['image_wh']

    image_size = (image_wh, image_wh)
    image_shape= (image_wh, image_wh, 3)

    train_list, valid_list = create_tv_list(folder)
    print(f'Train size: {len(train_list)}, valid size: {len(valid_list)}')

    train_df = pd.DataFrame(train_list, columns=['fname', 'class'])
    valid_df = pd.DataFrame(valid_list, columns=['fname', 'class'])

    model = None
    if 'NM' in model_name:
        model_name = 'NM'
        model = NASNetMobile(include_top=True,
                             weights=None,
                             input_tensor=None,
                             input_shape=image_shape,
                             pooling='max',
                             classes=2)

    elif 'XC' in model_name:
        model_name = 'XC'
        model = Xception(include_top=True,
                         weights=None,
                         input_tensor=None,
                         input_shape=image_shape,
                         pooling='max',
                         classes=2)
    elif 'D21' in model_name:
        model_name = 'D21'
        model = DenseNet201(include_top=True,
                            weights=None,
                            input_tensor=None,
                            input_shape=image_shape,
                            pooling='max',
                            classes=2)
    elif 'IV3' in model_name:
        model_name = 'IV3'
        model = InceptionV3(include_top=True,
                            weights=None,
                            input_tensor=None,
                            input_shape=image_shape,
                            pooling='max',
                            classes=2)

    else:
        model_name = 'IR2'
        model = InceptionResNetV2(include_top=True,
                                  weights=None,
                                  input_tensor=None,
                                  input_shape=image_shape,
                                  pooling='max',
                                  classes=2)

    model.compile(loss='categorical_crossentropy',
                  optimizer=Adam(lr=lr_schedule(0)),
                  metrics=['accuracy'])
    model.summary()

    # Image generator does data augmentation:
    datagen = data_generator()

    train_gen = datagen.flow_from_dataframe(
        dataframe=train_df,
        directory=folder,
        x_col="fname",
        y_col="class",
        class_mode="categorical",
        target_size=image_size,
        color_mode='rgb',
        batch_size=batch_size,
        shuffle=False)

    valid_gen = datagen.flow_from_dataframe(
        dataframe=valid_df,
        directory=folder,
        x_col="fname",
        y_col="class",
        class_mode="categorical",
        target_size=image_size,
        color_mode='rgb',
        batch_size=batch_size,
        shuffle=False)

    # Save class indices
    system_config['class_indices'] = train_gen.class_indices
    save_config()

    # Prepare model model saving directory.
    save_dir = Path(os.path.dirname(os.path.realpath(__file__))).joinpath('models')
    if not save_dir.is_dir():
        save_dir.mkdir(exist_ok=True)
    filepath = f'{str(save_dir)}/{MODEL_NAMES[model_name]}'
    print(f'{filepath}\n')

    # Prepare callbacks for model saving and for learning rate adjustment.
    checkpoint = ModelCheckpoint(filepath=filepath,
                                 monitor='val_acc',
                                 verbose=1,
                                 save_best_only=True)

    lr_scheduler = LearningRateScheduler(lr_schedule)

    lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1),
                                   cooldown=0,
                                   patience=5,
                                   min_lr=0.5e-6)

    callbacks = [checkpoint, lr_reducer, lr_scheduler]

    # Fit the model on the batches generated by datagen.flow().
    steps_per_epoch = int(len(train_list)/batch_size)
    history = model.fit_generator(
        generator=train_gen,
        steps_per_epoch=steps_per_epoch,
        validation_data=valid_gen,
        validation_steps=steps_per_epoch,
        epochs=epoch_size,
        use_multiprocessing=False,
        verbose=1,
        workers=4,
        callbacks=callbacks)

    # Score trained model.
    scores = model.evaluate_generator(generator=valid_gen, steps=steps_per_epoch, verbose=1)
    print('Test loss:', scores[0])
    print('Test accuracy:', scores[1])

    # Save score in configuration file
    system_config[f'{model_name}_Accuracy'] = scores[1]
    save_config()

    return history
def autoencoder_train(folder, batch_size, epoch_size, model_name):
    """
    Autoencoding, inherently UNET, is a data compression algorithm where the compression and decompression functions are:
    - data specific, ie, only compress data similar to what they have been trained on
    - lossy, ie, decompressed output will be degraded
    - learned automatically from examples.

    Two practical applications of autoencoders are data removal and dimensionality reduction

    There is an implementation from scikit-learn:
    http://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html

    :param folder: image folder for training
    :param batch_size: training batch size
    :param epoch_size: training epoch size
    :param model_name: IR2, InceptionResNetV2; NL, NASNetLarge; NM, NASNetLarge
    :return: None
    """
    image_wh = system_config['image_wh']

    image_size = (image_wh, image_wh)
    image_shape = (image_wh, image_wh, 1)

    train_list, valid_list = create_tv_list(folder)
    print(f'Train size: {len(train_list)}, valid size: {len(valid_list)}')

    train_df = pd.DataFrame(train_list, columns=['fname', 'class'])
    valid_df = pd.DataFrame(valid_list, columns=['fname', 'class'])

    model = None
    if 'NM' in model_name:
        model_name = 'NM'
        model = NASNetMobile(include_top=True,
                             weights=None,
                             input_tensor=None,
                             input_shape=image_shape,
                             pooling='max',
                             classes=6)
    elif 'NL' in model_name:
        model_name = 'NL'
        model = NASNetLarge(include_top=True,
                            weights=None,
                            input_tensor=None,
                            input_shape=image_shape,
                            pooling='max',
                            classes=6)
    elif 'XC' in model_name:
        model_name = 'XC'
        model = Xception(include_top=True,
                         weights=None,
                         input_tensor=None,
                         input_shape=image_shape,
                         pooling='max',
                         classes=6)
    elif 'D21' in model_name:
        model_name = 'D21'
        model = DenseNet201(include_top=True,
                            weights=None,
                            input_tensor=None,
                            input_shape=image_shape,
                            pooling='max',
                            classes=6)
    elif 'IV3' in model_name:
        model_name = 'IV3'
        model = InceptionV3(include_top=True,
                            weights=None,
                            input_tensor=None,
                            input_shape=image_shape,
                            pooling='max',
                            classes=6)
    elif 'SC' in model_name:
        model_name = 'SC'
        model = simple_cnn(input_shape=image_shape, classes=6)
    else:
        model_name = 'IR2'
        model = InceptionResNetV2(include_top=True,
                                  weights=None,
                                  input_tensor=None,
                                  input_shape=image_shape,
                                  pooling='max',
                                  classes=6)

    model.compile(loss='categorical_crossentropy',
                  optimizer=Adam(lr=lr_schedule(0)),
                  metrics=['accuracy'])
    model.summary()

    # Image generator does data augmentation:
    datagen = data_generator()

    train_gen = datagen.flow_from_dataframe(dataframe=train_df,
                                            directory=folder,
                                            x_col="fname",
                                            y_col="class",
                                            class_mode="categorical",
                                            target_size=image_size,
                                            color_mode='grayscale',
                                            batch_size=batch_size,
                                            shuffle=False)

    valid_gen = datagen.flow_from_dataframe(dataframe=valid_df,
                                            directory=folder,
                                            x_col="fname",
                                            y_col="class",
                                            class_mode="categorical",
                                            target_size=image_size,
                                            color_mode='grayscale',
                                            batch_size=batch_size,
                                            shuffle=False)

    # Prepare model model saving directory.
    save_dir = Path(os.path.dirname(
        os.path.realpath(__file__))).joinpath('models')
    if not save_dir.is_dir():
        save_dir.mkdir(exist_ok=True)
    filepath = f'{str(save_dir)}/{MODEL_NAMES[model_name]}'
    print(f'{filepath}\n')

    # Prepare callbacks for model saving and for learning rate adjustment.
    checkpoint = ModelCheckpoint(filepath=filepath,
                                 monitor='val_acc',
                                 verbose=1,
                                 save_best_only=True)

    lr_scheduler = LearningRateScheduler(lr_schedule)

    lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1),
                                   cooldown=0,
                                   patience=5,
                                   min_lr=0.5e-6)

    callbacks = [checkpoint, lr_reducer, lr_scheduler]

    # Fit the model on the batches generated by datagen.flow().
    steps_per_epoch = int(len(train_list) / batch_size)
    history = model.fit_generator(generator=train_gen,
                                  steps_per_epoch=steps_per_epoch,
                                  validation_data=valid_gen,
                                  validation_steps=steps_per_epoch,
                                  epochs=epoch_size,
                                  use_multiprocessing=False,
                                  verbose=1,
                                  workers=4,
                                  callbacks=callbacks)

    # Score trained model.
    scores = model.evaluate_generator(generator=valid_gen,
                                      steps=steps_per_epoch,
                                      verbose=1)
    print('Test loss:', scores[0])
    print('Test accuracy:', scores[1])

    # Save score in configuration file
    system_config[f'{model_name}_Accuracy'] = scores[1]
    save_config()

    return history
    callbacksList.append(earlyStop)
if reduceLRFlag:
    callbacksList.append(reduce_lr)
if modelCheckpointFlag:
    callbacksList.append(modelCheckpoint)

print('passing class weights: {}'.format(
    getClassWeights(trainGenerator.classes)))
history = model.fit_generator(
    trainGenerator,
    steps_per_epoch=trainSamplesNumber // batchSize * foldAugment,
    epochs=epochs,
    verbose=1,
    callbacks=callbacksList,
    validation_data=validationGenerator,
    class_weight=getClassWeights(trainGenerator.classes),
    shuffle=True,
    validation_steps=validateSamplesNumber // batchSize)

score = model.evaluate_generator(testGenerator, testSamplesNumber)

print('Test loss:', score[0])
print('Test accuracy:', score[1])

# serialize model to JSON
model_json = model.to_json()
with open(modelFile, "w") as json_file:
    json_file.write(model_json)
# serialize weights to HDF5
model.save_weights(weightsFile)
print("Saved model to disk")