示例#1
0
                                samplefile=samplepath,
                                after_n_batches=100,
                                batchsize=100000,
                                on_epoch_end=False,
                                use_event=ev))

model, history = train.trainModel(
    nepochs=1,
    run_eagerly=True,
    batchsize=nbatch,
    batchsize_use_sum_of_squares=False,
    checkperiod=1,  # saves a checkpoint model every N epochs
    verbose=verbosity,
    backup_after_batches=100,
    additional_callbacks=callbacks + [
        CyclicLR(
            base_lr=learningrate / 5., max_lr=learningrate * 5., step_size=20)
    ])

loss_config.energy_loss_weight = 0.01
loss_config.position_loss_weight = 0.01
learningrate = 3e-5

model, history = train.trainModel(
    nepochs=1 + 3,
    run_eagerly=True,
    batchsize=nbatch,
    batchsize_use_sum_of_squares=False,
    checkperiod=1,  # saves a checkpoint model every N epochs
    verbose=verbosity,
    backup_after_batches=100,
    additional_callbacks=callbacks + [
    def train_flow(self,
                   dir_path,
                   model_type,
                   num_class,
                   epoch,
                   batch_size=128,
                   lr=0.01,
                   es=True,
                   decay_lr=False,
                   clr=False,
                   tl=True):
        """
        A function to train the data on fly
        :param dir_path: the directory of where the images store
        :param model_type: the type of the model architecture, available options are 'vgg16', 'vgg19', 'resnet50' and 'xception'
        :param num_class: number of classes
        :param epoch: number of epochs to train the model
        :param batch_size: batch size for mini-batch training
        :param lr: learning rate
                - if cyclical learning rate is used, it's the minimum learning rate
                - if step decay learning rate is used, it's the initial learning rate
        :param es: whether to use early stopping or not
        :param decay_lr: whether to use learning rate decay
        :param clr: whether to use cyclical learning rate
        :param tl: whether to use transfer learning(fine-tuning all layers) or not
        :return: the trained model
        """
        # if not apply transfer learning
        if tl == False:
            load_weights = None
        # if apply transfer learning, load weights from ImageNet
        if tl == True:
            load_weights = 'imagenet'

        if model_type.lower() == 'vgg16':
            self.img_shape = (224, 224)
            base_model = VGG16(include_top=False,
                               weights=load_weights,
                               input_shape=(224, 224, 3))
        elif model_type.lower() == 'vgg19':
            self.img_shape = (224, 224)
            base_model = VGG19(include_top=False,
                               weights=load_weights,
                               input_shape=(224, 224, 3))
        elif model_type.lower() == 'resnet50':
            self.img_shape = (224, 224)
            base_model = ResNet50(include_top=False,
                                  weights=load_weights,
                                  input_shape=(224, 224, 3))
        elif model_type.lower() == 'xception':
            self.img_shape = (299, 299)
            base_model = Xception(include_top=False,
                                  weights=load_weights,
                                  input_shape=(299, 299, 3))
        else:
            raise ValueError("Error: model name not valid!")

        self.batch_size = batch_size

        x = base_model.output
        # flatten the output
        x = Flatten()(x)

        # the number of units in the dense layer is 1024
        x = Dense(1024, activation="relu")(x)
        x = Dropout(0.5)(x)

        predictions = Dense(num_class,
                            activation="softmax",
                            name='new_dense_layer')(x)
        model = Model(input=base_model.input, output=predictions)
        optimizer = optimizers.SGD(lr=lr, momentum=0.9)
        model.compile(loss="categorical_crossentropy",
                      metrics=["accuracy"],
                      optimizer=optimizer)

        # data augmentation
        # create generator for augmenting training data
        train_datagen = ImageDataGenerator(featurewise_center=True,
                                           zoom_range=0.1,
                                           shear_range=0.6,
                                           rescale=1. / 255,
                                           rotation_range=6)
        # fit the train_datagen (compute statistics for pre-processing) with some sample training data
        sample_train = self.sample_train(dir_path, 100)
        train_datagen.fit(sample_train)

        test_datagen = ImageDataGenerator(rescale=1. / 255)

        train_generator = train_datagen.flow_from_directory(
            dir_path + 'train',
            target_size=self.img_shape,
            batch_size=batch_size)
        valid_generator = test_datagen.flow_from_directory(
            dir_path + 'validation',
            target_size=self.img_shape,
            batch_size=batch_size)

        # Check point: save the model with the best accuracy
        model_path = self.name + '_model.h5'
        check_point = ModelCheckpoint(model_path,
                                      monitor='val_acc',
                                      save_best_only=True,
                                      mode='max')

        callback_list = [check_point]

        # if decay learning rate scheduler is used
        if decay_lr == True:
            lrate = LearningRateScheduler(self.step_decay)
            callback_list.append(lrate)

        # if clr = True, use Cyclical Learning rate
        if clr == True:
            clr_stepsize = 2 * math.ceil(37882 / batch_size)
            clr_triangular = CyclicLR(mode='triangular',
                                      base_lr=lr,
                                      max_lr=6 * lr,
                                      step_size=clr_stepsize)
            callback_list.append(clr_triangular)

        # if es == True, use Early Stoppinp
        if es == True:
            early_stop = EarlyStopping(monitor='val_acc',
                                       patience=10,
                                       mode='max')
            callback_list.append(early_stop)

        model.fit_generator(train_generator,
                            validation_data=valid_generator,
                            epochs=epoch,
                            callbacks=callback_list)

        # get a map from real label to prediction
        label_map = (train_generator.class_indices)
        # swap value and key, map from prediction to real label
        label_map = dict((v, k) for k, v in label_map.items())
        # store the label map
        with open('label_map.json', 'w') as fp:
            json.dump(label_map, fp)

        return model
    for i in range(itercount):
        train_indices, val_indices = train_test_split(np.arange(len(meta_parameters_dictionary['train_labels'])), test_size = 0.1)
        if early:
            early_test_indices = []
            for j in range(len(meta_parameters_dictionary['test_labels'])):
                if meta_parameters_dictionary['test_labels_stage'][j,0]==1 or meta_parameters_dictionary['test_labels_stage'][j,1]==1:
                    early_test_indices.append(j)
        test_indices = range(len(meta_parameters_dictionary['test_labels']))
        meta_parameters_dictionary['train_indices']=train_indices
        meta_parameters_dictionary['val_indices']=val_indices
        meta_parameters_dictionary['test_indices'] = np.array(test_indices)
        training_generator = data(meta_parameters_dictionary,batch_size,True,False)
        val_generator = data(meta_parameters_dictionary,batch_size,False,True)
        test_generator = data(meta_parameters_dictionary,batch_size,False,False)
        csv_logger = CSVLogger(os.path.join(LOGDIR,'training_{}.log'.format(test_cohort)))
        lrate = CyclicLR(base_lr=0.001,max_lr= 0.01,step_size=100,mode='triangular2')
        checkpointer = ExponentialMovingAverage(filepath=checkpoint_dir+'cyclic_{}_{}.h5'.format(test_cohort,i),save_best_only=True, save_weights_only=True,custom_objects={'cox_regression_loss':cox_regression_loss},verbose=1)
        lr_monitor = LambdaCallback(on_epoch_begin=lambda epoch, logs:print(tf.eval(model.optimizer.lr)))
        lr_callback = ReduceLROnPlateau(monitor='val_loss',factor=0.5,patience=2, min_lr = 0.00001)
        model = get_model(cube_size, clinical_features_size,kernel_size = (3,3,3))
        history = model.fit_generator(training_generator, verbose =2, epochs=steps, callbacks=[lr_callback,lr_monitor,lrate,csv_logger,checkpointer],validation_data= val_generator,workers=8, use_multiprocessing=True, shuffle=True)
        print(i)
        try:
            model.load_weights(checkpoint_dir+'cyclic_{}_{}.h5'.format(test_cohort,i))
        except OSError:
            print('Could not find checkpoint:'+ checkpoint_dir+'cyclic_{}_{}.h5'.format(test_cohort,i))
            continue

        #tensorboard_callback = TensorBoard(log_dir=LOGDIR, histogram_freq=0, write_graph=True)
        #early_stopping_monitor = EarlyStopping(monitor='val_loss', patience=10)
        #weights = model.get_weights()
示例#4
0
    metrics=None,
    #clipnorm=0.01
)

model, history = train.trainModel(
    nepochs=1,
    run_eagerly=True,
    batchsize=nbatch,
    extend_truth_list_by=len(train.keras_model.outputs) -
    2,  #just adapt truth list to avoid keras error (no effect on model)
    batchsize_use_sum_of_squares=False,
    checkperiod=1,  # saves a checkpoint model every N epochs
    verbose=verbosity,
    backup_after_batches=100,
    additional_callbacks=[
        CyclicLR(base_lr=learningrate / 3., max_lr=learningrate, step_size=50)
    ] + cb)

print("freeze BN")
for l in train.keras_model.layers:
    if 'FullOCLoss' in l.name:
        l.use_average_cc_pos = False
        l.q_min = 0.5
        l.beta_loss_scale = 3.

#also stop GravNetLLLocalClusterLoss* from being evaluated
learningrate /= 10.
nbatch = 180000

train.compileModel(learningrate=learningrate, loss=None, metrics=None)
示例#5
0
            xtrain_fold = np.vstack((xtrain_fold,xtrain_pseudo,xtrain_flip_pseudo))
            ytrain_fold = np.vstack((ytrain_fold,ytrain_pseudo,ytrain_pseudo))
            xtrain_fold, ytrain_fold = shuffle(xtrain_fold, ytrain_fold)

            xvalid_fold = xtrain[vidxs,:]
            yvalid_fold = ytrain[vidxs,:]

            train_size = ytrain_fold.shape[0]
            valid_size = yvalid_fold.shape[0]
            train_steps = np.ceil(float(train_size) / float(BATCH_SIZE))
            valid_steps = np.ceil(float(valid_size) / float(BATCH_SIZE))
            print('TRAIN SIZE: %d VALID SIZE: %d'%(train_size, valid_size))

            WEIGHTS_BEST = 'weights/best_weight_pseudo_part%d_fold%d.hdf5'%(part, fold)

            clr = CyclicLR(base_lr=1e-7, max_lr=2e-4, step_size=4*train_steps, mode='exp_range',gamma=0.99994)
            early_stopping = EarlyStopping(monitor='val_acc', patience=20, verbose=1, mode='max')
            save_checkpoint = ModelCheckpoint(WEIGHTS_BEST, monitor = 'val_acc', verbose = 1, save_weights_only = True, save_best_only=True, mode='max')
            callbacks = [save_checkpoint, early_stopping, clr]

            model = Model()
            model.summary()
            model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=2e-4), metrics=['accuracy'])

            model.fit(xtrain_fold, ytrain_fold, batch_size=BATCH_SIZE, epochs=EPOCHS, verbose=1, validation_data=(xvalid_fold, yvalid_fold), callbacks=callbacks, shuffle=True)

            model.load_weights(WEIGHTS_BEST)

            ptest += model.predict(xtest, batch_size=BATCH_SIZE, verbose=1)
            ptest += model.predict(xtest_flip, batch_size=BATCH_SIZE, verbose=1)
        early_stoping = EarlyStopping(monitor='val_acc', patience=8, verbose=1)
        save_checkpoint = ModelCheckpoint(WEIGHTS_BEST,
                                          monitor='val_acc',
                                          verbose=1,
                                          save_best_only=True,
                                          save_weights_only=True,
                                          mode='max')
        reduce_lr = ReduceLROnPlateau(monitor='val_acc',
                                      factor=0.2,
                                      patience=4,
                                      min_lr=1e-8,
                                      verbose=1)
        csv_logger = CSVLogger(TRAINING_LOG, append=True)
        clr = CyclicLR(base_lr=1e-8,
                       max_lr=4e-5,
                       step_size=2000.,
                       mode='exp_range',
                       gamma=0.99994)

        callbacks_warmup = [save_checkpoint, csv_logger]
        callbacks_clr = [early_stoping, save_checkpoint, clr, csv_logger]
        callbacks = [early_stoping, save_checkpoint, reduce_lr, csv_logger]

        model = InceptionV3_Model()

        # warm up
        for layer in model.layers[0:-3]:
            layer.trainable = False
        model.compile(loss='categorical_crossentropy',
                      optimizer=Adam(lr=8e-5),
                      metrics=['accuracy'])
示例#7
0
# )
#
#     )
#

# unfix
# train.keras_model = fixLayersContaining(train.keras_model, "batch_normalization")
# train.keras_model = fixLayersContaining(train.keras_model, "bn_")
train.compileModel(learningrate=1e-4, loss=None)
# print('frozen:')
# for l in train.keras_model.layers:
#     if not l.trainable:
#         print(l.name)

# 0/0

# train.saveModel('jan.h5')
#
# 0/0

model, history = train.trainModel(
    nepochs=10,
    run_eagerly=True,
    batchsize=nbatch,
    batchsize_use_sum_of_squares=False,
    checkperiod=1,  # saves a checkpoint model every N epochs
    verbose=verbosity,
    backup_after_batches=100,
    additional_callbacks=callbacks +
    [CyclicLR(base_lr=learningrate, max_lr=learningrate * 2., step_size=100)])
示例#8
0
文件: UnBFace.py 项目: lwyqq/UnBFace
# configure some hyper parameters
INIT_LR = 5e-3
EPOCHS = 100
BATCH_SIZE = 96
#STEPS_PER_EPOCH = 320,
VALIDATION_STEPS = 64

# add cyclical learning rate callback
MIN_LR = 1e-7
MAX_LR = 1e-2
CLR_METHOD = "triangular"
STEP_SIZE = 4

clr = CyclicLR(mode=CLR_METHOD,
               base_lr=MIN_LR,
               max_lr=MAX_LR,
               step_size=(STEP_SIZE * (np.shape(face_train)[0] // BATCH_SIZE)))

# add checkpoint to save the network and stop if training doesn't improve
filepath = "../best_weights_" + timestamp + ".hdf5"
checkpoint = ModelCheckpoint(filepath,
                             monitor='val_loss',
                             verbose=1,
                             save_best_only=True,
                             mode='min')
earlystop = EarlyStopping(monitor='val_loss', patience=50)
callbacks_list = [checkpoint, earlystop, clr]

# compile complete model with optmizer and print summary on screen
optim = SGD(lr=INIT_LR, momentum=0.9)
model.compile(optimizer=optim, loss='mean_squared_error', metrics=['mae'])
示例#9
0
    def train(self,
              train,
              valid,
              checkPath,
              epochs=200,
              factor=0.8,
              batch_size=32,
              tensorboardPath=None,
              lim_lr=0.0009,
              scheduler_mode=None,
              iteration=None):
        try:
            if tensorboardPath == None:

                t, h = os.path.split(checkPath)
                tensorboardPath = os.path.normpath(t + "//tensorboard//" +
                                                   str(h))

            patience_stop = 100
            patience_reduce = 3

            self.factor = factor

            earlystop = keras.callbacks.EarlyStopping(monitor='val_loss',
                                                      patience=patience_stop)
            checkpointer = keras.callbacks.ModelCheckpoint(filepath=checkPath,
                                                           verbose=1,
                                                           save_best_only=True)
            tboard = LRTensorBoard(tensorboardPath)

            if scheduler_mode == None:
                reduce_lr = keras.callbacks.ReduceLROnPlateau(
                    monitor='val_loss',
                    factor=float(factor),
                    patience=patience_reduce,
                    min_delta=0.2,
                    min_lr=lim_lr)
            else:
                str_info = ' scheduler_mode: ' + str(scheduler_mode) + ','
                str_info += ' base_lr: ' + str(self.start_lr) + ','
                str_info += ' max_lr ' + str(
                    self.start_lr * float(factor)) + ','
                str_info += ' step_size: ' + str(int(2 * iteration))
                print(termcolor.colored(str_info, "yellow"))

                if iteration == None:
                    raise Exception("Number of iteration is unknown")

                reduce_lr = CyclicLR(base_lr=self.start_lr,
                                     max_lr=self.start_lr * float(factor),
                                     mode=scheduler_mode,
                                     step_size=2 * iteration)

            callback_list = [earlystop, checkpointer, tboard, reduce_lr]

            self.model.fit_generator(
                generator=train,
                validation_data=valid,
                epochs=epochs,
                use_multiprocessing=True,
                callbacks=callback_list,
            )

            del self.model
        except:
            raise Exception("train is failed")
示例#10
0
def main():
    start = time.time()

    ap = argparse.ArgumentParser()
    ap.add_argument("-e",
                    "--epochs",
                    required=True,
                    type=int,
                    help="Number of epochs",
                    default=25)
    ap.add_argument("-m",
                    "--model_name",
                    required=True,
                    type=str,
                    help="Imagenet model to train",
                    default="xception")
    ap.add_argument("-b",
                    "--batch_size",
                    required=True,
                    type=int,
                    help="Batch size",
                    default=8)
    ap.add_argument("-im_size",
                    "--image_size",
                    required=True,
                    type=int,
                    help="Batch size",
                    default=224)
    args = ap.parse_args()

    # Training dataset loading
    train_data = np.load("train_data.npy")
    train_label = np.load("train_label.npy")
    encoder = LabelEncoder()
    encoder.fit(train_label)
    encoded_y = encoder.transform(train_label)
    Y = utils.to_categorical(encoded_y)

    print("Dataset Loaded...")

    # Train and validation split
    trainX, valX, trainY, valY = train_test_split(train_data,
                                                  Y,
                                                  test_size=0.1,
                                                  shuffle=True,
                                                  random_state=42,
                                                  stratify=Y)
    print(trainX.shape, valX.shape, trainY.shape, valY.shape)

    # Train nad validation image data generator
    trainAug = ImageDataGenerator(
        rescale=1.0 / 255.0,
        preprocessing_function=get_random_eraser(p=0.5,
                                                 s_l=0.02,
                                                 s_h=0.4,
                                                 r_1=0.3,
                                                 r_2=1 / 0.3,
                                                 v_l=0,
                                                 v_h=255,
                                                 pixel_level=False),
        rotation_range=30,
        zoom_range=0.15,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.15,
        horizontal_flip=True,
        fill_mode="nearest",
    )

    valAug = ImageDataGenerator(rescale=1.0 / 255.0)

    model = cnn_model(args.model_name, img_size=args.image_size)

    # Number of trainable and non-trainable parameters
    trainable_count = int(
        np.sum([K.count_params(p) for p in set(model.trainable_weights)]))
    non_trainable_count = int(
        np.sum([K.count_params(p) for p in set(model.non_trainable_weights)]))

    print("Total params: {:,}".format(trainable_count + non_trainable_count))
    print("Trainable params: {:,}".format(trainable_count))
    print("Non-trainable params: {:,}".format(non_trainable_count))

    if not exists("./trained_wts"):
        makedirs("./trained_wts")
    if not exists("./training_logs"):
        makedirs("./training_logs")
    if not exists("./plots"):
        makedirs("./plots")

    # Keras backend
    model_checkpoint = ModelCheckpoint(
        "trained_wts/" + args.model_name + ".hdf5",
        monitor="val_loss",
        verbose=1,
        save_best_only=True,
        save_weights_only=True,
    )

    stopping = EarlyStopping(monitor="val_loss", patience=10, verbose=0)

    clr = CyclicLR(mode=CLR_METHOD,
                   base_lr=MIN_LR,
                   max_lr=MAX_LR,
                   step_size=STEP_SIZE * (trainX.shape[0] // args.batch_size))
    print("Training is going to start in 3... 2... 1... ")

    # Model Training
    H = model.fit_generator(
        trainAug.flow(trainX, trainY, batch_size=args.batch_size),
        steps_per_epoch=len(trainX) // args.batch_size,
        validation_data=valAug.flow(valX, valY),
        validation_steps=len(valX) // args.batch_size,
        epochs=args.epochs,
        callbacks=[model_checkpoint],
    )

    # plot the training loss and accuracy
    plt.style.use("ggplot")
    plt.figure()
    N = args.epochs
    plt.plot(np.arange(0, N), H.history["loss"], label="train_loss")
    plt.plot(np.arange(0, N), H.history["val_loss"], label="val_loss")
    plt.plot(np.arange(0, N), H.history["accuracy"], label="train_acc")
    plt.plot(np.arange(0, N), H.history["val_accuracy"], label="val_acc")
    plt.title("Training Loss and Accuracy")
    plt.xlabel("Epoch #")
    plt.ylabel("Loss/Accuracy")
    plt.legend(loc="lower left")
    plt.savefig("plots/training_plot.png")

    N = np.arange(0, len(clr.history["lr"]))
    plt.figure()
    plt.plot(N, clr.history["lr"])
    plt.title("Cyclical Learning Rate (CLR)")
    plt.xlabel("Training Iterations")
    plt.ylabel("Learning Rate")
    plt.savefig("plots/cyclic_lr.png")

    end = time.time()
    dur = end - start

    if dur < 60:
        print("Execution Time:", dur, "seconds")
    elif dur > 60 and dur < 3600:
        dur = dur / 60
        print("Execution Time:", dur, "minutes")
    else:
        dur = dur / (60 * 60)
        print("Execution Time:", dur, "hours")
示例#11
0
def _main():
    # argument parsing
    parser = argparse.ArgumentParser(
        description='Trains an image similarity detector.')
    parser.add_argument('--training-images-dir',
                        type=str,
                        help='The directory containing the training images'
                        'input files (JSON).')
    parser.add_argument(
        '--validation-images-dir',
        type=str,
        default=None,
        help='The directory containing the validation images input files.'
        'If not specified, than no validation is performed (default behavior).'
    )
    parser.add_argument('--images-dir',
                        type=str,
                        help='The root of the images directory.')
    parser.add_argument(
        '--output-dir',
        type=str,
        help='The output directory where the checkpoints will be stored.')
    parser.add_argument('--restart-checkpoint',
                        type=str,
                        default=None,
                        help='The checkpoint from which to restart.')
    parser.add_argument(
        '--image-size',
        type=int,
        default=224,
        help='The image size in pixels, default is 224 (meaning 224x224).')
    parser.add_argument(
        '--preload-images',
        type=int,
        default=0,
        help=
        'Preload (cache) images before starting training, 0 if not needed, else: number of bytes '
        'to load in cache.')
    parser.add_argument('--greyscale',
                        type=int,
                        default=0,
                        help='If set to 1, converts images to greyscale.')
    parser.add_argument('--batch-size',
                        type=int,
                        default=24,
                        help='The training minibatch size.')
    parser.add_argument('--loss-batch',
                        type=int,
                        default=4,
                        help='The loss minibatch size.')
    parser.add_argument(
        '--backbone',
        type=str,
        default='mobilenetv2',
        help='The network backbone: mobilenetv2 (default), densenet121')
    parser.add_argument('--freeze-backbone',
                        type=int,
                        default=0,
                        help='If set to 1, freeze the backbone.')
    parser.add_argument(
        '--feature-len',
        type=int,
        default=128,
        help=
        'If larger than 0, a 1x1 convolution is added that converts the backbone output features '
        'to a layer with depth equal to --feature-len.')
    parser.add_argument(
        '--margin',
        type=float,
        default=0.4,
        help='The margin for the triple loss (default is 0.4).')
    parser.add_argument(
        '--soft',
        type=int,
        default=0,
        help='If set to 1, use soft margins when computing loss.')
    parser.add_argument(
        '--metric',
        type=str,
        default='euclidian',
        help=
        'The distance metric: Euclidian (euclidian) or binary cross-entropy (binaryce). By '
        'fedault it is Euclidian.')
    parser.add_argument(
        '--max-lr',
        type=float,
        default=1e-4,
        help='The maximum (and also initial) learning rate (1e-4 by default).')
    parser.add_argument('--min-lr',
                        type=float,
                        default=1e-5,
                        help='The minimum learning rate (1e-5 by default).')
    parser.add_argument(
        '--lr-schedule',
        type=str,
        default='cosine',
        help='The learning rate schedule: cosine (default), cyclic.')
    parser.add_argument(
        '--lr-schedule-cycle',
        type=int,
        default=100000,
        help='The lerning rate cycle length (number of images).')
    parser.add_argument('--images-per-epoch',
                        type=int,
                        default=10000,
                        help='The number of images per epoch.')
    parser.add_argument('--start-epoch',
                        type=int,
                        default=1,
                        help='The starting epoch (1 by default).')
    parser.add_argument('--end-epoch',
                        type=int,
                        default=5000,
                        help='The ending epoch (5000 by default).')
    parser.add_argument('--checkpoint-name',
                        type=str,
                        default='chkpt',
                        help='The root of the checkpoint names.')
    parser.add_argument(
        '--checkpoint-freq',
        type=int,
        default=100,
        help='The frequency of checkpoints in epochs. Default is 100.')
    parser.add_argument(
        '--early-stopping-patience',
        type=int,
        default=-1,
        help=
        'The number of epoch to wait before stopping if the validation loss does not decrease. '
        'Set to -1 to disable (default)')
    parser.add_argument(
        '--no-aug-prob',
        type=float,
        default=0.2,
        help='The probability that an image is not augmented at all.')
    parser.add_argument('--crop-prob',
                        type=float,
                        default=0.0,
                        help='The crop probability (0.05 by default).')
    parser.add_argument(
        '--crop-frac',
        type=float,
        default=0.09,
        help='The maximum fraction of area cropped-out (0.16 by default).')
    parser.add_argument('--fill-letterbox',
                        type=int,
                        default=0,
                        help='Fill the letterbox (for small images')
    parser.add_argument('--jitter-prob',
                        type=float,
                        default=0.2,
                        help='The jitter probability (0.2 by default')
    parser.add_argument('--jitter',
                        type=float,
                        default=0.1,
                        help='The jitter size (0.1 by default).')
    parser.add_argument('--rotation-prob',
                        type=float,
                        default=0.0,
                        help='The rotation probability.')
    parser.add_argument('--rotation-angle',
                        type=float,
                        default=0.0,
                        help='The maximum rotation angle.')
    parser.add_argument(
        '--rotation-expand-prob',
        type=float,
        default=0,
        help=
        'Probability to expand the image when rotating to not lose anything.')
    parser.add_argument('--scale-prob',
                        type=float,
                        default=0.1,
                        help='The rescaling probability.')
    parser.add_argument('--scale-min',
                        type=float,
                        default=1.0,
                        help='The minimum image rescaling factor.')
    parser.add_argument('--scale-max',
                        type=float,
                        default=1.0,
                        help='The maximum image rescaling factor.')
    parser.add_argument(
        '--hflip',
        type=float,
        default=0.0,
        help='The horizontal flip probability (0.0 by default).')
    parser.add_argument('--no-colour-transforms',
                        type=int,
                        default=0,
                        help='Do not transform colors.')
    parser.add_argument('--vflip',
                        type=float,
                        default=0.0,
                        help='The vertical flip probability (0.0 by default).')
    parser.add_argument(
        '--hue',
        type=float,
        default=0.05,
        help='The hue variation (ignored for siamese backbone).')
    parser.add_argument(
        '--sat',
        type=float,
        default=0.2,
        help='The saturation variation (ignored for siamese backbone).')
    parser.add_argument(
        '--val',
        type=float,
        default=0.2,
        help='The value variation (ignored for siamese backbone).')
    parser.add_argument(
        '--mlflow',
        type=int,
        default=0,
        help='Set to 1 if using MLflow. Metrics and artifacts will be logged.')

    args = parser.parse_args()

    # start the mlflow autologging
    if args.mlflow:
        import mlflow.keras
        mlflow.keras.autolog()

    # create the training image list
    train_data = load_data(args.training_images_dir, verbose=False)
    train_imgs, train_cache = preload_images(train_data, 4, args.images_dir,
                                             args.preload_images)
    train_parents = list(train_imgs.keys())
    np.random.shuffle(train_parents)

    train_lens = {}
    for k, v in train_imgs.items():
        cur_len = len(v)
        if cur_len in train_lens:
            train_lens[cur_len] += 1
        else:
            train_lens[cur_len] = 1
    train_lens = pd.DataFrame(train_lens, index=[0])
    print("Training length distribution:")
    print(train_lens)

    if args.validation_images_dir:
        do_valid = True
        val_data = load_data(args.validation_images_dir, verbose=False)
        val_imgs, val_cache = preload_images(val_data, 4, args.images_dir,
                                             args.preload_images)
        val_parents = list(val_imgs.keys())
        np.random.shuffle(val_parents)

    else:
        do_valid = False

    print('There are {} training images.'.format(len(train_imgs)))
    if do_valid:
        print('There are {} validation images.'.format(len(val_imgs)))

    # create the output directory if necessary
    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)

    # scale the larning rate to the batch size
    max_lr = args.max_lr
    min_lr = args.min_lr

    # create the model
    num_channels = 1 if args.backbone == 'siamese' else 3
    encoder = create_model((args.image_size, args.image_size, num_channels),
                           restart_checkpoint=args.restart_checkpoint,
                           backbone=args.backbone,
                           feature_len=args.feature_len,
                           freeze=args.freeze_backbone == 1)

    # compile the model with the initial learning rate
    bh_loss = Lambda(batch_hard_loss,
                     output_shape=(1, ),
                     name='batch_hard',
                     arguments={
                         'loss_batch': args.loss_batch,
                         'loss_margin': args.margin,
                         'soft': args.soft == 1,
                         'metric': args.metric
                     })(encoder.output)
    model = Model(encoder.input, bh_loss)
    model.compile(loss={
        'batch_hard': lambda y_true, y_pred: y_pred
    },
                  optimizer=Adam(lr=max_lr))
    print(model.summary())

    print('Loss metric: {}'.format(args.metric))
    if args.soft == 1:
        print('Using soft margins.')

    # prepare the callbacks
    info_lr = lr_info(model, args.mlflow == 1)

    # learning rate
    true_batch_size = args.batch_size // args.loss_batch

    print(
        'Scaling the learning rate minimum to {} and maximum (initial) to {}'.
        format(min_lr, max_lr))
    if args.lr_schedule == 'cosine':
        print('Using the cosine annealing learning rate scheduler.')
        lr_callback = CosineAnnealingScheduler(
            max_lr,
            true_batch_size,
            args.lr_schedule_cycle,
            min_lr=min_lr,
            verbose=True,
            initial_counter=(args.start_epoch - 1) * args.images_per_epoch)
    else:
        lr_callback = CyclicLR(mode='triangular',
                               max_lr=max_lr,
                               base_lr=min_lr,
                               step_size=args.lr_schedule_cycle //
                               true_batch_size)

    # checkpoints
    checkpoint = MyModelCheckpoint(
        filepath=os.path.join(args.output_dir,
                              args.checkpoint_name + '_' + '{epoch:04d}'),
        snapshot_path=os.path.join(args.output_dir,
                                   args.checkpoint_name + '-snapshot'),
        model_body=None,
        encoder=encoder,
        save_best_only=do_valid,
        period=args.checkpoint_freq,
        verbose=1,
        mlflow=args.mlflow == 1)

    callbacks = [info_lr, lr_callback, checkpoint]

    if do_valid and args.early_stopping_patience != -1:
        callbacks.append(
            EarlyStopping(monitor='val_loss',
                          patience=args.early_stopping_patience))

    # train
    print('Batch configuration:')
    print('Loss batch: {}'.format(args.loss_batch))
    print('Positives + anchors: {}'.format(args.loss_batch // 4))
    print('Negatives: {}'.format(args.loss_batch - args.loss_batch // 4))
    print('Effective minibatch: {}'.format(true_batch_size))
    print('Encoder minibatch: {}'.format(args.batch_size))

    augment = {
        'scale_prob': args.scale_prob,
        'scale_min': args.scale_min,
        'scale_max': args.scale_max,
        'crop_prob': args.crop_prob,
        'crop_frac': args.crop_frac,
        'jitter_prob': args.jitter_prob,
        'jitter': args.jitter,
        'rotate_prob': args.rotation_prob,
        'rotate_angle': args.rotation_angle,
        'rotate_expand_prob': args.rotation_expand_prob,
        'hflip_prob': args.hflip,
        'vflip_prob': args.vflip
    }
    if args.no_colour_transforms == 0:
        augment['hue']: args.hue
        augment['saturation']: args.sat
        augment['value']: args.val

    train_generator = data_generator(
        train_imgs,
        train_parents,
        args.batch_size,
        args.loss_batch, (args.image_size, args.image_size, num_channels),
        args.no_aug_prob,
        augment=augment,
        greyscale=args.greyscale == 1,
        fill_letterbox=args.fill_letterbox == 1,
        cache=train_cache)

    if do_valid:
        val_generator = data_generator(
            val_imgs,
            val_parents,
            args.batch_size,
            args.loss_batch, (args.image_size, args.image_size, num_channels),
            args.no_aug_prob,
            augment=augment,
            greyscale=args.greyscale == 1,
            fill_letterbox=args.fill_letterbox == 1,
            cache=val_cache)
    else:
        val_generator = None

    model.fit_generator(
        train_generator,
        steps_per_epoch=max(1, args.images_per_epoch // true_batch_size),
        validation_data=val_generator,
        validation_steps=max(1, args.images_per_epoch // true_batch_size),
        epochs=args.end_epoch,
        initial_epoch=args.start_epoch - 1,
        callbacks=callbacks)
示例#12
0
def get_training_param_img(
    hypa: ty.Dict[str, str],
    use_validation: bool,
    model_path: ty.Optional[Path],
    num_samples: int,
) -> ty.Dict[str, ty.Any]:
    """MAKEDOC: what is get_training_param_img doing?"""
    logg = logging.getLogger(f"c.{__name__}.get_training_param_img")
    # logg.setLevel("INFO")
    # logg.debug("Start get_training_param_img")

    training_param: ty.Dict[str, ty.Any] = {}

    training_param["batch_size"] = int(hypa["batch_size_type"])
    training_param["epochs"] = int(hypa["epoch_num_type"])

    # translate from short key to long name
    learning_rate_types = {
        "01": "fixed01",
        "02": "fixed02",
        "03": "exp_decay_step_01",
        "04": "exp_decay_smooth_01",
        "05": "clr_triangular2_01",
        "06": "clr_triangular2_02",
    }
    learning_rate_type = hypa["learning_rate_type"]
    lr_name = learning_rate_types[learning_rate_type]
    training_param["lr_name"] = lr_name

    if lr_name.startswith("fixed"):
        if lr_name == "fixed01":
            lr = 1e-3
        elif lr_name == "fixed02":
            lr = 1e-4
    else:
        lr = 1e-3

    optimizer_types = {
        "a1": Adam(learning_rate=lr),
        "r1": RMSprop(learning_rate=lr)
    }
    training_param["opt"] = optimizer_types[hypa["optimizer_type"]]

    callbacks = []

    if lr_name.startswith("exp_decay"):
        if lr_name == "exp_decay_step_01":
            exp_decay_part = partial(exp_decay_step, epochs_drop=5)
        elif lr_name == "exp_decay_smooth_01":
            exp_decay_part = partial(exp_decay_smooth, epochs_drop=5)
        lrate = LearningRateScheduler(exp_decay_part)
        callbacks.append(lrate)

    # setup cyclic learning rate
    elif lr_name.startswith("clr_triangular2"):

        # target_cycles = the number of cycles we want in those epochs
        # it_per_epoch = num_samples // batch_size
        # total_iterations = it_per_epoch * epoch_num
        # step_size = total_iterations // target_cycles

        if lr_name == "clr_triangular2_01":
            target_cycles = 2
            it_per_epoch = num_samples // training_param["batch_size"]
            total_iterations = it_per_epoch * training_param["epochs"]
            step_size = total_iterations // (target_cycles * 2)
            base_lr = 1e-5
            max_lr = 1e-3
        elif lr_name == "clr_triangular2_02":
            target_cycles = 8
            it_per_epoch = num_samples // training_param["batch_size"]
            total_iterations = it_per_epoch * training_param["epochs"]
            step_size = total_iterations // (target_cycles * 2)
            base_lr = 1e-6
            max_lr = 1e-3

        logg.debug(f"target_cycles: {target_cycles}")
        logg.debug(f"it_per_epoch: {it_per_epoch}")
        logg.debug(f"total_iterations: {total_iterations}")
        logg.debug(f"num_samples: {num_samples}")
        logg.debug(f"CLR is using step_size: {step_size}")

        mode = "triangular2"
        cyclic_lr = CyclicLR(base_lr, max_lr, step_size, mode)
        callbacks.append(cyclic_lr)

    # which metric to monitor for early_stop and model_checkpoint
    metric_to_monitor = "val_loss" if use_validation else "loss"

    if lr_name.startswith("fixed") or lr_name.startswith("exp_decay"):
        early_stop = EarlyStopping(
            monitor=metric_to_monitor,
            patience=4,
            restore_best_weights=True,
            verbose=1,
        )
        callbacks.append(early_stop)

    # to inhibit checkpointing by passing None
    if model_path is not None:
        model_checkpoint = ModelCheckpoint(str(model_path),
                                           monitor=metric_to_monitor,
                                           verbose=1,
                                           save_best_only=True)
        callbacks.append(model_checkpoint)

    training_param["callbacks"] = callbacks

    return training_param
示例#13
0
    def init_callbacks(self, n_epochs, n_batches, **kwargs):
        from keras.callbacks import TerminateOnNaN, EarlyStopping, \
            ReduceLROnPlateau, CSVLogger, TensorBoard
        from validation_checkpoint import ValidationCheckpoint
        from clr_callback import CyclicLR

        print('Initializing model callbacks')
        use_tensorboard = kwargs.pop('use_tensorboard', False)
        val_monitor = kwargs.pop('monitor', 'val_loss')

        callbacks = kwargs.pop('callbacks', [])

        # strides to test/save model during training
        test_period = kwargs.pop('test_period', 1)
        save_period = kwargs.pop('save_period', 0)  # 0 = disable
        warmup_epoch = kwargs.pop('warmup_epoch', 3)
        random_state = kwargs.pop('random_state', 42)
        verbose = kwargs.pop('verbose', 1)

        test_ids = kwargs.pop('test_ids', [])

        step_lr = kwargs.pop('step_lr', None)
        clr_mult = kwargs.pop('clr_mult',
                              4)  # total epochs before CLR changes signs

        # exit early if the last [stop_early] test scores are all worse than the best
        early_stop = int(n_epochs * 0.2)
        stop_early = kwargs.pop('stop_early', None) or early_stop

        stop_delta = optparams['stop_delta']

        save_preds = kwargs.pop('save_preds', True)
        save_model = kwargs.pop('save_model', True)

        model_dir = self.model_dir
        initial_monitor = self.start_monitor
        initial_epoch = self.start_epoch

        # configure callbacks
        val_mode = 'auto'
        ctimestr = epoch2str(gettime())
        train_logf = pathjoin(
            model_dir, 'training_log_%s_pid%d.csv' % (ctimestr, self.pid))
        # if pathexists(train_logf) and pathsize(train_logf) != 0:
        #     #ctimestr = epoch2str(gettime())
        #     #ctimestr = epoch2str(pathctime(train_logf))
        #     ctimestr = '1'
        #     logf_base,logf_ext = splitext(train_logf)
        #     old_logf = logf_base+'_'+ctimestr+logf_ext
        #     print('Backing up existing log file "%s" to "%s"'%(train_logf,old_logf))
        #     os.rename(train_logf,old_logf)

        self.val_monitor = val_monitor
        self.save_preds = save_preds
        self.save_model = save_model
        self.save_period = save_period
        self.test_period = test_period
        self.stop_early = stop_early
        self.stop_delta = stop_delta
        self.test_ids = test_ids

        self.val_cb = ValidationCheckpoint(val_monitor=val_monitor,
                                           save_best_preds=save_preds,
                                           save_best_model=save_model,
                                           model_dir=model_dir,
                                           mode=val_mode,
                                           pid=self.pid,
                                           initial_monitor=initial_monitor,
                                           initial_epoch=initial_epoch,
                                           warmup_epoch=warmup_epoch,
                                           save_period=save_period,
                                           test_period=test_period,
                                           test_ids=test_ids,
                                           verbose=verbose)
        #self.val_cb = ModelCheckpoint(model_iterf,monitor=val_monitor,mode=val_mode, period=save_epoch,
        #                        save_best_only=True, save_weights_only=False,
        #                        verbose=False)
        step_lr = step_lr or int(n_batches * clr_mult)
        self.lr_cb = CyclicLR(base_lr=optparams['lr_min'],
                              max_lr=optparams['lr_max'],
                              step_size=step_lr)
        # else:
        #     step_lr = step_lr or min(100,int(n_epochs*0.01))
        #     self.lr_cb = ReduceLROnPlateau(monitor=val_monitor,
        #                                    mode=val_mode,
        #                                    patience=step_lr,
        #                                    min_lr=optparams['lr_min'],
        #                                    factor=optparams['reduce_lr'],
        #                                    epsilon=optparams['tol'],
        #                                    verbose=verbose)
        self.es_cb = EarlyStopping(monitor=val_monitor,
                                   mode=val_mode,
                                   patience=stop_early,
                                   min_delta=stop_delta,
                                   verbose=verbose)
        self.tn_cb = TerminateOnNaN()

        self.cv_cb = CSVLogger(filename=train_logf, append=True)
        self.callbacks = callbacks + [
            self.val_cb, self.lr_cb, self.es_cb, self.tn_cb, self.cv_cb
        ]

        if self.backend == 'tensorflow' and use_tensorboard:
            tb_batch_size = 32
            tb_histogram_freq = 1
            tb_embeddings_freq = 0
            tb_log_dir = pathjoin(model_dir, 'tb_logs_pid%d' % self.pid)
            if not pathexists(tb_log_dir):
                os.makedirs(tb_log_dir)

            self.tb_cb = TensorBoard(log_dir=tb_log_dir,
                                     histogram_freq=tb_histogram_freq,
                                     batch_size=tb_batch_size,
                                     write_graph=True,
                                     write_grads=True,
                                     write_images=True,
                                     embeddings_freq=tb_embeddings_freq,
                                     embeddings_layer_names=None,
                                     embeddings_metadata=None)
            self.callbacks.append(self.tb_cb)
        elif self.backend != 'tensorflow' and use_tensorboard:
            print('Cannot use tensorboard with backend "%s"' % self.backend)
            use_tensorboard = False

        print('Initialized %d callbacks:' % len(self.callbacks),
              str(self.callbacks))
示例#14
0
###############################################################################
# Train the model
###############################################################################
early_stopper = EarlyStopping(monitor='val_loss',
                              verbose=1,
                              patience=args.patience)
model_checkpoint = ModelCheckpoint(args.model_path,
                                   monitor='val_loss',
                                   mode='min',
                                   save_best_only=True,
                                   verbose=1)
callbacks = [early_stopper, model_checkpoint]
if args.cyclical_learning_rate:
    callbacks.append(
        CyclicLR(base_lr=0.0005,
                 max_lr=0.006,
                 step_size=4 * STEPS_PER_EPOCH,
                 mode='triangular2'))

VAL_SUBSPLITS = 5
VALIDATION_STEPS = info.splits[
    'test'].num_examples // args.batch_size // VAL_SUBSPLITS
model_history = model.fit(tfds.as_numpy(augmentedDataset()),
                          epochs=args.max_epochs,
                          steps_per_epoch=STEPS_PER_EPOCH,
                          validation_steps=VALIDATION_STEPS,
                          validation_data=tfds.as_numpy(validate_dataset),
                          callbacks=callbacks)

###############################################################################
# Load the best model snapshot and evaluate the quality
###############################################################################
def main(data_module,
         model_module,
         optimizer_module,
         filename,
         config,
         use_val=False):
    """Patch everything together."""
    batch_size = config['train']['batch_size']
    nb_epoch = config['train']['epochs']

    today = datetime.datetime.now()
    datestring = today.strftime('%Y%m%d-%H%M-%S')

    # The data, shuffled and split between train and test sets:
    data = data_module.load_data(config)
    print("Data loaded.")

    X_train, y_train = data['x_train'], data['y_train']
    X_train = data_module.preprocess(X_train)

    # Get use_val value
    if 'use_val' in config['train']:
        use_val = config['train']['use_val']
    else:
        use_val = True

    # Get training / validation sets
    if use_val:
        X_test, y_test = data['x_val'], data['y_val']
    else:
        X_test, y_test = data['x_test'], data['y_test']
        X_val = data_module.preprocess(data['x_val'])
        X_train = np.append(X_train, X_val, axis=0)
        y_train = np.append(y_train, data['y_val'], axis=0)
    X_test = data_module.preprocess(X_test)

    # load hierarchy, if present
    if 'hierarchy_path' in config['dataset']:
        ret = handle_hierarchies(config, data_module, X_train, y_train, X_test,
                                 y_test)
        # hierarchy = ret['hierarchy']
        X_train = ret['X_train']
        y_train = ret['y_train']
        X_test = ret['X_test']
        y_test = ret['y_test']

    nb_classes = data_module.n_classes
    logging.info("# classes = {}".format(data_module.n_classes))
    img_rows = data_module.img_rows
    img_cols = data_module.img_cols
    img_channels = data_module.img_channels
    da = config['train']['data_augmentation']

    # Convert class vectors to binary class matrices.
    Y_train = np_utils.to_categorical(y_train, nb_classes)
    Y_test = np_utils.to_categorical(y_test, nb_classes)
    # Y_train = Y_train.reshape((-1, 1, 1, nb_classes))  # For fcn
    # Y_test = Y_test.reshape((-1, 1, 1, nb_classes))

    if 'smooth_train' in config['dataset']:
        Y_train = np.load(config['dataset']['smooth_train'])

    if 'smooth_test_path' in config['dataset']:
        Y_test = np.load(config['dataset']['smooth_test_path'])

    # Input shape depends on the backend
    if K.image_dim_ordering() == "th":
        input_shape = (img_channels, img_rows, img_cols)
    else:
        input_shape = (img_rows, img_cols, img_channels)

    model = model_module.create_model(nb_classes, input_shape, config)
    print("Model created")

    if 'initializing_model_path' in config['model']:
        init_model_path = config['model']['initializing_model_path']
        if not os.path.isfile(init_model_path):
            logging.error(
                "initializing_model={} not found".format(init_model_path))
            sys.exit(-1)
        init_model = load_model(init_model_path)
        layer_dict_init = dict([(layer.name, layer)
                                for layer in init_model.layers])
        layer_dict_model = dict([(layer.name, layer)
                                 for layer in model.layers])
        for layer_name in layer_dict_model.keys():
            if layer_name in layer_dict_init:
                print("\tLoad layer weights '{}'".format(layer_name))
                weights = layer_dict_init[layer_name].get_weights()
                try:
                    layer_dict_model[layer_name].set_weights(weights)
                except ValueError:
                    print("\t\twrong shape - skip")
        logging.info("Done initializing")

    model.summary()
    optimizer = optimizer_module.get_optimizer(config)
    model.compile(loss='categorical_crossentropy',
                  optimizer=optimizer,
                  metrics=["accuracy"])
    print("Finished compiling")
    print("Building model...")

    es = EarlyStopping(monitor='val_acc',
                       min_delta=0,
                       patience=10,
                       verbose=1,
                       mode='auto')
    history_cb = History()
    callbacks = [es, history_cb]  # remote,
    if 'checkpoint' in config['train'] and config['train']['checkpoint']:
        checkpoint_fname = os.path.basename(config['train']['artifacts_path'])
        if 'saveall' in config['train'] and config['train']['saveall']:
            checkpoint_fname = ("{}_{}.chk.{{epoch:02d}}.h5".format(
                checkpoint_fname, datestring))
            save_best_only = False
        else:
            checkpoint_fname = "{}_{}.chk.h5".format(checkpoint_fname,
                                                     datestring)
            save_best_only = True
        model_chk_path = os.path.join(config['train']['artifacts_path'],
                                      checkpoint_fname)
        model_chk_path = get_nonexistant_path(model_chk_path)
        checkpoint = ModelCheckpoint(model_chk_path,
                                     monitor="val_acc",
                                     save_best_only=save_best_only,
                                     save_weights_only=False)
        callbacks.append(checkpoint)
    if 'tensorboard' in config['train'] and config['train']['tensorboard']:
        tensorboard = TensorBoard(log_dir='./logs',
                                  histogram_freq=0,
                                  write_graph=True,
                                  write_images=True)
        callbacks.append(tensorboard)
    if 'remote' in config['train'] and config['train']['remote']:
        remote = RemoteMonitor(root='http://localhost:9000')
        callbacks.append(remote)
    if 'lr_reducer' in config['train'] and config['train']['lr_reducer']:
        lr_reducer = ReduceLROnPlateau(monitor='val_acc',
                                       factor=0.3,
                                       cooldown=0,
                                       patience=3,
                                       min_lr=0.5e-6,
                                       verbose=1)
        callbacks.append(lr_reducer)
    if 'clr' in config['train']:
        clr = CyclicLR(base_lr=config['train']['clr']['base_lr'],
                       max_lr=config['train']['clr']['max_lr'],
                       step_size=(config['train']['clr']['step_size'] *
                                  (X_train.shape[0] // batch_size)),
                       mode=config['train']['clr']['mode'])
        callbacks.append(clr)

    if not da:
        print('Not using data augmentation.')
        if 'checkpoint' in config['train'] and config['train']['checkpoint']:
            model.save(
                model_chk_path.format(epoch=0).replace('.00.', '.00.a.'))
        t0 = time.time()
        model.fit(X_train,
                  Y_train,
                  batch_size=batch_size,
                  epochs=nb_epoch,
                  validation_data=(X_test, Y_test),
                  shuffle=True,
                  callbacks=callbacks)
        t1 = time.time()
        t2 = t1
        epochs_augmented_training = 0
    else:
        print('Using real-time data augmentation.')

        if 'hue_shift' in da:
            hsv_augmentation = (da['hue_shift'], da['saturation_scale'],
                                da['saturation_shift'], da['value_scale'],
                                da['value_shift'])
        else:
            hsv_augmentation = None

        # This will do preprocessing and realtime data augmentation:
        datagen = ImageDataGenerator(
            # set input mean to 0 over the dataset
            featurewise_center=da['featurewise_center'],
            # set each sample mean to 0
            samplewise_center=da['samplewise_center'],
            # divide inputs by std of the dataset
            featurewise_std_normalization=False,
            # divide each input by its std
            samplewise_std_normalization=da['samplewise_std_normalization'],
            zca_whitening=da['zca_whitening'],
            # randomly rotate images in the range (degrees, 0 to 180)
            rotation_range=da['rotation_range'],
            # randomly shift images horizontally (fraction of total width)
            width_shift_range=da['width_shift_range'],
            # randomly shift images vertically (fraction of total height)
            height_shift_range=da['height_shift_range'],
            horizontal_flip=da['horizontal_flip'],
            vertical_flip=da['vertical_flip'],
            hsv_augmentation=hsv_augmentation,
            zoom_range=da['zoom_range'],
            shear_range=da['shear_range'],
            channel_shift_range=da['channel_shift_range'])

        # Compute quantities required for featurewise normalization
        # (std, mean, and principal components if ZCA whitening is applied).
        datagen.fit(X_train, seed=0)

        # Apply normalization to test data
        for i in range(len(X_test)):
            X_test[i] = datagen.standardize(X_test[i])

        # Fit the model on the batches generated by datagen.flow().
        steps_per_epoch = X_train.shape[0] // batch_size
        if 'checkpoint' in config['train'] and config['train']['checkpoint']:
            model.save(
                model_chk_path.format(epoch=0).replace('.00.', '.00.a.'))
        t0 = time.time()
        model.fit_generator(datagen.flow(X_train,
                                         Y_train,
                                         batch_size=batch_size),
                            steps_per_epoch=steps_per_epoch,
                            epochs=nb_epoch,
                            validation_data=(X_test, Y_test),
                            callbacks=callbacks)
        t1 = time.time()
        # Train one epoch without augmentation to make sure data distribution
        # is fit well
        loss_history = history_cb.history["loss"]
        epochs_augmented_training = len(loss_history)
        model.fit(X_train,
                  Y_train,
                  batch_size=batch_size,
                  epochs=nb_epoch,
                  validation_data=(X_test, Y_test),
                  shuffle=True,
                  callbacks=callbacks,
                  initial_epoch=len(loss_history))
        t2 = time.time()
    loss_history = history_cb.history["loss"]
    acc_history = history_cb.history["acc"]
    val_acc_history = history_cb.history["val_acc"]
    np_loss_history = np.array(loss_history)
    np_acc_history = np.array(acc_history)
    np_val_acc_history = np.array(val_acc_history)
    history_data = zip(list(range(1,
                                  len(np_loss_history) + 1)), np_loss_history,
                       np_acc_history, np_val_acc_history)
    history_data = [(el[0], "%0.4f" % el[1], "%0.4f" % el[2], "%0.4f" % el[3])
                    for el in history_data]
    history_fname = os.path.basename(config['train']['artifacts_path'])
    history_fname = "{}_{}_history.csv".format(history_fname, datestring)
    csv_path = os.path.join(config['train']['artifacts_path'], history_fname)
    csv_path = get_nonexistant_path(csv_path)
    with open(csv_path, 'w') as fp:
        writer = csv.writer(fp, delimiter=',')
        writer.writerows([("epoch", "loss", "acc", "val_acc")])
        writer.writerows(history_data)
    training_time = t1 - t0
    readjustment_time = t2 - t1
    print("wall-clock training time: {}s".format(training_time))
    model_fn = os.path.basename(config['train']['artifacts_path'])
    model_fn = "{}_{}.h5".format(model_fn, datestring)
    model_fn = os.path.join(config['train']['artifacts_path'], model_fn)
    model_fn = get_nonexistant_path(model_fn)
    model.save(model_fn)
    # Store training meta data
    data = {
        'training_time': training_time,
        'readjustment_time': readjustment_time,
        'HOST': platform.node(),
        'epochs': len(history_data),
        'epochs_augmented_training': epochs_augmented_training,
        'config': config
    }
    meta_train_fname = os.path.join(config['train']['artifacts_path'],
                                    "train-meta_{}.json".format(datestring))
    meta_train_fname = get_nonexistant_path(meta_train_fname)
    with open(meta_train_fname, 'w') as outfile:
        str_ = json.dumps(data,
                          indent=4,
                          sort_keys=True,
                          separators=(',', ': '),
                          ensure_ascii=False)
        outfile.write(str_)
#model.add(Dense(24,activation='relu'))
model.add(Dense(12, activation='relu'))
#model.add(Dropout(0.05))
#model.add(Flatten())
#model.add(Dropout(0.05))
model.add(Dense(2, activation='softmax'))

model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy', 'binary_crossentropy'])

# initialize the cyclical learning rate callback
print("[INFO] using '{}' method".format(config.CLR_METHOD))
clr = CyclicLR(mode=config.CLR_METHOD,
               base_lr=config.MIN_LR,
               max_lr=config.MAX_LR,
               step_size=config.STEP_SIZE *
               (X_train.shape[0] // config.BATCH_SIZE))

#fitting the model
baseline_history = model.fit(
    X_train,
    y_train,
    epochs=config.NUM_EPOCHS,
    callbacks=[clr],
    batch_size=config.BATCH_SIZE,
    #steps_per_epoch=X_train.shape[0] // config.BATCH_SIZE,
    validation_data=(X_test, y_test),
    verbose=1)
model.summary()
pred_train = model.predict(X_train)
def train_attention(hypa: ty.Dict[str, str], force_retrain: bool,
                    use_validation: bool) -> None:
    """MAKEDOC: what is train_attention doing?"""
    logg = logging.getLogger(f"c.{__name__}.train_attention")
    # logg.setLevel("INFO")
    logg.debug("Start train_attention")

    # build the model name
    model_name = build_attention_name(hypa, use_validation)
    logg.debug(f"model_name: {model_name}")

    # save the trained model here
    model_folder = Path("trained_models") / "attention"
    if not model_folder.exists():
        model_folder.mkdir(parents=True, exist_ok=True)
    model_path = model_folder / f"{model_name}.h5"
    placeholder_path = model_folder / f"{model_name}.txt"

    # check if this model has already been trained
    if placeholder_path.exists():
        if force_retrain:
            logg.warn("\nRETRAINING MODEL!!\n")
        else:
            logg.debug("Already trained")
            return

    # save info regarding the model training in this folder
    info_folder = Path("info") / "attention" / model_name
    if not info_folder.exists():
        info_folder.mkdir(parents=True, exist_ok=True)

    # get the word list
    words = words_types[hypa["words_type"]]
    num_labels = len(words)

    # load data
    processed_folder = Path("data_proc")
    processed_path = processed_folder / f"{hypa['dataset_name']}"
    data, labels = load_processed(processed_path, words)

    # concatenate train and val for final train
    val_data = None
    if use_validation:
        x = data["training"]
        y = labels["training"]
        val_data = (data["validation"], labels["validation"])
        logg.debug("Using validation data")
    else:
        x = np.concatenate((data["training"], data["validation"]))
        y = np.concatenate((labels["training"], labels["validation"]))
        logg.debug("NOT using validation data")

    # the shape of each sample
    input_shape = data["training"][0].shape

    # from hypa extract model param
    model_param = get_model_param_attention(hypa, num_labels, input_shape)

    batch_size_types = {"01": 32, "02": 16}
    batch_size = batch_size_types[hypa["batch_size_type"]]

    epoch_num_types = {"01": 15, "02": 30, "03": 2, "04": 4}
    epoch_num = epoch_num_types[hypa["epoch_num_type"]]

    # magic to fix the GPUs
    setup_gpus()

    model = AttentionModel(**model_param)
    # model.summary()

    metrics = [
        tf.keras.metrics.CategoricalAccuracy(),
        tf.keras.metrics.Precision(),
        tf.keras.metrics.Recall(),
    ]

    learning_rate_types = {
        "01": "fixed01",
        "02": "fixed02",
        "03": "exp_decay_step_01",
        "04": "exp_decay_smooth_01",
        "05": "clr_triangular2_01",
        "06": "clr_triangular2_02",
        "07": "clr_triangular2_03",
        "08": "clr_triangular2_04",
        "09": "clr_triangular2_05",
        "10": "exp_decay_smooth_02",
    }
    learning_rate_type = hypa["learning_rate_type"]
    lr_value = learning_rate_types[learning_rate_type]

    # setup opt fixed lr values
    if lr_value.startswith("fixed"):
        if lr_value == "fixed01":
            lr = 1e-3
        elif lr_value == "fixed02":
            lr = 1e-4
    else:
        lr = 1e-3

    optimizer_types = {
        "a1": Adam(learning_rate=lr),
        "r1": RMSprop(learning_rate=lr)
    }
    opt = optimizer_types[hypa["optimizer_type"]]

    model.compile(
        optimizer=opt,
        loss=tf.keras.losses.CategoricalCrossentropy(),
        metrics=metrics,
    )

    # setup callbacks
    callbacks = []

    # setup exp decay step / smooth
    if lr_value.startswith("exp_decay"):
        if lr_value == "exp_decay_step_01":
            exp_decay_part = partial(exp_decay_step, epochs_drop=5)
        elif lr_value == "exp_decay_smooth_01":
            exp_decay_part = partial(exp_decay_smooth, epochs_drop=5)
        elif lr_value == "exp_decay_smooth_02":
            exp_decay_part = partial(exp_decay_smooth,
                                     epochs_drop=5,
                                     initial_lrate=1e-2)
        lrate = LearningRateScheduler(exp_decay_part)
        callbacks.append(lrate)

    # setup cyclic learning rate
    if lr_value.startswith("clr_triangular2"):
        base_lr = 1e-5
        max_lr = 1e-3

        # training iteration per epoch = num samples // batch size
        # step size suggested = 2~8 * iterations
        if lr_value == "clr_triangular2_01":
            step_factor = 8
            step_size = step_factor * x.shape[0] // batch_size

        elif lr_value == "clr_triangular2_02":
            step_factor = 2
            step_size = step_factor * x.shape[0] // batch_size

        # target_cycles = the number of cycles we want in those epochs
        # it_per_epoch = num_samples // batch_size
        # total_iterations = it_per_epoch * epoch_num
        # step_size = total_iterations // target_cycles
        elif lr_value == "clr_triangular2_03":
            # the number of cycles we want in those epochs
            target_cycles = 4
            it_per_epoch = x.shape[0] // batch_size
            total_iterations = it_per_epoch * epoch_num
            step_size = total_iterations // (target_cycles * 2)

        elif lr_value == "clr_triangular2_04":
            # the number of cycles we want in those epochs
            target_cycles = 2
            it_per_epoch = x.shape[0] // batch_size
            total_iterations = it_per_epoch * epoch_num
            step_size = total_iterations // (target_cycles * 2)

        elif lr_value == "clr_triangular2_05":
            # the number of cycles we want in those epochs
            target_cycles = 2
            it_per_epoch = x.shape[0] // batch_size
            total_iterations = it_per_epoch * epoch_num
            step_size = total_iterations // (target_cycles * 2)
            # set bigger starting value
            max_lr = 1e-2

        logg.debug(f"x.shape[0]: {x.shape[0]}")
        logg.debug(f"CLR is using step_size: {step_size}")

        mode = "triangular2"
        cyclic_lr = CyclicLR(base_lr, max_lr, step_size, mode)
        callbacks.append(cyclic_lr)

    # setup early stopping
    if learning_rate_type in ["01", "02", "03", "04"]:
        metric_to_monitor = "val_loss" if use_validation else "loss"
        early_stop = EarlyStopping(
            monitor=metric_to_monitor,
            patience=4,
            restore_best_weights=True,
            verbose=1,
        )
        callbacks.append(early_stop)

    # model_checkpoint = ModelCheckpoint(
    #     model_name,
    #     monitor="val_loss",
    #     save_best_only=True,
    # )

    # a dict to recreate this training
    # FIXME this should be right before fit and have epoch_num/batch_size/lr info
    recap: ty.Dict[str, ty.Any] = {}
    recap["words"] = words
    recap["hypa"] = hypa
    recap["model_param"] = model_param
    recap["use_validation"] = use_validation
    recap["model_name"] = model_name
    recap["version"] = "001"
    # logg.debug(f"recap: {recap}")
    recap_path = info_folder / "recap.json"
    recap_path.write_text(json.dumps(recap, indent=4))

    results = model.fit(
        x,
        y,
        validation_data=val_data,
        epochs=epoch_num,
        batch_size=batch_size,
        callbacks=callbacks,
    )

    results_recap: ty.Dict[str, ty.Any] = {}
    results_recap["model_name"] = model_name
    results_recap["results_recap_version"] = "002"

    # eval performance on the various metrics
    eval_testing = model.evaluate(data["testing"], labels["testing"])
    for metrics_name, value in zip(model.metrics_names, eval_testing):
        logg.debug(f"{metrics_name}: {value}")
        results_recap[metrics_name] = value

    # compute the confusion matrix
    y_pred = model.predict(data["testing"])
    cm = pred_hot_2_cm(labels["testing"], y_pred, words)
    # logg.debug(f"cm: {cm}")
    results_recap["cm"] = cm.tolist()

    # compute the fscore
    fscore = analyze_confusion(cm, words)
    logg.debug(f"fscore: {fscore}")
    results_recap["fscore"] = fscore

    # save the histories
    results_recap["history_train"] = {
        mn: results.history[mn]
        for mn in model.metrics_names
    }
    if use_validation:
        results_recap["history_val"] = {
            f"val_{mn}": results.history[f"val_{mn}"]
            for mn in model.metrics_names
        }

    # plot the cm
    fig, ax = plt.subplots(figsize=(12, 12))
    plot_confusion_matrix(cm, ax, model_name, words, fscore)
    plot_cm_path = info_folder / "test_confusion_matrix.png"
    fig.savefig(plot_cm_path)
    plt.close(fig)

    # save the results
    res_recap_path = info_folder / "results_recap.json"
    res_recap_path.write_text(json.dumps(results_recap, indent=4))

    # if cyclic_lr was used save the history
    if lr_value.startswith("clr_triangular2"):
        logg.debug(f"cyclic_lr.history.keys(): {cyclic_lr.history.keys()}")
        clr_recap = {}
        for metric_name, values in cyclic_lr.history.items():
            clr_recap[metric_name] = list(float(v) for v in values)
        clr_recap_path = info_folder / "clr_recap.json"
        clr_recap_path.write_text(json.dumps(clr_recap, indent=4))

    # save the trained model
    model.save(model_path)

    placeholder_path.write_text(f"Trained. F-score: {fscore}")
示例#18
0
def train_test(model_type, data, labels, feature_names, percentile=[20, 40, 60, 80]):
    """ Train/test model using nested k-folds paradigm.

    Parameters
    ----------
    model_type : string
        'SVM' for support vector machine or 'DNN' for neural network.
    data : np.array
        features to use for training/testing of shape (n_samples, n_features)
    labels : np.array or list
        labels used for binary classification, ex: np.array of 1 = cannabis
        or 0 = control
    percentile : list
        list of percentiles to use in feature selection during grid search
        allows accommodatation of varying number of features
    feature_names : list
        list of feature names

    Returns
    -------
    df_performance : pd.DataFrame
        data frame of hyperparameters and performance across folds
    all_tp : list
        list of the number of true positives in each fold
    all_tn : list
        list of the number of true negatives in each fold
    all_fp : list
        list of the number of false positives in each fold
    all_fn : list
        list of the number of false negatives in each fold
    top_features : list
        list of selected features following grid search of percentile
    y_test_plot : list
        list of test labels for each fold (for plotting performance)
    probas_plot : list
        list of prediction probabilites (for plotting performance)
    """
    warnings.filterwarnings("ignore")

    # Log performance
    if model_type == 'SVM':
        col_header = ['Kernel', 'Gamma', 'Cost', 'Percentile', 'Sensitivity', 'Specificity', 'PPV', 'NPV']
    elif model_type == 'DNN':
        col_header = ['Optimizer', 'Initializer', 'Decay', 'Batch Size', 'Activation 1', 'Activation 2',
                      'Percentile', 'Sensitivity', 'Specificity', 'PPV', 'NPV']

    df_performance = pd.DataFrame(columns=col_header)
    all_tp = []
    all_tn = []
    all_fp = []
    all_fn = []

    # Plotting
    y_test_plot = []
    probas_plot = []

    # Feature importance
    top_features = []

    # Define grid hyper-parameters
    if model_type == 'SVM':
        tuned_parameters = dict(
            anova__percentile = percentile,
            svc__kernel = ['rbf', 'sigmoid', 'poly'],
            svc__gamma = [2**g for g in range(-15, 4)],
            svc__C = [2**C for C in range(-5, 16)]
        )
    elif model_type == "DNN":
        tuned_parameters = dict(
            anova__percentile = percentile,
            nn__optimizer = ['SGD', 'AdamW'],
            nn__init = ['glorot_normal', 'glorot_uniform'],
            nn__activation_1 = ['relu', 'sigmoid', 'tanh'],
            nn__activation_2 = ['relu', 'sigmoid', 'tanh'],
            nn__batch_size = [32, 64, 128, 256],
            nn__decay = [10.0**i for i in range(-10,-1) if i%2 == 1]
        )

    # Cross-validation
    kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=2)
    inner_kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=2)
    loop = 1
    folds = []
    for train_indices, test_indices in kfold.split(data, labels):
        print(f'Fold {loop}')

        # Callbacks for neural net
        clr = CyclicLR(mode='triangular', base_lr=0.175, max_lr=0.9175, step_size=12)
        es = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=0, mode='auto',
                           baseline=None, restore_best_weights=True)

        # Inner performance lists
        TP = []
        TN = []
        FP = []
        FN = []

        # Split data
        X_train = [data[idx] for idx in train_indices]
        y_train = [labels[idx] for idx in train_indices]
        X_test = [data[idx] for idx in test_indices]
        y_test = [labels[idx] for idx in test_indices]

        # Apply mean and variance centering
        scaler = StandardScaler().fit(X_train)
        X_train = scaler.transform(X_train)
        X_test = scaler.transform(X_test)

        # Pipe feature selection and classifier together
        if model_type == 'SVM':
            anova = SelectPercentile(f_classif)
            svc = SVC(class_weight='balanced',  probability=True)
            clf = Pipeline([('anova', anova), ('svc', svc)])
        elif model_type == 'DNN':
            anova = ANOVASelection()  # Modified SelectPercentile class
            nn = KerasClassifier(build_fn=create_model, epochs=1000, verbose=0)
            clf = Pipeline([('anova', anova), ('nn', nn)])

        # Train model
        clf = GridSearchCV(clf, tuned_parameters, scoring='balanced_accuracy',
                           n_jobs=-1, cv=inner_kfold)

        # A random grid search can speed up computation if an analysis hangs:
        # clf = RandomizedSearchCV(clf, tuned_parameters, n_iter=30, scoring='balanced_accuracy',
        #                          n_jobs=-1, cv=inner_kfold)

        if model_type == 'SVM':
            clf.fit(X_train, y_train)
        elif model_type == 'DNN':
            clf.fit(X_train, y_train, nn__callbacks=[clr, es])

        # Determine top features from feature selection
        selection = SelectPercentile(f_classif, percentile=clf.best_estimator_[0].percentile).fit(X_train, y_train)
        top_indices = selection.get_support(indices=True)
        selected_features = [feature_names[idx] for idx in top_indices]
        top_features.append(selected_features)

        # Test model
        y_true, y_pred = y_test, clf.predict(X_test)

        # Evaluate performance
        for idx, y in enumerate(y_true):
            if y == 1.0 and y == y_pred[idx]:
                TP.append(1)
            elif y == 1.0 and y != y_pred[idx]:
                FN.append(1)
            elif y == 0.0 and y == y_pred[idx]:
                TN.append(1)
            elif y == 0.0 and y != y_pred[idx]:
                FP.append(1)

        if len(FP) != 0 and len(FN) != 0:
            # This is most likely
            sensitivity = len(TP)/(len(TP)+len(FN))
            specificity = len(TN)/(len(TN)+len(FP))
            NPV = len(TN)/(len(TN)+len(FN))
            PPV = len(TP)/(len(TP)+len(FP))
        elif len(FP) != 0 and len(FN) == 0:
            # Likely overfitting
            sensitivity = 1
            specificity = len(TN)/(len(TN)+len(FP))
            NPV = 1
            PPV = len(TP)/(len(TP)+len(FP))
        elif len(FP) == 0 and len(FN) != 0:
            # Likely overfitting
            sensitivity = len(TP)/(len(TP)+len(FN))
            specificity = 1
            PPV = 1
            NPV = len(TN)/(len(TN)+len(FN))
        if len(FP) == 0 and len(FN) == 0:
            # Perfect classification - yeah right...
            sensitivity = 1
            specificity = 1
            NPV = 1
            PPV = 1

        all_tp.append(len(TP))
        all_tn.append(len(TN))
        all_fp.append(len(FP))
        all_fn.append(len(FN))

        # Append to performance df
        df_row_to_add = []

        if model_type == 'SVM':
            params = ['svc__kernel', 'svc__gamma', 'svc__C', 'anova__percentile']
        elif model_type == 'DNN':
            params = ['nn__optimizer', 'nn__init', 'nn__decay', 'nn__batch_size',
                      'nn__activation_1', 'nn__activation_2', 'anova__percentile']

        df_row_to_add = [clf.best_params_[param] for param in params]
        df_row_to_add.append(sensitivity)
        df_row_to_add.append(specificity)
        df_row_to_add.append(PPV)
        df_row_to_add.append(NPV)
        folds.append('Fold ' + str(loop))
        df_performance = df_performance.append(pd.Series(df_row_to_add,
                                                         index=df_performance.columns),
                                               ignore_index=True)
        df_performance.index = folds

        # For plotting
        y_test_plot.append(y_test)
        probas_ = clf.predict_proba(X_test)
        probas_plot.append(probas_)
        loop += 1

    return df_performance, all_tp, all_tn, all_fp, all_fn, top_features, y_test_plot, probas_plot
示例#19
0
    def train(self):
        """ train VAE model
        """

        train_datagen = ImageDataGenerator(rescale=1. /
                                           (2**self.image_res - 1),
                                           horizontal_flip=True,
                                           vertical_flip=True)

        # colormode needs to be set depending on num_channels
        if self.nchannel == 1:
            train_generator = train_datagen.flow_from_directory(
                self.data_dir,
                target_size=(self.image_size, self.image_size),
                batch_size=self.batch_size,
                color_mode='grayscale',
                class_mode='input')

        elif self.nchannel == 3:
            print('using three channel generator!')
            train_generator = train_datagen.flow_from_directory(
                self.data_dir,
                target_size=(self.image_size, self.image_size),
                batch_size=self.batch_size,
                color_mode='rgb',
                class_mode='input')
        else:
            # expecting data saved as numpy array
            train_generator = NumpyDataGenerator(self.data_dir,
                                                 batch_size=self.batch_size,
                                                 image_size=self.image_size,
                                                 nchannel=self.nchannel,
                                                 image_res=self.image_res,
                                                 shuffle=True)

        # instantiate callbacks
        callbacks = []

        term_nan = TerminateOnNaN()
        callbacks.append(term_nan)

        csv_logger = CSVLogger(os.path.join(self.save_dir, 'training.log'),
                               separator='\t')
        callbacks.append(csv_logger)

        checkpointer = ModelCheckpoint(os.path.join(
            self.save_dir, 'checkpoints/vae_weights.hdf5'),
                                       verbose=1,
                                       save_best_only=True,
                                       save_weights_only=True)
        callbacks.append(checkpointer)

        if self.earlystop:
            earlystop = EarlyStopping(monitor='loss', min_delta=0, patience=8)
            callbacks.append(earlystop)

        if self.use_clr:
            clr = CyclicLR(base_lr=self.learn_rate,
                           max_lr=0.0001,
                           step_size=0.25 * self.steps_per_epoch,
                           mode='triangular')
            callbacks.append(clr)

        if self.use_vaecb:
            vaecb = VAEcallback(self)
            callbacks.append(vaecb)

        self.history = self.vae.fit_generator(
            train_generator,
            epochs=self.epochs,
            callbacks=callbacks,
            steps_per_epoch=self.steps_per_epoch,
            verbose=self.verbose)

        print('saving model weights to', self.model_dir)
        self.vae.save_weights(os.path.join(self.model_dir, 'weights_vae.hdf5'))
        self.encoder.save_weights(
            os.path.join(self.model_dir, 'weights_encoder.hdf5'))
        self.decoder.save_weights(
            os.path.join(self.model_dir, 'weights_decoder.hdf5'))

        self.encode()

        print('done!')
示例#20
0
def _main():

    # argument parsing
    parser = argparse.ArgumentParser(
        description='Trains an image similarity detector.')
    parser.add_argument('--training-images-dir',
                        type=str,
                        help='The training images directory.')
    parser.add_argument(
        '--validation-images-dir',
        type=str,
        default=None,
        help=
        'The validation images directory. If not specified, than no validation is performed (defualt behavior).'
    )
    parser.add_argument(
        '--output-dir',
        type=str,
        help='The output directory where the checkpoints will be stored.')
    parser.add_argument('--restart-checkpoint',
                        type=str,
                        default=None,
                        help='The checkpoint from which to restart.')
    parser.add_argument(
        '--image-size',
        type=int,
        default=224,
        help='The image size in pixels, default is 224 (meaning 224x224).')
    parser.add_argument('--batch-size',
                        type=int,
                        default=8,
                        help='The training minibatch size.')
    parser.add_argument(
        '--feature-vector-len',
        type=int,
        default=1024,
        help='The length of the feature vector (1024 by default).')
    parser.add_argument('--use-l2',
                        type=int,
                        default=0,
                        help='If set to 1, use L2 instead of L1 difference.')
    parser.add_argument(
        '--backbone',
        type=str,
        default='siamese',
        help='The network backbone: siamese(default), mobilenetv2, resnet50')
    parser.add_argument('--freeze-backbone',
                        type=int,
                        default=0,
                        help='Set to 1 to freeze the backbone (0 by default).')
    parser.add_argument(
        '--max-lr',
        type=float,
        default=1e-4,
        help='The maximum (and also initial) learning rate (1e-4 by default).')
    parser.add_argument('--min-lr',
                        type=float,
                        default=1e-5,
                        help='The minimum learning rate (1e-5 by default).')
    parser.add_argument(
        '--lr-schedule',
        type=str,
        default='cosine',
        help='The learning rate schedule: cosine (default), cyclic.')
    parser.add_argument(
        '--lr-schedule-cycle',
        type=int,
        default=100000,
        help='The lerning rate cycle length (number of images).')
    parser.add_argument('--images-per-epoch',
                        type=int,
                        default=10000,
                        help='The number of images per epoch.')
    parser.add_argument('--start-epoch',
                        type=int,
                        default=1,
                        help='The starting epoch (1 by default).')
    parser.add_argument('--end-epoch',
                        type=int,
                        default=5000,
                        help='The ending epoch (5000 by default).')
    parser.add_argument('--checkpoint-name',
                        type=str,
                        default='chkpt',
                        help='The root of the checkpoint names.')
    parser.add_argument(
        '--checkpoint-freq',
        type=int,
        default=100,
        help='The frequency of checkpoints in epochs. Default is 100.')
    parser.add_argument(
        '--early-stopping-patience',
        type=int,
        default=-1,
        help=
        'The number of epoch to wait before stopping if the validation loss does not decrease. Set to -1 to disable (default)'
    )
    parser.add_argument(
        '--same-prob',
        type=float,
        default=0.5,
        help='The probability of comparing to the same image (0.5 by default).'
    )
    parser.add_argument(
        '--no-aug-prob',
        type=float,
        default=0.2,
        help='The probability that an image is not augmented at all.')
    parser.add_argument('--crop-prob',
                        type=float,
                        default=0.05,
                        help='The crop probability (0.05 by default).')
    parser.add_argument(
        '--crop-frac',
        type=float,
        default=0.09,
        help='The maximum fraction of area cropped-out (0.16 by default).')
    parser.add_argument('--jitter-prob',
                        type=float,
                        default=0.2,
                        help='The jitter probability (0.2 by default')
    parser.add_argument('--jitter',
                        type=float,
                        default=0.1,
                        help='The jitter size (0.1 by default).')
    parser.add_argument('--rot',
                        type=float,
                        default=0.0,
                        help='The rotation probability (0.0 by default).')
    parser.add_argument(
        '--hflip',
        type=float,
        default=0.0,
        help='The horizontal flip probability (0.0 by default).')
    parser.add_argument('--vflip',
                        type=float,
                        default=0.3,
                        help='The vertical flip probability (0.0 by default).')
    parser.add_argument(
        '--hue',
        type=float,
        default=0.05,
        help='The hue variation (ignored for siamese backbone).')
    parser.add_argument(
        '--sat',
        type=float,
        default=0.2,
        help='The saturation variation (ignored for siamese backbone).')
    parser.add_argument(
        '--val',
        type=float,
        default=0.2,
        help='The value variation (ignored for siamese backbone).')
    parser.add_argument(
        '--mlflow',
        type=int,
        default=0,
        help='Set to 1 if using MLflow. Metrics and artifacts will be logged.')

    args = parser.parse_args()

    # start the mlflow autologging
    if args.mlflow:
        import mlflow.keras
        mlflow.keras.autolog()

    # create the image lists
    exts = ('.jpg', '.JPG', '.jpeg', '.JPEG', '.png', '.PNG', '.gif', '.GIF',
            '.tiff', '.TIFF', '.TIF', '.bmp', '.BMP')
    train_imgs = []
    train_dir_files = os.listdir(args.training_images_dir)

    for f in train_dir_files:
        if f.endswith(exts):
            train_imgs.append(os.path.join(args.training_images_dir, f))

        np.random.shuffle(train_imgs)

    if args.validation_images_dir:
        do_valid = True
        val_imgs = []
        val_dir_files = os.listdir(args.validation_images_dir)

        for f in val_dir_files:
            if f.endswith(exts):
                val_imgs.append(os.path.join(args.validation_images_dir, f))

        np.random.shuffle(val_imgs)
    else:
        do_valid = False

    print('There are {} training images.'.format(len(train_imgs)))
    if do_valid:
        print('There are {} validation images.'.format(len(val_imgs)))

    # create the output directory if necessary
    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)

    # create the model
    from model import create_model
    num_channels = 1 if args.backbone == 'siamese' else 3
    model, model_body, encoder = create_model(
        (args.image_size, args.image_size, num_channels),
        args.feature_vector_len,
        restart_checkpoint=args.restart_checkpoint,
        backbone=args.backbone,
        freeze=args.freeze_backbone == 1,
        l2=args.use_l2 == 1)

    print('\nThe model:')
    print(model.summary())

    # prepare the callbacks
    from lr_info import lr_info
    info_lr = lr_info(model, args.mlflow == 1)

    # learning rate

    # scale the larning rate to the batch size
    max_lr = args.max_lr * np.sqrt(args.batch_size)
    min_lr = args.min_lr * np.sqrt(args.batch_size)

    print(
        'Scaling the learning rate minimum to {} and maximum (initial) to {}'.
        format(min_lr, max_lr))
    print(
        'The original values are {} and {}, and are multiplied by the root of the batch size {}.'
        .format(args.min_lr, args.max_lr, args.batch_size))

    if args.lr_schedule == 'cosine':
        print('Using the cosine annealing learning rate scheduler.')
        from cos_callback import CosineAnnealingScheduler
        lr_callback = CosineAnnealingScheduler(
            max_lr,
            args.batch_size,
            args.lr_schedule_cycle,
            min_lr=min_lr,
            verbose=True,
            initial_counter=(args.start_epoch - 1) * args.images_per_epoch //
            args.batch_size)
    else:
        from clr_callback import CyclicLR
        lr_callback = CyclicLR(model='triangular',
                               max_lr=maxlr,
                               base_lr=min_lr,
                               step_size=args.lr_schedule_cycle //
                               args.batch_size)

    # checkpoints
    from checkpoint import MyModelCheckpoint
    checkpoint = MyModelCheckpoint(
        filepath=os.path.join(args.output_dir,
                              args.checkpoint_name + '_' + '{epoch:04d}'),
        snapshot_path=os.path.join(args.output_dir,
                                   args.checkpoint_name + '-snapshot'),
        model_body=model_body,
        encoder=encoder,
        save_best_only=do_valid,
        period=args.checkpoint_freq,
        verbose=1,
        mlflow=args.mlflow == 1)

    callbacks = [info_lr, lr_callback, checkpoint]

    if do_valid and args.early_stopping_patience != -1:
        from keras.callbacks import EarlyStopping

        callbacks.append(
            EarlyStopping(monitor='val_loss',
                          patience=args.early_stopping_patience))

    # compile the model with the initial learning rate
    from keras.optimizers import Adam
    model.compile(loss='binary_crossentropy', optimizer=Adam(lr=max_lr))

    # train
    augment = {
        'crop_prob': args.crop_prob,
        'crop_frac': args.crop_frac,
        'jitter_prob': args.jitter_prob,
        'jitter': args.jitter,
        'rot': args.rot,
        'hflip': args.hflip,
        'vflip': args.vflip,
        'hue': args.hue,
        'sat': args.sat,
        'val': args.val
    }

    train_generator = data_generator(
        train_imgs,
        args.batch_size, (args.image_size, args.image_size, num_channels),
        args.same_prob,
        args.no_aug_prob,
        no_augment=False,
        augment=augment)

    if do_valid:
        val_generator = data_generator(
            val_imgs,
            args.batch_size, (args.image_size, args.image_size, num_channels),
            args.same_prob,
            args.no_aug_prob,
            no_augment=False,
            augment=augment)
    else:
        val_generator = None

    model.fit_generator(
        train_generator,
        steps_per_epoch=max(1, args.images_per_epoch // args.batch_size),
        validation_data=val_generator,
        validation_steps=max(1, args.images_per_epoch // args.batch_size),
        epochs=args.end_epoch,
        initial_epoch=args.start_epoch - 1,
        callbacks=callbacks)
示例#21
0
# if schedule is not None:
#     callbacks = [LearningRateScheduler(schedule)]
# decay = 0.0
# if args["schedule"] == "standard":
#     print("[INFO] using 'keras standard' learning rate decay...")
#     decay = 1e-1 / epochs
# elif schedule is None:
#     print("[INFO] no learning rate schedule being used")
#
stepSize = config.STEP_SIZE * (train_images.shape[0] // config.BATCH_SIZE)
file_path = "xception-hepatocyte.h5"
es, msave, reduce_lr, tb_log, log_cv = get_callbacks(file_path,
                                                     top_model,
                                                     patience=10)
clr = CyclicLR(mode=config.CLR_METHOD,
               base_lr=config.MIN_LR,
               max_lr=config.MAX_LR,
               step_size=stepSize)
print("[INFO] training network...")
H = top_model.fit_generator(
    train_datagen.flow(train_images, Y_train, batch_size=config.BATCH_SIZE),
    validation_data=valid_gen,
    steps_per_epoch=train_images.shape[0] // batch_size_for_generators,
    validation_steps=valid_images.shape[0] // batch_size_for_generators,
    epochs=config.NUM_EPOCHS,
    callbacks=[clr, msave, log_cv],
    verbose=1)
print("[INFO] evaluating network...")
predictions = top_model.predict(valid_images, batch_size=config.BATCH_SIZE)
print(
    classification_report(Y_valid.argmax(axis=1),
                          predictions.argmax(axis=1),
                                batchsize=100000,
                                on_epoch_end=False,
                                publish=publishpath + "_event_" + str(ev),
                                use_event=ev))

loss_config.use_average_cc_pos = True
model, history = train.trainModel(
    nepochs=1,
    run_eagerly=True,
    batchsize=nbatch,
    batchsize_use_sum_of_squares=False,
    checkperiod=1,  # saves a checkpoint model every N epochs
    verbose=verbosity,
    backup_after_batches=100,
    additional_callbacks=callbacks +
    [CyclicLR(base_lr=learningrate, max_lr=learningrate * 10., step_size=10)])

loss_config.use_average_cc_pos = False

loss_config.energy_loss_weight = 1e-1
loss_config.position_loss_weight = 1e-2
loss_config.timing_loss_weight = 1e-6
learningrate = 1e-5
loss_config.beta_loss_scale = 10.

model, history = train.trainModel(
    nepochs=1 + 3,
    run_eagerly=True,
    batchsize=nbatch,
    batchsize_use_sum_of_squares=False,
    checkperiod=1,  # saves a checkpoint model every N epochs
        PRETRAINED_WEIGHTS = 'weights/pretrained_weights_fold%d_%s.hdf5' % (
            fold, ftype)

        kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

        for sub_fold, (train_index,
                       valid_index) in enumerate(kf.split(x_valid, y_valid1)):
            x_train_fold, x_valid_fold = x_valid[train_index], x_valid[
                valid_index]
            y_train_fold, y_valid_fold = y_valid[train_index], y_valid[
                valid_index]

            WEIGHTS_BEST = 'weights/best_weights_fold%d_subfold%d_%s.hdf5' % (
                fold, sub_fold, ftype)

            clr = CyclicLR(base_lr=1e-8, max_lr=8e-5)
            early_stoping = EarlyStopping(monitor='val_acc',
                                          patience=20,
                                          verbose=1)
            save_checkpoint = ModelCheckpoint(WEIGHTS_BEST,
                                              monitor='val_acc',
                                              verbose=1,
                                              save_best_only=True,
                                              save_weights_only=True,
                                              mode='max')
            callbacks = [early_stoping, save_checkpoint, clr]

            model = Stacking_Model()
            model.compile(loss='categorical_crossentropy',
                          optimizer=Adam(lr=8e-5),
                          metrics=['accuracy'])
示例#24
0
# Generators
training_data = DataGenerator(subset='training', **params)
#validation_generator = DataGenerator(subset='validation', **params)

# define model
model = model_from_yaml(open('models/' + params["model"] + '.yaml'))
model.load_weights('models/' + params["model"] + '.h5', by_name=True)
VARS = vars()
model.compile('SGD', loss={k: VARS[v] for k, v in params['vars_loss'].items()})

# fit model
model.fit(
    training_data,
    epochs=10000,
    #validation_data=validation_generator,
    #use_multiprocessing=True,
    #workers=6,
    callbacks=[
        CyclicLR(mode='triangular',
                 base_lr=0.000001,
                 max_lr=0.01,
                 step_size=params['steps']),
        keras.callbacks.ModelCheckpoint(filepath='models/' + params['model'] +
                                        '.h5',
                                        save_weights_only=True,
                                        monitor='loss',
                                        mode='min',
                                        save_best_only=True)
    ])
示例#25
0
def get_lr_schedule(schedule, num_samples, batch_size, schedule_args={}):
    """ Creates a learning rate schedule.

    # Arguments:

    - schedule: Name of the schedule. Possible values:
                - 'sgd': Stochastic Gradient Descent with ReduceLROnPlateau or LearningRateSchedule callback.
                - 'sgdr': Stochastic Gradient Descent with Cosine Annealing and Warm Restarts.
                - 'clr': Cyclical Learning Rates.
                - 'resnet-schedule': Hand-crafted schedule used by He et al. for training ResNet.
    
    - num_samples: Number of training samples.

    - batch_size: Number of samples per batch.

    - schedule_args: Further arguments for the specific learning rate schedule.
                     'sgd' supports:
                        - 'sgd_patience': Number of epochs without improvement before reducing the LR. Default: 10.
                        - 'sgd_min_lr': Minimum learning rate. Default : 1e-4
                        - 'sgd_schedule': Comma-separated list of `epoch:lr` pairs, defining a learning rate schedule.
                                          The total number of epochs can be appended to this list, separated by a comma as well.
                                          If this is specified, the learning rate will not be reduced on plateaus automatically
                                          and `sgd_patience` and `sgd_min_lr` will be ignored.
                                          The following example would mean to train for 50 epochs, starting with a learning rate
                                          of 0.1 and reducing it by a factor of 10 after 30 and 40 epochs: "1:0.1,31:0.01,41:0.001,50".
                     'sgdr' supports:
                        - 'sgdr_base_len': Length of the first cycle. Default: 12.
                        - 'sgdr_mul': Factor multiplied with the length of the cycle after the end of each one. Default: 2.
                        - 'sgdr_max_lr': Initial learning rate at the beginning of each cycle. Default: 0.1.
                     'clr' supports:
                        - 'clr_step_len': Number of training epochs per half-cycle. Default: 12.
                        - 'clr_min_lr': Minimum learning rate. Default: 1e-5.
                        - 'clr_max_lr': Maximum learning rate: Default: 0.1.
    
    # Returns:
        - a list of callbacks for being passed to the fit function,
        - a suggested number of training epochs.
    """

    if schedule.lower() == 'sgd':

        if ('sgd_schedule' in schedule_args) and (
                schedule_args['sgd_schedule']
                is not None) and (schedule_args['sgd_schedule'] != ''):

            def lr_scheduler(schedule, epoch, cur_lr):
                if schedule[0][0] > epoch:
                    return cur_lr
                for i in range(1, len(schedule)):
                    if schedule[i][0] > epoch:
                        return schedule[i - 1][1] if schedule[
                            i - 1][1] is not None else cur_lr
                return schedule[-1][1] if schedule[-1][
                    1] is not None else cur_lr

            schedule = [
                (int(point[0]) - 1,
                 float(point[1]) if len(point) > 1 else None)
                for sched_tuple in schedule_args['sgd_schedule'].split(',')
                for point in [sched_tuple.split(':')]
            ]
            schedule.sort()
            return [
                keras.callbacks.LearningRateScheduler(
                    lambda ep, cur_lr: lr_scheduler(schedule, ep, cur_lr))
            ], schedule[-1][0] + 1

        else:

            if 'sgd_patience' not in schedule_args:
                schedule_args['sgd_patience'] = 10
            if 'sgd_min_lr' not in schedule_args:
                schedule_args['sgd_min_lr'] = 1e-4

            return [
                keras.callbacks.ReduceLROnPlateau(
                    'val_loss',
                    patience=schedule_args['sgd_patience'],
                    epsilon=1e-4,
                    min_lr=schedule_args['sgd_min_lr'],
                    verbose=True)
            ], 200

    elif schedule.lower() == 'sgdr':

        if 'sgdr_base_len' not in schedule_args:
            schedule_args['sgdr_base_len'] = 12
        if 'sgdr_mul' not in schedule_args:
            schedule_args['sgdr_mul'] = 2
        if 'sgdr_max_lr' not in schedule_args:
            schedule_args['sgdr_max_lr'] = 0.1
        return ([
            SGDR(1e-6, schedule_args['sgdr_max_lr'],
                 schedule_args['sgdr_base_len'], schedule_args['sgdr_mul'])
        ],
                sum(schedule_args['sgdr_base_len'] *
                    (schedule_args['sgdr_mul']**i) for i in range(5)))

    elif schedule.lower() == 'clr':

        if 'clr_step_len' not in schedule_args:
            schedule_args['clr_step_len'] = 12
        if 'clr_min_lr' not in schedule_args:
            schedule_args['clr_min_lr'] = 1e-5
        if 'clr_max_lr' not in schedule_args:
            schedule_args['clr_max_lr'] = 0.1
        return ([
            CyclicLR(schedule_args['clr_min_lr'],
                     schedule_args['clr_max_lr'],
                     schedule_args['clr_step_len'] *
                     (num_samples // batch_size),
                     mode='triangular')
        ], schedule_args['clr_step_len'] * 20)

    elif schedule.lower() == 'resnet-schedule':

        def resnet_scheduler(epoch):
            if epoch >= 120:
                return 0.001
            elif epoch >= 80:
                return 0.01
            elif epoch >= 1:
                return 0.1
            else:
                return 0.01

        return [keras.callbacks.LearningRateScheduler(resnet_scheduler)], 164

    else:

        raise ValueError('Unknown learning rate schedule: {}'.format(schedule))
train.compileModel(learningrate=learningrate, loss=None, metrics=None)

# print(train.keras_model.)

model, history = train.trainModel(
    nepochs=4,
    run_eagerly=True,
    batchsize=nbatch,
    extend_truth_list_by=len(train.keras_model.outputs) -
    2,  #just adapt truth list to avoid keras error (no effect on model)
    batchsize_use_sum_of_squares=False,
    checkperiod=1,  # saves a checkpoint model every N epochs
    verbose=verbosity,
    backup_after_batches=100,
    additional_callbacks=[
        CyclicLR(base_lr=learningrate / 3., max_lr=learningrate, step_size=20)
    ] + cb,
)

#print("freeze BN")
#for l in train.keras_model.layers:
#    if isinstance(l, BatchNormalization):
#        l.trainable=False
#    if 'GravNetLLLocalClusterLoss' in l.name:
#        l.active=False

#also stop GravNetLLLocalClusterLoss* from being evaluated

learningrate = 1e-4
train.compileModel(learningrate=learningrate, loss=None, metrics=None)