示例#1
0
def train(inputs, data):
    model = unet(inputs)
    model.summary()
    train_idx, mask_count_df, train_df, val_idx = data
    config = Config()
    train_generator = DataGenerator(train_idx,
                                    df=mask_count_df,
                                    target_df=train_df,
                                    batch_size=config.batch_size,
                                    reshape=(config.height, config.width),
                                    augment=True,
                                    graystyle=False,
                                    shuffle=True,
                                    n_channels=config.channels,
                                    n_classes=config.n_classes)
    train_eval_generator = DataGenerator(train_idx,
                                         df=mask_count_df,
                                         target_df=train_df,
                                         batch_size=config.batch_size,
                                         reshape=(config.height, config.width),
                                         augment=False,
                                         graystyle=False,
                                         shuffle=False,
                                         n_channels=config.channels,
                                         n_classes=config.n_classes)
    val_generator = DataGenerator(val_idx,
                                  df=mask_count_df,
                                  target_df=train_df,
                                  batch_size=config.batch_size,
                                  reshape=(config.height, config.width),
                                  augment=False,
                                  graystyle=False,
                                  shuffle=False,
                                  n_channels=config.channels,
                                  n_classes=config.n_classes)
    earlystopping = EarlyStopping(monitor='loss', patience=config.es_patience)
    reduce_lr = ReduceLROnPlateau(monitor='loss',
                                  patience=config.rlrop_patience,
                                  factor=config.decay_drop,
                                  min_lr=1e-6)
    checkpoint = ModelCheckpoint(filepath='weights-{epoch:03d}-{loss:.2f}.h5',
                                 monitor='loss',
                                 save_best_only=False,
                                 save_weights_only=True)
    metric_list = [dice_coef]
    callback_list = [earlystopping, reduce_lr, checkpoint]
    optimizer = Adam(lr=config.learning_rate)
    model.compile(optimizer=optimizer, loss=bce_dice_loss, metrics=metric_list)
    checkpoint.set_model(model)
    model.fit_generator(train_generator,
                        validation_data=val_generator,
                        callbacks=callback_list,
                        epochs=100,
                        initial_epoch=0)
示例#2
0
def initLogging(params, modelD, modelG):

    # Generate actual output folder with timestamp
    timestring = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
    outfolder = os.path.join(os.getcwd(), params['logdir'].split('./')[1], timestring)
    os.makedirs(outfolder, exist_ok = True)

    # Initialize TensorBoard
    tb = TensorBoard(log_dir = outfolder, batch_size = params['batchsize'], write_graph = True, write_grads = True,
                     write_images = True)
    tb.set_model(modelG)

    # Initialize checkpointing
    #mcD = ModelCheckpoint(filepath=os.path.join(outfolder, 'discriminator.epoch{epoch:04d}-epe{epe:9.5f}.h5'),
    mcD = ModelCheckpoint(filepath=os.path.join(outfolder, 'discriminator.h5'),
                          save_best_only = True, monitor='epe', mode='min')
    mcD.set_model(modelD)
    #mcG = ModelCheckpoint(filepath=os.path.join(outfolder, 'generator.epoch{epoch:04d}-epe{epe:9.5f}.h5'),
    mcG = ModelCheckpoint(filepath=os.path.join(outfolder, 'generator.h5'),
                          save_best_only = True, monitor='epe', mode='min')
    mcG.set_model(modelG)

    # Save JSON representation of models
    with open(os.path.join(outfolder, "discriminator.json"), "w") as json_fileD:
        json_fileD.write(modelD.to_json())
    with open(os.path.join(outfolder, "generator.json"), "w") as json_fileG:
        json_fileG.write(modelG.to_json())

    # Save params to file
    with open(os.path.join(outfolder, "params_train.log"), "w") as paramfile:
        paramfile.write(repr(params))

    # Clone stdout to file
    sys.stdout = Logger(sys.stdout, os.path.join(outfolder, "console_train.log"))

    return [tb, mcD, mcG, outfolder]
示例#3
0
    def train_rtvsrgan(self,
                       epochs=None,
                       batch_size=None,
                       modelname=None,
                       datapath_train=None,
                       datapath_validation=None,
                       steps_per_validation=None,
                       datapath_test=None,
                       workers=None,
                       max_queue_size=None,
                       first_epoch=None,
                       print_frequency=None,
                       crops_per_image=None,
                       log_weight_frequency=None,
                       log_weight_path=None,
                       log_tensorboard_path=None,
                       log_tensorboard_update_freq=None,
                       log_test_frequency=None,
                       log_test_path=None,
                       media_type='i'):
        """Train the ESRGAN network
        :param int epochs: how many epochs to train the network for
        :param str modelname: name to use for storing model weights etc.
        :param str datapath_train: path for the image files to use for training
        :param str datapath_test: path for the image files to use for testing / plotting
        :param int print_frequency: how often (in epochs) to print progress to terminal. Warning: will run validation inference!
        :param int log_weight_frequency: how often (in epochs) should network weights be saved. None for never
        :param int log_weight_path: where should network weights be saved        
        :param int log_test_frequency: how often (in epochs) should testing & validation be performed
        :param str log_test_path: where should test results be saved
        :param str log_tensorboard_path: where should tensorflow logs be sent
        """

        # Create data loaders
        train_loader = DataLoader(datapath_train, batch_size, self.height_hr,
                                  self.width_hr, self.upscaling_factor,
                                  crops_per_image, media_type, self.channels,
                                  self.colorspace)

        # Validation data loader
        validation_loader = None
        if datapath_validation is not None:
            validation_loader = DataLoader(datapath_validation, batch_size,
                                           self.height_hr, self.width_hr,
                                           self.upscaling_factor,
                                           crops_per_image, media_type,
                                           self.channels, self.colorspace)

        test_loader = None
        if datapath_test is not None:
            test_loader = DataLoader(datapath_test, 1, self.height_hr,
                                     self.width_hr, self.upscaling_factor, 1,
                                     media_type, self.channels,
                                     self.colorspace)

        # Use several workers on CPU for preparing batches
        enqueuer = OrderedEnqueuer(train_loader,
                                   use_multiprocessing=True,
                                   shuffle=True)
        enqueuer.start(workers=workers, max_queue_size=max_queue_size)
        output_generator = enqueuer.get()

        # Callback: save weights after each epoch
        modelcheckpoint = ModelCheckpoint(os.path.join(
            log_weight_path,
            modelname + '2_{}X.h5'.format(self.upscaling_factor)),
                                          monitor='Perceptual_loss',
                                          save_best_only=True,
                                          save_weights_only=True,
                                          mode='min',
                                          verbose=1)
        modelcheckpoint.set_model(self.generator)

        # Callback: tensorboard
        if log_tensorboard_path:
            tensorboard = TensorBoard(log_dir=os.path.join(
                log_tensorboard_path, modelname),
                                      histogram_freq=0,
                                      batch_size=batch_size,
                                      write_graph=True,
                                      write_grads=True,
                                      update_freq=log_tensorboard_update_freq)
            tensorboard.set_model(self.rtvsrgan)
        else:
            print(
                ">> Not logging to tensorboard since no log_tensorboard_path is set"
            )

        # Learning rate scheduler
        def lr_scheduler(epoch, lr):
            factor = 0.5
            decay_step = [500, 1000, 1500, 2000]
            if epoch in decay_step and epoch:
                return lr * factor
            return lr

        lr_scheduler_gan = LearningRateScheduler(lr_scheduler, verbose=1)
        lr_scheduler_gan.set_model(self.rtvsrgan)
        lr_scheduler_gen = LearningRateScheduler(lr_scheduler, verbose=0)
        lr_scheduler_gen.set_model(self.generator)
        lr_scheduler_dis = LearningRateScheduler(lr_scheduler, verbose=0)
        lr_scheduler_dis.set_model(self.discriminator)
        lr_scheduler_ra = LearningRateScheduler(lr_scheduler, verbose=0)
        lr_scheduler_ra.set_model(self.ra_discriminator)

        # Callback: format input value
        def named_logs(model, logs):
            """Transform train_on_batch return value to dict expected by on_batch_end callback"""
            result = {}
            for l in zip(model.metrics_names, logs):
                result[l[0]] = l[1]
            return result

        # Shape of output from discriminator
        disciminator_output_shape = list(self.ra_discriminator.output_shape)
        disciminator_output_shape[0] = batch_size
        disciminator_output_shape = tuple(disciminator_output_shape)

        # VALID / FAKE targets for discriminator
        real = np.ones(disciminator_output_shape)
        fake = np.zeros(disciminator_output_shape)

        # Each epoch == "update iteration" as defined in the paper
        print_losses = {"GAN": [], "D": []}
        start_epoch = datetime.datetime.now()

        # Random images to go through
        #idxs = np.random.randint(0, len(train_loader), epochs)
        # Loop through epochs / iterations
        for epoch in range(first_epoch, int(epochs) + first_epoch):
            lr_scheduler_gan.on_epoch_begin(epoch)
            lr_scheduler_ra.on_epoch_begin(epoch)
            lr_scheduler_dis.on_epoch_begin(epoch)
            lr_scheduler_gen.on_epoch_begin(epoch)

            # Start epoch time
            if epoch % print_frequency == 0:
                print("\nEpoch {}/{}:".format(epoch + 1, epochs + first_epoch))
                start_epoch = datetime.datetime.now()

            # Train discriminator
            self.discriminator.trainable = True
            self.ra_discriminator.trainable = True

            imgs_lr, imgs_hr = next(output_generator)
            generated_hr = self.generator.predict(imgs_lr)

            real_loss = self.ra_discriminator.train_on_batch(
                [imgs_hr, generated_hr], real)
            #print("Real: ",real_loss)
            fake_loss = self.ra_discriminator.train_on_batch(
                [generated_hr, imgs_hr], fake)
            #print("Fake: ",fake_loss)
            discriminator_loss = 0.5 * np.add(real_loss, fake_loss)

            # Train generator
            self.discriminator.trainable = False
            self.ra_discriminator.trainable = False

            for _ in tqdm(range(10), ncols=60, desc=">> Training generator"):
                imgs_lr, imgs_hr = next(output_generator)
                gan_loss = self.rtvsrgan.train_on_batch(
                    [imgs_lr, imgs_hr], [imgs_hr, real, imgs_hr])

            # Callbacks
            logs = named_logs(self.rtvsrgan, gan_loss)
            tensorboard.on_epoch_end(epoch, logs)

            # Callbacks
            if datapath_validation:
                validation_losses = self.generator.evaluate_generator(
                    validation_loader,
                    steps=steps_per_validation,
                    use_multiprocessing=False,  #workers>1,
                    workers=1)
                #logs = named_logs(self.generator, validation_losses)
                modelcheckpoint.on_epoch_end(epoch, logs)

            # Save losses
            print_losses['GAN'].append(gan_loss)
            print_losses['D'].append(discriminator_loss)

            # Show the progress
            if epoch % print_frequency == 0:
                g_avg_loss = np.array(print_losses['GAN']).mean(axis=0)
                d_avg_loss = np.array(print_losses['D']).mean(axis=0)
                print(">> Time: {}s\n>> GAN: {}\n>> Discriminator: {}".format(
                    (datetime.datetime.now() - start_epoch).seconds,
                    ", ".join([
                        "{}={:.4f}".format(k, v) for k, v in zip(
                            self.rtvsrgan.metrics_names, g_avg_loss)
                    ]), ", ".join([
                        "{}={:.4f}".format(k, v) for k, v in zip(
                            self.discriminator.metrics_names, d_avg_loss)
                    ])))
                print_losses = {"GAN": [], "D": []}

                # Run validation inference if specified
                if datapath_validation:
                    print(">> Validation Losses: {}".format(", ".join([
                        "{}={:.4f}".format(k, v) for k, v in zip(
                            self.generator.metrics_names, validation_losses)
                    ])))

            # If test images are supplied, run model on them and save to log_test_path
            if datapath_test and epoch % log_test_frequency == 0:
                plot_test_images(self.generator,
                                 test_loader,
                                 datapath_test,
                                 log_test_path,
                                 epoch,
                                 modelname,
                                 channels=self.channels,
                                 colorspace=self.colorspace)

            # Check if we should save the network weights
            if log_weight_frequency and epoch % log_weight_frequency == 0:
                # Save the network weights
                self.save_weights(os.path.join(log_weight_path, modelname))
示例#4
0
    test_loader = gen('../test/' + tag + '/tmp_labels.txt',
                      '../test/' + tag + '/',
                      batchsize=batch_size,
                      maxlabellength=maxlabellength,
                      imagesize=(img_h, img_w))
    #train_loader = gen('../all/train.txt', '../all/', batchsize=batch_size, maxlabellength=maxlabellength, imagesize=(img_h, img_w))
    #test_loader = gen('../all/test.txt', '../all/', batchsize=batch_size, maxlabellength=maxlabellength, imagesize=(img_h, img_w))
    #train_loader = gen('../all/train_13_100.txt', '../all/', batchsize=batch_size, maxlabellength=maxlabellength, imagesize=(img_h, img_w))
    #test_loader = gen('../all/test_13_100.txt', '../all/', batchsize=batch_size, maxlabellength=maxlabellength, imagesize=(img_h, img_w))
    checkpoint = ModelCheckpoint(filepath='./models/' + tag + '/weights_' +
                                 tag +
                                 '_shufflenet-{epoch:02d}-{val_loss:.2f}.h5',
                                 monitor='val_loss',
                                 save_best_only=False,
                                 save_weights_only=True)
    checkpoint.set_model(save_model)
    #lr_schedule = lambda epoch: 0.0005 * 0.4**epoch
    #lr_schedule = lambda epoch: 0.005 * 20 * 0.4 / (epoch + 1)
    #lr_schedule = lambda epoch: 0.00135 * 2 * 0.33**epoch
    lr_schedule = lambda epoch: 0.0005 * 1 * 0.55**epoch

    learning_rate = np.array([lr_schedule(i) for i in range(30)])
    changelr = LearningRateScheduler(lambda epoch: float(learning_rate[epoch]))
    earlystop = EarlyStopping(monitor='val_loss', patience=2, verbose=1)
    tensorboard = TensorBoard(log_dir='./models/logs', write_graph=True)
    print('-----------Start training-----------')
    model.fit_generator(
        train_loader,
        steps_per_epoch=train_size // batch_size,
        epochs=30,
        initial_epoch=0,
示例#5
0
#     ???

# model = primary_net()
loss = 'mse'
metrics = ['mae', 'mse', 'mape', 'msle', 'logcosh', 'cosine']
model.compile(loss=loss,
              loss_weights=[0, 0, 1, 1, 1e4, 1],
              optimizer=Adam(lr=lr, beta_1=beta_1),
              metrics=metrics)
model.summary()
# model = load_model('model28log.h5', custom_objects=custom_objects)
# model_name = input('Enter model name: ')
model_name = 'model_final_16x16'
print('model name: ' + model_name)
checkpoint = ModelCheckpoint(model_name + '_{epoch:d}.h5', period=1)
checkpoint.set_model(model)
checkpoint_weight = ModelCheckpoint(model_name + '_weights_{epoch:d}.h5',
                                    period=1,
                                    save_weights_only=True)
checkpoint_weight.set_model(model)
# early_stop = EarlyStopping(monitor='val_1_loss', min_delta=0.1, patience=30, restore_best_weights=True, verbose=1)
callbacks = [TimeHistory(), checkpoint, checkpoint_weight]
# callbacks = [TimeHistory()]
# callbacks = [TimeHistory(), EarlyStopping(monitor='loss', min_delta=0.1, patience=100,
#                                           restore_best_weights=True, verbose=1)]
# early_stop = callbacks.EarlyStopping(patience=200, verbose=1)
# with open('model_hist_data28log.json', 'r') as f:
#     hist_data = json.load(f)
dataset = datasets[key]
# data_size = dataset.data_size
# val_size = dataset.val_size
示例#6
0
    try:
        pre_trained_weights = 'resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5'
        model.load_weights(pre_trained_weights, by_name=True)
    except Exception as e:
        print('load pre-trained weights error {}'.format(e))
    for cls, idx in train_generator.class_indices.items():
        print('Class #{} = {}'.format(idx, cls))

    checkpoint = ModelCheckpoint(
        filepath='weights/weights-{epoch:03d}-{loss:.2f}.h5',
        monitor='loss',
        save_best_only=False,
        save_weights_only=True)

    checkpoint.set_model(model)

    model.compile(optimizer=Adam(lr=1e-5),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    lr_reducer = ReduceLROnPlateau(monitor='loss',
                                   factor=np.sqrt(0.1),
                                   cooldown=0,
                                   patience=2,
                                   min_lr=0.5e-6)

    earlystopping = EarlyStopping(monitor='loss', patience=5, verbose=1)

    tensorbord = TensorBoard(log_dir='weights/logs', write_graph=True)
示例#7
0
    else:
        start_epoch_num = 0
        spatial_stream = spatial.basic()
        print('set network')

    print('complete')
    sgd = SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True)
    spatial_stream.compile(optimizer=sgd,
                           loss='categorical_crossentropy',
                           metrics=['accuracy'])
    print('complete network setting')

    tmp_numiter = len(train_loader.get_train_data_list())/batch_size
    num_iter = int(tmp_numiter)+1 if tmp_numiter - int(tmp_numiter) > 0 else int(tmp_numiter)
    tbCallBack.set_model(spatial_stream)
    mcCallBack.set_model(spatial_stream)

    loss_session = tf.Session()
    best_val_acc = 0
    for epoch in range(start_epoch_num, start_epoch_num + num_epoch):
        print('Epoch', epoch)

        train_acc, train_loss = train_1epoch(spatial_stream, train_loader, num_iter)
        print("train_loss:", train_loss, "train_acc:", train_acc)

        tr_val_acc, tr_val_loss = validation_1epoch(spatial_stream, train_val_loader, sess)
        print("tr_val_loss:", tr_val_loss, "tr_val_acc:", tr_val_acc)

        val_acc, val_loss = validation_1epoch(spatial_stream, test_loader, sess)
        print("val_loss:", val_loss, "val_acc:", val_acc)
示例#8
0
def run_hcomb_final(h, ID, hcm, model_dir, INTERMEDIATE_PLOTS, GLOBAL_GRADIENT_NORM_PLOT):
    ################################################# FINAL EXPERIMENT
    start = timer()

    ALL_FOLDS = list(range(1, 7))

    print(5 * '\n' + 'Starting Final Experiment, training {} epochs...\n'.format(h.MAX_EPOCHS))

    model_save_dir = os.path.join(model_dir, 'final')
    os.makedirs(model_save_dir, exist_ok=True)

    ################################################# MODEL DEFINITION

    reg = None
    adam_kwargs = {'clipnorm': 1.0}
    kernel_initializer_dense = 'glorot_uniform'
    if 'changbin' in model_dir:
        from keras import regularizers
        reg = regularizers.l2(0.0000459)

    subsample_time_steps = False
    if h.TIME_STEPS >= 2000:
        subsample_time_steps = True

    if h.TIME_STEPS >= 2000:
        from keras import regularizers
        reg = regularizers.l2(0.001)
        adam_kwargs['clipvalue'] = 0.3
        adam_kwargs['clipnorm'] = 0.7
        kernel_initializer_dense = 'he_uniform'  # should prevent exploding grads for ReLU

    print('\nBuild model...\n')

    time_steps = h.TIME_STEPS if not subsample_time_steps else h.TIME_STEPS // 2
    x = Input(batch_shape=(h.BATCH_SIZE, time_steps, h.N_FEATURES), name='Input', dtype='float32')
    y = x

    # Input dropout
    y = Dropout(h.INPUT_DROPOUT, noise_shape=(h.BATCH_SIZE, 1, h.N_FEATURES))(y)
    for units in h.UNITS_PER_LAYER_LSTM:
        y = CuDNNLSTM(units, return_sequences=True, stateful=True, kernel_regularizer=reg, recurrent_regularizer=reg)(y)

        # LSTM Output dropout
        y = Dropout(h.LSTM_OUTPUT_DROPOUT, noise_shape=(h.BATCH_SIZE, 1, units))(y)
    for units in h.UNITS_PER_LAYER_MLP:
        if units != h.N_CLASSES:
            y = Dense(units, activation='relu', kernel_regularizer=reg, kernel_initializer=kernel_initializer_dense)(y)
        else:
            y = Dense(units, activation='linear', kernel_regularizer=reg)(y)

        # MLP Output dropout but not last layer
        if units != h.N_CLASSES:
            y = Dropout(h.MLP_OUTPUT_DROPOUT, noise_shape=(h.BATCH_SIZE, 1, units))(y)
    model = Model(x, y)

    model.summary()
    print(5 * '\n')

    my_loss = tensorflow_utils.my_loss_builder(h.MASK_VAL,
                                               tensorflow_utils.get_loss_weights(ALL_FOLDS, h.TRAIN_SCENES,
                                                                                  h.LABEL_MODE))

    ################################################# LOAD CHECKPOINTED MODEL

    model_is_resumed = False
    epochs_finished_old = None

    # use val_fold = 0 as dummy for finished epochs
    val_fold = 1
    val_fold_str = 'final_experiment: {} ({} / {})'.format(val_fold, 1, 1)

    latest_weights_path, epochs_finished, val_acc, best_epoch_, best_val_acc_, epochs_without_improvement_ \
        = tensorflow_utils.latest_training_state(model_save_dir)
    if latest_weights_path is not None:
        model.load_weights(latest_weights_path)

        model_is_resumed = True


        if h.epochs_finished[val_fold - 1] != epochs_finished:
            epochs_finished_old = h.epochs_finished[val_fold - 1]
            print(
                'MISMATCH: Latest state in hyperparameter combination list is different to checkpointed state.')
            h.epochs_finished[val_fold - 1] = epochs_finished
            h.val_acc[val_fold - 1] = val_acc
            hcm.replace_at_id(ID, h)

    ################################################# COMPILE MODEL

    adam = Adam(lr=h.LEARNING_RATE, **adam_kwargs)
    model.compile(optimizer=adam, loss=my_loss, metrics=None, sample_weight_mode='temporal')

    print('\nModel compiled.\n')

    ################################################# DATA LOADER
    use_multithreading = True
    BUFFER = utils.get_buffer_size_wrt_time_steps(h.TIME_STEPS)

    train_loader = tr_utils.create_train_dataloader(h.LABEL_MODE, ALL_FOLDS, -1, h.BATCH_SIZE, h.TIME_STEPS,
                                                    h.MAX_EPOCHS, 160, 13,
                                                    BUFFER=BUFFER, use_multithreading=use_multithreading)

    ################################################# CALLBACKS
    model_ckp_last = ModelCheckpoint(os.path.join(model_save_dir,
                                                  'model_ckp_epoch_{epoch:02d}-val_acc_{val_final_acc:.3f}.hdf5'),
                                     verbose=1, monitor='val_final_acc')
    model_ckp_last.set_model(model)

    args = [h.OUTPUT_THRESHOLD, h.MASK_VAL, h.MAX_EPOCHS, val_fold_str, GLOBAL_GRADIENT_NORM_PLOT,
            h.RECURRENT_DROPOUT, h.METRIC]

    # training phase
    train_phase = tr_utils.Phase('train', model, train_loader, BUFFER, *args,
                                 no_new_weighting=True if 'nnw' in model_save_dir else False,
                                 changbin_recurrent_dropout=True if 'changbin' in model_dir else False,
                                 subsample_time_steps=subsample_time_steps)

    if model_is_resumed:
        try:
            old_metrics = utils.load_metrics(model_save_dir, name="metrics_train")

            # merge metrics
            h.METRIC = old_metrics['metric']
            train_phase.metric = h.METRIC

            train_iterations_done = old_metrics['train_losses'].shape[0]
            epochs_done = old_metrics['train_accs'].shape[0]
            if epochs_finished_old is not None:
                epochs_done_old = epochs_done
                epochs_done = epochs_done if epochs_finished > epochs_done else epochs_finished
                train_iterations_done = int(train_iterations_done / epochs_done_old) * epochs_done

            train_phase.losses = old_metrics['train_losses'].tolist()[:train_iterations_done]
            train_phase.accs = old_metrics['train_accs'].tolist()[:epochs_done]
            train_phase.sens_spec_class_scene = old_metrics['train_sens_spec_class_scene'].tolist()[:epochs_done]

            if 'global_gradient_norm' in old_metrics:
                train_phase.global_gradient_norms = old_metrics['global_gradient_norm'].tolist()[
                                                    :train_iterations_done]
        except:
            pass

        train_phase.resume_from_epoch(h.epochs_finished[val_fold - 1] + 1)

    for e in range(h.epochs_finished[val_fold - 1], h.MAX_EPOCHS):

        train_loss_is_nan, _ = train_phase.run()

        if train_loss_is_nan:
            print('\n\n\n---------------------------------------\n\n\n')
            print("ERROR: Training loss is NaN.")
            print('\n\n\n---------------------------------------\n\n\n')
            break

        tr_utils.update_latest_model_ckp(model_ckp_last, model_save_dir, e, 0.0)

        metrics = {
            'metric': h.METRIC,
            'train_losses': np.array(train_phase.losses),
            'train_accs': np.array(train_phase.accs),
            'train_sens_spec_class_scene': np.array(train_phase.sens_spec_class_scene),
        }

        if GLOBAL_GRADIENT_NORM_PLOT:
            metrics['global_gradient_norm'] = np.array(train_phase.global_gradient_norms)

        utils.pickle_metrics(metrics, model_save_dir, name="metrics_train")

        hcm.finish_epoch(ID, h, 0.0, 0.0, 0.0, 0.0, val_fold - 1, e + 1, e + 1, (timer() - start) / 60)

        if INTERMEDIATE_PLOTS:
            plot.plot_metrics(metrics, model_save_dir)

        if GLOBAL_GRADIENT_NORM_PLOT:
            plot.plot_global_gradient_norm(np.array(train_phase.global_gradient_norms), model_save_dir,
                                           epochs_done=e + 1)

    del model
    K.clear_session()

    hcm.finish_hcomb(ID, h)

    ################################################## TESTING

    test_loader = tr_utils.create_test_dataloader(h.LABEL_MODE)

    ################################################# MODEL DEFINITION

    print('\nBuild model for testing...\n')

    x = Input(batch_shape=(1, None, h.N_FEATURES), name='Input', dtype='float32')
    y = x

    # Input dropout
    y = Dropout(h.INPUT_DROPOUT, noise_shape=(1, 1, h.N_FEATURES))(y)
    for units in h.UNITS_PER_LAYER_LSTM:
        y = CuDNNLSTM(units, return_sequences=True, stateful=True, kernel_regularizer=reg, recurrent_regularizer=reg)(y)

        # LSTM Output dropout
        y = Dropout(h.LSTM_OUTPUT_DROPOUT, noise_shape=(1, 1, units))(y)
    for units in h.UNITS_PER_LAYER_MLP:
        if units != h.N_CLASSES:
            y = Dense(units, activation='relu', kernel_regularizer=reg, kernel_initializer=kernel_initializer_dense)(y)
        else:
            y = Dense(units, activation='linear', kernel_regularizer=reg)(y)

        # MLP Output dropout but not last layer
        if units != h.N_CLASSES:
            y = Dropout(h.MLP_OUTPUT_DROPOUT, noise_shape=(1, 1, units))(y)
    model = Model(x, y)

    model.summary()

    latest_weights_path, _, _, _, _, _ = tensorflow_utils.latest_training_state(model_save_dir)

    if latest_weights_path is not None:

        model.load_weights(latest_weights_path)

        model.compile(optimizer=adam, loss=my_loss, metrics=None)

        print('\nModel compiled.\n')

        test_phase = tr_utils.TestPhase(model, test_loader, h.OUTPUT_THRESHOLD, h.MASK_VAL, 1, val_fold_str, model_save_dir,
                                        metric=('BAC', 'BAC2'), ret=('final', 'per_class', 'per_class_scene', 'per_scene'))

        test_loss_is_nan, _ = test_phase.run()

        metrics_test = {
            'metric': h.METRIC,
            'test_accs': np.array(test_phase.accs),
            'test_accs_bac2': np.array(test_phase.accs_bac2),
            'test_class_accs': np.array(test_phase.class_accs),
            'test_class_accs_bac2': np.array(test_phase.class_accs_bac2),
            'test_class_scene_accs': np.array(test_phase.class_scene_accs),
            'test_class_scene_accs_bac2': np.array(test_phase.class_scene_accs_bac2),
            'test_scene_accs': np.array(test_phase.scene_accs),
            'test_scene_accs_bac2': np.array(test_phase.scene_accs_bac2),
            'test_sens_spec_class_scene': np.array(test_phase.sens_spec_class_scene),
            'test_sens_spec_class': np.array(test_phase.sens_spec_class)
        }

        utils.pickle_metrics(metrics_test, model_save_dir)

    else:
        print('\n\n\n---------------------------------------\n\n\n')
        print("ERROR: No testing possible, because no trained model saved.")
        print('\n\n\n---------------------------------------\n\n\n')

    go_to_next_stage = False
    return go_to_next_stage
示例#9
0
def run_hcomb_cv(h, ID, hcm, model_dir, INTERMEDIATE_PLOTS, GLOBAL_GRADIENT_NORM_PLOT):
    ################################################# CROSS VALIDATION
    start = timer()

    NUMBER_OF_CLASSES = 13
    # METRICS 

    ALL_FOLDS = h.ALL_FOLDS if h.ALL_FOLDS != -1 else list(range(1, 7))

    best_val_class_accuracies_over_folds = [[0] * NUMBER_OF_CLASSES] * len(ALL_FOLDS)
    best_val_acc_over_folds = [0] * len(ALL_FOLDS)

    best_val_class_accuracies_over_folds_bac2 = [[0] * NUMBER_OF_CLASSES] * len(ALL_FOLDS)
    best_val_acc_over_folds_bac2 = [0] * len(ALL_FOLDS)

    go_to_next_stage = False

    subsample_time_steps = False
    if h.TIME_STEPS >= 2000:
        subsample_time_steps = True

    print(5 * '\n' + 'Starting Cross Validation STAGE {}...\n'.format(h.STAGE))

    for i_val_fold, val_fold in enumerate(h.VAL_FOLDS):
        model_save_dir = os.path.join(model_dir, 'val_fold{}'.format(val_fold))
        os.makedirs(model_save_dir, exist_ok=True)

        TRAIN_FOLDS = list(set(ALL_FOLDS).difference({val_fold}))

        val_fold_str = 'val_fold: {} ({} / {})'.format(val_fold, i_val_fold + 1, len(h.VAL_FOLDS))

        ################################################# MODEL DEFINITION

        print('\nBuild model...\n')

        time_steps = h.TIME_STEPS if not subsample_time_steps else h.TIME_STEPS // 2
        x = Input(batch_shape=(h.BATCH_SIZE, time_steps, h.N_FEATURES), name='Input', dtype='float32')
        y = x

        # Input dropout
        y = Dropout(h.INPUT_DROPOUT, noise_shape=(h.BATCH_SIZE, 1, h.N_FEATURES))(y)
        for units in h.UNITS_PER_LAYER_LSTM:
            y = CuDNNLSTM(units, return_sequences=True, stateful=True)(y)

            # LSTM Output dropout
            y = Dropout(h.LSTM_OUTPUT_DROPOUT, noise_shape=(h.BATCH_SIZE, 1, units))(y)
        for units in h.UNITS_PER_LAYER_MLP:
            if units != h.N_CLASSES:
                y = Dense(units, activation='relu')(y)
            else:
                y = Dense(units, activation='linear')(y)

            # MLP Output dropout but not last layer
            if units != h.N_CLASSES:
                y = Dropout(h.MLP_OUTPUT_DROPOUT, noise_shape=(h.BATCH_SIZE, 1, units))(y)
        model = Model(x, y)

        model.summary()
        print(5 * '\n')

        my_loss = tensorflow_utils.my_loss_builder(h.MASK_VAL,
                                                   tensorflow_utils.get_loss_weights(TRAIN_FOLDS, h.TRAIN_SCENES,
                                                                                     h.LABEL_MODE))

        ################################################# LOAD CHECKPOINTED MODEL

        model_is_resumed = False
        epochs_finished_old = None

        latest_weights_path, epochs_finished, val_acc, best_epoch_, best_val_acc_, epochs_without_improvement_ \
            = tensorflow_utils.latest_training_state(model_save_dir)
        if latest_weights_path is not None:
            model.load_weights(latest_weights_path)

            model_is_resumed = True

            if h.epochs_finished[val_fold - 1] != epochs_finished:
                epochs_finished_old = h.epochs_finished[val_fold - 1]
                print(
                    'MISMATCH: Latest state in hyperparameter combination list is different to checkpointed state.')
                h.epochs_finished[val_fold - 1] = epochs_finished
                h.val_acc[val_fold - 1] = val_acc
                hcm.replace_at_id(ID, h)

        ################################################# COMPILE MODEL

        adam = Adam(lr=h.LEARNING_RATE, clipnorm=1.)
        model.compile(optimizer=adam, loss=my_loss, metrics=None, sample_weight_mode='temporal')

        print('\nModel compiled.\n')

        ################################################# DATA LOADER
        use_multithreading = True
        BUFFER = utils.get_buffer_size_wrt_time_steps(h.TIME_STEPS)

        train_loader, val_loader = tr_utils.create_dataloaders(h.LABEL_MODE, TRAIN_FOLDS, h.TRAIN_SCENES,
                                                               h.BATCH_SIZE,
                                                               h.TIME_STEPS, h.MAX_EPOCHS, h.N_FEATURES,
                                                               h.N_CLASSES,
                                                               [val_fold], h.VAL_STATEFUL,
                                                               BUFFER=BUFFER, use_multithreading=use_multithreading,
                                                               subsample_time_steps=subsample_time_steps)

        ################################################# CALLBACKS
        model_ckp_last = ModelCheckpoint(os.path.join(model_save_dir,
                                                      'model_ckp_epoch_{epoch:02d}-val_acc_{val_final_acc:.3f}.hdf5'),
                                         verbose=1, monitor='val_final_acc')
        model_ckp_last.set_model(model)
        model_ckp_best = ModelCheckpoint(os.path.join(model_save_dir,
                                                      'best_model_ckp_epoch_{epoch:02d}-val_acc_{val_final_acc:.3f}.hdf5'),
                                         verbose=1, monitor='val_final_acc', save_best_only=True)
        model_ckp_best.set_model(model)

        args = [h.OUTPUT_THRESHOLD, h.MASK_VAL, h.MAX_EPOCHS, val_fold_str, GLOBAL_GRADIENT_NORM_PLOT,
                h.RECURRENT_DROPOUT, h.METRIC]

        # training phase
        train_phase = tr_utils.Phase('train', model, train_loader, BUFFER, *args,
                                     no_new_weighting=True if 'nnw' in model_save_dir else False,
                                     subsample_time_steps=subsample_time_steps)

        # validation phase
        val_phase = tr_utils.Phase('val', model, val_loader, BUFFER, *args,
                                   no_new_weighting=True if 'nnw' in model_save_dir else False)

        # needed for early stopping
        best_val_acc = -1 if not model_is_resumed else best_val_acc_
        best_val_acc_bac2 = -1
        best_epoch = 0 if not model_is_resumed else best_epoch_
        epochs_without_improvement = 0 if not model_is_resumed else epochs_without_improvement_

        if model_is_resumed:
            old_metrics = utils.load_metrics(model_save_dir)

            # merge metrics
            h.METRIC = old_metrics['metric']
            train_phase.metric = h.METRIC
            val_phase.metric = h.METRIC

            train_iterations_done = old_metrics['train_losses'].shape[0]
            val_iterations_done = old_metrics['val_losses'].shape[0]
            epochs_done = old_metrics['val_accs'].shape[0]
            if epochs_finished_old is not None:
                epochs_done_old = epochs_done
                epochs_done = epochs_done if epochs_finished > epochs_done else epochs_finished
                train_iterations_done = int(train_iterations_done / epochs_done_old) * epochs_done
                val_iterations_done = int(val_iterations_done / epochs_done_old) * epochs_done

            train_phase.losses = old_metrics['train_losses'].tolist()[:train_iterations_done]
            train_phase.accs = old_metrics['train_accs'].tolist()[:epochs_done]
            val_phase.losses = old_metrics['val_losses'].tolist()[:val_iterations_done]
            val_phase.accs = old_metrics['val_accs'].tolist()[:epochs_done]
            val_phase.accs_bac2 = old_metrics['val_accs_bac2'].tolist()[:epochs_done]
            val_phase.class_accs = old_metrics['val_class_accs'].tolist()[:epochs_done]
            val_phase.class_accs_bac2 = old_metrics['val_class_accs_bac2'].tolist()[:epochs_done]
            val_phase.class_scene_accs = old_metrics['val_class_scene_accs'].tolist()[:epochs_done]
            val_phase.class_scene_accs_bac2 = old_metrics['val_class_scene_accs_bac2'].tolist()[:epochs_done]
            val_phase.scene_accs = old_metrics['val_scene_accs'].tolist()[:epochs_done]
            val_phase.scene_accs_bac2 = old_metrics['val_scene_accs_bac2'].tolist()[:epochs_done]
            train_phase.sens_spec_class_scene = old_metrics['train_sens_spec_class_scene'].tolist()[:epochs_done]
            val_phase.sens_spec_class_scene = old_metrics['val_sens_spec_class_scene'].tolist()[:epochs_done]
            val_phase.sens_spec_class = old_metrics['val_sens_spec_class'].tolist()[:epochs_done]

            if 'global_gradient_norm' in old_metrics:
                train_phase.global_gradient_norms = old_metrics['global_gradient_norm'].tolist()[
                                                    :train_iterations_done]

            best_val_acc = np.max(val_phase.accs)
            best_val_acc_bac2 = old_metrics['val_accs_bac2'][np.argmax(val_phase.accs)]

            # set the dataloaders to correct epoch
            train_phase.resume_from_epoch(h.epochs_finished[val_fold - 1] + 1)
            val_phase.resume_from_epoch(h.epochs_finished[val_fold - 1] + 1)

        stage_was_finished = True

        loss_is_nan = False

        for e in range(h.epochs_finished[val_fold - 1], h.MAX_EPOCHS):

            # early stopping
            if epochs_without_improvement >= h.PATIENCE_IN_EPOCHS and h.PATIENCE_IN_EPOCHS > 0:
                break
            else:
                stage_was_finished = False

            train_loss_is_nan, _ = train_phase.run()
            val_loss_is_nan, _ = val_phase.run()

            if train_loss_is_nan or val_loss_is_nan:
                loss_is_nan = True
                print('\n\n\n---------------------------------------\n\n\n')
                print("ERROR: Training loss is NaN.")
                print('\n\n\n---------------------------------------\n\n\n')
                break

            tr_utils.update_latest_model_ckp(model_ckp_last, model_save_dir, e, val_phase.accs[-1])
            tr_utils.update_best_model_ckp(model_ckp_best, model_save_dir, e, val_phase.accs[-1])

            metrics = {
                'metric': h.METRIC,
                'train_losses': np.array(train_phase.losses),
                'train_accs': np.array(train_phase.accs),
                'val_losses': np.array(val_phase.losses),
                'val_accs': np.array(val_phase.accs),
                'val_accs_bac2': np.array(val_phase.accs_bac2),
                'val_class_accs': np.array(val_phase.class_accs),
                'val_class_accs_bac2': np.array(val_phase.class_accs_bac2),
                'val_class_scene_accs': np.array(val_phase.class_scene_accs),
                'val_class_scene_accs_bac2': np.array(val_phase.class_scene_accs_bac2),
                'val_scene_accs': np.array(val_phase.scene_accs),
                'val_scene_accs_bac2': np.array(val_phase.scene_accs_bac2),
                'train_sens_spec_class_scene': np.array(train_phase.sens_spec_class_scene),
                'val_sens_spec_class_scene': np.array(val_phase.sens_spec_class_scene),
                'val_sens_spec_class': np.array(val_phase.sens_spec_class)
            }

            if GLOBAL_GRADIENT_NORM_PLOT:
                metrics['global_gradient_norm'] = np.array(train_phase.global_gradient_norms)

            utils.pickle_metrics(metrics, model_save_dir)

            if val_phase.accs[-1] > best_val_acc:
                best_val_acc = val_phase.accs[-1]
                best_val_acc_bac2 = val_phase.accs_bac2[-1]
                epochs_without_improvement = 0
                best_epoch = e + 1
            else:
                epochs_without_improvement += 1

            hcm.finish_epoch(ID, h, val_phase.accs[-1], best_val_acc, val_phase.accs_bac2[-1], best_val_acc_bac2,
                             val_fold - 1, e + 1, best_epoch, (timer() - start) / 60)

            if INTERMEDIATE_PLOTS:
                plot.plot_metrics(metrics, model_save_dir)

            if GLOBAL_GRADIENT_NORM_PLOT:
                plot.plot_global_gradient_norm(np.array(train_phase.global_gradient_norms), model_save_dir,
                                               epochs_done=e + 1)

            del metrics

        if not loss_is_nan:

            if not stage_was_finished:

                best_val_class_accuracies_over_folds[val_fold - 1] = val_phase.class_accs[best_epoch - 1]
                best_val_acc_over_folds[val_fold - 1] = val_phase.accs[best_epoch - 1]

                best_val_class_accuracies_over_folds_bac2[val_fold - 1] = val_phase.class_accs_bac2[best_epoch - 1]
                best_val_acc_over_folds_bac2[val_fold - 1] = val_phase.accs_bac2[best_epoch - 1]

                ################################################# CROSS VALIDATION: MEAN AND VARIANCE
                best_val_class_accs_over_folds = np.array(best_val_class_accuracies_over_folds)
                best_val_accs_over_folds = np.array(best_val_acc_over_folds)

                best_val_class_accs_over_folds_bac2 = np.array(best_val_class_accuracies_over_folds_bac2)
                best_val_accs_over_folds_bac2 = np.array(best_val_acc_over_folds_bac2)

                metrics_over_folds = utils.create_metrics_over_folds_dict(best_val_class_accs_over_folds,
                                                                          best_val_accs_over_folds,
                                                                          best_val_class_accs_over_folds_bac2,
                                                                          best_val_accs_over_folds_bac2)

                if h.STAGE > 1:
                    metrics_over_folds_old = utils.load_metrics(model_dir)

                    best_val_class_accs_over_folds += metrics_over_folds_old['best_val_class_accs_over_folds']
                    best_val_accs_over_folds += metrics_over_folds_old['best_val_acc_over_folds']

                    best_val_class_accs_over_folds_bac2 += metrics_over_folds_old['best_val_class_accs_over_folds_bac2']
                    best_val_accs_over_folds_bac2 += metrics_over_folds_old['best_val_acc_over_folds_bac2']

                    metrics_over_folds = utils.create_metrics_over_folds_dict(best_val_class_accs_over_folds,
                                                                              best_val_accs_over_folds,
                                                                              best_val_class_accs_over_folds_bac2,
                                                                              best_val_accs_over_folds_bac2)

                utils.pickle_metrics(metrics_over_folds, model_dir)

                if INTERMEDIATE_PLOTS:
                    plot.plot_metrics(metrics_over_folds, model_dir)

                hcm.finish_stage(ID, h,
                                 metrics_over_folds['best_val_acc_mean_over_folds'],
                                 metrics_over_folds['best_val_acc_std_over_folds'],
                                 metrics_over_folds['best_val_acc_mean_over_folds_bac2'],
                                 metrics_over_folds['best_val_acc_std_over_folds_bac2'],
                                 timer() - start)

            else:
                metrics_over_folds = utils.load_metrics(model_dir)

            # STAGE thresholds
            stage_thresholds = {1: 0.81, 2: 0.81, 3: np.inf}  # 3 is the last stage

            if metrics_over_folds['best_val_acc_mean_over_folds'] >= stage_thresholds[h.STAGE]:
                go_to_next_stage = True

            if go_to_next_stage:
                hcm.next_stage(ID, h)

            else:
                if h.STAGE == 3 or stage_thresholds[h.STAGE] != np.inf:
                    hcm.finish_hcomb(ID, h)

            return go_to_next_stage

        else:
            hcm.finish_hcomb(ID, h)
            return False
示例#10
0
    # log['args'] = args
    # log['style_names'] = styles[:args.nb_classes]
    # log['style_image_sizes'] = style_sizes
    # log['total_loss'] = []
    # log['style_loss'] = {k: [] for k in args.style_layers}
    # log['content_loss'] = {k: [] for k in args.content_layers}
    # log['tv_loss'] = []

    # save paths
    chkpt_path = args.checkpoint_path
    log_dir = args.log_dir
    weights_path = os.path.splitext(args.checkpoint_path)[0] + "_weights.h5"

    # checkpoints
    model_checkpoint = ModelCheckpoint(chkpt_path, monitor='total_loss')
    model_checkpoint.set_model(pastiche_net)

    # tensorboard
    if (log_dir):
        if not os.path.exists(log_dir):
            os.makedirs(log_dir)
        tensorboard = TensorBoard(log_dir=log_dir)
        tensorboard.set_model(pastiche_net)

    start_time = time.time()
    for it in range(args.num_iterations):
        if batch_idx >= batches_per_epoch:
            print('Epoch done. Going back to the beginning...')
            batch_idx = 0

        # Get the batch
示例#11
0
def train_model(model, data, config, include_tensorboard):
	model_history = History()
	model_history.on_train_begin()
	saver = ModelCheckpoint(full_path(config.model_file()), verbose=1, save_best_only=True, period=1)
	saver.set_model(model)
	early_stopping = EarlyStopping(min_delta=config.min_delta, patience=config.patience, verbose=1)
	early_stopping.set_model(model)
	early_stopping.on_train_begin()
	csv_logger = CSVLogger(full_path(config.csv_log_file()))
	csv_logger.on_train_begin()
	if include_tensorboard:
		tensorborad = TensorBoard(histogram_freq=10, write_images=True)
		tensorborad.set_model(model)
	else:
	 tensorborad = Callback()

	epoch = 0
	stop = False
	while(epoch <= config.max_epochs and stop == False):
		epoch_history = History()
		epoch_history.on_train_begin()
		valid_sizes = []
		train_sizes = []
		print("Epoch:", epoch)
		for dataset in data.datasets:
			print("dataset:", dataset.name)
			model.reset_states()
			dataset.reset_generators()

			valid_sizes.append(dataset.valid_generators[0].size())
			train_sizes.append(dataset.train_generators[0].size())
			fit_history = model.fit_generator(dataset.train_generators[0],
				dataset.train_generators[0].size(), 
				nb_epoch=1, 
				verbose=0, 
				validation_data=dataset.valid_generators[0], 
				nb_val_samples=dataset.valid_generators[0].size())

			epoch_history.on_epoch_end(epoch, last_logs(fit_history))

			train_sizes.append(dataset.train_generators[1].size())
			fit_history = model.fit_generator(dataset.train_generators[1],
				dataset.train_generators[1].size(),
				nb_epoch=1, 
				verbose=0)

			epoch_history.on_epoch_end(epoch, last_logs(fit_history))

		epoch_logs = average_logs(epoch_history, train_sizes, valid_sizes)
		model_history.on_epoch_end(epoch, logs=epoch_logs)
		saver.on_epoch_end(epoch, logs=epoch_logs)
		early_stopping.on_epoch_end(epoch, epoch_logs)
		csv_logger.on_epoch_end(epoch, epoch_logs)
		tensorborad.on_epoch_end(epoch, epoch_logs)
		epoch+= 1

		if early_stopping.stopped_epoch > 0:
			stop = True

	early_stopping.on_train_end()
	csv_logger.on_train_end()
	tensorborad.on_train_end({})