Exemplo n.º 1
0
    def testOptimizerWithCallbacks(self):
        np.random.seed(1331)
        input_np = np.random.random((10, 3))
        output_np = np.random.random((10, 4))
        a = input_layer.Input(shape=(3, ), name='input_a')
        model = sequential.Sequential()
        model.add(core.Dense(4, name='dense'))
        model.add(core.Dropout(0.5, name='dropout'))
        model(a)
        optimizer = gradient_descent.SGD(learning_rate=0.1)
        model.compile(optimizer, loss='mse', metrics=['mae'])
        # This does not reduce the LR after the first epoch (due to low delta).
        cbks = [
            callbacks.ReduceLROnPlateau(monitor='val_loss',
                                        factor=0.1,
                                        min_delta=0,
                                        patience=1,
                                        cooldown=5)
        ]
        model.fit(input_np,
                  output_np,
                  batch_size=10,
                  validation_data=(input_np, output_np),
                  callbacks=cbks,
                  epochs=2,
                  verbose=0)
        self.assertAllClose(float(backend.get_value(model.optimizer.lr)),
                            0.1,
                            atol=1e-4)

        # This should reduce the LR after the first epoch (due to high delta).
        cbks = [
            callbacks.ReduceLROnPlateau(monitor='val_loss',
                                        factor=0.1,
                                        min_delta=10,
                                        patience=1,
                                        cooldown=5)
        ]
        model.fit(input_np,
                  output_np,
                  batch_size=10,
                  validation_data=(input_np, output_np),
                  callbacks=cbks,
                  epochs=2,
                  verbose=2)
        self.assertAllClose(float(backend.get_value(model.optimizer.lr)),
                            0.01,
                            atol=1e-4)
Exemplo n.º 2
0
  def test_validate_callbacks_predefined_callbacks(self):
    supported_predefined_callbacks = [
        callbacks.TensorBoard(),
        callbacks.CSVLogger(filename='./log.csv'),
        callbacks.EarlyStopping(),
        callbacks.ModelCheckpoint(filepath='./checkpoint'),
        callbacks.TerminateOnNaN(),
        callbacks.ProgbarLogger(),
        callbacks.History(),
        callbacks.RemoteMonitor()
    ]

    distributed_training_utils.validate_callbacks(
        supported_predefined_callbacks, adam.Adam())

    unsupported_predefined_callbacks = [
        callbacks.ReduceLROnPlateau(),
        callbacks.LearningRateScheduler(schedule=lambda epoch: 0.001)
    ]

    for callback in unsupported_predefined_callbacks:
      with self.assertRaisesRegex(ValueError,
                                  'You must specify a Keras Optimizer V2'):
        distributed_training_utils.validate_callbacks([callback],
                                                      v1_adam.AdamOptimizer())
Exemplo n.º 3
0
def create_callbacks(early_stopping, model_checkpoint, reduce_lr_on_plateau,
                     tensor_board):
    '''
    Создание списка callbacks

    :param early_stopping: остановка обучения, если параметр 'monitor' не меняется в течении 'patience' эпох
    :param model_checkpoint:  сохранение весов сети с лучшим показателем параметра 'monitor'
    :param reduce_lr_on_plateau: уменьшение learning rate в процессе обучения
    :param tensor_board:
    :return:
    '''
    callbacks_list = []

    # if early_stopping == True:
    #     callbacks_list.append(callbacks.EarlyStopping(monitor='val_acc', patience=7))

    if model_checkpoint == True:
        callbacks_list.append(
            callbacks.ModelCheckpoint(
                filepath=
                'weight_checkpoints/weights.{epoch:02d}-{val_loss:.2f}.hdf5',
                monitor='val_loss',
                save_best_only=True))

    if reduce_lr_on_plateau == True:
        callbacks_list.append(
            callbacks.ReduceLROnPlateau(monitor='val_loss',
                                        factor=0.1,
                                        patience=10))

    # if tensor_board == True:
    #     callbacks_list.append(callbacks.TensorBoard(log_dir='log_dir', histogram_freq=1))

    return callbacks_list
Exemplo n.º 4
0
def get_callbacks(use_early_stopping=True, use_reduce_lr=True):

    callback_list = []

    if (use_early_stopping):

        callback_list.append(
            callbacks.EarlyStopping(monitor='val_loss',
                                    min_delta=0,
                                    patience=10,
                                    verbose=keras_verbosity,
                                    mode='auto'))

    if (use_reduce_lr):

        callback_list.append(
            callbacks.ReduceLROnPlateau(monitor='val_loss',
                                        factor=0.1,
                                        patience=5,
                                        verbose=keras_verbosity,
                                        mode='auto',
                                        epsilon=0.0001,
                                        cooldown=0,
                                        min_lr=0))

    return callback_list
Exemplo n.º 5
0
def dense_train(space):
    ''' train lightgbm booster based on training / validaton set -> give predictions of Y '''

    params = space.copy()

    input_shape = (X_train.shape[-1], )  # input shape depends on x_fields used
    input_img = Input(shape=input_shape)

    init_nodes = params['init_nodes']  # fisrt dense layer - number of nodes
    nodes_mult = params['nodes_mult']  # nodes growth rate
    mult_freq = params['mult_freq']  # grow every X layer
    mult_start = params['mult_start']  # grow from X layer
    end_nodes = params['end_nodes']  # maximum number of nodes

    if params['num_Dense_layer'] < 4:
        params['init_nodes'] = init_nodes = 16

    d_1 = Dense(init_nodes, activation=params['activation'])(
        input_img)  # remove kernel_regularizer=regularizers.l1(params['l1'])
    d_1 = Dropout(params['dropout'])(d_1)

    for i in range(1, params['num_Dense_layer']):
        temp_nodes = int(
            min(
                init_nodes * (2**(nodes_mult * max(
                    (i - mult_start + 3) // mult_freq, 0))), end_nodes))
        d_1 = Dense(temp_nodes, activation=params['activation'])(d_1)

        if i != params[
                'num_Dense_layer'] - 1:  # last dense layer has no dropout
            d_1 = Dropout(params['dropout'])(d_1)

    f_x = Dense(1)(d_1)

    callbacks_list = [
        callbacks.ReduceLROnPlateau(monitor='val_loss',
                                    factor=0.1,
                                    patience=10),
        callbacks.EarlyStopping(monitor='val_loss', patience=10, mode='auto')
    ]  # add callbacks
    lr_val = 10**-int(params['learning_rate'])

    adam = optimizers.Adam(lr=lr_val)
    model = Model(input_img, f_x)
    model.compile(adam, loss='mae')
    model.summary()

    history = model.fit(X_train,
                        Y_train,
                        epochs=50,
                        batch_size=params['batch_size'],
                        validation_data=(X_valid, Y_valid),
                        callbacks=callbacks_list,
                        verbose=1)

    Y_test_pred = model.predict(X_test)
    Y_train_pred = model.predict(X_train)
    Y_valid_pred = model.predict(X_valid)

    return Y_test_pred, Y_train_pred, Y_valid_pred, history
Exemplo n.º 6
0
    def callableForTestReduceLROnPlateau(model, test_obj, train_ds, num_epoch,
                                         steps, strategy, saving_filepath,
                                         **kwargs):

        cbks = [
            callbacks.ReduceLROnPlateau(monitor='loss',
                                        factor=0.1,
                                        min_delta=1,
                                        patience=1,
                                        cooldown=5,
                                        verbose=1)
        ]

        # It is expected that the learning rate would drop by `factor` within
        # 3 epochs with `min_delta=1`.
        model.fit(x=train_ds, epochs=3, steps_per_epoch=steps, callbacks=cbks)
        test_obj.assertAllClose(float(K.get_value(model.optimizer.lr)),
                                0.0001,
                                atol=1e-8)

        # It is expected that the learning rate would drop by another `factor`
        # within 3 epochs with `min_delta=1`.
        model.fit(x=train_ds, epochs=3, steps_per_epoch=steps, callbacks=cbks)
        test_obj.assertAllClose(float(K.get_value(model.optimizer.lr)),
                                0.00001,
                                atol=1e-8)
Exemplo n.º 7
0
def create_learning_rate_reducer(cfg_solver: dict) -> callbacks.ReduceLROnPlateau:
    """Create a ReduceLROnPlateau callback.

    Args:
        cfg_solver: dict, solver subsection of config.

    Returns:
        ReduceLROnPlateau, ReduceLROnPlateau callback.
    """
    params = cfg_solver["learning_rate_reducer"]
    params["verbose"] = 1

    return callbacks.ReduceLROnPlateau(**params)
Exemplo n.º 8
0
def get_callbacks(args):
    """Define callbacks for distributed training."""
    callbacks = [
        # This is necessary to ensure consistent initialization of all workers
        hvd.callbacks.BroadcastGlobalVariablesCallback(0),
        # Note: must be in the list before the ReduceLROnPlateau or other metrics-based callbacks.
        hvd.callbacks.MetricAverageCallback(),
        # Adjust Learning Rate
        hvd.callbacks.LearningRateWarmupCallback(
            warmup_epochs=args.warmup_epochs)
    ]
    if args.train_only:
        # Reduce learning rate on a schedule
        onethirds_point = int(math.floor(args.epochs / 3))
        twothirds_point = int(math.floor(args.epochs / 3 * 2))
        callbacks.append(
            hvd.callbacks.LearningRateScheduleCallback(
                start_epoch=args.warmup_epochs,
                end_epoch=onethirds_point,
                multiplier=1.))
        callbacks.append(
            hvd.callbacks.LearningRateScheduleCallback(
                start_epoch=onethirds_point,
                end_epoch=twothirds_point,
                multiplier=1e-1))
        callbacks.append(
            hvd.callbacks.LearningRateScheduleCallback(
                start_epoch=twothirds_point,
                end_epoch=args.epochs + 1,
                multiplier=1e-2))
    else:
        # Reduce learning rate on validation loss plateau
        callbacks.append(
            cb.ReduceLROnPlateau(monitor='val_loss',
                                 factor=0.1,
                                 patience=5,
                                 min_lr=0.001,
                                 verbose=1 if hvd.rank() == 0 else 0))
    if args.early_stopping:
        callbacks.append(
            cb.EarlyStopping(monitor='loss',
                             patience=7,
                             restore_best_weights=True))
    print('Callbacks created on rank ' + str(hvd.rank()))
    return callbacks
Exemplo n.º 9
0
def callb(path_checkpoint):
    callback_checkpoint = tf_cb.ModelCheckpoint(
        filepath=path_checkpoint, monitor = 'loss', verbose=1,
        save_weights_only=True, save_best_only=True)

    callback_earlystopping = tf_cb.EarlyStopping(monitor='loss',
                                                 patience=20, verbose=1)
    callback_reduce_lr = tf_cb.ReduceLROnPlateau(monitor='loss',
                                                 factor=0.98,
                                                 min_lr=0.3e-4,
                                                 patience=0,
                                                 verbose=1)
    callBacks = [
        callback_checkpoint,
        callback_earlystopping,
        callback_reduce_lr
    ]
    return callBacks
Exemplo n.º 10
0
    def test_TensorBoard_with_ReduceLROnPlateau(self):
        with self.cached_session():
            temp_dir = self.get_temp_dir()
            self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True)

            (x_train, y_train), (x_test, y_test) = testing_utils.get_test_data(
                train_samples=TRAIN_SAMPLES,
                test_samples=TEST_SAMPLES,
                input_shape=(INPUT_DIM, ),
                num_classes=NUM_CLASSES)
            y_test = np_utils.to_categorical(y_test)
            y_train = np_utils.to_categorical(y_train)

            model = testing_utils.get_small_sequential_mlp(
                num_hidden=NUM_HIDDEN,
                num_classes=NUM_CLASSES,
                input_dim=INPUT_DIM)
            model.compile(loss='binary_crossentropy',
                          optimizer='sgd',
                          metrics=['accuracy'])

            cbks = [
                callbacks.ReduceLROnPlateau(monitor='val_loss',
                                            factor=0.5,
                                            patience=4,
                                            verbose=1),
                callbacks_v1.TensorBoard(log_dir=temp_dir)
            ]

            model.fit(x_train,
                      y_train,
                      batch_size=BATCH_SIZE,
                      validation_data=(x_test, y_test),
                      callbacks=cbks,
                      epochs=2,
                      verbose=0)

            assert os.path.exists(temp_dir)
Exemplo n.º 11
0
def create_learning_rate_reducer(
        cfg_solver: dict,
        metrics_names: List[str]) -> callbacks.ReduceLROnPlateau:
    """Create a ReduceLROnPlateau callback.

    Args:
        cfg_solver: dict, solver subsection of config.
        metrics_names: list[str], 'metrics' names.

    Returns:
        ReduceLROnPlateau, ReduceLROnPlateau callback.

    Raises:
        ValueError, monitor not in 'metrics' names.
    """
    monitor = cfg_solver["learning_rate_reducer"]["monitor"]
    val_metrics_names = [f"val_{mm}" for mm in metrics_names]
    if (monitor not in metrics_names) and (monitor not in val_metrics_names):
        raise ValueError(
            f"monitor: {monitor} not found in model metrics names: "
            f"{metrics_names + val_metrics_names}")
    params = cfg_solver["learning_rate_reducer"]
    params["verbose"] = 1
    return callbacks.ReduceLROnPlateau(**params)
Exemplo n.º 12
0
def train_model_retinanet(model,
                          dataset,
                          backbone,
                          expt='',
                          test_size=.1,
                          n_epoch=10,
                          batch_size=1,
                          num_gpus=None,
                          include_masks=False,
                          panoptic=False,
                          panoptic_weight=1,
                          anchor_params=None,
                          pyramid_levels=['P3', 'P4', 'P5', 'P6', 'P7'],
                          mask_size=(28, 28),
                          optimizer=SGD(lr=0.01,
                                        decay=1e-6,
                                        momentum=0.9,
                                        nesterov=True),
                          log_dir='/data/tensorboard_logs',
                          model_dir='/data/models',
                          model_name=None,
                          sigma=3.0,
                          alpha=0.25,
                          gamma=2.0,
                          score_threshold=0.01,
                          iou_threshold=0.5,
                          max_detections=100,
                          weighted_average=True,
                          lr_sched=rate_scheduler(lr=0.01, decay=0.95),
                          rotation_range=0,
                          flip=True,
                          shear=0,
                          zoom_range=0,
                          seed=None,
                          **kwargs):
    """Train a RetinaNet model from the given backbone

    Adapted from:
        https://github.com/fizyr/keras-retinanet &
        https://github.com/fizyr/keras-maskrcnn
    """

    is_channels_first = K.image_data_format() == 'channels_first'

    if model_name is None:
        todays_date = datetime.datetime.now().strftime('%Y-%m-%d')
        data_name = os.path.splitext(os.path.basename(dataset))[0]
        model_name = '{}_{}_{}'.format(todays_date, data_name, expt)

    model_path = os.path.join(model_dir, '{}.h5'.format(model_name))
    loss_path = os.path.join(model_dir, '{}.npz'.format(model_name))

    train_dict, test_dict = get_data(dataset, seed=seed, test_size=test_size)

    channel_axis = 1 if is_channels_first else -1
    n_classes = model.layers[-1].output_shape[channel_axis]

    if panoptic:
        n_semantic_classes = model.get_layer(
            name='semantic').output_shape[channel_axis]

    # the data, shuffled and split between train and test sets
    print('X_train shape:', train_dict['X'].shape)
    print('y_train shape:', train_dict['y'].shape)
    print('X_test shape:', test_dict['X'].shape)
    print('y_test shape:', test_dict['y'].shape)
    print('Output Shape:', model.layers[-1].output_shape)
    print('Number of Classes:', n_classes)

    if num_gpus is None:
        num_gpus = train_utils.count_gpus()

    if num_gpus >= 1e6:
        batch_size = batch_size * num_gpus
        model = train_utils.MultiGpuModel(model, num_gpus)

    print('Training on {} GPUs'.format(num_gpus))

    # evaluation of model is done on `retinanet_bbox`
    if include_masks:
        prediction_model = model
    else:
        prediction_model = retinanet_bbox(model,
                                          nms=True,
                                          anchor_params=anchor_params,
                                          panoptic=panoptic,
                                          class_specific_filter=False)

    retinanet_losses = losses.RetinaNetLosses(sigma=sigma,
                                              alpha=alpha,
                                              gamma=gamma,
                                              iou_threshold=iou_threshold,
                                              mask_size=mask_size)

    def semantic_loss(y_pred, y_true):
        return panoptic_weight * losses.weighted_categorical_crossentropy(
            y_pred, y_true, n_classes=n_semantic_classes)

    loss = {
        'regression': retinanet_losses.regress_loss,
        'classification': retinanet_losses.classification_loss
    }

    if include_masks:
        loss['masks'] = retinanet_losses.mask_loss

    if panoptic:
        loss['semantic'] = semantic_loss

    model.compile(loss=loss, optimizer=optimizer)

    if num_gpus >= 2:
        # Each GPU must have at least one validation example
        if test_dict['y'].shape[0] < num_gpus:
            raise ValueError('Not enough validation data for {} GPUs. '
                             'Received {} validation sample.'.format(
                                 test_dict['y'].shape[0], num_gpus))

        # When using multiple GPUs and skip_connections,
        # the training data must be evenly distributed across all GPUs
        num_train = train_dict['y'].shape[0]
        nb_samples = num_train - num_train % batch_size
        if nb_samples:
            train_dict['y'] = train_dict['y'][:nb_samples]
            train_dict['X'] = train_dict['X'][:nb_samples]

    # this will do preprocessing and realtime data augmentation
    datagen = image_generators.RetinaNetGenerator(
        # fill_mode='constant',  # for rotations
        rotation_range=rotation_range,
        shear_range=shear,
        zoom_range=zoom_range,
        horizontal_flip=flip,
        vertical_flip=flip)

    datagen_val = image_generators.RetinaNetGenerator(
        # fill_mode='constant',  # for rotations
        rotation_range=0,
        shear_range=0,
        zoom_range=0,
        horizontal_flip=0,
        vertical_flip=0)

    if 'vgg' in backbone or 'densenet' in backbone:
        compute_shapes = make_shapes_callback(model)
    else:
        compute_shapes = guess_shapes

    train_data = datagen.flow(train_dict,
                              seed=seed,
                              include_masks=include_masks,
                              panoptic=panoptic,
                              pyramid_levels=pyramid_levels,
                              anchor_params=anchor_params,
                              compute_shapes=compute_shapes,
                              batch_size=batch_size)

    val_data = datagen_val.flow(test_dict,
                                seed=seed,
                                include_masks=include_masks,
                                panoptic=panoptic,
                                pyramid_levels=pyramid_levels,
                                anchor_params=anchor_params,
                                compute_shapes=compute_shapes,
                                batch_size=batch_size)

    tensorboard_callback = callbacks.TensorBoard(
        log_dir=os.path.join(log_dir, model_name))

    # fit the model on the batches generated by datagen.flow()
    loss_history = model.fit_generator(
        train_data,
        steps_per_epoch=train_data.y.shape[0] // batch_size,
        epochs=n_epoch,
        validation_data=val_data,
        validation_steps=val_data.y.shape[0] // batch_size,
        callbacks=[
            callbacks.LearningRateScheduler(lr_sched),
            callbacks.ModelCheckpoint(model_path,
                                      monitor='val_loss',
                                      verbose=1,
                                      save_best_only=True,
                                      save_weights_only=num_gpus >= 2),
            tensorboard_callback,
            callbacks.ReduceLROnPlateau(monitor='loss',
                                        factor=0.1,
                                        patience=10,
                                        verbose=1,
                                        mode='auto',
                                        min_delta=0.0001,
                                        cooldown=0,
                                        min_lr=0),
            RedirectModel(
                Evaluate(val_data,
                         iou_threshold=iou_threshold,
                         score_threshold=score_threshold,
                         max_detections=max_detections,
                         tensorboard=tensorboard_callback,
                         weighted_average=weighted_average), prediction_model),
        ])

    model.save_weights(model_path)
    np.savez(loss_path, loss_history=loss_history.history)

    average_precisions = evaluate(
        val_data,
        prediction_model,
        iou_threshold=iou_threshold,
        score_threshold=score_threshold,
        max_detections=max_detections,
    )

    # print evaluation
    total_instances = []
    precisions = []
    for label, (average_precision,
                num_annotations) in average_precisions.items():
        print('{:.0f} instances of class'.format(num_annotations), label,
              'with average precision: {:.4f}'.format(average_precision))
        total_instances.append(num_annotations)
        precisions.append(average_precision)

    if sum(total_instances) == 0:
        print('No test instances found.')
    else:
        print(
            'mAP using the weighted average of precisions among classes: {:.4f}'
            .format(
                sum([a * b for a, b in zip(total_instances, precisions)]) /
                sum(total_instances)))
        print('mAP: {:.4f}'.format(
            sum(precisions) / sum(x > 0 for x in total_instances)))

    return model
Exemplo n.º 13
0
print(model.summary())

# '''
file_path = os.path.join(save_dir, model_name)
checkpoint = callbacks.ModelCheckpoint(
    file_path,
    monitor='val_predictions_categorical_accuracy',
    verbose=1,
    save_best_only=True,
    mode='auto',
    save_weights_only=True,
    period=1)
reduce_lr = callbacks.ReduceLROnPlateau(monitor='val_predictions_loss',
                                        factor=0.25,
                                        patience=10,
                                        verbose=1,
                                        mode='auto',
                                        min_delta=1e-6,
                                        cooldown=0,
                                        min_lr=0)
csv_logger = callbacks.CSVLogger(os.path.join(save_dir, 'Log_V1.log'),
                                 separator=',',
                                 append=False)
train_data_generator = Train_data_generator(batch_size)
valid_data_generator = Valid_data_generator(batch_size)

model.fit_generator(generator=train_data_generator,
                    steps_per_epoch=int(210030 / batch_size),
                    epochs=epochs,
                    verbose=1,
                    callbacks=[checkpoint, reduce_lr, csv_logger],
                    validation_data=valid_data_generator,
Exemplo n.º 14
0
def rnn_train(space):  #functional
    ''' train lightgbm booster based on training / validaton set -> give predictions of Y '''
    params = space.copy()

    lookback = 20  # lookback = 5Y * 4Q = 20Q
    x_fields = 10  # lgbm top15 features -> 10 features in rnn

    inputs_loss_weight = 0.1  # loss weights for individual outputs from each rnn model
    dense_loss_weight = 2  # loss weights for final output
    loss_weights = [inputs_loss_weight] * x_fields + [
        dense_loss_weight
    ]  # loss weights for training

    loss = [args.objective] * (
        x_fields + 1)  # use MAE loss function for all inputs and final
    metrics = [args.objective] * (x_fields + 1)

    input_img = Input(shape=(lookback, x_fields))
    outputs = []
    states = []

    for col in range(10):  # build model for each feature

        g_1 = K.expand_dims(
            input_img[:, :, col], axis=2
        )  # add dimension to certain feature: shape = (samples, 20, 1)

        for i in range(params['num_gru_layer']):
            temp_nodes = int(
                min(params['gru_nodes'] * (2**(params['gru_nodes_mult'] * i)),
                    8))
            extra = dict(return_sequences=True)

            if args.bi == False:
                if i == params['num_gru_layer'] - 1:
                    extra = dict(return_sequences=False)
                    g_state = GRU(temp_nodes, **extra)(g_1)  # forecast state
                elif i == 0:
                    g_1 = GRU(temp_nodes, **extra)(g_1)
                else:
                    g_1 = GRU(temp_nodes,
                              dropout=params['gru_dropout'],
                              **extra)(g_1)

            else:  # try bidirectional one
                if i == params['num_gru_layer'] - 1:
                    extra = dict(return_sequences=False)
                    g_state = GRU(temp_nodes, **extra)(g_1)  # forecast state
                elif i == 0:
                    g_1 = Bidirectional(GRU(temp_nodes, **extra))(g_1)
                else:
                    g_1 = Bidirectional(
                        GRU(temp_nodes, dropout=params['gru_dropout'],
                            **extra))(g_1)

        g_output = Dense(1)(g_state)

        states.append(g_state)
        outputs.append(g_output)

    f_x = Concatenate(axis=1)(states)
    for i in range(
            params['num_dense_layer']):  # for second or third dense layers
        f_x = Dense(10)(f_x)

    f_x = Dense(1, name='final_dense')(f_x)

    outputs.append(f_x)
    model = Model(
        inputs=input_img,
        outputs=outputs)  # outputs = 10 forecast states + final forecast

    callbacks_list = [
        callbacks.ReduceLROnPlateau(monitor='val_loss',
                                    factor=0.1,
                                    patience=10),
        callbacks.EarlyStopping(monitor='val_loss', patience=10, mode='auto')
    ]  # add callbacks
    lr_val = 10**-int(params['learning_rate'])
    adam = optimizers.Adam(lr=lr_val)
    model.compile(adam, loss=loss, metrics=metrics, loss_weights=loss_weights)
    model.summary()

    history = model.fit(X_train, [Y_train] * (x_fields + 1),
                        epochs=50,
                        batch_size=params['batch_size'],
                        validation_data=(X_valid, [Y_valid] * (x_fields + 1)),
                        verbose=1,
                        callbacks=callbacks_list)

    Y_test_pred = model.predict(X_test)[-1]  # final dense predictions
    Y_train_pred = model.predict(X_train)[-1]
    Y_valid_pred = model.predict(X_valid)[-1]

    return Y_test_pred, Y_train_pred, Y_valid_pred, history
Exemplo n.º 15
0
def train_model_retinanet(model,
                          dataset,
                          backbone,
                          expt='',
                          test_size=.1,
                          n_epoch=10,
                          batch_size=1,
                          num_gpus=None,
                          include_masks=False,
                          mask_size=(28, 28),
                          optimizer=SGD(lr=0.01,
                                        decay=1e-6,
                                        momentum=0.9,
                                        nesterov=True),
                          log_dir='/data/tensorboard_logs',
                          model_dir='/data/models',
                          model_name=None,
                          sigma=3.0,
                          alpha=0.25,
                          gamma=2.0,
                          score_threshold=0.01,
                          iou_threshold=0.5,
                          max_detections=100,
                          weighted_average=True,
                          lr_sched=rate_scheduler(lr=0.01, decay=0.95),
                          rotation_range=0,
                          flip=True,
                          shear=0,
                          zoom_range=0,
                          **kwargs):
    """Train a RetinaNet model from the given backbone

    Adapted from:
        https://github.com/fizyr/keras-retinanet &
        https://github.com/fizyr/keras-maskrcnn
    """
    is_channels_first = K.image_data_format() == 'channels_first'

    if model_name is None:
        todays_date = datetime.datetime.now().strftime('%Y-%m-%d')
        data_name = os.path.splitext(os.path.basename(dataset))[0]
        model_name = '{}_{}_{}'.format(todays_date, data_name, expt)
    model_path = os.path.join(model_dir, '{}.h5'.format(model_name))
    loss_path = os.path.join(model_dir, '{}.npz'.format(model_name))

    train_dict, test_dict = get_data(dataset, mode='conv', test_size=test_size)

    n_classes = model.layers[-1].output_shape[1 if is_channels_first else -1]
    # the data, shuffled and split between train and test sets
    print('X_train shape:', train_dict['X'].shape)
    print('y_train shape:', train_dict['y'].shape)
    print('X_test shape:', test_dict['X'].shape)
    print('y_test shape:', test_dict['y'].shape)
    print('Output Shape:', model.layers[-1].output_shape)
    print('Number of Classes:', n_classes)

    if num_gpus is None:
        num_gpus = train_utils.count_gpus()

    if num_gpus >= 1e6:
        batch_size = batch_size * num_gpus
        model = train_utils.MultiGpuModel(model, num_gpus)

    print('Training on {} GPUs'.format(num_gpus))

    def regress_loss(y_true, y_pred):
        # separate target and state
        regression = y_pred
        regression_target = y_true[..., :-1]
        anchor_state = y_true[..., -1]

        # filter out "ignore" anchors
        indices = tf.where(K.equal(anchor_state, 1))
        regression = tf.gather_nd(regression, indices)
        regression_target = tf.gather_nd(regression_target, indices)

        # compute the loss
        loss = losses.smooth_l1(regression_target, regression, sigma=sigma)

        # compute the normalizer: the number of positive anchors
        normalizer = K.maximum(1, K.shape(indices)[0])
        normalizer = K.cast(normalizer, dtype=K.floatx())

        return K.sum(loss) / normalizer

    def classification_loss(y_true, y_pred):
        # TODO: try weighted_categorical_crossentropy
        labels = y_true[..., :-1]
        # -1 for ignore, 0 for background, 1 for object
        anchor_state = y_true[..., -1]

        classification = y_pred
        # filter out "ignore" anchors
        indices = tf.where(K.not_equal(anchor_state, -1))
        labels = tf.gather_nd(labels, indices)
        classification = tf.gather_nd(classification, indices)

        # compute the loss
        loss = losses.focal(labels, classification, alpha=alpha, gamma=gamma)

        # compute the normalizer: the number of positive anchors
        normalizer = tf.where(K.equal(anchor_state, 1))
        normalizer = K.cast(K.shape(normalizer)[0], K.floatx())
        normalizer = K.maximum(K.cast_to_floatx(1.0), normalizer)

        return K.sum(loss) / normalizer

    def mask_loss(y_true, y_pred):
        def _mask(y_true, y_pred, iou_threshold=0.5, mask_size=(28, 28)):
            # split up the different predicted blobs
            boxes = y_pred[:, :, :4]
            masks = y_pred[:, :, 4:]

            # split up the different blobs
            annotations = y_true[:, :, :5]
            width = K.cast(y_true[0, 0, 5], dtype='int32')
            height = K.cast(y_true[0, 0, 6], dtype='int32')
            masks_target = y_true[:, :, 7:]

            # reshape the masks back to their original size
            masks_target = K.reshape(masks_target,
                                     (K.shape(masks_target)[0] *
                                      K.shape(masks_target)[1], height, width))
            masks = K.reshape(masks, (K.shape(masks)[0] * K.shape(masks)[1],
                                      mask_size[0], mask_size[1], -1))

            # batch size > 1 fix
            boxes = K.reshape(boxes, (-1, K.shape(boxes)[2]))
            annotations = K.reshape(annotations, (-1, K.shape(annotations)[2]))

            # compute overlap of boxes with annotations
            iou = overlap(boxes, annotations)
            argmax_overlaps_inds = K.argmax(iou, axis=1)
            max_iou = K.max(iou, axis=1)

            # filter those with IoU > 0.5
            indices = tf.where(K.greater_equal(max_iou, iou_threshold))
            boxes = tf.gather_nd(boxes, indices)
            masks = tf.gather_nd(masks, indices)
            argmax_overlaps_inds = tf.gather_nd(argmax_overlaps_inds, indices)
            argmax_overlaps_inds = K.cast(argmax_overlaps_inds, 'int32')
            labels = K.gather(annotations[:, 4], argmax_overlaps_inds)
            labels = K.cast(labels, 'int32')

            # make normalized boxes
            x1 = boxes[:, 0]
            y1 = boxes[:, 1]
            x2 = boxes[:, 2]
            y2 = boxes[:, 3]
            boxes = K.stack([
                y1 / (K.cast(height, dtype=K.floatx()) - 1),
                x1 / (K.cast(width, dtype=K.floatx()) - 1),
                (y2 - 1) / (K.cast(height, dtype=K.floatx()) - 1),
                (x2 - 1) / (K.cast(width, dtype=K.floatx()) - 1),
            ],
                            axis=1)

            # crop and resize masks_target
            # append a fake channel dimension
            masks_target = K.expand_dims(masks_target, axis=3)
            masks_target = tf.image.crop_and_resize(masks_target, boxes,
                                                    argmax_overlaps_inds,
                                                    mask_size)

            # remove fake channel dimension
            masks_target = masks_target[:, :, :, 0]

            # gather the predicted masks using the annotation label
            masks = tf.transpose(masks, (0, 3, 1, 2))
            label_indices = K.stack([tf.range(K.shape(labels)[0]), labels],
                                    axis=1)
            masks = tf.gather_nd(masks, label_indices)

            # compute mask loss
            mask_loss = K.binary_crossentropy(masks_target, masks)
            normalizer = K.shape(masks)[0] * K.shape(masks)[1] * K.shape(
                masks)[2]
            normalizer = K.maximum(K.cast(normalizer, K.floatx()), 1)
            mask_loss = K.sum(mask_loss) / normalizer

            return mask_loss

        # if there are no masks annotations, return 0; else, compute the masks loss
        return tf.cond(
            K.any(K.equal(K.shape(y_true), 0)), lambda: K.cast_to_floatx(0.0),
            lambda: _mask(y_true,
                          y_pred,
                          iou_threshold=iou_threshold,
                          mask_size=mask_size))

    # evaluation of model is done on `retinanet_bbox`
    if include_masks:
        prediction_model = model
    else:
        prediction_model = retinanet_bbox(model,
                                          nms=True,
                                          class_specific_filter=False)

    loss = {'regression': regress_loss, 'classification': classification_loss}

    if include_masks:
        loss['masks'] = mask_loss

    model.compile(loss=loss, optimizer=optimizer)

    if num_gpus >= 2:
        # Each GPU must have at least one validation example
        if test_dict['y'].shape[0] < num_gpus:
            raise ValueError('Not enough validation data for {} GPUs. '
                             'Received {} validation sample.'.format(
                                 test_dict['y'].shape[0], num_gpus))

        # When using multiple GPUs and skip_connections,
        # the training data must be evenly distributed across all GPUs
        num_train = train_dict['y'].shape[0]
        nb_samples = num_train - num_train % batch_size
        if nb_samples:
            train_dict['y'] = train_dict['y'][:nb_samples]
            train_dict['X'] = train_dict['X'][:nb_samples]

    # this will do preprocessing and realtime data augmentation
    datagen = image_generators.RetinaNetGenerator(
        # fill_mode='constant',  # for rotations
        rotation_range=rotation_range,
        shear_range=shear,
        zoom_range=zoom_range,
        horizontal_flip=flip,
        vertical_flip=flip)

    datagen_val = image_generators.RetinaNetGenerator(
        # fill_mode='constant',  # for rotations
        rotation_range=0,
        shear_range=0,
        zoom_range=0,
        horizontal_flip=0,
        vertical_flip=0)

    if 'vgg' in backbone or 'densenet' in backbone:
        compute_shapes = make_shapes_callback(model)
    else:
        compute_shapes = guess_shapes

    train_data = datagen.flow(train_dict,
                              include_masks=include_masks,
                              compute_shapes=compute_shapes,
                              batch_size=batch_size)

    val_data = datagen_val.flow(test_dict,
                                include_masks=include_masks,
                                compute_shapes=compute_shapes,
                                batch_size=batch_size)

    tensorboard_callback = callbacks.TensorBoard(
        log_dir=os.path.join(log_dir, model_name))

    # fit the model on the batches generated by datagen.flow()
    loss_history = model.fit_generator(
        train_data,
        steps_per_epoch=train_data.y.shape[0] // batch_size,
        epochs=n_epoch,
        validation_data=val_data,
        validation_steps=val_data.y.shape[0] // batch_size,
        callbacks=[
            callbacks.LearningRateScheduler(lr_sched),
            callbacks.ModelCheckpoint(model_path,
                                      monitor='val_loss',
                                      verbose=1,
                                      save_best_only=True,
                                      save_weights_only=num_gpus >= 2),
            tensorboard_callback,
            callbacks.ReduceLROnPlateau(monitor='loss',
                                        factor=0.1,
                                        patience=10,
                                        verbose=1,
                                        mode='auto',
                                        min_delta=0.0001,
                                        cooldown=0,
                                        min_lr=0),
            RedirectModel(
                Evaluate(val_data,
                         iou_threshold=iou_threshold,
                         score_threshold=score_threshold,
                         max_detections=max_detections,
                         tensorboard=tensorboard_callback,
                         weighted_average=weighted_average), prediction_model),
        ])

    model.save_weights(model_path)
    np.savez(loss_path, loss_history=loss_history.history)

    average_precisions = evaluate(
        val_data,
        prediction_model,
        iou_threshold=iou_threshold,
        score_threshold=score_threshold,
        max_detections=max_detections,
    )

    # print evaluation
    total_instances = []
    precisions = []
    for label, (average_precision,
                num_annotations) in average_precisions.items():
        print('{:.0f} instances of class'.format(num_annotations), label,
              'with average precision: {:.4f}'.format(average_precision))
        total_instances.append(num_annotations)
        precisions.append(average_precision)

    if sum(total_instances) == 0:
        print('No test instances found.')
    else:
        print(
            'mAP using the weighted average of precisions among classes: {:.4f}'
            .format(
                sum([a * b for a, b in zip(total_instances, precisions)]) /
                sum(total_instances)))
        print('mAP: {:.4f}'.format(
            sum(precisions) / sum(x > 0 for x in total_instances)))

    return model
Exemplo n.º 16
0
                                                      image_width,
                                                      image_depth),
                                    pooling=None,
                                    size_final_dense=256,
                                    num_classes=num_classes,
                                    trainable=True,
                                    weights=None)
    #model = multi_gpu_model(model, gpus=2)

    # Now train it
    opt_RMSprop = RMSprop(lr=0.0002)
    model.compile(optimizer=opt_RMSprop,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    callback_lr_plateau = callbacks.ReduceLROnPlateau(monitor='val_loss',
                                                      factor=0.2,
                                                      patience=5)

    train_start = time.time()
    os.makedirs(os.path.dirname(dir_tensorboard_logs),
                exist_ok=True)  # Make tensorboard log directory
    model.fit(dataset_train,
              epochs=num_epochs,
              steps_per_epoch=num_steps_per_epoch,
              validation_data=dataset_valid,
              validation_steps=num_steps_per_epoch_valid,
              callbacks=[callback_tensorboard, callback_lr_plateau])
    print("Training time: %s seconds" % (time.time() - train_start))
    print(model.summary())

    # Save the model
Exemplo n.º 17
0
def main(params):
    '''create save dir if does not exist'''
    try:
        os.stat(params["save_path"])
    except:
        os.makedirs(params["save_path"])
    '''load data files'''
    img_dir = os.path.join(params["data_dir"], "train_images")
    label_dir = os.path.join(params["data_dir"], "train_labels")
    img_val_dir = os.path.join(params["data_dir"], "test_images")
    label_val_dir = os.path.join(params["data_dir"], "test_labels")
    ids_train = [i for i in os.listdir(img_dir)]
    ids_val = [i for i in os.listdir(img_val_dir)]
    num_training_examples = len(ids_train)

    # im_batch, labels_batch, im_displayed \
    #     = lnf.get_clinic_train_data(im_dir=img_dir, seg_dir=label_dir, img_shape=params["img_shape"],
    #                                 batch_size=params["batch_size"])
    #
    # # Running next element in our graph will produce a batch of images
    # plt.figure(figsize=(10, 10))
    #
    # plt.subplot(2, 2, 1)
    # plt.imshow(im_batch[0,:,:,:])
    #
    # plt.subplot(2, 2, 2)
    # plt.imshow(labels_batch[0, :, :, 0])
    #
    # plt.subplot(2, 2, 3)
    # plt.imshow(im_batch[1,:,:,:])
    #
    # plt.subplot(2, 2, 4)
    # plt.imshow(labels_batch[1, :, :, 0])
    #
    # plt.show()
    '''get model'''
    inputs, outputs = model_fn(params["img_shape"])
    model = models.Model(inputs=[inputs], outputs=[outputs])
    '''Compile model'''
    adam = optimizers.Adam(lr=params["learning_rate"],
                           beta_1=0.9,
                           beta_2=0.999,
                           epsilon=None,
                           decay=0.0,
                           amsgrad=False)
    if params["loss_function"] == "dice_loss":
        model.compile(optimizer=adam, loss=dice_loss, metrics=[dice_loss])
    if params["loss_function"] == "bce_dice_loss":
        model.compile(optimizer=adam, loss=bce_dice_loss, metrics=[dice_loss])
    model.summary()
    '''train and save model'''
    save_model_path = os.path.join(params["save_path"], "weights.hdf5")
    cp = tf.keras.callbacks.ModelCheckpoint(filepath=save_model_path,
                                            monitor='val_dice_loss',
                                            save_best_only=True,
                                            verbose=1,
                                            save_weights_only=True)

    learning_rate_reduction = callbacks.ReduceLROnPlateau(
        monitor='val_dice_loss',
        patience=5,
        verbose=0,
        factor=0.5,
        min_lr=0.0001)

    if params["continuing_training"] == True:
        '''Load models trained weights'''
        model = models.load_model(save_model_path,
                                  custom_objects={
                                      'bce_dice_loss': bce_dice_loss,
                                      'dice_loss': dice_loss
                                  })

    for i in range(0, params["epochs"] * len(ids_train)):
        im_batch_val, labels_batch_val, im_displayed_val \
            = lnf.get_clinic_train_data(im_dir=img_val_dir, seg_dir=label_val_dir, img_shape=params["img_shape"],
                                        batch_size=params["batch_size"])
        im_batch, labels_batch, im_displayed \
            = lnf.get_clinic_train_data(im_dir=img_dir, seg_dir=label_dir, img_shape=params["img_shape"],
                                        batch_size=params["batch_size"])
        history = model.fit(x=im_batch,
                            y=labels_batch,
                            steps_per_epoch=2,
                            validation_data=(im_batch_val, labels_batch_val),
                            validation_steps=1,
                            callbacks=[cp, learning_rate_reduction])
    '''Visualize the training process'''
    dice = history.history['dice_loss']
    val_dice = history.history['val_dice_loss']

    loss = history.history['loss']
    val_loss = history.history['val_loss']

    np.save(os.path.join(params["save_path"], "train_loss"), np.array(loss))
    np.save(os.path.join(params["save_path"], "validation_loss"),
            np.array(val_loss))
    np.save(os.path.join(params["save_path"], "train_dice"), np.array(dice))
    np.save(os.path.join(params["save_path"], "validation_dice"),
            np.array(val_dice))