Exemplo n.º 1
0
    def get_input_encoder(self):
        config = self.config

        # SSD 300 layers
        predictor_sizes = [
            self.model.get_layer('conv4_3_norm_mbox_conf').output_shape[1:3],
            self.model.get_layer('fc7_mbox_conf').output_shape[1:3],
            self.model.get_layer('conv6_2_mbox_conf').output_shape[1:3],
            self.model.get_layer('conv7_2_mbox_conf').output_shape[1:3],
            self.model.get_layer('conv8_2_mbox_conf').output_shape[1:3],
            self.model.get_layer('conv9_2_mbox_conf').output_shape[1:3]
        ]

        self.ssd_input_encoder = SSDInputEncoder(
            img_height=config.img_height,
            img_width=config.img_width,
            n_classes=self.n_classes,
            predictor_sizes=predictor_sizes,
            scales=config.scales,
            aspect_ratios_per_layer=config.aspect_ratios,
            two_boxes_for_ar1=config.two_boxes_for_ar1,
            steps=config.steps,
            offsets=config.offsets,
            clip_boxes=config.clip_boxes,
            variances=config.variances,
            matching_type='multi',
            pos_iou_threshold=0.5,
            neg_iou_limit=0.5,
            normalize_coords=config.normalize_coords)
        print(f'created encoder with {self.n_classes} classes')
def main():
    create_new_model = True if args.model_name == 'default' else False

    if create_new_model:
        K.clear_session()  # Clear previous models from memory.
        model = ssd_512(image_size=(Config.img_height, Config.img_width,
                                    Config.img_channels),
                        n_classes=Config.n_classes,
                        mode='training',
                        l2_regularization=Config.l2_regularization,
                        scales=Config.scales,
                        aspect_ratios_per_layer=Config.aspect_ratios,
                        two_boxes_for_ar1=Config.two_boxes_for_ar1,
                        steps=Config.steps,
                        offsets=Config.offsets,
                        clip_boxes=Config.clip_boxes,
                        variances=Config.variances,
                        normalize_coords=Config.normalize_coords,
                        subtract_mean=Config.mean_color,
                        swap_channels=Config.swap_channels)

        adam = Adam(lr=0.001,
                    beta_1=0.9,
                    beta_2=0.999,
                    epsilon=1e-08,
                    decay=0.0)
        ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)
        model.compile(optimizer=adam, loss=ssd_loss.compute_loss)
    else:

        model_path = "weights/" + args.model_name + ".h5"
        # We need to create an SSDLoss object in order to pass that to the model loader.
        ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)

        K.clear_session()  # Clear previous models from memory.

        model = load_model(model_path,
                           custom_objects={
                               'AnchorBoxes': AnchorBoxes,
                               'L2Normalization': L2Normalization,
                               'compute_loss': ssd_loss.compute_loss
                           })

    # Load the data
    train_dataset = DataGenerator(load_images_into_memory=True,
                                  hdf5_dataset_path=os.getcwd() + "/data/" +
                                  args.dataset + '/polyp_train.h5')
    val_dataset = DataGenerator(load_images_into_memory=True,
                                hdf5_dataset_path=os.getcwd() + "/data/" +
                                args.dataset + '/polyp_val.h5')
    train_dataset_size = train_dataset.get_dataset_size()
    val_dataset_size = val_dataset.get_dataset_size()
    print("Number of images in the training dataset:\t{:>6}".format(
        train_dataset_size))
    print("Number of images in the validation dataset:\t{:>6}".format(
        val_dataset_size))

    batch_size = args.batch_size

    # For the training generator:
    ssd_data_augmentation = SSDDataAugmentation(img_height=Config.img_height,
                                                img_width=Config.img_width,
                                                background=Config.mean_color)

    # For the validation generator:
    convert_to_3_channels = ConvertTo3Channels()
    resize = Resize(height=Config.img_height, width=Config.img_width)

    # 5: Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function.

    # The encoder constructor needs the spatial dimensions of the model's predictor layers to create the anchor boxes.
    predictor_sizes = [
        model.get_layer('conv4_3_norm_mbox_conf').output_shape[1:3],
        model.get_layer('fc7_mbox_conf').output_shape[1:3],
        model.get_layer('conv6_2_mbox_conf').output_shape[1:3],
        model.get_layer('conv7_2_mbox_conf').output_shape[1:3],
        model.get_layer('conv8_2_mbox_conf').output_shape[1:3],
        model.get_layer('conv9_2_mbox_conf').output_shape[1:3],
        model.get_layer('conv10_2_mbox_conf').output_shape[1:3]
    ]

    ssd_input_encoder = SSDInputEncoder(
        img_height=Config.img_height,
        img_width=Config.img_width,
        n_classes=Config.n_classes,
        predictor_sizes=predictor_sizes,
        scales=Config.scales,
        aspect_ratios_per_layer=Config.aspect_ratios,
        two_boxes_for_ar1=Config.two_boxes_for_ar1,
        steps=Config.steps,
        offsets=Config.offsets,
        clip_boxes=Config.clip_boxes,
        variances=Config.variances,
        matching_type='multi',
        pos_iou_threshold=0.5,
        neg_iou_limit=0.5,
        normalize_coords=Config.normalize_coords)

    # 6: Create the generator handles that will be passed to Keras' `fit_generator()` function.
    train_generator = train_dataset.generate(
        batch_size=batch_size,
        shuffle=True,
        transformations=[ssd_data_augmentation],
        label_encoder=ssd_input_encoder,
        returns={'processed_images', 'encoded_labels'},
        keep_images_without_gt=False)

    val_generator = val_dataset.generate(
        batch_size=batch_size,
        shuffle=False,
        transformations=[convert_to_3_channels, resize],
        label_encoder=ssd_input_encoder,
        returns={'processed_images', 'encoded_labels'},
        keep_images_without_gt=False)

    model_checkpoint = ModelCheckpoint(
        filepath=os.getcwd() +
        '/weights/ssd512_epoch-{epoch:02d}_loss-{loss:.4f}_val_loss-{val_loss:.4f}.h5',
        monitor='val_loss',
        verbose=1,
        save_best_only=True,
        save_weights_only=False,
        mode='auto',
        period=30)

    csv_logger = CSVLogger(filename='ssd512_training_log.csv',
                           separator=',',
                           append=True)
    learning_rate_scheduler = LearningRateScheduler(schedule=lr_schedule)
    terminate_on_nan = TerminateOnNaN()

    tf_log = keras.callbacks.TensorBoard(log_dir=TF_LOG_PATH + args.tf_logs,
                                         histogram_freq=0,
                                         batch_size=batch_size,
                                         write_graph=True,
                                         write_grads=False,
                                         write_images=False)

    callbacks = [
        model_checkpoint, csv_logger, learning_rate_scheduler,
        terminate_on_nan, tf_log
    ]

    # If you're resuming a previous training, set `initial_epoch` and `final_epoch` accordingly.
    initial_epoch = 0
    final_epoch = args.final_epoch
    steps_per_epoch = 500

    # Train/Fit the model
    if args.predict_mode == 'train':
        history = model.fit_generator(generator=train_generator,
                                      steps_per_epoch=steps_per_epoch,
                                      epochs=final_epoch,
                                      callbacks=callbacks,
                                      validation_data=val_generator,
                                      validation_steps=ceil(val_dataset_size /
                                                            batch_size),
                                      initial_epoch=initial_epoch)

    # Prediction Output
    predict_generator = val_dataset.generate(
        batch_size=1,
        shuffle=True,
        transformations=[convert_to_3_channels, resize],
        label_encoder=None,
        returns={
            'processed_images', 'filenames', 'inverse_transform',
            'original_images', 'original_labels'
        },
        keep_images_without_gt=False)

    i = 0
    for val in range(val_dataset_size):
        batch_images, batch_filenames, batch_inverse_transforms, batch_original_images, batch_original_labels = next(
            predict_generator)

        y_pred = model.predict(batch_images)

        y_pred_decoded = decode_detections(
            y_pred,
            confidence_thresh=0.5,
            iou_threshold=0.4,
            top_k=200,
            normalize_coords=Config.normalize_coords,
            img_height=Config.img_height,
            img_width=Config.img_width)

        # 5: Convert the predictions for the original image.
        y_pred_decoded_inv = apply_inverse_transforms(
            y_pred_decoded, batch_inverse_transforms)

        np.set_printoptions(precision=2, suppress=True, linewidth=90)
        print("Predicted boxes:\n")
        print('   class   conf xmin   ymin   xmax   ymax')
        print(y_pred_decoded_inv[i])

        plt.figure(figsize=(20, 12))
        plt.imshow(batch_images[i])

        current_axis = plt.gca()

        colors = plt.cm.hsv(
            np.linspace(0, 1, Config.n_classes +
                        1)).tolist()  # Set the colors for the bounding boxes
        classes = [
            'background', 'polyps'
        ]  # Just so we can print class names onto the image instead of IDs

        for box in batch_original_labels[i]:
            xmin = box[1]
            ymin = box[2]
            xmax = box[3]
            ymax = box[4]
            label = '{}'.format(classes[int(box[0])])
            current_axis.add_patch(
                plt.Rectangle((xmin, ymin),
                              xmax - xmin,
                              ymax - ymin,
                              color='green',
                              fill=False,
                              linewidth=2))
            current_axis.text(xmin,
                              ymin,
                              label,
                              size='x-large',
                              color='white',
                              bbox={
                                  'facecolor': 'green',
                                  'alpha': 1.0
                              })

        for box in y_pred_decoded_inv[i]:
            xmin = box[2]
            ymin = box[3]
            xmax = box[4]
            ymax = box[5]
            color = colors[int(box[0])]
            label = '{}: {:.2f}'.format(classes[int(box[0])], box[1])
            current_axis.add_patch(
                plt.Rectangle((xmin, ymin),
                              xmax - xmin,
                              ymax - ymin,
                              color=color,
                              fill=False,
                              linewidth=2))
            current_axis.text(xmin,
                              ymin,
                              label,
                              size='x-large',
                              color='white',
                              bbox={
                                  'facecolor': color,
                                  'alpha': 1.0
                              })
        image = plt.gcf()
        plt.draw()
        image.savefig(os.getcwd() + "/val_ssd512val_" + str(val) + ".png",
                      dpi=100)
Exemplo n.º 3
0
def get_dataset(
    args: argparse.Namespace, model: Model
) -> Tuple[Iterable[List[np.array]], Iterable[List[np.array]], int]:
    train_dataset = DataGenerator(load_images_into_memory=False,
                                  hdf5_dataset_path=None)
    val_dataset = DataGenerator(load_images_into_memory=False,
                                hdf5_dataset_path=None)

    VOC_2007_images_dir = os.path.join(args.data_dir, '/VOC2007/JPEGImages/')
    VOC_2012_images_dir = os.path.join(args.data_dir, '/VOC2012/JPEGImages/')

    VOC_2007_annotations_dir = os.path.join(args.data_dir,
                                            '/VOC2007/Annotations/')
    VOC_2012_annotations_dir = os.path.join(args.data_dir,
                                            '/VOC2012/Annotations/')

    VOC_2007_trainval_image_set_filename = os.path.join(
        args.data_dir, '/VOC2007/ImageSets/Main/trainval.txt')
    VOC_2012_trainval_image_set_filename = os.path.join(
        args.data_dir, '/VOC2012/ImageSets/Main/trainval.txt')
    VOC_2007_test_image_set_filename = os.path.join(
        args.data_dir, '/VOC2007/ImageSets/Main/test.txt')

    classes = [
        'background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus',
        'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse',
        'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train',
        'tvmonitor'
    ]

    train_dataset.parse_xml(
        images_dirs=[VOC_2007_images_dir, VOC_2012_images_dir],
        image_set_filenames=[
            VOC_2007_trainval_image_set_filename,
            VOC_2012_trainval_image_set_filename
        ],
        annotations_dirs=[VOC_2007_annotations_dir, VOC_2012_annotations_dir],
        classes=classes,
        include_classes='all',
        exclude_truncated=False,
        exclude_difficult=False,
        ret=False)

    val_dataset.parse_xml(
        images_dirs=[VOC_2007_images_dir],
        image_set_filenames=[VOC_2007_test_image_set_filename],
        annotations_dirs=[VOC_2007_annotations_dir],
        classes=classes,
        include_classes='all',
        exclude_truncated=False,
        exclude_difficult=True,
        ret=False)

    train_dataset.create_hdf5_dataset(
        file_path='dataset_pascal_voc_07+12_trainval.h5',
        resize=False,
        variable_image_size=True,
        verbose=True)

    val_dataset.create_hdf5_dataset(file_path='dataset_pascal_voc_07_test.h5',
                                    resize=False,
                                    variable_image_size=True,
                                    verbose=True)

    ssd_data_augmentation = SSDDataAugmentation(img_height=args.img_height,
                                                img_width=args.img_width,
                                                background=args.mean_color)

    # For the validation generator:
    convert_to_3_channels = ConvertTo3Channels()
    resize = Resize(height=args.img_height, width=args.img_width)

    # 5: Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function.

    # The encoder constructor needs the spatial dimensions of the model's predictor layers to create the anchor boxes.
    predictor_sizes = [
        model.get_layer('conv4_3_norm_mbox_conf').output_shape[1:3],
        model.get_layer('fc7_mbox_conf').output_shape[1:3],
        model.get_layer('conv6_2_mbox_conf').output_shape[1:3],
        model.get_layer('conv7_2_mbox_conf').output_shape[1:3],
        model.get_layer('conv8_2_mbox_conf').output_shape[1:3],
        model.get_layer('conv9_2_mbox_conf').output_shape[1:3]
    ]

    ssd_input_encoder = SSDInputEncoder(
        img_height=args.img_height,
        img_width=args.img_width,
        n_classes=args.n_classes,
        predictor_sizes=predictor_sizes,
        scales=args.scales,
        aspect_ratios_per_layer=args.aspect_ratios,
        two_boxes_for_ar1=args.two_boxes_for_ar1,
        steps=args.steps,
        offsets=args.offsets,
        clip_boxes=args.clip_boxes,
        variances=args.variances,
        matching_type='multi',
        pos_iou_threshold=0.5,
        neg_iou_limit=0.5,
        normalize_coords=args.normalize_coords)

    train_generator = train_dataset.generate(
        batch_size=args.batch_size,
        shuffle=True,
        transformations=[ssd_data_augmentation],
        label_encoder=ssd_input_encoder,
        returns={'processed_images', 'encoded_labels'},
        keep_images_without_gt=False)

    val_generator = val_dataset.generate(
        batch_size=args.batch_size,
        shuffle=False,
        transformations=[convert_to_3_channels, resize],
        label_encoder=ssd_input_encoder,
        returns={'processed_images', 'encoded_labels'},
        keep_images_without_gt=False)
    return train_generator, val_generator, val_dataset.get_dataset_size()
def main():
    create_new_model = True if args.model_name == 'default' else False

    if create_new_model:
        K.clear_session()  # Clear previous models from memory.
        model = build_model(image_size=(Config.img_height, Config.img_width,
                                        Config.img_channels),
                            n_classes=Config.n_classes,
                            mode='training',
                            l2_regularization=Config.l2_regularization,
                            scales=Config.scales,
                            aspect_ratios_global=Config.aspect_ratios,
                            aspect_ratios_per_layer=None,
                            two_boxes_for_ar1=Config.two_boxes_for_ar1,
                            steps=Config.steps,
                            offsets=Config.offsets,
                            clip_boxes=Config.clip_boxes,
                            variances=Config.variances,
                            normalize_coords=Config.normalize_coords,
                            subtract_mean=Config.intensity_mean,
                            divide_by_stddev=Config.intensity_range)

        # model.load_weights("./weights/"+ args.model_name + ".h5", by_name=True)
        adam = Adam(lr=args.learning_rate,
                    beta_1=0.9,
                    beta_2=0.999,
                    epsilon=1e-08,
                    decay=0.0)
        ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)
        model.compile(optimizer=adam, loss=ssd_loss.compute_loss)
    else:

        model_path = "weights/" + args.model_name + ".h5"
        # We need to create an SSDLoss object in order to pass that to the model loader.
        ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)
        K.clear_session()  # Clear previous models from memory.
        model = load_model(model_path,
                           custom_objects={
                               'AnchorBoxes': AnchorBoxes,
                               'compute_loss': ssd_loss.compute_loss
                           })

    # Load the data
    train_dataset = DataGenerator(load_images_into_memory=True,
                                  hdf5_dataset_path=os.getcwd() + "/data/" +
                                  args.dataset + '/polyp_train.h5')
    val_dataset = DataGenerator(load_images_into_memory=True,
                                hdf5_dataset_path=os.getcwd() + "/data/" +
                                args.dataset + '/polyp_val.h5')
    train_dataset_size = train_dataset.get_dataset_size()
    val_dataset_size = val_dataset.get_dataset_size()
    print("Number of images in the training dataset:\t{:>6}".format(
        train_dataset_size))
    print("Number of images in the validation dataset:\t{:>6}".format(
        val_dataset_size))

    batch_size = args.batch_size

    # 4: Define the image processing chain.
    data_augmentation_chain = DataAugmentationConstantInputSize(
        random_brightness=(-48, 48, 0.5),
        random_contrast=(0.5, 1.8, 0.5),
        random_saturation=(0.5, 1.8, 0.5),
        random_hue=(18, 0.5),
        random_flip=0.5,
        random_translate=((0.03, 0.5), (0.03, 0.5), 0.5),
        random_scale=(0.5, 2.0, 0.5),
        n_trials_max=3,
        clip_boxes=True,
        overlap_criterion='area',
        bounds_box_filter=(0.3, 1.0),
        bounds_validator=(0.5, 1.0),
        n_boxes_min=1,
        background=(0, 0, 0))

    # 5: Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function.
    # The encoder constructor needs the spatial dimensions of the model's predictor layers to create the anchor boxes.
    predictor_sizes = [
        model.get_layer('classes4').output_shape[1:3],
        model.get_layer('classes5').output_shape[1:3],
        model.get_layer('classes6').output_shape[1:3],
        model.get_layer('classes7').output_shape[1:3]
    ]

    ssd_input_encoder = SSDInputEncoder(
        img_height=Config.img_height,
        img_width=Config.img_width,
        n_classes=Config.n_classes,
        predictor_sizes=predictor_sizes,
        scales=Config.scales,
        aspect_ratios_global=Config.aspect_ratios,
        two_boxes_for_ar1=Config.two_boxes_for_ar1,
        steps=Config.steps,
        offsets=Config.offsets,
        clip_boxes=Config.clip_boxes,
        variances=Config.variances,
        matching_type='multi',
        pos_iou_threshold=0.5,
        neg_iou_limit=0.3,
        normalize_coords=Config.normalize_coords)

    # 6: Create the generator handles that will be passed to Keras' `fit_generator()` function.
    train_generator = train_dataset.generate(
        batch_size=batch_size,
        shuffle=True,
        transformations=[data_augmentation_chain],
        label_encoder=ssd_input_encoder,
        returns={'processed_images', 'encoded_labels'},
        keep_images_without_gt=False)

    val_generator = val_dataset.generate(
        batch_size=batch_size,
        shuffle=False,
        transformations=[],
        label_encoder=ssd_input_encoder,
        returns={'processed_images', 'encoded_labels'},
        keep_images_without_gt=False)
    model_checkpoint = ModelCheckpoint(
        filepath=os.getcwd() +
        '/weights/ssd7_epoch-{epoch:02d}_loss-{loss:.4f}_val_loss-{val_loss:.4f}.h5',
        monitor='val_loss',
        verbose=1,
        save_best_only=True,
        save_weights_only=False,
        mode='auto',
        period=1)

    csv_logger = CSVLogger(filename='ssd7_training_log.csv',
                           separator=',',
                           append=True)
    early_stopping = EarlyStopping(monitor='val_loss',
                                   min_delta=0.0,
                                   patience=10,
                                   verbose=1)
    reduce_learning_rate = ReduceLROnPlateau(monitor='val_loss',
                                             factor=0.2,
                                             patience=8,
                                             verbose=1,
                                             epsilon=0.001,
                                             cooldown=0,
                                             min_lr=0.00001)

    tf_log = keras.callbacks.TensorBoard(log_dir=TF_LOG_PATH + args.tf_logs,
                                         histogram_freq=0,
                                         batch_size=batch_size,
                                         write_graph=True,
                                         write_grads=False,
                                         write_images=False)

    callbacks = [model_checkpoint, csv_logger, reduce_learning_rate, tf_log]

    # If you're resuming a previous training, set `initial_epoch` and `final_epoch` accordingly.
    initial_epoch = 0
    final_epoch = args.final_epoch
    steps_per_epoch = 1000

    # Train/Fit the model
    if args.predict_mode == 'train':
        history = model.fit_generator(generator=train_generator,
                                      steps_per_epoch=steps_per_epoch,
                                      epochs=final_epoch,
                                      callbacks=callbacks,
                                      validation_data=val_generator,
                                      validation_steps=ceil(val_dataset_size /
                                                            batch_size),
                                      initial_epoch=initial_epoch)

    # Prediction Output
    predict_generator = val_dataset.generate(
        batch_size=1,
        shuffle=False,
        transformations=[],
        label_encoder=ssd_input_encoder,
        returns={'processed_images', 'processed_labels', 'filenames'},
        keep_images_without_gt=False)

    i = 0
    for val in range(val_dataset_size):
        batch_images, batch_labels, batch_filenames = next(predict_generator)
        y_pred = model.predict(batch_images)

        y_pred_decoded = decode_detections(
            y_pred,
            confidence_thresh=0.5,
            iou_threshold=0.5,
            top_k=200,
            normalize_coords=Config.normalize_coords,
            img_height=Config.img_height,
            img_width=Config.img_width)

        np.set_printoptions(precision=2, suppress=True, linewidth=90)
        print("Predicted boxes:\n")
        print('   class   conf xmin   ymin   xmax   ymax')
        print(y_pred_decoded[i])

        plt.figure(figsize=(20, 12))
        plt.imshow(batch_images[i])

        current_axis = plt.gca()

        colors = plt.cm.hsv(
            np.linspace(0, 1, Config.n_classes +
                        1)).tolist()  # Set the colors for the bounding boxes
        classes = [
            'background', 'polyps'
        ]  # Just so we can print class names onto the image instead of IDs

        # Draw the ground truth boxes in green (omit the label for more clarity)
        for box in batch_labels[i]:
            xmin = box[1]
            ymin = box[2]
            xmax = box[3]
            ymax = box[4]
            label = '{}'.format(classes[int(box[0])])
            current_axis.add_patch(
                plt.Rectangle((xmin, ymin),
                              xmax - xmin,
                              ymax - ymin,
                              color='green',
                              fill=False,
                              linewidth=2))
            current_axis.text(xmin,
                              ymin,
                              label,
                              size='x-large',
                              color='white',
                              bbox={
                                  'facecolor': 'green',
                                  'alpha': 1.0
                              })

        # Draw the predicted boxes in blue
        for box in y_pred_decoded[i]:
            xmin = box[-4]
            ymin = box[-3]
            xmax = box[-2]
            ymax = box[-1]
            color = colors[int(box[0])]
            label = '{}: {:.2f}'.format(classes[int(box[0])], box[1])
            current_axis.add_patch(
                plt.Rectangle((xmin, ymin),
                              xmax - xmin,
                              ymax - ymin,
                              color=color,
                              fill=False,
                              linewidth=2))
            current_axis.text(xmin,
                              ymin,
                              label,
                              size='x-large',
                              color='white',
                              bbox={
                                  'facecolor': color,
                                  'alpha': 1.0
                              })

        image = plt.gcf()
        # plt.show()
        plt.draw()
        image.savefig(os.getcwd() + "/val_predictions/val_" + str(val) +
                      ".png",
                      dpi=100)
Exemplo n.º 5
0
def _main_(args):
    print('Hello World! This is {:s}'.format(args.desc))

    # config_path = args.conf
    # with open(config_path) as config_buffer:    
    #     config = json.loads(config_buffer.read())
    #############################################################
    #   Set model parameters
    #############################################################
    img_height          = 300  # Height of the model input images
    img_width           = 300  # Width of the model input images
    img_channels        = 3  # Number of color channels of the model input images
    mean_color          = [123, 117, 104]  # The per-channel mean of the images in the dataset. Do not change this value if you're using any of the pre-trained weights.
    swap_channels       = [2, 1, 0]  # The color channel order in the original SSD is BGR, so we'll have the model reverse the color channel order of the input images.
    n_classes           = 20  # Number of positive classes, e.g. 20 for Pascal VOC, 80 for MS COCO
    scales_pascal       = [0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05]  # The anchor box scaling factors used in the original SSD300 for the Pascal VOC datasets
    scales_coco         = [0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05]  # The anchor box scaling factors used in the original SSD300 for the MS COCO datasets
    scales              = scales_pascal
    aspect_ratios       = [[1.0, 2.0, 0.5],
                           [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                           [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                           [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                           [1.0, 2.0, 0.5],
                           [1.0, 2.0, 0.5]]  # The anchor box aspect ratios used in the original SSD300; the order matters
    two_boxes_for_ar1   = True
    steps               = [8, 16, 32, 64, 100, 300]  # The space between two adjacent anchor box center points for each predictor layer.
    offsets             = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5]  # The offsets of the first anchor box center points from the top and left borders of the image as a fraction of the step size for each predictor layer.
    clip_boxes          = False  # Whether or not to clip the anchor boxes to lie entirely within the image boundaries
    variances           = [0.1, 0.1, 0.2, 0.2]  # The variances by which the encoded target coordinates are divided as in the original implementation
    normalize_coords    = True

    #############################################################
    #   Create the model
    #############################################################
    # 1: Build the Keras model.
    model = ssd_300(image_size=(img_height, img_width, img_channels),
                    n_classes=n_classes,
                    mode='training',
                    l2_regularization=0.0005,
                    scales=scales,
                    aspect_ratios_per_layer=aspect_ratios,
                    two_boxes_for_ar1=two_boxes_for_ar1,
                    steps=steps,
                    offsets=offsets,
                    clip_boxes=clip_boxes,
                    variances=variances,
                    normalize_coords=normalize_coords,
                    subtract_mean=mean_color,
                    swap_channels=swap_channels)
    # 2: Load some weights into the model.

    # 3: Instantiate an optimizer and the SSD loss function and compile the model.
    adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
    ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)
    model.compile(optimizer=adam, loss=ssd_loss.compute_loss)

    #############################################################
    #   Prepare the data
    #############################################################
    # 1: Instantiate two `DataGenerator` objects: One for training, one for validation.
    train_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=None)
    val_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=None)

    # 2: Parse the image and label lists for the training and validation datasets. This can take a while.
    VOC_2007_images_dir = '/home/minhnc-lab/WORKSPACES/AI/data/VOC/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/JPEGImages'
    VOC_2007_annotations_dir = '/home/minhnc-lab/WORKSPACES/AI/data/VOC/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/Annotations'
    VOC_2007_train_image_set_filename = '/home/minhnc-lab/WORKSPACES/AI/data/VOC/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/ImageSets/Main/train.txt'
    VOC_2007_val_image_set_filename = '/home/minhnc-lab/WORKSPACES/AI/data/VOC/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/ImageSets/Main/val.txt'
    # VOC_2007_trainval_image_set_filename = '/home/minhnc-lab/WORKSPACES/AI/data/VOC/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/ImageSets/Main/trainval.txt'
    # VOC_2007_test_image_set_filename = '/home/minhnc-lab/WORKSPACES/AI/data/VOC/VOCtest_06-Nov-2007/VOCdevkit/VOC2007/ImageSets/Main/test.txt'

    classes = ['background',
               'aeroplane', 'bicycle', 'bird', 'boat',
               'bottle', 'bus', 'car', 'cat',
               'chair', 'cow', 'diningtable', 'dog',
               'horse', 'motorbike', 'person', 'pottedplant',
               'sheep', 'sofa', 'train', 'tvmonitor']

    train_dataset.parse_xml(images_dirs=[VOC_2007_images_dir],
                            image_set_filenames=[VOC_2007_train_image_set_filename],
                            annotations_dirs=[VOC_2007_annotations_dir],
                            classes=classes,
                            include_classes='all',
                            exclude_truncated=False,
                            exclude_difficult=False,
                            ret=False)
    val_dataset.parse_xml(images_dirs=[VOC_2007_images_dir],
                          image_set_filenames=[VOC_2007_val_image_set_filename],
                          annotations_dirs=[VOC_2007_annotations_dir],
                          classes=classes,
                          include_classes='all',
                          exclude_truncated=False,
                          exclude_difficult=True,
                          ret=False)

    train_dataset.create_hdf5_dataset(file_path='dataset_pascal_voc_07+12_trainval.h5',
                                      resize=False,
                                      variable_image_size=True,
                                      verbose=True)

    val_dataset.create_hdf5_dataset(file_path='dataset_pascal_voc_07_test.h5',
                                    resize=False,
                                    variable_image_size=True,
                                    verbose=True)
    # 3: Set the batch size.
    batch_size = 8  # Change the batch size if you like, or if you run into GPU memory issues.

    # 4: Set the image transformations for pre-processing and data augmentation options.
    ssd_data_augmentation = SSDDataAugmentation(img_height=img_height,
                                                img_width=img_width,
                                                background=mean_color)
    convert_to_3_channels = ConvertTo3Channels()
    resize = Resize(height=img_height, width=img_width)

    # 5: Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function.
    predictor_sizes = [model.get_layer('conv4_3_norm_mbox_conf').output_shape[1:3],
                       model.get_layer('fc7_mbox_conf').output_shape[1:3],
                       model.get_layer('conv6_2_mbox_conf').output_shape[1:3],
                       model.get_layer('conv7_2_mbox_conf').output_shape[1:3],
                       model.get_layer('conv8_2_mbox_conf').output_shape[1:3],
                       model.get_layer('conv9_2_mbox_conf').output_shape[1:3]]

    ssd_input_encoder = SSDInputEncoder(img_height=img_height,
                                        img_width=img_width,
                                        n_classes=n_classes,
                                        predictor_sizes=predictor_sizes,
                                        scales=scales,
                                        aspect_ratios_per_layer=aspect_ratios,
                                        two_boxes_for_ar1=two_boxes_for_ar1,
                                        steps=steps,
                                        offsets=offsets,
                                        clip_boxes=clip_boxes,
                                        variances=variances,
                                        matching_type='multi',
                                        pos_iou_threshold=0.5,
                                        neg_iou_limit=0.5,
                                        normalize_coords=normalize_coords)

    # 6: Create the generator handles that will be passed to Keras' `fit_generator()` function.
    train_generator = train_dataset.generate(batch_size=batch_size,
                                             shuffle=True,
                                             transformations=[ssd_data_augmentation],
                                             label_encoder=ssd_input_encoder,
                                             returns={'processed_images',
                                                      'encoded_labels'},
                                             keep_images_without_gt=False)

    val_generator = val_dataset.generate(batch_size=batch_size,
                                         shuffle=False,
                                         transformations=[convert_to_3_channels,
                                                          resize],
                                         label_encoder=ssd_input_encoder,
                                         returns={'processed_images',
                                                  'encoded_labels'},
                                         keep_images_without_gt=False)

    # Get the number of samples in the training and validations datasets.
    train_dataset_size = train_dataset.get_dataset_size()
    val_dataset_size = val_dataset.get_dataset_size()

    print("Number of images in the training dataset:\t{:>6}".format(train_dataset_size))
    print("Number of images in the validation dataset:\t{:>6}".format(val_dataset_size))

    #############################################################
    #   Kick off the training
    #############################################################
    # Define model callbacks.
    model_checkpoint = ModelCheckpoint(
        filepath='ssd300_pascal_07+12_epoch-{epoch:02d}_loss-{loss:.4f}_val_loss-{val_loss:.4f}.h5',
        monitor='val_loss',
        verbose=1,
        save_best_only=True,
        save_weights_only=False,
        mode='auto',
        period=1)

    csv_logger = CSVLogger(filename='ssd300_pascal_07+12_training_log.csv',
                           separator=',',
                           append=True)

    learning_rate_scheduler = LearningRateScheduler(schedule=lr_schedule,
                                                    verbose=1)

    terminate_on_nan = TerminateOnNaN()

    callbacks = [model_checkpoint,
                 csv_logger,
                 learning_rate_scheduler,
                 terminate_on_nan]

    # Train
    initial_epoch = 0
    final_epoch = 120
    steps_per_epoch = 1000

    history = model.fit_generator(generator=train_generator,
                                  steps_per_epoch=steps_per_epoch,
                                  epochs=final_epoch,
                                  callbacks=callbacks,
                                  validation_data=val_generator,
                                  validation_steps=ceil(val_dataset_size / batch_size),
                                  initial_epoch=initial_epoch)

    #############################################################
    #   Run the evaluation
    #############################################################
    # 1: Set the generator for the predictions.
    predict_generator = val_dataset.generate(batch_size=1,
                                             shuffle=True,
                                             transformations=[convert_to_3_channels,
                                                              resize],
                                             label_encoder=None,
                                             returns={'processed_images',
                                                      'filenames',
                                                      'inverse_transform',
                                                      'original_images',
                                                      'original_labels'},
                                             keep_images_without_gt=False)

    # 2: Generate samples.
    batch_images, batch_filenames, batch_inverse_transforms, batch_original_images, batch_original_labels = next(
        predict_generator)

    i = 0  # Which batch item to look at

    print("Image:", batch_filenames[i])
    print()
    print("Ground truth boxes:\n")
    print(np.array(batch_original_labels[i]))

    # 3: Make predictions.
    y_pred = model.predict(batch_images)

    # 4: Decode the raw predictions in `y_pred`.
    y_pred_decoded = decode_detections(y_pred,
                                       confidence_thresh=0.5,
                                       iou_threshold=0.4,
                                       top_k=200,
                                       normalize_coords=normalize_coords,
                                       img_height=img_height,
                                       img_width=img_width)

    # 5: Convert the predictions for the original image.
    y_pred_decoded_inv = apply_inverse_transforms(y_pred_decoded, batch_inverse_transforms)
    np.set_printoptions(precision=2, suppress=True, linewidth=90)
    print("Predicted boxes:\n")
    print('   class   conf xmin   ymin   xmax   ymax')
    print(y_pred_decoded_inv[i])

    # 6: Draw the predicted boxes onto the image
    # Set the colors for the bounding boxes
    colors = plt.cm.hsv(np.linspace(0, 1, n_classes + 1)).tolist()
    classes = ['background',
               'aeroplane', 'bicycle', 'bird', 'boat',
               'bottle', 'bus', 'car', 'cat',
               'chair', 'cow', 'diningtable', 'dog',
               'horse', 'motorbike', 'person', 'pottedplant',
               'sheep', 'sofa', 'train', 'tvmonitor']

    plt.figure(figsize=(20, 12))
    plt.imshow(batch_original_images[i])

    current_axis = plt.gca()

    for box in batch_original_labels[i]:
        xmin = box[1]
        ymin = box[2]
        xmax = box[3]
        ymax = box[4]
        label = '{}'.format(classes[int(box[0])])
        current_axis.add_patch(
            plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, color='green', fill=False, linewidth=2))
        current_axis.text(xmin, ymin, label, size='x-large', color='white', bbox={'facecolor': 'green', 'alpha': 1.0})

    for box in y_pred_decoded_inv[i]:
        xmin = box[2]
        ymin = box[3]
        xmax = box[4]
        ymax = box[5]
        color = colors[int(box[0])]
        label = '{}: {:.2f}'.format(classes[int(box[0])], box[1])
        current_axis.add_patch(
            plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, color=color, fill=False, linewidth=2))
        current_axis.text(xmin, ymin, label, size='x-large', color='white', bbox={'facecolor': color, 'alpha': 1.0})
Exemplo n.º 6
0
    def set_generator(self,
                      train_images_dir,
                      train_annotation_path,
                      batch_size,
                      val_images_dir=None,
                      val_annotation_path=None):
        train_dataset = DataGenerator(load_images_into_memory=True,
                                      hdf5_dataset_path=None)
        train_dataset.parse_json(images_dirs=[train_images_dir],
                                 annotations_filenames=[train_annotation_path],
                                 ground_truth_available=True,
                                 include_classes='all',
                                 ret=False,
                                 verbose=True)
        train_dataset_size = train_dataset.get_dataset_size()
        if self.model_name == 'ssd_7':
            # Define the image processing chain.
            ssd_data_augmentation = DataAugmentationConstantInputSize(
                random_brightness=(-48, 48, 0.5),
                random_contrast=(0.5, 1.8, 0.5),
                random_saturation=(0.5, 1.8, 0.5),
                random_hue=(18, 0.5),
                random_flip=0.5,
                random_translate=((0.03, 0.5), (0.03, 0.5), 0.5),
                random_scale=(0.5, 2.0, 0.5),
                n_trials_max=3,
                clip_boxes=True,
                overlap_criterion='area',
                bounds_box_filter=(0.3, 1.0),
                bounds_validator=(0.5, 1.0),
                n_boxes_min=1,
                background=(0, 0, 0))

            # Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function.

            # The encoder constructor needs the spatial dimensions of the model's predictor layers to create the anchor boxes.
            predictor_sizes = [
                self.model.get_layer('classes4').output_shape[1:3],
                self.model.get_layer('classes5').output_shape[1:3],
                self.model.get_layer('classes6').output_shape[1:3],
                self.model.get_layer('classes7').output_shape[1:3]
            ]

        elif self.model_name == 'ssd_300':
            # For the training generator:
            ssd_data_augmentation = SSDDataAugmentation(
                img_height=self.image_size[0],
                img_width=self.image_size[1],
                background=self.mean_color)

            # 5: Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function.

            # The encoder constructor needs the spatial dimensions of the model's predictor layers to create the anchor boxes.
            predictor_sizes = [
                self.model.get_layer(
                    'conv4_3_norm_mbox_conf').output_shape[1:3],
                self.model.get_layer('fc7_mbox_conf').output_shape[1:3],
                self.model.get_layer('conv6_2_mbox_conf').output_shape[1:3],
                self.model.get_layer('conv7_2_mbox_conf').output_shape[1:3],
                self.model.get_layer('conv8_2_mbox_conf').output_shape[1:3],
                self.model.get_layer('conv9_2_mbox_conf').output_shape[1:3]
            ]
        elif self.model_name == 'ssd_512':
            # For the training generator:
            ssd_data_augmentation = SSDDataAugmentation(
                img_height=self.image_size[0],
                img_width=self.image_size[1],
                background=self.mean_color)
            # 5: Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function.

            # The encoder constructor needs the spatial dimensions of the model's predictor layers to create the anchor boxes.
            predictor_sizes = [
                self.model.get_layer(
                    'conv4_3_norm_mbox_conf').output_shape[1:3],
                self.model.get_layer('fc7_mbox_conf').output_shape[1:3],
                self.model.get_layer('conv6_2_mbox_conf').output_shape[1:3],
                self.model.get_layer('conv7_2_mbox_conf').output_shape[1:3],
                self.model.get_layer('conv8_2_mbox_conf').output_shape[1:3],
                self.model.get_layer('conv9_2_mbox_conf').output_shape[1:3],
                self.model.get_layer('conv10_2_mbox_conf').output_shape[1:3]
            ]

        ssd_input_encoder = SSDInputEncoder(
            img_height=self.image_size[0],
            img_width=self.image_size[1],
            n_classes=self.n_classes,
            predictor_sizes=predictor_sizes,
            scales=self.scales,
            aspect_ratios_per_layer=self.aspect_ratios_per_layer,
            two_boxes_for_ar1=self.two_boxes_for_ar1,
            steps=self.steps,
            offsets=self.offsets,
            clip_boxes=self.clip_boxes,
            variances=self.variances,
            matching_type='multi',
            pos_iou_threshold=0.5,
            neg_iou_limit=0.5,
            normalize_coords=self.normalize_coords)

        self.generator = train_dataset.generate(
            batch_size=batch_size,
            shuffle=True,
            transformations=[ssd_data_augmentation],
            label_encoder=ssd_input_encoder,
            returns={'processed_images', 'encoded_labels'},
            keep_images_without_gt=False)
        self.steps_per_epoch = ceil(train_dataset_size / batch_size)

        if val_images_dir is not None and val_annotation_path is not None:
            val_dataset = DataGenerator(load_images_into_memory=True,
                                        hdf5_dataset_path=None)
            val_dataset.parse_json(images_dirs=[val_images_dir],
                                   annotations_filenames=[val_annotation_path],
                                   ground_truth_available=True,
                                   include_classes='all',
                                   ret=False,
                                   verbose=True)
            val_dataset_size = val_dataset.get_dataset_size()

            if self.model_name == 'ssd_300' or self.model_name == 'ssd_512':
                # For the validation generator:
                convert_to_3_channels = ConvertTo3Channels()
                resize = Resize(height=self.image_size[0],
                                width=self.image_size[1])
                transformations = [convert_to_3_channels, resize]
            else:
                transformations = []

            self.validation_data = val_dataset.generate(
                batch_size=batch_size,
                shuffle=False,
                transformations=transformations,
                label_encoder=ssd_input_encoder,
                returns={'processed_images', 'encoded_labels'},
                keep_images_without_gt=False)
            self.validation_steps = ceil(val_dataset_size / batch_size)

        else:
            self.validation_data = None
            self.validation_steps = None
Exemplo n.º 7
0
def main(job_dir, **args):
    ##Setting up the path for saving logs
    logs_dir = job_dir + 'logs/'
    data_dir = "gs://deeplearningteam11/data"

    print("Current Directory: " + os.path.dirname(__file__))
    print("Lets copy the data to: " + os.path.dirname(__file__))
    os.system("gsutil -m cp -r " + data_dir + "  " +
              os.path.dirname(__file__) + " > /dev/null 2>&1 ")
    #exit(0)

    with tf.device('/device:GPU:0'):
        # 1: Build the Keras model.
        K.clear_session()  # Clear previous models from memory.
        model = ssd_300(image_size=(img_height, img_width, img_channels),
                        n_classes=n_classes,
                        mode='training',
                        l2_regularization=0.0005,
                        scales=scales,
                        aspect_ratios_per_layer=aspect_ratios,
                        two_boxes_for_ar1=two_boxes_for_ar1,
                        steps=steps,
                        offsets=offsets,
                        clip_boxes=clip_boxes,
                        variances=variances,
                        normalize_coords=normalize_coords,
                        subtract_mean=mean_color,
                        swap_channels=swap_channels)

        # Set the path to the `.h5` file of the model to be loaded.
        model_file = file_io.FileIO('gs://deeplearningteam11/vgg19BNmodel.h5',
                                    mode='rb')

        # Store model locally on instance
        model_path = 'model.h5'
        with open(model_path, 'wb') as f:
            f.write(model_file.read())
        model_file.close()

        ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)

        model = load_model(model_path,
                           custom_objects={
                               'AnchorBoxes': AnchorBoxes,
                               'L2Normalization': L2Normalization,
                               'DecodeDetections': DecodeDetections,
                               'compute_loss': ssd_loss.compute_loss
                           })

        for layer in model.layers:
            layer.trainable = True

        model.summary()

        # 1: Instantiate two `DataGenerator` objects: One for training, one for validation.
        train_dataset = DataGenerator(load_images_into_memory=True,
                                      hdf5_dataset_path=None)
        val_dataset = DataGenerator(load_images_into_memory=True,
                                    hdf5_dataset_path=None)

        # 2: Parse the image and label lists for the training and validation datasets. This can take a while.
        #  VOC 2007
        #  The directories that contain the images.
        VOC_2007_train_images_dir = 'data/data/VOC2007/train/JPEGImages/'
        VOC_2007_test_images_dir = 'data/data/VOC2007/test/JPEGImages/'

        VOC_2007_train_anns_dir = 'data/data/VOC2007/train/Annotations/'
        VOC_2007_test_anns_dir = 'data/data/VOC2007/test/Annotations/'

        # The paths to the image sets.
        VOC_2007_trainval_image_set_dir = 'data/data/VOC2007/train/ImageSets/Main/'
        VOC_2007_test_image_set_dir = 'data/data/VOC2007/test/ImageSets/Main/'

        VOC_2007_train_images_dir = os.path.dirname(
            __file__) + "/" + VOC_2007_train_images_dir
        VOC_2007_test_images_dir = os.path.dirname(
            __file__) + "/" + VOC_2007_test_images_dir

        VOC_2007_train_anns_dir = os.path.dirname(
            __file__) + "/" + VOC_2007_train_anns_dir
        VOC_2007_test_anns_dir = os.path.dirname(
            __file__) + "/" + VOC_2007_test_anns_dir

        VOC_2007_trainval_image_set_dir = os.path.dirname(
            __file__) + "/" + VOC_2007_trainval_image_set_dir
        VOC_2007_test_image_set_dir = os.path.dirname(
            __file__) + "/" + VOC_2007_test_image_set_dir

        VOC_2007_trainval_image_set_filename = VOC_2007_trainval_image_set_dir + '/trainval.txt'
        VOC_2007_test_image_set_filename = VOC_2007_test_image_set_dir + '/test.txt'

        # The XML parser needs to now what object class names to look for and in which order to map them to integers.
        classes = [
            'background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle',
            'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse',
            'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train',
            'tvmonitor'
        ]

        print("Parsing Training Data ...")
        train_dataset.parse_xml(
            images_dirs=[VOC_2007_train_images_dir],
            image_set_filenames=[VOC_2007_trainval_image_set_filename],
            annotations_dirs=[VOC_2007_train_anns_dir],
            classes=classes,
            include_classes='all',
            exclude_truncated=False,
            exclude_difficult=False,
            ret=False,
            verbose=False)
        print("Done")
        print(
            "================================================================")

        print("Parsing Test Data ...")
        val_dataset.parse_xml(
            images_dirs=[VOC_2007_test_images_dir],
            image_set_filenames=[VOC_2007_test_image_set_filename],
            annotations_dirs=[VOC_2007_test_anns_dir],
            classes=classes,
            include_classes='all',
            exclude_truncated=False,
            exclude_difficult=True,
            ret=False,
            verbose=False)
        print("Done")
        print(
            "================================================================")

        # 3: Set the batch size.
        batch_size = 32  # Change the batch size if you like, or if you run into GPU memory issues.

        #  4: Set the image transformations for pre-processing and data augmentation options.

        # For the training generator:
        ssd_data_augmentation = SSDDataAugmentation(img_height=img_height,
                                                    img_width=img_width,
                                                    background=mean_color)

        # For the validation generator:
        convert_to_3_channels = ConvertTo3Channels()
        resize = Resize(height=img_height, width=img_width)

        # 5: Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function.

        # The encoder constructor needs the spatial dimensions of the model's predictor layers to create the anchor boxes.
        predictor_sizes = [
            model.get_layer('conv4_4_norm_mbox_conf').output_shape[1:3],
            model.get_layer('fc7_mbox_conf').output_shape[1:3],
            model.get_layer('conv8_2_mbox_conf').output_shape[1:3],
            model.get_layer('conv9_2_mbox_conf').output_shape[1:3],
            model.get_layer('conv10_2_mbox_conf').output_shape[1:3],
            model.get_layer('conv11_2_mbox_conf').output_shape[1:3]
        ]

        ssd_input_encoder = SSDInputEncoder(
            img_height=img_height,
            img_width=img_width,
            n_classes=n_classes,
            predictor_sizes=predictor_sizes,
            scales=scales,
            aspect_ratios_per_layer=aspect_ratios,
            two_boxes_for_ar1=two_boxes_for_ar1,
            steps=steps,
            offsets=offsets,
            clip_boxes=clip_boxes,
            variances=variances,
            matching_type='multi',
            pos_iou_threshold=0.5,
            neg_iou_limit=0.5,
            normalize_coords=normalize_coords)

        # 6: Create the generator handles that will be passed to Keras' `fit_generator()` function.

        train_generator = train_dataset.generate(
            batch_size=batch_size,
            shuffle=True,
            transformations=[ssd_data_augmentation],
            label_encoder=ssd_input_encoder,
            returns={'processed_images', 'encoded_labels'},
            keep_images_without_gt=False)

        val_generator = val_dataset.generate(
            batch_size=batch_size,
            shuffle=False,
            transformations=[convert_to_3_channels, resize],
            label_encoder=ssd_input_encoder,
            returns={'processed_images', 'encoded_labels'},
            keep_images_without_gt=False)

        # Get the number of samples in the training and validations datasets.
        train_dataset_size = train_dataset.get_dataset_size()
        val_dataset_size = val_dataset.get_dataset_size()

        print("Number of images in the training dataset:\t{:>6}".format(
            train_dataset_size))
        print("Number of images in the validation dataset:\t{:>6}".format(
            val_dataset_size))

        # Define a learning rate schedule.

        def lr_schedule(epoch):
            return 1e-6
            # if epoch < 80:
            #     return 0.001
            # elif epoch < 100:
            #     return 0.0001
            # else:
            #     return 0.00001

        learning_rate_scheduler = LearningRateScheduler(schedule=lr_schedule,
                                                        verbose=1)

        terminate_on_nan = TerminateOnNaN()

        callbacks = [learning_rate_scheduler, terminate_on_nan]

        # If you're resuming a previous training, set `initial_epoch` and `final_epoch` accordingly.
        initial_epoch = 120
        final_epoch = 200
        steps_per_epoch = 500

        history = model.fit_generator(generator=train_generator,
                                      steps_per_epoch=steps_per_epoch,
                                      epochs=final_epoch,
                                      callbacks=callbacks,
                                      validation_data=val_generator,
                                      validation_steps=ceil(val_dataset_size /
                                                            batch_size),
                                      initial_epoch=initial_epoch)

        model_name = "vgg19BNmodel_cont.h5"
        model.save(model_name)
        with file_io.FileIO(model_name, mode='rb') as input_f:
            with file_io.FileIO("gs://deeplearningteam11/" + model_name,
                                mode='w+') as output_f:
                output_f.write(input_f.read())
def train(optimizer, learning_rate, trial):
    model = create_network()

    # 5: Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function.
    # The encoder constructor needs the spatial dimensions of the model's predictor layers to create the anchor boxes.
    predictor_sizes = [
        model.get_layer('conv4_3_norm_mbox_conf').output_shape[1:3],
        model.get_layer('fc7_mbox_conf').output_shape[1:3],
        model.get_layer('conv6_2_mbox_conf').output_shape[1:3],
        model.get_layer('conv7_2_mbox_conf').output_shape[1:3],
        model.get_layer('conv8_2_mbox_conf').output_shape[1:3],
        model.get_layer('conv9_2_mbox_conf').output_shape[1:3]
    ]

    ssd_input_encoder = SSDInputEncoder(img_height=img_height,
                                        img_width=img_width,
                                        n_classes=n_classes,
                                        predictor_sizes=predictor_sizes,
                                        scales=scales,
                                        aspect_ratios_per_layer=aspect_ratios,
                                        two_boxes_for_ar1=two_boxes_for_ar1,
                                        steps=steps,
                                        offsets=offsets,
                                        clip_boxes=clip_boxes,
                                        variances=variances,
                                        matching_type='multi',
                                        pos_iou_threshold=0.5,
                                        neg_iou_limit=0.5,
                                        normalize_coords=normalize_coords)

    # 6: Create the generator handles that will be passed to Keras' `fit_generator()` function.

    train_generator = train_dataset.generate(
        batch_size=batch_size,
        shuffle=True,
        transformations=[ssd_data_augmentation],
        label_encoder=ssd_input_encoder,
        returns={'processed_images', 'encoded_labels'},
        keep_images_without_gt=False)

    val_generator = val_dataset.generate(
        batch_size=batch_size,
        shuffle=False,
        transformations=[convert_to_3_channels, resize],
        label_encoder=ssd_input_encoder,
        returns={'processed_images', 'encoded_labels'},
        keep_images_without_gt=False)

    # Get the number of samples in the training and validations datasets.
    train_dataset_size = train_dataset.get_dataset_size()
    val_dataset_size = val_dataset.get_dataset_size()

    print("Number of images in the training dataset:\t{:>6}".format(
        train_dataset_size))
    print("Number of images in the validation dataset:\t{:>6}".format(
        val_dataset_size))

    # 3: Instantiate an optimizer and the SSD loss function and compile the model.
    #    If you want to follow the original Caffe implementation, use the preset SGD
    #    optimizer, otherwise I'd recommend the commented-out Adam optimizer.
    if optimizer == "sgd":
        opt = SGD(lr=0.001, momentum=0.9, decay=0.0, nesterov=False)
#    elif optimizer == "momentum":
#        model.compile(tf.train.MomentumOptimizer(learning_rate, 0.9), "categorical_crossentropy", ["acc"])
#    elif optimizer == "rmsprop":
#        model.compile(tf.train.RMSPropOptimizer(learning_rate), "categorical_crossentropy", ["acc"])
    elif optimizer == "adam":
        opt = Adam(lr=learning_rate,
                   beta_1=0.9,
                   beta_2=0.999,
                   epsilon=1e-08,
                   decay=0.0)

    ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)

    model.compile(optimizer=opt, loss=ssd_loss.compute_loss)

    #    tpu_grpc_url = "grpc://"+os.environ["COLAB_TPU_ADDR"]
    #    tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(tpu_grpc_url)
    #    strategy = keras_support.TPUDistributionStrategy(tpu_cluster_resolver)
    #    model = tf.contrib.tpu.keras_to_tpu_model(model, strategy=strategy)

    hist = History()
    truncate = OptunaCallback(trial)
    #    model.fit_generator(train_gen, X_train.shape[0]//batch_size, callbacks=[hist, truncate],
    #                        validation_data=test_gen, validation_steps=X_test.shape[0]//batch_size,
    #                        epochs=50)
    callbacks = [
        model_checkpoint,
        csv_logger,
        learning_rate_scheduler,
        hist,
        truncate,
        #             time_callback,
        #             early_stopping,
        terminate_on_nan
    ]

    # If you're resuming a previous training, set `initial_epoch` and `final_epoch` accordingly.
    initial_epoch = 0
    final_epoch = 50
    steps_per_epoch = train_dataset_size // batch_size

    history = model.fit_generator(generator=train_generator,
                                  steps_per_epoch=steps_per_epoch,
                                  epochs=final_epoch,
                                  callbacks=callbacks,
                                  validation_data=val_generator,
                                  validation_steps=ceil(val_dataset_size /
                                                        batch_size),
                                  initial_epoch=initial_epoch)

    history = hist.history

    return history
Exemplo n.º 9
0
    def create_generator(self, model, train_dataset, val_dataset):
        # 数据扩充链
        data_augmentation_chain = DataAugmentationConstantInputSize(
            random_brightness=(-48, 48, 0.5),
            random_contrast=(0.5, 1.8, 0.5),
            random_saturation=(0.5, 1.8, 0.5),
            random_hue=(18, 0.5),
            random_flip=0.5,
            random_translate=((0.03, 0.5), (0.03, 0.5), 0.5),
            random_scale=(0.5, 2.0, 0.5),
            n_trials_max=3,
            clip_boxes=True,
            overlap_criterion='area',
            bounds_box_filter=(0.3, 1.0),
            bounds_validator=(0.5, 1.0),
            n_boxes_min=1,
            background=(0, 0, 0))

        # 5: Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function.

        # The encoder constructor needs the spatial dimensions of the model's
        # predictor layers to create the anchor boxes.
        predictor_sizes = [
            model.get_layer('classes4').output_shape[1:3],
            model.get_layer('classes5').output_shape[1:3],
            model.get_layer('classes6').output_shape[1:3],
            model.get_layer('classes7').output_shape[1:3]
        ]

        ssd_input_encoder = SSDInputEncoder(
            img_height=self.img_height,
            img_width=self.img_width,
            n_classes=self.n_classes,
            predictor_sizes=predictor_sizes,
            scales=self.scales,
            aspect_ratios_global=self.aspect_ratios,
            variances=self.variances,
            matching_type='multi',
            pos_iou_threshold=0.5,
            neg_iou_limit=0.3,
            normalize_coords=self.normalize_coords)

        # 6: Create the generator handles that will be passed to Keras' `fit_generator()` function.

        train_generator = train_dataset.generate(
            batch_size=self.batch_size,
            shuffle=True,
            transformations=[data_augmentation_chain],
            label_encoder=ssd_input_encoder,
            returns={'processed_images', 'encoded_labels'},
            keep_images_without_gt=False)

        val_generator = val_dataset.generate(
            batch_size=self.batch_size,
            shuffle=False,
            transformations=[],
            label_encoder=ssd_input_encoder,
            returns={'processed_images', 'encoded_labels'},
            keep_images_without_gt=False)

        return train_generator, val_generator
def _main_(args):

    config_path = args.conf

    with open(config_path) as config_buffer:
        config = json.loads(config_buffer.read())

    ###############################
    #   Parse the annotations
    ###############################
    path_imgs_training = config['train']['train_image_folder']
    path_anns_training = config['train']['train_annot_folder']
    path_imgs_val = config['valid']['valid_image_folder']
    path_anns_val = config['valid']['valid_annot_folder']
    labels = config['model']['labels']
    categories = {}
    #categories = {"Razor": 1, "Gun": 2, "Knife": 3, "Shuriken": 4} #la categoría 0 es la background
    for i in range(len(labels)):
        categories[labels[i]] = i + 1
    print('\nTraining on: \t' + str(categories) + '\n')

    ####################################
    #   Parameters
    ###################################
    #%%
    img_height = config['model']['input']  # Height of the model input images
    img_width = config['model']['input']  # Width of the model input images
    img_channels = 3  # Number of color channels of the model input images
    mean_color = [
        123, 117, 104
    ]  # The per-channel mean of the images in the dataset. Do not change this value if you're using any of the pre-trained weights.
    swap_channels = [
        2, 1, 0
    ]  # The color channel order in the original SSD is BGR, so we'll have the model reverse the color channel order of the input images.
    n_classes = len(
        labels
    )  # Number of positive classes, e.g. 20 for Pascal VOC, 80 for MS COCO
    scales_pascal = [
        0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05
    ]  # The anchor box scaling factors used in the original SSD300 for the Pascal VOC datasets
    #scales_coco = [0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05] # The anchor box scaling factors used in the original SSD300 for the MS COCO datasets
    scales = scales_pascal
    aspect_ratios = [
        [1.0,
         2.0, 0.5], [1.0, 2.0, 0.5, 3.0,
                     1.0 / 3.0],
        [1.0, 2.0, 0.5,
         3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0
                           ], [1.0, 2.0, 0.5
                               ], [1.0, 2.0, 0.5]
    ]  # The anchor box aspect ratios used in the original SSD300; the order matters
    two_boxes_for_ar1 = True
    steps = [
        8, 16, 32, 64, 100, 300
    ]  # The space between two adjacent anchor box center points for each predictor layer.
    offsets = [
        0.5, 0.5, 0.5, 0.5, 0.5, 0.5
    ]  # The offsets of the first anchor box center points from the top and left borders of the image as a fraction of the step size for each predictor layer.
    clip_boxes = False  # Whether or not to clip the anchor boxes to lie entirely within the image boundaries
    variances = [
        0.1, 0.1, 0.2, 0.2
    ]  # The variances by which the encoded target coordinates are divided as in the original implementation
    normalize_coords = True

    K.clear_session()  # Clear previous models from memory.

    model_path = config['train']['saved_weights_name']
    # 3: Instantiate an optimizer and the SSD loss function and compile the model.
    #    If you want to follow the original Caffe implementation, use the preset SGD
    #    optimizer, otherwise I'd recommend the commented-out Adam optimizer.

    if config['model']['backend'] == 'ssd512':
        aspect_ratios = [[1.0, 2.0, 0.5], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                         [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                         [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                         [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5],
                         [1.0, 2.0, 0.5]]
        steps = [
            8, 16, 32, 64, 100, 200, 300
        ]  # The space between two adjacent anchor box center points for each predictor layer.
        offsets = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
        scales = [0.07, 0.15, 0.3, 0.45, 0.6, 0.75, 0.9, 1.05]

    elif config['model']['backend'] == 'ssd7':
        #weights_path = 'VGG_ILSVRC_16_layers_fc_reduced.h5'
        scales = [
            0.08, 0.16, 0.32, 0.64, 0.96
        ]  # An explicit list of anchor box scaling factors. If this is passed, it will override `min_scale` and `max_scale`.
        aspect_ratios = [0.5, 1.0,
                         2.0]  # The list of aspect ratios for the anchor boxes
        two_boxes_for_ar1 = True  # Whether or not you want to generate two anchor boxes for aspect ratio 1
        steps = None  # In case you'd like to set the step sizes for the anchor box grids manually; not recommended
        offsets = None

    if os.path.exists(model_path):
        print("\nLoading pretrained weights.\n")
        # We need to create an SSDLoss object in order to pass that to the model loader.
        ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)

        K.clear_session()  # Clear previous models from memory.
        model = load_model(model_path,
                           custom_objects={
                               'AnchorBoxes': AnchorBoxes,
                               'L2Normalization': L2Normalization,
                               'compute_loss': ssd_loss.compute_loss
                           })

    else:
        ####################################
        #   Build the Keras model.
        ###################################

        if config['model']['backend'] == 'ssd300':
            #weights_path = 'VGG_VOC0712Plus_SSD_300x300_ft_iter_160000.h5'
            from models.keras_ssd300 import ssd_300 as ssd

            model = ssd_300(image_size=(img_height, img_width, img_channels),
                            n_classes=n_classes,
                            mode='training',
                            l2_regularization=0.0005,
                            scales=scales,
                            aspect_ratios_per_layer=aspect_ratios,
                            two_boxes_for_ar1=two_boxes_for_ar1,
                            steps=steps,
                            offsets=offsets,
                            clip_boxes=clip_boxes,
                            variances=variances,
                            normalize_coords=normalize_coords,
                            subtract_mean=mean_color,
                            swap_channels=swap_channels)

        elif config['model']['backend'] == 'ssd512':
            #weights_path = 'VGG_VOC0712Plus_SSD_512x512_ft_iter_160000.h5'
            from models.keras_ssd512 import ssd_512 as ssd

            # 2: Load some weights into the model.
            model = ssd(image_size=(img_height, img_width, img_channels),
                        n_classes=n_classes,
                        mode='training',
                        l2_regularization=0.0005,
                        scales=scales,
                        aspect_ratios_per_layer=aspect_ratios,
                        two_boxes_for_ar1=two_boxes_for_ar1,
                        steps=steps,
                        offsets=offsets,
                        clip_boxes=clip_boxes,
                        variances=variances,
                        normalize_coords=normalize_coords,
                        swap_channels=swap_channels)

        elif config['model']['backend'] == 'ssd7':
            #weights_path = 'VGG_ILSVRC_16_layers_fc_reduced.h5'
            from models.keras_ssd7 import build_model as ssd
            scales = [
                0.08, 0.16, 0.32, 0.64, 0.96
            ]  # An explicit list of anchor box scaling factors. If this is passed, it will override `min_scale` and `max_scale`.
            aspect_ratios = [
                0.5, 1.0, 2.0
            ]  # The list of aspect ratios for the anchor boxes
            two_boxes_for_ar1 = True  # Whether or not you want to generate two anchor boxes for aspect ratio 1
            steps = None  # In case you'd like to set the step sizes for the anchor box grids manually; not recommended
            offsets = None
            model = ssd(image_size=(img_height, img_width, img_channels),
                        n_classes=n_classes,
                        mode='training',
                        l2_regularization=0.0005,
                        scales=scales,
                        aspect_ratios_global=aspect_ratios,
                        aspect_ratios_per_layer=None,
                        two_boxes_for_ar1=two_boxes_for_ar1,
                        steps=steps,
                        offsets=offsets,
                        clip_boxes=clip_boxes,
                        variances=variances,
                        normalize_coords=normalize_coords,
                        subtract_mean=None,
                        divide_by_stddev=None)

        else:
            print('Wrong Backend')

        print('OK create model')
        #sgd = SGD(lr=config['train']['learning_rate'], momentum=0.9, decay=0.0, nesterov=False)

        # TODO: Set the path to the weights you want to load. only for ssd300 or ssd512

        weights_path = 'VGG_ILSVRC_16_layers_fc_reduced.h5'
        print("\nLoading pretrained weights VGG.\n")
        model.load_weights(weights_path, by_name=True)

        # 3: Instantiate an optimizer and the SSD loss function and compile the model.
        #    If you want to follow the original Caffe implementation, use the preset SGD
        #    optimizer, otherwise I'd recommend the commented-out Adam optimizer.

        #adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
        #sgd = SGD(lr=0.001, momentum=0.9, decay=0.0, nesterov=False)
        optimizer = Adam(lr=config['train']['learning_rate'],
                         beta_1=0.9,
                         beta_2=0.999,
                         epsilon=1e-08,
                         decay=0.0)
        ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)
        model.compile(optimizer=optimizer, loss=ssd_loss.compute_loss)

        model.summary()

    #####################################################################
    #  Instantiate two `DataGenerator` objects: One for training, one for validation.
    ######################################################################
    # Optional: If you have enough memory, consider loading the images into memory for the reasons explained above.

    train_dataset = DataGenerator(load_images_into_memory=False,
                                  hdf5_dataset_path=None)
    val_dataset = DataGenerator(load_images_into_memory=False,
                                hdf5_dataset_path=None)

    # 2: Parse the image and label lists for the training and validation datasets. This can take a while.

    # The XML parser needs to now what object class names to look for and in which order to map them to integers.
    classes = ['background'] + labels

    train_dataset.parse_xml(
        images_dirs=[config['train']['train_image_folder']],
        image_set_filenames=[config['train']['train_image_set_filename']],
        annotations_dirs=[config['train']['train_annot_folder']],
        classes=classes,
        include_classes='all',
        exclude_truncated=False,
        exclude_difficult=False,
        ret=False)

    val_dataset.parse_xml(
        images_dirs=[config['valid']['valid_image_folder']],
        image_set_filenames=[config['valid']['valid_image_set_filename']],
        annotations_dirs=[config['valid']['valid_annot_folder']],
        classes=classes,
        include_classes='all',
        exclude_truncated=False,
        exclude_difficult=False,
        ret=False)

    #########################
    # 3: Set the batch size.
    #########################
    batch_size = config['train'][
        'batch_size']  # Change the batch size if you like, or if you run into GPU memory issues.

    ##########################
    # 4: Set the image transformations for pre-processing and data augmentation options.
    ##########################
    # For the training generator:

    # For the validation generator:
    convert_to_3_channels = ConvertTo3Channels()
    resize = Resize(height=img_height, width=img_width)

    ######################################3
    # 5: Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function.
    #########################################
    # The encoder constructor needs the spatial dimensions of the model's predictor layers to create the anchor boxes.
    if config['model']['backend'] == 'ssd512':
        predictor_sizes = [
            model.get_layer('conv4_3_norm_mbox_conf').output_shape[1:3],
            model.get_layer('fc7_mbox_conf').output_shape[1:3],
            model.get_layer('conv6_2_mbox_conf').output_shape[1:3],
            model.get_layer('conv7_2_mbox_conf').output_shape[1:3],
            model.get_layer('conv8_2_mbox_conf').output_shape[1:3],
            model.get_layer('conv9_2_mbox_conf').output_shape[1:3],
            model.get_layer('conv10_2_mbox_conf').output_shape[1:3]
        ]

        ssd_input_encoder = SSDInputEncoder(
            img_height=img_height,
            img_width=img_width,
            n_classes=n_classes,
            predictor_sizes=predictor_sizes,
            scales=scales,
            aspect_ratios_per_layer=aspect_ratios,
            two_boxes_for_ar1=two_boxes_for_ar1,
            steps=steps,
            offsets=offsets,
            clip_boxes=clip_boxes,
            variances=variances,
            matching_type='multi',
            pos_iou_threshold=0.5,
            neg_iou_limit=0.5,
            normalize_coords=normalize_coords)

    elif config['model']['backend'] == 'ssd300':
        predictor_sizes = [
            model.get_layer('conv4_3_norm_mbox_conf').output_shape[1:3],
            model.get_layer('fc7_mbox_conf').output_shape[1:3],
            model.get_layer('conv6_2_mbox_conf').output_shape[1:3],
            model.get_layer('conv7_2_mbox_conf').output_shape[1:3],
            model.get_layer('conv8_2_mbox_conf').output_shape[1:3],
            model.get_layer('conv9_2_mbox_conf').output_shape[1:3]
        ]
        ssd_input_encoder = SSDInputEncoder(
            img_height=img_height,
            img_width=img_width,
            n_classes=n_classes,
            predictor_sizes=predictor_sizes,
            scales=scales,
            aspect_ratios_per_layer=aspect_ratios,
            two_boxes_for_ar1=two_boxes_for_ar1,
            steps=steps,
            offsets=offsets,
            clip_boxes=clip_boxes,
            variances=variances,
            matching_type='multi',
            pos_iou_threshold=0.5,
            neg_iou_limit=0.5,
            normalize_coords=normalize_coords)

    elif config['model']['backend'] == 'ssd7':
        predictor_sizes = [
            model.get_layer('classes4').output_shape[1:3],
            model.get_layer('classes5').output_shape[1:3],
            model.get_layer('classes6').output_shape[1:3],
            model.get_layer('classes7').output_shape[1:3]
        ]
        ssd_input_encoder = SSDInputEncoder(
            img_height=img_height,
            img_width=img_width,
            n_classes=n_classes,
            predictor_sizes=predictor_sizes,
            scales=scales,
            aspect_ratios_global=aspect_ratios,
            two_boxes_for_ar1=two_boxes_for_ar1,
            steps=steps,
            offsets=offsets,
            clip_boxes=clip_boxes,
            variances=variances,
            matching_type='multi',
            pos_iou_threshold=0.5,
            neg_iou_limit=0.3,
            normalize_coords=normalize_coords)

    #######################
    # 6: Create the generator handles that will be passed to Keras' `fit_generator()` function.
    #######################

    train_generator = train_dataset.generate(
        batch_size=batch_size,
        shuffle=True,
        transformations=[
            SSDDataAugmentation(img_height=img_height, img_width=img_width)
        ],
        label_encoder=ssd_input_encoder,
        returns={'processed_images', 'encoded_labels'},
        keep_images_without_gt=False)

    val_generator = val_dataset.generate(
        batch_size=batch_size,
        shuffle=False,
        transformations=[convert_to_3_channels, resize],
        label_encoder=ssd_input_encoder,
        returns={'processed_images', 'encoded_labels'},
        keep_images_without_gt=False)

    # Get the number of samples in the training and validations datasets.
    train_dataset_size = train_dataset.get_dataset_size()
    val_dataset_size = val_dataset.get_dataset_size()

    print("Number of images in the training dataset:\t{:>6}".format(
        train_dataset_size))
    print("Number of images in the validation dataset:\t{:>6}".format(
        val_dataset_size))

    ##########################
    # Define model callbacks.
    #########################

    # TODO: Set the filepath under which you want to save the model.
    model_checkpoint = ModelCheckpoint(
        filepath=config['train']['saved_weights_name'],
        monitor='val_loss',
        verbose=1,
        save_best_only=True,
        save_weights_only=False,
        mode='auto',
        period=1)
    #model_checkpoint.best =

    csv_logger = CSVLogger(filename='log.csv', separator=',', append=True)

    learning_rate_scheduler = LearningRateScheduler(schedule=lr_schedule,
                                                    verbose=1)

    terminate_on_nan = TerminateOnNaN()

    callbacks = [
        model_checkpoint, csv_logger, learning_rate_scheduler, terminate_on_nan
    ]

    #print(model.summary())
    batch_images, batch_labels = next(train_generator)

    #    i = 0 # Which batch item to look at
    #
    #    print("Image:", batch_filenames[i])
    #    print()
    #    print("Ground truth boxes:\n")
    #    print(batch_labels[i])

    initial_epoch = 0
    final_epoch = config['train']['nb_epochs']
    #final_epoch     = 20
    steps_per_epoch = 500

    history = model.fit_generator(generator=train_generator,
                                  steps_per_epoch=steps_per_epoch,
                                  epochs=final_epoch,
                                  callbacks=callbacks,
                                  validation_data=val_generator,
                                  validation_steps=ceil(val_dataset_size /
                                                        batch_size),
                                  initial_epoch=initial_epoch,
                                  verbose=1 if config['train']['debug'] else 2)
Exemplo n.º 11
0
def run(train_dir, valid_dir, set_dir, model_dir):
    # train_dir = arguments.train_dir
    # valid_dir = arguments.valid_dir

    train_dataset_dir = train_dir
    train_annot_dir = train_dir + '/annot/'
    train_set = train_dir + '/img_set.txt'

    valid_dataset_dir = valid_dir
    valid_annot_dir = valid_dir + '/annot/'
    valid_set = valid_dir + '/valid_set.txt'

    # Set Training and Validation dataset paths
    batch_size = 16
    print('Using batch size of: {}'.format(batch_size))
    #model_path = 'COCO_512.h5'
    model_path = model_dir
    # model_path = 'saved_model.h5'
    # Needs to know classes and order to map to integers
    classes = ['background', 'car', 'bus', 'truck']
    # Set required parameters for training of SSD
    img_height = 512
    img_width = 512
    img_channels = 3  # Colour image
    mean_color = [123, 117, 104]  # DO NOT CHANGE
    swap_channels = [2, 1, 0]  # Original SSD used BGR
    n_classes = 3  # 80 for COCO
    scales_coco = [0.04, 0.1, 0.26, 0.42, 0.58, 0.74, 0.9, 1.06]
    scales = scales_coco
    aspect_ratios = [[1.0, 2.0, 0.5], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                     [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                     [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                     [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5],
                     [1.0, 2.0, 0.5]]
    two_boxes_for_ar1 = True
    steps = [8, 16, 32, 64, 128, 256, 512]
    offsets = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
    clip_boxes = False
    variances = [0.1, 0.1, 0.2, 0.2]
    normalize_coords = True
    K.clear_session()

    model = ssd_512(image_size=(img_height, img_width, img_channels),
                    n_classes=n_classes,
                    mode='training',
                    l2_regularization=0.0005,
                    scales=scales,
                    aspect_ratios_per_layer=aspect_ratios,
                    two_boxes_for_ar1=two_boxes_for_ar1,
                    steps=steps,
                    offsets=offsets,
                    clip_boxes=clip_boxes,
                    variances=variances,
                    normalize_coords=normalize_coords,
                    subtract_mean=mean_color,
                    swap_channels=swap_channels)
    model.load_weights(model_path, by_name=True)

    sgd = SGD(lr=0.001, momentum=0.9, decay=0.0, nesterov=False)
    ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)

    model.compile(optimizer=sgd, loss=ssd_loss.compute_loss)

    # model = load_model(model_path, custom_objects={'AnchorBoxes': AnchorBoxes,
    #                                   'L2Normalization': L2Normalization,

    #                                   'compute_loss': ssd_loss.compute_loss})
    # Create Data Generators for train and valid sets
    train_dataset = DataGenerator(load_images_into_memory=False,
                                  hdf5_dataset_path=None)
    valid_dataset = DataGenerator(load_images_into_memory=False,
                                  hdf5_dataset_path=None)
    train_dataset.parse_xml(images_dirs=[train_dataset_dir],
                            image_set_filenames=[train_set],
                            annotations_dirs=[train_annot_dir],
                            classes=classes,
                            include_classes='all',
                            exclude_truncated=False,
                            exclude_difficult=False,
                            ret=False)

    valid_dataset.parse_xml(images_dirs=[valid_dataset_dir],
                            image_set_filenames=[valid_set],
                            annotations_dirs=[valid_annot_dir],
                            classes=classes,
                            include_classes='all',
                            exclude_truncated=False,
                            exclude_difficult=False,
                            ret=False)

    # Will speed up trainig but requires more memory
    # Can comment out to avoid memory requirements
    '''
    train_dataset.create_hdf5_dataset(file_path='dataset_pascal_voc_07+12_trainval.h5',
                                      resize=False,
                                      variable_image_size=True,
                                      verbose=True)

    valid_dataset.create_hdf5_dataset(file_path='dataset_pascal_voc_07_test.h5',
                                      resize=False,
                                      variable_image_size=True,
                                      verbose=True)
    '''

    ssd_data_augmentation = SSDDataAugmentation(img_height=img_height,
                                                img_width=img_width,
                                                background=mean_color)

    convert_to_3_channels = ConvertTo3Channels()
    resize = Resize(height=img_height, width=img_width)

    predictor_sizes = [
        model.get_layer('conv4_3_norm_mbox_conf').output_shape[1:3],
        model.get_layer('fc7_mbox_conf').output_shape[1:3],
        model.get_layer('conv6_2_mbox_conf').output_shape[1:3],
        model.get_layer('conv7_2_mbox_conf').output_shape[1:3],
        model.get_layer('conv8_2_mbox_conf').output_shape[1:3],
        model.get_layer('conv9_2_mbox_conf').output_shape[1:3],
        model.get_layer('conv10_2_mbox_conf').output_shape[1:3]
    ]

    ssd_input_encoder = SSDInputEncoder(img_height=img_height,
                                        img_width=img_width,
                                        n_classes=n_classes,
                                        predictor_sizes=predictor_sizes,
                                        scales=scales,
                                        aspect_ratios_per_layer=aspect_ratios,
                                        two_boxes_for_ar1=two_boxes_for_ar1,
                                        steps=steps,
                                        offsets=offsets,
                                        clip_boxes=clip_boxes,
                                        variances=variances,
                                        matching_type='multi',
                                        pos_iou_threshold=0.5,
                                        neg_iou_limit=0.5,
                                        normalize_coords=normalize_coords)

    train_generator = train_dataset.generate(
        batch_size=batch_size,
        shuffle=True,
        transformations=[ssd_data_augmentation],
        label_encoder=ssd_input_encoder,
        returns={'processed_images', 'encoded_labels'},
        keep_images_without_gt=False)

    val_generator = valid_dataset.generate(
        batch_size=batch_size,
        shuffle=False,
        transformations=[convert_to_3_channels, resize],
        label_encoder=ssd_input_encoder,
        returns={'processed_images', 'encoded_labels'},
        keep_images_without_gt=False)

    # Get the number of samples in the training and validations datasets.
    train_dataset_size = train_dataset.get_dataset_size()
    valid_dataset_size = valid_dataset.get_dataset_size()

    print("Number of images in the training dataset:\t{:>6}".format(
        train_dataset_size))
    print("Number of images in the validation dataset:\t{:>6}".format(
        valid_dataset_size))

    model_checkpoint = ModelCheckpoint(
        filepath=
        'ssd_epoch-{epoch:02d}_loss-{loss:.4f}_val_loss-{val_loss:.4f}.h5',
        monitor='val_loss',
        verbose=1,
        save_best_only=True,
        save_weights_only=False,
        mode='auto',
        period=1)

    #csv_logger = CSVLogger(filename='ssd512_training_log.csv',
    #                       separator=',',
    #                       append=True)

    learning_rate_scheduler = LearningRateScheduler(schedule=lr_schedule,
                                                    verbose=1)

    terminate_on_nan = TerminateOnNaN()

    callbacks = [
        model_checkpoint, csv_logger, learning_rate_scheduler, terminate_on_nan
    ]

    #callbacks = [learning_rate_scheduler,
    #             terminate_on_nan]

    initial_epoch = 0
    final_epoch = 150  # 150
    steps_per_epoch = math.ceil(119 /
                                batch_size)  # ceil(num_samples/batch_size)

    # Training
    history = model.fit_generator(generator=train_generator,
                                  steps_per_epoch=steps_per_epoch,
                                  epochs=final_epoch,
                                  callbacks=callbacks,
                                  validation_data=val_generator,
                                  validation_steps=math.ceil(
                                      valid_dataset_size / batch_size),
                                  initial_epoch=initial_epoch)

    # Save final trained model
    model.save('trained.h5')

    # Make predictions
    predict_generator = valid_dataset.generate(
        batch_size=1,
        shuffle=True,
        transformations=[convert_to_3_channels, resize],
        label_encoder=None,
        returns={
            'processed_images', 'filenames', 'inverse_transform',
            'original_images', 'original_labels'
        },
        keep_images_without_gt=False)

    batch_images, batch_filenames, batch_inverse_transforms, batch_original_images, batch_original_labels = next(
        predict_generator)

    i = 0  # Which batch item to look at

    print("Image:", batch_filenames[i])
    print()
    print("Ground truth boxes:\n")
    print(np.array(batch_original_labels[i]))

    y_pred = model.predict(batch_images)
    y_pred_decoded = decode_detections(y_pred,
                                       confidence_thresh=0.2,
                                       iou_threshold=0.4,
                                       top_k=200,
                                       normalize_coords=normalize_coords,
                                       img_height=img_height,
                                       img_width=img_width)

    y_pred_decoded_inv = apply_inverse_transforms(y_pred_decoded,
                                                  batch_inverse_transforms)

    np.set_printoptions(precision=2, suppress=True, linewidth=90)
    print("Predicted boxes:\n")
    print('   class   conf xmin   ymin   xmax   ymax')
    print(y_pred_decoded_inv[i])

    # Set the colors for the bounding boxes
    colors = plt.cm.hsv(np.linspace(0, 1, n_classes + 1)).tolist()
    # classes = ['background', 'car', 'bus', 'truck', 'motorbike'] # Already set at start

    plt.figure(figsize=(20, 12))
    plt.imshow(batch_original_images[i])

    current_axis = plt.gca()

    for box in batch_original_labels[i]:
        xmin = box[1]
        ymin = box[2]
        xmax = box[3]
        ymax = box[4]
        label = '{}'.format(classes[int(box[0])])
        current_axis.add_patch(
            plt.Rectangle((xmin, ymin),
                          xmax - xmin,
                          ymax - ymin,
                          color='green',
                          fill=False,
                          linewidth=2))
        current_axis.text(xmin,
                          ymin,
                          label,
                          size='x-large',
                          color='white',
                          bbox={
                              'facecolor': 'green',
                              'alpha': 1.0
                          })

    for box in y_pred_decoded_inv[i]:
        xmin = box[2]
        ymin = box[3]
        xmax = box[4]
        ymax = box[5]
        color = colors[int(box[0])]
        label = '{}: {:.2f}'.format(classes[int(box[0])], box[1])
        current_axis.add_patch(
            plt.Rectangle((xmin, ymin),
                          xmax - xmin,
                          ymax - ymin,
                          color=color,
                          fill=False,
                          linewidth=2))
        current_axis.text(xmin,
                          ymin,
                          label,
                          size='x-large',
                          color='white',
                          bbox={
                              'facecolor': color,
                              'alpha': 1.0
                          })

    plt.show()

    return
def ssd_model(config: Dict, train_dataset, val_dataset, callbacks_list):
    """Training SSD model
    
    Parameters
    ----------
    config : Dict
        Config yaml/json containing all parameter
    """
    start_train = timer()

    img_height = config['training']['img_height']  # Height  input images
    img_width = config['training']['img_width']  # Width  input images
    img_channels = config['training'][
        'img_channels']  # Number of color channels
    n_classes = config['training'][
        'n_classes']  # Number of positive classes, e.g. 20 for Pascal VOC, 80 for MS COCO

    model = ssd_300(image_size=(img_height, img_width, img_channels),
                    n_classes=n_classes,
                    mode='training',
                    l2_regularization=config['training']['l2_regularization'],
                    scales=scales,
                    aspect_ratios_per_layer=aspect_ratios,
                    two_boxes_for_ar1=two_boxes_for_ar1,
                    steps=steps,
                    offsets=offsets,
                    clip_boxes=clip_boxes,
                    variances=variances,
                    normalize_coords=normalize_coords,
                    subtract_mean=mean_color,
                    swap_channels=swap_channels)

    weights_path = './weights/VGG_ILSVRC_16_layers_fc_reduced.h5'
    model.load_weights(weights_path, by_name=True)

    adam = Adam(lr=config['training']['learning_rate'],
                beta_1=0.9,
                beta_2=0.999,
                epsilon=1e-08,
                decay=0.0)

    ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)

    model.compile(optimizer=adam, loss=ssd_loss.compute_loss)

    batch_size = config['training']['batch_size']

    ssd_data_augmentation = SSDDataAugmentation(img_height=img_height,
                                                img_width=img_width,
                                                background=mean_color)

    convert_to_3_channels = ConvertTo3Channels()
    resize = Resize(height=img_height, width=img_width)

    predictor_sizes = [
        model.get_layer('conv4_3_norm_mbox_conf').output_shape[1:3],
        model.get_layer('fc7_mbox_conf').output_shape[1:3],
        model.get_layer('conv6_2_mbox_conf').output_shape[1:3],
        model.get_layer('conv7_2_mbox_conf').output_shape[1:3],
        model.get_layer('conv8_2_mbox_conf').output_shape[1:3],
        model.get_layer('conv9_2_mbox_conf').output_shape[1:3]
    ]

    ssd_input_encoder = SSDInputEncoder(img_height=img_height,
                                        img_width=img_width,
                                        n_classes=n_classes,
                                        predictor_sizes=predictor_sizes,
                                        scales=scales,
                                        aspect_ratios_per_layer=aspect_ratios,
                                        two_boxes_for_ar1=two_boxes_for_ar1,
                                        steps=steps,
                                        offsets=offsets,
                                        clip_boxes=clip_boxes,
                                        variances=variances,
                                        matching_type='multi',
                                        pos_iou_threshold=0.5,
                                        neg_iou_limit=0.5,
                                        normalize_coords=normalize_coords)

    train_generator = train_dataset.generate(
        batch_size=batch_size,
        shuffle=True,
        transformations=[ssd_data_augmentation],
        label_encoder=ssd_input_encoder,
        returns={'processed_images', 'encoded_labels'},
        keep_images_without_gt=False)

    val_generator = val_dataset.generate(
        batch_size=batch_size,
        shuffle=False,
        transformations=[convert_to_3_channels, resize],
        label_encoder=ssd_input_encoder,
        returns={'processed_images', 'encoded_labels'},
        keep_images_without_gt=False)

    # Get the number of samples in the training and validations datasets.
    train_dataset_size = train_dataset.get_dataset_size()
    val_dataset_size = val_dataset.get_dataset_size()

    print(
        f"[INFO]...Number of images in the training dataset: {train_dataset_size}"
    )
    print(
        f"[INFO]...Number of images in the validation dataset: {val_dataset_size}"
    )
    print(
        f"[INFO]...Weights will be saved at {config['training']['weight_save_path']}"
    )
    history = model.fit_generator(
        generator=train_generator,
        steps_per_epoch=config['training']['steps_per_epoch'],
        epochs=config['training']['epochs'],
        callbacks=callbacks_list,
        validation_data=val_generator,
        validation_steps=ceil(val_dataset_size / batch_size))
    end_train = timer()
    print(
        f"[INFO]...Total time taken by Training Job is {(end_train - start_train)/60:.2f} min(s)"
    )
Exemplo n.º 13
0
def train_VOC(config):
    '''
    Train the given configuration ; the configuration must be constructed
    according to the utility script found in utils/generateconfig.py. 

    Arguments:
        config : the configuration of the model to use ; should already be
            loaded

    '''
    ###################################
    ### PATHS AND PARAMETERS
    ##################################
    datadir = config.DATA_DIR
    local_dir = config.ROOT_FOLDER
    img_shape = config.IMG_SHAPE
    classes = config.CLASSES
    checkpoint_output = os.path.join(local_dir, 'models',
                                     config.CHECKPOINT_NAME)
    model_output = os.path.join(local_dir, 'models', config.MODEL_NAME)
    img_height = img_shape[0]  # Height of the model input images
    img_width = img_shape[1]  # Width of the model input images
    img_channels = img_shape[
        2]  # Number of color channels of the model input images
    mean_color = [
        123, 117, 104
    ]  # The per-channel mean of the images in the dataset. Do not change this value if you're using any of the pre-trained weights.
    swap_channels = [
        2, 1, 0
    ]  # The color channel order in the original SSD is BGR, so we'll have the model reverse the color channel order of the input images.
    n_classes = 20  # Number of positive classes, e.g. 20 for Pascal VOC, 80 for MS COCO
    scales_pascal = [
        0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05
    ]  # The anchor box scaling factors used in the original SSD300 for the Pascal VOC datasets
    scales = scales_pascal
    aspect_ratios = [
        [1.0,
         2.0, 0.5], [1.0, 2.0, 0.5, 3.0,
                     1.0 / 3.0],
        [1.0, 2.0, 0.5,
         3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0
                           ], [1.0, 2.0, 0.5
                               ], [1.0, 2.0, 0.5]
    ]  # The anchor box aspect ratios used in the original SSD300; the order matters
    two_boxes_for_ar1 = True
    steps = [
        8, 16, 32, 64, 100, 300
    ]  # The space between two adjacent anchor box center points for each predictor layer.
    offsets = [
        0.5, 0.5, 0.5, 0.5, 0.5, 0.5
    ]  # The offsets of the first anchor box center points from the top and left borders of the image as a fraction of the step size for each predictor layer.
    clip_boxes = False  # Whether or not to clip the anchor boxes to lie entirely within the image boundaries
    variances = [
        0.1, 0.1, 0.2, 0.2
    ]  # The variances by which the encoded target coordinates are divided as in the original implementation
    normalize_coords = True
    batch_size = config.BATCH_SIZE  # Change the batch size if you like, or if you run into GPU memory issues.

    ###################################
    ### BUILDING MODEL
    ##################################
    K.clear_session()  # Clear previous models from memory.

    model = ssd_300(image_size=(img_height, img_width, img_channels),
                    n_classes=n_classes,
                    mode='training',
                    l2_regularization=0.0005,
                    scales=scales,
                    aspect_ratios_per_layer=aspect_ratios,
                    two_boxes_for_ar1=two_boxes_for_ar1,
                    steps=steps,
                    offsets=offsets,
                    clip_boxes=clip_boxes,
                    variances=variances,
                    normalize_coords=normalize_coords,
                    subtract_mean=mean_color,
                    swap_channels=swap_channels)

    weights_path = os.path.join(local_dir, 'weights',
                                'VGG_VOC0712_SSD_300x300_iter_120000.h5')
    model.load_weights(weights_path, by_name=True)

    #adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
    sgd = SGD(lr=0.001, momentum=0.9, decay=0.0, nesterov=False)
    ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)
    model.compile(optimizer=sgd, loss=ssd_loss.compute_loss)

    ###################################
    ### LOADING DATAS
    ##################################
    train_dataset = DataGenerator(load_images_into_memory=False,
                                  hdf5_dataset_path=None)
    val_dataset = DataGenerator(load_images_into_memory=False,
                                hdf5_dataset_path=None)
    images_dir = os.path.join(datadir, 'Images')
    annotations_dir = os.path.join(datadir, 'Annotations')
    trainval_image_set_filename = os.path.join(datadir, 'ImageSets',
                                               'train.txt')
    test_image_set_filename = os.path.join(datadir, 'ImageSets', 'val.txt')

    # The XML parser needs to now what object class names to look for and in which order to map them to integers.
    #

    train_dataset.parse_xml(images_dirs=[images_dir],
                            image_set_filenames=[trainval_image_set_filename],
                            annotations_dirs=[annotations_dir],
                            classes=classes,
                            include_classes='all',
                            exclude_truncated=False,
                            exclude_difficult=False,
                            ret=False)

    val_dataset.parse_xml(images_dirs=[images_dir],
                          image_set_filenames=[test_image_set_filename],
                          annotations_dirs=[annotations_dir],
                          classes=classes,
                          include_classes='all',
                          exclude_truncated=False,
                          exclude_difficult=True,
                          ret=False)

    train_dataset.create_hdf5_dataset(file_path='flowers_train.h5',
                                      resize=False,
                                      variable_image_size=True,
                                      verbose=True)

    val_dataset.create_hdf5_dataset(file_path='flowers_val.h5',
                                    resize=False,
                                    variable_image_size=True,
                                    verbose=True)

    ssd_data_augmentation = SSDDataAugmentation(img_height=img_height,
                                                img_width=img_width,
                                                background=mean_color)
    convert_to_3_channels = ConvertTo3Channels()
    resize = Resize(height=img_height, width=img_width)

    # The encoder constructor needs the spatial dimensions of the model's predictor layers to create the anchor boxes.
    predictor_sizes = [
        model.get_layer('conv4_3_norm_mbox_conf').output_shape[1:3],
        model.get_layer('fc7_mbox_conf').output_shape[1:3],
        model.get_layer('conv6_2_mbox_conf').output_shape[1:3],
        model.get_layer('conv7_2_mbox_conf').output_shape[1:3],
        model.get_layer('conv8_2_mbox_conf').output_shape[1:3],
        model.get_layer('conv9_2_mbox_conf').output_shape[1:3]
    ]

    ssd_input_encoder = SSDInputEncoder(img_height=img_height,
                                        img_width=img_width,
                                        n_classes=n_classes,
                                        predictor_sizes=predictor_sizes,
                                        scales=scales,
                                        aspect_ratios_per_layer=aspect_ratios,
                                        two_boxes_for_ar1=two_boxes_for_ar1,
                                        steps=steps,
                                        offsets=offsets,
                                        clip_boxes=clip_boxes,
                                        variances=variances,
                                        matching_type='multi',
                                        pos_iou_threshold=0.5,
                                        neg_iou_limit=0.5,
                                        normalize_coords=normalize_coords)

    train_generator = train_dataset.generate(
        batch_size=batch_size,
        shuffle=True,
        transformations=[ssd_data_augmentation],
        label_encoder=ssd_input_encoder,
        returns={'processed_images', 'encoded_labels'},
        keep_images_without_gt=False)

    val_generator = val_dataset.generate(
        batch_size=batch_size,
        shuffle=False,
        transformations=[convert_to_3_channels, resize],
        label_encoder=ssd_input_encoder,
        returns={'processed_images', 'encoded_labels'},
        keep_images_without_gt=False)

    # Get the number of samples in the training and validations datasets.
    train_dataset_size = train_dataset.get_dataset_size()
    val_dataset_size = val_dataset.get_dataset_size()

    print("Number of images in the training dataset:\t{:>6}".format(
        train_dataset_size))
    print("Number of images in the validation dataset:\t{:>6}".format(
        val_dataset_size))

    ###################################
    ### PREPARE TRAINING
    ##################################

    def lr_schedule(epoch):
        if epoch < 80:
            return 0.001
        elif epoch < 100:
            return 0.0001
        else:
            return 0.00001

    model_checkpoint = ModelCheckpoint(filepath=checkpoint_output,
                                       monitor='val_loss',
                                       verbose=1,
                                       save_best_only=True,
                                       save_weights_only=False,
                                       mode='auto',
                                       period=1)

    early_stopping = EarlyStopping(monitor='val_loss',
                                   min_delta=0.0,
                                   patience=10,
                                   verbose=1)

    learning_rate_scheduler = LearningRateScheduler(schedule=lr_schedule,
                                                    verbose=1)

    terminate_on_nan = TerminateOnNaN()

    callbacks = [
        model_checkpoint, learning_rate_scheduler, terminate_on_nan,
        early_stopping
    ]

    ###################################
    ### TRAINING
    ##################################
    epochs = config.EPOCHS
    steps_per_epoch = ceil(train_dataset_size / batch_size)
    model.summary()
    history = model.fit_generator(generator=train_generator,
                                  steps_per_epoch=steps_per_epoch,
                                  epochs=epochs,
                                  callbacks=callbacks,
                                  validation_data=val_generator,
                                  validation_steps=ceil(val_dataset_size /
                                                        batch_size))

    model.save(model_output)
Exemplo n.º 14
0
predictor_sizes = [
    model.get_layer('classes4').output_shape[1:3],
    model.get_layer('classes5').output_shape[1:3],
    model.get_layer('classes6').output_shape[1:3],
    model.get_layer('classes7').output_shape[1:3]
]
ssd_input_encoder = SSDInputEncoder(
    img_height=img_height,
    img_width=img_width,
    n_classes=n_classes,
    predictor_sizes=predictor_sizes,
    scales=scales,
    aspect_ratios_global=aspect_ratios,
    two_boxes_for_ar1=two_boxes_for_ar1,
    steps=steps,
    offsets=offsets,
    # 这里 clip 的是 anchor boxes
    clip_boxes=clip_boxes,
    variances=variances,
    matching_type='multi',
    pos_iou_threshold=0.5,
    neg_iou_limit=0.3,
    border_pixels='half',
    coords='centroids',
    normalize_coords=normalize_coords,
)

# 6: Create the generator handles that will be passed to Keras' `fit_generator()` function.
train_generator = train_dataset.generate(
    batch_size=batch_size,
    shuffle=True,
    transformations=(data_augmentation_chain, ),
def train(data_path):
    K.clear_session() # Clear previous models from memory.
    model = init_Model(trained_weight_path)

    img_dir = os.path.join(path2, 'dataset', data_path, 'original')
    labels_filename = os.path.join(path2, 'dataset', data_path, 'csv', 'labels_train.csv')
    mask_path =  os.path.join(path2, 'dataset', data_path, 'mask_richtig_4')
    data_input_class = Data_Input_Class(labels_filename = labels_filename, images_dir=img_dir, mask_groundTruth_Path= mask_path)

    images, filenames, labels, image_ids = data_input_class.parse_csv(ret = True)

    images = np.asarray(images)
    filenames = np.asarray(filenames)
    labels = np.asarray(labels)
    image_ids = np.asarray(image_ids)

    '''
    print('shape of images:', images.shape)
    print('shape of filenames:', filenames.shape)
    print('shape of labels:', labels.shape)
    print('shape of image_ids:', image_ids.shape)
    print(labels[1])
    print(image_ids)
    print(filenames[0:5])
    '''

    predictor_sizes = [
                    model.get_layer('classes3_3_add').output_shape[1:3],
                    model.get_layer('classes4').output_shape[1:3],
                    model.get_layer('classes5').output_shape[1:3],
                    model.get_layer('classes6').output_shape[1:3],
                    model.get_layer('classes7').output_shape[1:3]]


    ssd_input_encoder = SSDInputEncoder(img_height=img_height,
                                        img_width=img_width,
                                        n_classes=n_classes,
                                        predictor_sizes=predictor_sizes,
                                        scales=scales,
                                        aspect_ratios_global=aspect_ratios,
                                        two_boxes_for_ar1=two_boxes_for_ar1,
                                        steps=steps,
                                        offsets=offsets,
                                        clip_boxes=clip_boxes,
                                        variances=variances,
                                        matching_type='multi',
                                        pos_iou_threshold=0.5,
                                        neg_iou_limit=0.3,
                                        normalize_coords=normalize_coords)


    batch_X, batch_y_encoded = data_input_class.get_encoded_boxlabel(batch_size = 200, label_encoder=ssd_input_encoder)

    mask, class_weight = data_input_class.get_mask_label()

    '''
    print('type of batch_x:', type(batch_X))
    print('shape of batch_x:', batch_X.shape)
    print('type of batch_y:', type(batch_y_encoded))
    print('shape of batch_y_encoded:', batch_y_encoded.shape)
    '''
    print('shape of mask:', mask.shape)
    print('shape of class_weight:', class_weight.shape)


    #tb = keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=0, batch_size=batch_size, write_graph=False, write_grads=True, write_images=True, embeddings_freq=0, embeddings_layer_names=None, embeddings_metadata=None)
    #chk = keras.callbacks.ModelCheckpoint(mdl_path, monitor='val_loss', verbose=0, save_best_only=True, save_weights_only=False, mode='auto', period=1)
    #redu = keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=reduce_factor, patience=num_patience, verbose=1, mode='auto', epsilon=0.0001, cooldown=0, min_lr=0)
    epoch = 50
    batch_size = 1

    #weight_saved_path = 'E:/0525/new_Version/ssd_keras-master-copy/weight_change/detection_segmentation/'
    weight_saved_path =  os.path.join(path2, 'dataset', data_path, 'weight', 'ssd_segmentation_detection.h5')
    chk = keras.callbacks.ModelCheckpoint(weight_saved_path, monitor='val_loss', verbose=0, save_best_only=True, save_weights_only=False, mode='auto', period=1)
    redu = keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, verbose=1, mode='auto', epsilon=0.0001, cooldown=0, min_lr=0)
    model.fit(images, [batch_y_encoded, mask], validation_split=0.2, epochs=epoch, batch_size=batch_size, callbacks=[chk], verbose=1, class_weight=[None,class_weight], shuffle = True)
        model.get_layer('conv4_3_norm_mbox_conf').output_shape[1:3],
        model.get_layer('fc7_mbox_conf').output_shape[1:3],
        model.get_layer('conv6_2_mbox_conf').output_shape[1:3],
        model.get_layer('conv7_2_mbox_conf').output_shape[1:3],
        model.get_layer('conv8_2_mbox_conf').output_shape[1:3],
        model.get_layer('conv9_2_mbox_conf').output_shape[1:3]
    ]

    ssd_input_encoder = SSDInputEncoder(img_height=img_height,
                                        img_width=img_width,
                                        n_classes=n_classes,
                                        predictor_sizes=predictor_sizes,
                                        scales=scales,
                                        aspect_ratios_per_layer=aspect_ratios,
                                        two_boxes_for_ar1=two_boxes_for_ar1,
                                        steps=steps,
                                        offsets=offsets,
                                        clip_boxes=clip_boxes,
                                        variances=variances,
                                        matching_type='multi',
                                        pos_iou_threshold=0.5,
                                        neg_iou_limit=0.5,
                                        normalize_coords=normalize_coords)

    # 6: Create the generator handles that will be passed to Keras' `fit_generator()` function.

    train_generator = train_dataset.generate(
        batch_size=batch_size,
        shuffle=True,
        transformations=[ssd_data_augmentation],
        label_encoder=ssd_input_encoder,
Exemplo n.º 17
0
	def load_VOC_IMG_generators(self,model):
		print('Making VOC image generators')
		datadir = self.datas['DATA_PATH']
		train_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=None)
		val_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=None)
		test_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=None)
		images_dir                   = os.path.join(datadir,'Images')
		annotations_dir              = os.path.join(datadir,'Annotations')
		train_image_set_filename  = os.path.join(datadir,'ImageSets','train.txt')
		val_image_set_filename      = os.path.join(datadir,'ImageSets','val.txt')
		test_image_set_filename      = os.path.join(datadir,'ImageSets','test.txt')
		generator_options = self.datas['GENERATOR']

		train_dataset.parse_xml(images_dirs=[images_dir],
		                        image_set_filenames=[train_image_set_filename],
		                        annotations_dirs=[annotations_dir],
		                        classes=self.datas['CLASSES'],
		                        include_classes='all',
		                        exclude_truncated=False,
		                        exclude_difficult=False,
		                        ret=False)
		val_dataset.parse_xml(images_dirs=[images_dir],
		                        image_set_filenames=[val_image_set_filename],
		                        annotations_dirs=[annotations_dir],
		                        classes=self.datas['CLASSES'],
		                        include_classes='all',
		                        exclude_truncated=False,
		                        exclude_difficult=False,
		                        ret=False)
		test_dataset.parse_xml(images_dirs=[images_dir],
		                        image_set_filenames=[test_image_set_filename],
		                        annotations_dirs=[annotations_dir],
		                        classes=self.datas['CLASSES'],
		                        include_classes='all',
		                        exclude_truncated=False,
		                        exclude_difficult=False,
		                        ret=False)

		convert_to_3_channels = ConvertTo3Channels()
		target_size = generator_options['TARGET_SIZE']
		resize = Resize(height=target_size[0], width=target_size[1])

		predictor_sizes = [model.get_layer('conv4_3_norm_mbox_conf').output_shape[1:3],
	                       model.get_layer('fc7_mbox_conf').output_shape[1:3],
	                       model.get_layer('conv6_2_mbox_conf').output_shape[1:3],
	                       model.get_layer('conv7_2_mbox_conf').output_shape[1:3],
	                       model.get_layer('conv8_2_mbox_conf').output_shape[1:3],
	                       model.get_layer('conv9_2_mbox_conf').output_shape[1:3]]
		scales_pascal = [0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05] # The anchor box scaling factors used in the original SSD300 for the Pascal VOC datasets
		scales = scales_pascal
		aspect_ratios = [[1.0, 2.0, 0.5],
                     [1.0, 2.0, 0.5, 3.0, 1.0/3.0],
                     [1.0, 2.0, 0.5, 3.0, 1.0/3.0],
                     [1.0, 2.0, 0.5, 3.0, 1.0/3.0],
                     [1.0, 2.0, 0.5],
                     [1.0, 2.0, 0.5]] # The anchor box aspect ratios used in the original SSD300; the order matters
		steps = [8, 16, 32, 64, 100, 300] # The space between two adjacent anchor box center points for each predictor layer.
		two_boxes_for_ar1 = True
		mean_color=[123,117,104] #TODO : add this as a parameter
		offsets = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
		clip_boxes=False
		variances=[0.1, 0.1, 0.2, 0.2]
		normalize_coords=True

		ssd_input_encoder = SSDInputEncoder(img_height = target_size[0],
											img_width = target_size[1],
											n_classes = 20, #TODO : handle subsampling
											predictor_sizes=predictor_sizes,
											scales=scales,
											aspect_ratios_per_layer=aspect_ratios,
											two_boxes_for_ar1=two_boxes_for_ar1,
											steps=steps,
											offsets=offsets,
											clip_boxes=clip_boxes,
											variances=variances,
											matching_type='multi',
											pos_iou_threshold=0.5,
											neg_iou_limit=0.5,
											normalize_coords=normalize_coords
											)
		train_generator = train_dataset.generate(batch_size=generator_options['BATCH_SIZE'],
												shuffle=True,
												transformations=[convert_to_3_channels,
																resize],
												label_encoder=ssd_input_encoder,
												returns={'processed_images',
														 'encoded_labels'},
												keep_images_without_gt=False)

		val_generator = val_dataset.generate(batch_size=generator_options['BATCH_SIZE'],
												shuffle=True,
												transformations=[convert_to_3_channels,
																resize],
												label_encoder=ssd_input_encoder,
												returns={'processed_images',
														 'encoded_labels'},
												keep_images_without_gt=False)

		test_generator = test_dataset.generate(batch_size=generator_options['BATCH_SIZE'],
												shuffle=True,
												transformations=[convert_to_3_channels,
																resize],
												label_encoder=ssd_input_encoder,
												returns={'processed_images',
														 'encoded_labels'},
												keep_images_without_gt=False)
		return [train_generator,train_dataset.get_dataset_size()],[val_generator,val_dataset.get_dataset_size()],[test_generator,train_dataset.get_dataset_size()]