Exemplo n.º 1
0
    def load_loss_function(self, model=None):
        name = self.conf['NAME']
        if name in keras_losses:
            loss = name
        else:
            loss_options = self.conf
            if name == "RCNN_LOSS":
                model.keras_model._losses = []
                model.keras_model._per_input_losses = {}
                loss_names = [
                    "rpn_class_loss", "rpn_bbox_loss", "mrcnn_class_loss",
                    "mrcnn_bbox_loss", "mrcnn_mask_loss"
                ]
                for name in loss_names:
                    layer = model.keras_model.get_layer(name)
                    if layer.output in model.keras_model.losses:
                        continue
                    loss = (tf.reduce_mean(layer.output, keepdims=True) *
                            model.config.LOSS_WEIGHTS.get(name, 1))
                    model.keras_model.add_loss(loss)
            if name == "SSD_LOSS":
                loss = SSDLoss(neg_pos_ratio=loss_options['NPR'],
                               alpha=loss_options['ALPHA'])
                loss = loss.compute_loss

        # print(' VERBOSE LOSS : ',loss)
        return loss
Exemplo n.º 2
0
def get_model(dataset):
    """

    :return: keras_ssd300.ssd300 model
    """

    if dataset == "VOC":
        ssd300_nclasses = 20
        ssd300_scales = [0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05]
        weights_filename = VOC_WEIGHTS_FILENAME
    elif dataset == "COCO":
        ssd300_nclasses = 80
        ssd300_scales = [0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05]
        weights_filename = COCO_WEIGHTS_FILENAME
    else:
        raise ValueError("Unrecognised dataset: {}".format(dataset))

    K.clear_session()  # Clear previous models from memory.

    model = ssd_300(image_size=(IMAGE_HEIGHT, IMAGE_WIDTH, 3),
                    n_classes=ssd300_nclasses,
                    l2_regularization=0.0005,
                    scales=ssd300_scales,  # The scales for MS COCO are [0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05]
                    aspect_ratios_per_layer=[[1.0, 2.0, 0.5],
                                             [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                                             [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                                             [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                                             [1.0, 2.0, 0.5],
                                             [1.0, 2.0, 0.5]],
                    two_boxes_for_ar1=True,
                    steps=[8, 16, 32, 64, 100, 300],
                    offsets=[0.5, 0.5, 0.5, 0.5, 0.5, 0.5],
                    limit_boxes=False,
                    variances=[0.1, 0.1, 0.2, 0.2],
                    coords='centroids',
                    normalize_coords=True,
                    subtract_mean=[123, 117, 104],
                    swap_channels=True)

    # 2: Load the trained weights into the model.

    # TODO: Set the path of the trained weights.

    model.load_weights(weights_filename, by_name=True)

    # 3: Compile the model so that Keras won't complain the next time you load it.

    adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=5e-04)

    ssd_loss = SSDLoss(neg_pos_ratio=3, n_neg_min=0, alpha=1.0)

    model.compile(optimizer=adam, loss=ssd_loss.compute_loss)

    return model
    def __load_model(self):
        # 2: Build the Keras model (and possibly load some trained weights)
        K.clear_session()  # Clear previous models from memory.
        # The output `predictor_sizes` is needed below to set up `SSDBoxEncoder`
        model, predictor_sizes = ssd_300(
            image_size=(self.img_train_height, self.img_train_width,
                        self.img_channels),
            n_classes=self.n_classes,
            min_scale=
            None,  # You could pass a min scale and max scale instead of the `scales` list, but we're not doing that here
            max_scale=None,
            scales=self.scales,
            aspect_ratios_global=None,
            aspect_ratios_per_layer=self.aspect_ratios,
            two_boxes_for_ar1=self.two_boxes_for_ar1,
            limit_boxes=self.limit_boxes,
            variances=self.variances,
            coords=self.coords,
            normalize_coords=self.normalize_coords)
        # Set the path to the VGG-16 weights below.

        ### Set up training

        # 3: Instantiate an Adam optimizer and the SSD loss function and compile the model
        adam = Adam(lr=0.001,
                    beta_1=0.9,
                    beta_2=0.999,
                    epsilon=1e-08,
                    decay=5e-04)
        ssd_loss = SSDLoss(neg_pos_ratio=3, n_neg_min=0, alpha=0.1)
        model.compile(optimizer=adam, loss=ssd_loss.compute_loss)

        # 4: Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function

        ssd_box_encoder = SSDBoxEncoder(
            img_height=self.img_train_height,
            img_width=self.img_train_width,
            n_classes=self.n_classes,
            predictor_sizes=self.predictor_sizes,
            min_scale=None,
            max_scale=None,
            scales=self.scales,
            aspect_ratios_global=None,
            aspect_ratios_per_layer=self.aspect_ratios,
            two_boxes_for_ar1=self.two_boxes_for_ar1,
            limit_boxes=self.limit_boxes,
            variances=self.variances,
            pos_iou_threshold=0.5,
            neg_iou_threshold=0.2,
            coords=self.coords,
            normalize_coords=self.normalize_coords)
Exemplo n.º 4
0
#
# In order to train the model on your own data just set the paths to the image files and labels in the batch generator constructor and read the documentation so you know what label format the generator expects. Also, make sure that your images are in whatever size you need them or use the resizing feature of the generator. The data augmentation features available in the generator are not identical to the techniques described in the paper, but they produce similar effects and work well nonetheless. If there is anything you don't understand, check out the documentation.
#
# Caution: I would not recommend to try to train the model from scratch as it is now, it would likely learn nothing. You either need to load pre-trained weights for the modified VGG-16 base network as they did in the paper, or you need to modify the network to use dropout, batch normalization, decrease the depth, and/or play around with weight initialization to train from scratch.

# In[ ]:

### Set up training

batch_size = 32

# 3: Instantiate an Adam optimizer and the SSD loss function and compile the model

adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=5e-05)

ssd_loss = SSDLoss(neg_pos_ratio=3, n_neg_min=0, alpha=0.1)

model.compile(optimizer=adam, loss=ssd_loss.compute_loss)

# 4: Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function

ssd_box_encoder = SSDBoxEncoder(img_height=img_height,
                                img_width=img_width,
                                n_classes=n_classes,
                                predictor_sizes=predictor_sizes,
                                min_scale=None,
                                max_scale=None,
                                scales=scales,
                                aspect_ratios_global=None,
                                aspect_ratios_per_layer=aspect_ratios,
                                two_boxes_for_ar1=two_boxes_for_ar1,
Exemplo n.º 5
0
def train_hs512(lr=1e-4,
                freeze_bn=False,
                optim=None,
                batch_size=8,
                weights_path=None,
                save_weights_only=True,
                epochs=25):
    if weights_path is None:
        weights_path = 'VGG_VOC0712Plus_SSD_512x512_ft_iter_160000.h5'

    img_height = 512  # 1088 // 2  # Height of the input images
    img_width = 512  # 2048 // 2  # Width of the input images
    img_channels = 3  # Number of color channels of the input images
    # subtract_mean = [104, 117, 123] # The per-channel mean of the images in the dataset
    subtract_mean = [138, 138,
                     138]  # The per-channel mean of the images in the dataset
    swap_channels = False
    n_classes = 20  # The number of positive classes, e.g. 20 for Pascal VOC, 80 for MS COCO.
    scales = [0.04, 0.1, 0.26, 0.42, 0.58, 0.74, 0.9, 1.06]  # MS COCO scales
    # scales = [0.07, 0.15, 0.3, 0.45, 0.6, 0.75, 0.9, 1.05]
    aspect_ratios = [
        [1.0,
         2.0, 0.5], [1.0, 2.0, 0.5, 3.0,
                     1.0 / 3.0],
        [1.0, 2.0, 0.5, 3.0,
         1.0 / 3.0],
        [1.0, 2.0, 0.5,
         3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0
                           ], [1.0, 2.0, 0.5
                               ], [1.0, 2.0, 0.5]
    ]  # The anchor box aspect ratios used in the original SSD300; the order matters
    two_boxes_for_ar1 = True
    # The space between two adjacent anchor box center points for each predictor layer.
    steps = [8, 16, 32, 64, 128, 256, 512]
    offsets = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
    limit_boxes = False  # Whether or not you want to limit the anchor boxes to lie entirely within the image boundaries
    variances = [0.1, 0.1, 0.2, 0.2]
    coords = 'centroids'
    normalize_coords = True

    # 1: Build the Keras model
    K.clear_session()
    model, pred_sizes = ssd_512(image_size=(img_height, img_width,
                                            img_channels),
                                n_classes=n_classes,
                                l2_regularization=0.0005,
                                scales=scales,
                                aspect_ratios_per_layer=aspect_ratios,
                                two_boxes_for_ar1=two_boxes_for_ar1,
                                steps=steps,
                                offsets=offsets,
                                limit_boxes=limit_boxes,
                                variances=variances,
                                coords=coords,
                                normalize_coords=normalize_coords,
                                subtract_mean=None,
                                divide_by_stddev=None,
                                swap_channels=swap_channels,
                                return_predictor_sizes=True)

    # 2: Load the trained VGG-16 weights into the model.
    model.load_weights(weights_path, by_name=True)

    # 3: Instantiate the optimizer and the SSD loss function and compile the model

    if optim is None:
        optim = SGD(lr=lr, momentum=0.9)
    ssd_loss = SSDLoss(neg_pos_ratio=3, n_neg_min=0, alpha=1.0)

    # 4: freeze the base model if needed
    if freeze_bn:
        for l in model.layers[:38]:
            l.trainable = False

    # 5: compile model
    model.compile(optimizer=optim, loss=ssd_loss.compute_loss)

    ## Prepare data generation

    # 1: Instantiate to `BatchGenerator` objects: One for training, one for validation.

    train_dataset = BatchGenerator(
        box_output_format=['class_id', 'xmin', 'ymin', 'xmax', 'ymax'])
    val_dataset = BatchGenerator(
        box_output_format=['class_id', 'xmin', 'ymin', 'xmax', 'ymax'])

    # 2: Parse the image and label lists for the training and validation datasets. This can take a while.

    images_path_root = './Datasets/'
    train_combined_labels = './Datasets/train_combined_ssd_512.txt'
    val_labels = './Datasets/val_ssd_512.txt'

    train_dataset.parse_csv(images_dir=images_path_root,
                            labels_filename=train_combined_labels,
                            input_format=[
                                'image_name', 'class_id', 'xmin', 'xmax',
                                'ymin', 'ymax'
                            ])

    val_dataset.parse_csv(images_dir=images_path_root,
                          labels_filename=val_labels,
                          input_format=[
                              'image_name', 'class_id', 'xmin', 'xmax', 'ymin',
                              'ymax'
                          ])

    # 3: Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function.

    ssd_box_encoder = SSDBoxEncoder(img_height=img_height,
                                    img_width=img_width,
                                    n_classes=n_classes,
                                    predictor_sizes=pred_sizes,
                                    min_scale=None,
                                    max_scale=None,
                                    scales=scales,
                                    aspect_ratios_global=None,
                                    aspect_ratios_per_layer=aspect_ratios,
                                    two_boxes_for_ar1=two_boxes_for_ar1,
                                    steps=steps,
                                    offsets=offsets,
                                    limit_boxes=limit_boxes,
                                    variances=variances,
                                    pos_iou_threshold=0.5,
                                    neg_iou_threshold=0.2,
                                    coords=coords,
                                    normalize_coords=normalize_coords)

    # 4: Set the image processing / data augmentation options and create generator handles.

    train_generator = train_dataset.generate(
        batch_size=batch_size,
        shuffle=True,
        train=True,
        ssd_box_encoder=ssd_box_encoder,
        equalize=False,
        brightness=(0.5, 2, 0.5),
        flip=0.5,
        translate=False,
        scale=False,
        max_crop_and_resize=(img_height, img_width, 1, 3),
        # This one is important because the Pascal VOC images vary in size
        random_pad_and_resize=(img_height, img_width, 1, 3, 0.5),
        # This one is important because the Pascal VOC images vary in size
        random_crop=False,
        crop=False,
        resize=False,
        gray=True,
        limit_boxes=True,
        # While the anchor boxes are not being clipped, the ground truth boxes should be
        include_thresh=0.4)

    val_generator = val_dataset.generate(
        batch_size=batch_size,
        shuffle=True,
        train=True,
        ssd_box_encoder=ssd_box_encoder,
        equalize=False,
        brightness=False,
        flip=False,
        translate=False,
        scale=False,
        max_crop_and_resize=(img_height, img_width, 1, 3),
        # This one is important because the Pascal VOC images vary in size
        random_pad_and_resize=(img_height, img_width, 1, 3, 0.5),
        # This one is important because the Pascal VOC images vary in size
        random_crop=False,
        crop=False,
        resize=False,
        gray=True,
        limit_boxes=True,
        include_thresh=0.4)

    # Get the number of samples in the training and validations datasets to compute the epoch lengths below.
    n_train_samples = train_dataset.get_n_samples()
    n_val_samples = val_dataset.get_n_samples()

    # ## 4. Run the training

    fingerprint = 'ssd{:%Y-%m-%d_%H-%M-%S}'.format(datetime.datetime.now())

    tbCallBack = TensorBoard(log_dir='./Graph/{}'.format(fingerprint),
                             histogram_freq=0,
                             batch_size=batch_size,
                             write_graph=True)

    checkpointer = ModelCheckpoint(
        './saved/{{val_loss:.2f}}__{}_best_weights.h5'.format(fingerprint),
        monitor='val_loss',
        verbose=1,
        save_best_only=save_weights_only,
        save_weights_only=True,
        mode='auto',
        period=1)

    learning_rate_reduction = ReduceLROnPlateau(monitor='val_loss',
                                                patience=5,
                                                verbose=1,
                                                factor=0.5,
                                                min_lr=1e-6)

    stopper = EarlyStopping(monitor='val_loss', min_delta=0.001, patience=10)

    epochs = 30

    history = model.fit_generator(
        generator=train_generator,
        steps_per_epoch=ceil(n_train_samples / batch_size),
        epochs=epochs,
        callbacks=[checkpointer, learning_rate_reduction, stopper, tbCallBack],
        validation_data=val_generator,
        validation_steps=ceil(n_val_samples / batch_size))
Exemplo n.º 6
0
def visualize_and_save_filters(model_path, name=None, layer_names=None):
    # build the VGG16 network with ImageNet weights
    ssd_loss = SSDLoss(neg_pos_ratio=3, n_neg_min=0, alpha=1.0)

    K.clear_session()  # Clear previous models from memory.

    model = load_model(model_path, custom_objects={'AnchorBoxes': AnchorBoxes,
                                                   'L2Normalization': L2Normalization,
                                                   'compute_loss': ssd_loss.compute_loss})

    print('Model loaded.')

    model.summary()

    # this is the placeholder for the input images
    input_img = model.input

    # get the symbolic outputs of each "key" layer (we gave them unique names).
    layer_dict = dict([(layer.name, layer) for layer in model.layers[1:]])

    for layer_name in layer_names:
        directory = 'filters/{}/{}'.format(name, layer_name)
        if not os.path.exists(directory):
            os.makedirs(directory)
        kept_filters = []
        for filter_index in range(256):
            # we only scan through the first 200 filters,
            # but there are actually 512 of them
            print('Processing filter %d' % filter_index)
            start_time = time.time()

            # we build a loss function that maximizes the activation
            # of the nth filter of the layer considered
            layer_output = layer_dict[layer_name].output
            if K.image_data_format() == 'channels_first':
                loss = K.mean(layer_output[:, filter_index, :, :])
            else:
                loss = K.mean(layer_output[:, :, :, filter_index])

            # we compute the gradient of the input picture wrt this loss
            grads = K.gradients(loss, input_img)[0]

            # normalization trick: we normalize the gradient
            grads = normalize(grads)

            # this function returns the loss and grads given the input picture
            iterate = K.function([input_img], [loss, grads])

            # step size for gradient ascent
            step = 1.

            # we start from a gray image with some random noise
            if K.image_data_format() == 'channels_first':
                input_img_data = np.random.random((1, 3, img_width, img_height))
            else:
                input_img_data = np.random.random((1, img_width, img_height, 3))
            input_img_data = (input_img_data - 0.5) * 20 + 128

            # we run gradient ascent for 20 steps
            for i in range(100):
                loss_value, grads_value = iterate([input_img_data])
                input_img_data += grads_value * step

                print('Current loss value:', loss_value)
                if loss_value <= 0.:
                    # some filters get stuck to 0, we can skip them
                    break

            # decode the resulting input image
            if loss_value > 0:
                img = deprocess_image(input_img_data[0])
                kept_filters.append((img, loss_value))
            end_time = time.time()
            print('Filter %d processed in %ds' % (filter_index, end_time - start_time))

        # we will stich the best 64 filters on a 8 x 8 grid.
        n = 8

        # the filters that have the highest loss are assumed to be better-looking.
        # we will only keep the top 64 filters.
        kept_filters.sort(key=lambda x: x[1], reverse=True)
        kept_filters = kept_filters[:n * n]

        # build a black picture with enough space for
        # our 8 x 8 filters of size 128 x 128, with a 5px margin in between
        margin = 5
        width = n * img_width + (n - 1) * margin
        height = n * img_height + (n - 1) * margin
        stitched_filters = np.zeros((width, height, 3))

        # fill the picture with our saved filters

        for i in range(n):
            for j in range(n):
                idx = i * n + j
                if idx >= len(kept_filters):
                    break

                # print(idx, len(kept_filters))
                img, loss = kept_filters[idx]
                imsave('{}/{}.png'.format(directory, str(idx)), img)
                stitched_filters[(img_width + margin) * i: (img_width + margin) * i + img_width,
                (img_height + margin) * j: (img_height + margin) * j + img_height, :] = img

        # save the result to disk
        imsave('filters/filters_{}_{}.png'.format(name, layer_name), stitched_filters)
Exemplo n.º 7
0
    root=root,
    data_folder=['VOC2007', 'VOC2012'],
    target_size=target_size,
    batch_size=batch_size,
    shuffle=False,
    priors=priors)

steps_per_epoch = len(train_generator)
validation_steps = len(val_generator)
#%*****************************************************************************
model = SSD300(input_shape=(300, 300, 3), num_classes=21)

model.load_weights('VGG_ILSVRC_16_layers_fc_reduced.h5', by_name=True)
#model.summary()

loss_fn = SSDLoss()

model.compile(optimizer=SGD(lr=initial_lr,
                            momentum=0.9,
                            nesterov=True,
                            decay=1e-5),
              loss=loss_fn.compute_loss)
#%%****************************************************************************
experiment_name = '{}_SSD300_VOC_BS-{}_EP-{}'.format(exp, batch_size,
                                                     num_epochs)

records = Path.cwd() / 'records'
checkpoint_path = Path.cwd() / 'checkpoints'

if not records.exists():
    records.mkdir()
    offsets=config.offsets,
    clip_boxes=config.clip_boxes,
    variances=config.variances,
    normalize_coords=config.normalize_coords,
    return_predictor_sizes=True)

parser = Tfrpaser(config=config,
                  predictor_sizes=predictor_sizes,
                  num_classes=num_classes,
                  batch_size=batch_size,
                  preprocess_input=preprocess_input)

dataset = parser.parse_tfrecords(
    filename=os.path.join(os.getcwd(), 'DATA', 'train*.tfrecord'))

ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)

# sgd = SGD(lr=0.001, momentum=0.9, decay=0.0, nesterov=False)
# model.compile(optimizer=sgd, loss=ssd_loss.compute_loss)
model.compile(optimizer=Adam(lr=0.001, clipnorm=0.001),
              loss=ssd_loss.compute_loss)

history = model.fit(
    dataset,
    steps_per_epoch=steps_per_epoch,
    epochs=final_epoch,
    # callbacks=callbacks,
    # validation_data=val_generator,
    # validation_steps=ceil(val_dataset_size/batch_size),
    initial_epoch=initial_epoch)
                                 variances=variances,
                                 coords=coords,
                                 normalize_coords=normalize_coords)

# 2: Load the trained VGG-16 weights into the model.

# TODO: Set the path to the VGG-16 weights.
# vgg16_path = 'vgg-16_ssd-fcn_ILSVRC-CLS-LOC.h5'

# model.load_weights(vgg16_path, by_name=True)

# 3: Instantiate an Adam optimizer and the SSD loss function and compile the model

adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=5e-04)

ssd_loss = SSDLoss(neg_pos_ratio=10, n_neg_min=0, alpha=1.0)

model.compile(optimizer=adam, loss=ssd_loss.compute_loss)

# TODO: Set the path to the `.h5` file of the model to be loaded.
# model_path = 'ssd300.h5'
#
# # We need to create an SSDLoss object in order to pass that to the model loader.
# ssd_loss = SSDLoss(neg_pos_ratio=3, n_neg_min=0, alpha=1.0)
#
# K.clear_session() # Clear previous models from memory.
#
# model = load_model(model_path, custom_objects={'AnchorBoxes': AnchorBoxes,
#                                                'L2Normalization': L2Normalization,
#                                                'compute_loss': ssd_loss.compute_loss})
def train(args):
    model = ssd_300(mode='training',
                    image_size=(img_height, img_width, img_channels),
                    n_classes=n_classes,
                    l2_regularization=0.0005,
                    scales=scales,
                    aspect_ratios_per_layer=aspect_ratios,
                    two_boxes_for_ar1=two_boxes_for_ar1,
                    steps=steps,
                    offsets=offsets,
                    limit_boxes=limit_boxes,
                    variances=variances,
                    coords=coords,
                    normalize_coords=normalize_coords,
                    subtract_mean=subtract_mean,
                    divide_by_stddev=None,
                    swap_channels=swap_channels)

    model.load_weights(args.weight_file, by_name=True, skip_mismatch=True)

    predictor_sizes = [
        model.get_layer('conv11_mbox_conf').output_shape[1:3],
        model.get_layer('conv13_mbox_conf').output_shape[1:3],
        model.get_layer('conv14_2_mbox_conf').output_shape[1:3],
        model.get_layer('conv15_2_mbox_conf').output_shape[1:3],
        model.get_layer('conv16_2_mbox_conf').output_shape[1:3],
        model.get_layer('conv17_2_mbox_conf').output_shape[1:3]
    ]

    adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=5e-04)

    ssd_loss = SSDLoss(neg_pos_ratio=3, n_neg_min=0, alpha=1.0)

    model.compile(optimizer=adam,
                  loss=ssd_loss.compute_loss,
                  metrics=['accuracy'])

    train_dataset = BatchGenerator(
        box_output_format=['class_id', 'xmin', 'ymin', 'xmax', 'ymax'])
    val_dataset = BatchGenerator(
        box_output_format=['class_id', 'xmin', 'ymin', 'xmax', 'ymax'])

    # 2: Parse the image and label lists for the training and validation datasets. This can take a while.

    # TODO: Set the paths to the datasets here.

    COCO_format_val_images_dir = args.ms_coco_dir_path + '/val/'
    COCO_format_train_images_dir = args.ms_coco_dir_path + '/train/'
    COCO_format_train_annotation_dir = args.ms_coco_dir_path + '/annotations/train.json'
    COCO_format_val_annotation_dir = args.ms_coco_dir_path + '/annotations/val.json'

    VOC_2007_images_dir = args.voc_dir_path + '/VOC2007/JPEGImages/'
    VOC_2012_images_dir = args.voc_dir_path + '/VOC2012/JPEGImages/'

    # The directories that contain the annotations.
    VOC_2007_annotations_dir = args.voc_dir_path + '/VOC2007/Annotations/'
    VOC_2012_annotations_dir = args.voc_dir_path + '/VOC2012/Annotations/'

    # The paths to the image sets.
    VOC_2007_train_image_set_filename = args.voc_dir_path + '/VOC2007/ImageSets/Main/trainval.txt'
    VOC_2012_train_image_set_filename = args.voc_dir_path + '/VOC2012/ImageSets/Main/trainval.txt'

    VOC_2007_val_image_set_filename = args.voc_dir_path + '/VOC2007/ImageSets/Main/test.txt'

    # The XML parser needs to now what object class names to look for and in which order to map them to integers.

    classes = [
        'background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus',
        'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse',
        'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train',
        'tvmonitor'
    ]
    '''
          This is an JSON parser for the MS COCO datasets. It might be applicable to other datasets with minor changes to
          the code, but in its current form it expects the JSON format of the MS COCO datasets.

          Arguments:
              images_dirs (list, optional): A list of strings, where each string is the path of a directory that
                  contains images that are to be part of the dataset. This allows you to aggregate multiple datasets
                  into one (e.g. one directory that contains the images for MS COCO Train 2014, another one for MS COCO
                  Val 2014, another one for MS COCO Train 2017 etc.).
              annotations_filenames (list): A list of strings, where each string is the path of the JSON file
                  that contains the annotations for the images in the respective image directories given, i.e. one
                  JSON file per image directory that contains the annotations for all images in that directory.
                  The content of the JSON files must be in MS COCO object detection format. Note that these annotations
                  files do not necessarily need to contain ground truth information. MS COCO also provides annotations
                  files without ground truth information for the test datasets, called `image_info_[...].json`.
              ground_truth_available (bool, optional): Set `True` if the annotations files contain ground truth information.
              include_classes (list, optional): Either 'all' or a list of integers containing the class IDs that
                  are to be included in the dataset. Defaults to 'all', in which case all boxes will be included
                  in the dataset.
              ret (bool, optional): Whether or not the image filenames and labels are to be returned.

          Returns:
              None by default, optionally the image filenames and labels.
          '''

    train_dataset.parse_json(
        images_dirs=[COCO_format_train_images_dir],
        annotations_filenames=[COCO_format_train_annotation_dir],
        ground_truth_available=True,
        include_classes='all',
        ret=False)

    val_dataset.parse_json(
        images_dirs=[COCO_format_val_images_dir],
        annotations_filenames=[COCO_format_val_annotation_dir],
        ground_truth_available=True,
        include_classes='all',
        ret=False)

    # 3: Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function.

    ssd_box_encoder = SSDBoxEncoder(img_height=img_height,
                                    img_width=img_width,
                                    n_classes=n_classes,
                                    predictor_sizes=predictor_sizes,
                                    min_scale=None,
                                    max_scale=None,
                                    scales=scales,
                                    aspect_ratios_global=None,
                                    aspect_ratios_per_layer=aspect_ratios,
                                    two_boxes_for_ar1=two_boxes_for_ar1,
                                    steps=steps,
                                    offsets=offsets,
                                    limit_boxes=limit_boxes,
                                    variances=variances,
                                    pos_iou_threshold=0.5,
                                    neg_iou_threshold=0.2,
                                    coords=coords,
                                    normalize_coords=normalize_coords)

    batch_size = args.batch_size

    train_generator = train_dataset.generate(
        batch_size=batch_size,
        shuffle=True,
        train=True,
        ssd_box_encoder=ssd_box_encoder,
        convert_to_3_channels=True,
        equalize=False,
        brightness=(0.5, 2, 0.5),
        flip=0.5,
        translate=False,
        scale=False,
        max_crop_and_resize=(img_height, img_width, 1, 3),
        # This one is important because the Pascal VOC images vary in size
        random_pad_and_resize=(img_height, img_width, 1, 3, 0.5),
        # This one is important because the Pascal VOC images vary in size
        random_crop=False,
        crop=False,
        resize=False,
        gray=False,
        limit_boxes=True,
        # While the anchor boxes are not being clipped, the ground truth boxes should be
        include_thresh=0.4)

    val_generator = val_dataset.generate(
        batch_size=batch_size,
        shuffle=True,
        train=True,
        ssd_box_encoder=ssd_box_encoder,
        convert_to_3_channels=True,
        equalize=False,
        brightness=(0.5, 2, 0.5),
        flip=0.5,
        translate=False,
        scale=False,
        max_crop_and_resize=(img_height, img_width, 1, 3),
        # This one is important because the Pascal VOC images vary in size
        random_pad_and_resize=(img_height, img_width, 1, 3, 0.5),
        # This one is important because the Pascal VOC images vary in size
        random_crop=False,
        crop=False,
        resize=False,
        gray=False,
        limit_boxes=True,
        # While the anchor boxes are not being clipped, the ground truth boxes should be
        include_thresh=0.4)
    # Get the number of samples in the training and validations datasets to compute the epoch lengths below.
    n_train_samples = train_dataset.get_n_samples()
    n_val_samples = val_dataset.get_n_samples()

    def lr_schedule(epoch):
        if epoch <= 300:
            return 0.001
        else:
            return 0.0001

    learning_rate_scheduler = LearningRateScheduler(schedule=lr_schedule)

    checkpoint_path = args.checkpoint_path + "/ssd300_epoch-{epoch:02d}.h5"

    checkpoint = ModelCheckpoint(checkpoint_path)

    log_path = args.checkpoint_path + "/logs"

    tensorborad = TensorBoard(log_dir=log_path,
                              histogram_freq=1,
                              write_graph=True,
                              write_images=False)

    callbacks = [checkpoint, tensorborad, learning_rate_scheduler]

    # TODO: Set the number of epochs to train for.
    epochs = args.epochs
    intial_epoch = args.intial_epoch

    history = model.fit_generator(
        generator=train_generator,
        steps_per_epoch=ceil(n_train_samples) / batch_size,
        verbose=1,
        initial_epoch=intial_epoch,
        epochs=epochs,
        validation_data=val_generator,
        validation_steps=ceil(n_val_samples) / batch_size,
        callbacks=callbacks)
# setting up training 

# batch_size and no.of epochs

batch_size = 16
num_epochs = 10

#Learning rate
base_lr = 0.002

# Optimizer
adam = Adam(lr=base_lr, beta_1=0.9, beta_2=0.999, epsilon=1e-6, decay = 0.0)

# Loss
ssd_loss = SSDLoss(neg_pos_ratio=2, n_neg_min=0, alpha=1.0, beta = 1.0)

# Compile
model.compile(optimizer=adam, loss=ssd_loss.compute_loss)

"""### Lets add early stopping and model checkpoint layers on validation loss with some patience values and using fit_generator function to train the model on data generated batch-by-batch by a Python generator, `train_generator` object as generator.


We are using checkpoint to save the best model based on validation accuracy.

#### Write code for early_stopping and model_checkpoint layers. Using model.fit_generator train the model and save the best weight file.
"""

early_stopping = EarlyStopping(monitor='val_loss', min_delta=0.001, patience=10)
model_checkpoint =  ModelCheckpoint(project_path + 'ssd_mobilenet_face_epoch.h5',
                                                           monitor='val_loss',
Exemplo n.º 12
0
    aspect_ratios_per_layer=None,
    two_boxes_for_ar1=two_boxes_for_ar1,
    steps=steps,
    offsets=offsets,
    limit_boxes=limit_boxes,
    variances=variances,
    coords=coords,
    normalize_coords=normalize_coords,
    subtract_mean=subtract_mean,
    divide_by_stddev=divide_by_stddev,
    swap_channels=False)

adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08,
            decay=5e-04)  # optimiser values

ssd_loss = SSDLoss(neg_pos_ratio=3, n_neg_min=0, alpha=1.0)  #loss function

model.compile(optimizer=adam, loss=ssd_loss.compute_loss)  #create model
"""# TODO: Set the path to the `.h5` file of the model to be loaded.
model_path = '/home/ssd_keras/ssd7.h5'

# We need to create an SSDLoss object in order to pass that to the model loader.
ssd_loss = SSDLoss(neg_pos_ratio=3, n_neg_min=0, alpha=1.0)

K.clear_session() # Clear previous models from memory.

model = load_model(model_path, custom_objects={'AnchorBoxes': AnchorBoxes,
                                               'compute_loss': ssd_loss.compute_loss})"""

train_dataset = BatchGenerator(
    box_output_format=['class_id', 'xmin', 'ymin', 'xmax', 'ymax'])