def __load_training_data(self):
        train_dataset = BatchGenerator(
            box_output_format=['class_id', 'xmin', 'xmax', 'ymin', 'ymax'])

        train_classes = [
            x[0] for x in join(self.path_to_train_directory,
                               walk(self.path_to_train_directory)) + '/'
        ]

        train_labels = [x[0] for x in walk(self.path_to_train_directory)]

        train_dataset.parse_csv(images_path=train_classes,
                                labels_path=self.cvsfile,
                                include_classes='all',
                                ret=False)

        train_generator = train_dataset.generate(
            batch_size=batch_size,
            train=True,
            ssd_box_encoder=ssd_box_encoder,
            equalize=False,
            translate=False,
            scale=False,
            full_crop_and_resize=(
                self.img_train_height, self.img_train_width
            ),  # This one is important because the Pascal VOC images vary in size
            random_crop=False,
            crop=False,
            resize=False,
            gray=True,
            limit_boxes=
            True,  # While the anchor boxes are not being clipped, the ground truth boxes should be
            include_thresh=0.4,
            diagnostics=True)

        n_train_samples = train_dataset.get_n_samples(
        )  # Get the number of samples in the training dataset to compute the epoch length below
        return train_generator, n_train_samples
    def __load_validation_data(self):
        val_dataset = BatchGenerator(
            box_output_format=['class_id', 'xmin', 'xmax', 'ymin', 'ymax'])
        # 6: Create the validation set batch generator
        validation_classes = [
            x[0] for x in join(self.path_to_validation_directory,
                               walk(self.path_to_validation_directory)) + '/'
        ]
        val_dataset.parse_csv(images_path=validation_classes,
                              labels_path=self.cvsfile_val,
                              include_classes='all',
                              ret=False)

        val_generator = val_dataset.generate(
            batch_size=self.batch_size,
            train=True,
            ssd_box_encoder=ssd_box_encoder,
            equalize=False,
            brightness=False,
            flip=False,
            translate=False,
            scale=
            False,  # This one is important because the Pascal VOC images vary in size
            full_crop_and_resize=(
                self.img_train_height, self.img_train_width
            ),  # This one is important because the Pascal VOC images vary in size
            random_crop=False,
            crop=False,
            resize=False,
            gray=True,
            limit_boxes=True,
            include_thresh=0.4,
            diagnostics=False)

        n_val_samples = val_dataset.get_n_samples()
        return val_generator, n_val_samples
Exemplo n.º 3
0
def train_hs512(lr=1e-4,
                freeze_bn=False,
                optim=None,
                batch_size=8,
                weights_path=None,
                save_weights_only=True,
                epochs=25):
    if weights_path is None:
        weights_path = 'VGG_VOC0712Plus_SSD_512x512_ft_iter_160000.h5'

    img_height = 512  # 1088 // 2  # Height of the input images
    img_width = 512  # 2048 // 2  # Width of the input images
    img_channels = 3  # Number of color channels of the input images
    # subtract_mean = [104, 117, 123] # The per-channel mean of the images in the dataset
    subtract_mean = [138, 138,
                     138]  # The per-channel mean of the images in the dataset
    swap_channels = False
    n_classes = 20  # The number of positive classes, e.g. 20 for Pascal VOC, 80 for MS COCO.
    scales = [0.04, 0.1, 0.26, 0.42, 0.58, 0.74, 0.9, 1.06]  # MS COCO scales
    # scales = [0.07, 0.15, 0.3, 0.45, 0.6, 0.75, 0.9, 1.05]
    aspect_ratios = [
        [1.0,
         2.0, 0.5], [1.0, 2.0, 0.5, 3.0,
                     1.0 / 3.0],
        [1.0, 2.0, 0.5, 3.0,
         1.0 / 3.0],
        [1.0, 2.0, 0.5,
         3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0
                           ], [1.0, 2.0, 0.5
                               ], [1.0, 2.0, 0.5]
    ]  # The anchor box aspect ratios used in the original SSD300; the order matters
    two_boxes_for_ar1 = True
    # The space between two adjacent anchor box center points for each predictor layer.
    steps = [8, 16, 32, 64, 128, 256, 512]
    offsets = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
    limit_boxes = False  # Whether or not you want to limit the anchor boxes to lie entirely within the image boundaries
    variances = [0.1, 0.1, 0.2, 0.2]
    coords = 'centroids'
    normalize_coords = True

    # 1: Build the Keras model
    K.clear_session()
    model, pred_sizes = ssd_512(image_size=(img_height, img_width,
                                            img_channels),
                                n_classes=n_classes,
                                l2_regularization=0.0005,
                                scales=scales,
                                aspect_ratios_per_layer=aspect_ratios,
                                two_boxes_for_ar1=two_boxes_for_ar1,
                                steps=steps,
                                offsets=offsets,
                                limit_boxes=limit_boxes,
                                variances=variances,
                                coords=coords,
                                normalize_coords=normalize_coords,
                                subtract_mean=None,
                                divide_by_stddev=None,
                                swap_channels=swap_channels,
                                return_predictor_sizes=True)

    # 2: Load the trained VGG-16 weights into the model.
    model.load_weights(weights_path, by_name=True)

    # 3: Instantiate the optimizer and the SSD loss function and compile the model

    if optim is None:
        optim = SGD(lr=lr, momentum=0.9)
    ssd_loss = SSDLoss(neg_pos_ratio=3, n_neg_min=0, alpha=1.0)

    # 4: freeze the base model if needed
    if freeze_bn:
        for l in model.layers[:38]:
            l.trainable = False

    # 5: compile model
    model.compile(optimizer=optim, loss=ssd_loss.compute_loss)

    ## Prepare data generation

    # 1: Instantiate to `BatchGenerator` objects: One for training, one for validation.

    train_dataset = BatchGenerator(
        box_output_format=['class_id', 'xmin', 'ymin', 'xmax', 'ymax'])
    val_dataset = BatchGenerator(
        box_output_format=['class_id', 'xmin', 'ymin', 'xmax', 'ymax'])

    # 2: Parse the image and label lists for the training and validation datasets. This can take a while.

    images_path_root = './Datasets/'
    train_combined_labels = './Datasets/train_combined_ssd_512.txt'
    val_labels = './Datasets/val_ssd_512.txt'

    train_dataset.parse_csv(images_dir=images_path_root,
                            labels_filename=train_combined_labels,
                            input_format=[
                                'image_name', 'class_id', 'xmin', 'xmax',
                                'ymin', 'ymax'
                            ])

    val_dataset.parse_csv(images_dir=images_path_root,
                          labels_filename=val_labels,
                          input_format=[
                              'image_name', 'class_id', 'xmin', 'xmax', 'ymin',
                              'ymax'
                          ])

    # 3: Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function.

    ssd_box_encoder = SSDBoxEncoder(img_height=img_height,
                                    img_width=img_width,
                                    n_classes=n_classes,
                                    predictor_sizes=pred_sizes,
                                    min_scale=None,
                                    max_scale=None,
                                    scales=scales,
                                    aspect_ratios_global=None,
                                    aspect_ratios_per_layer=aspect_ratios,
                                    two_boxes_for_ar1=two_boxes_for_ar1,
                                    steps=steps,
                                    offsets=offsets,
                                    limit_boxes=limit_boxes,
                                    variances=variances,
                                    pos_iou_threshold=0.5,
                                    neg_iou_threshold=0.2,
                                    coords=coords,
                                    normalize_coords=normalize_coords)

    # 4: Set the image processing / data augmentation options and create generator handles.

    train_generator = train_dataset.generate(
        batch_size=batch_size,
        shuffle=True,
        train=True,
        ssd_box_encoder=ssd_box_encoder,
        equalize=False,
        brightness=(0.5, 2, 0.5),
        flip=0.5,
        translate=False,
        scale=False,
        max_crop_and_resize=(img_height, img_width, 1, 3),
        # This one is important because the Pascal VOC images vary in size
        random_pad_and_resize=(img_height, img_width, 1, 3, 0.5),
        # This one is important because the Pascal VOC images vary in size
        random_crop=False,
        crop=False,
        resize=False,
        gray=True,
        limit_boxes=True,
        # While the anchor boxes are not being clipped, the ground truth boxes should be
        include_thresh=0.4)

    val_generator = val_dataset.generate(
        batch_size=batch_size,
        shuffle=True,
        train=True,
        ssd_box_encoder=ssd_box_encoder,
        equalize=False,
        brightness=False,
        flip=False,
        translate=False,
        scale=False,
        max_crop_and_resize=(img_height, img_width, 1, 3),
        # This one is important because the Pascal VOC images vary in size
        random_pad_and_resize=(img_height, img_width, 1, 3, 0.5),
        # This one is important because the Pascal VOC images vary in size
        random_crop=False,
        crop=False,
        resize=False,
        gray=True,
        limit_boxes=True,
        include_thresh=0.4)

    # Get the number of samples in the training and validations datasets to compute the epoch lengths below.
    n_train_samples = train_dataset.get_n_samples()
    n_val_samples = val_dataset.get_n_samples()

    # ## 4. Run the training

    fingerprint = 'ssd{:%Y-%m-%d_%H-%M-%S}'.format(datetime.datetime.now())

    tbCallBack = TensorBoard(log_dir='./Graph/{}'.format(fingerprint),
                             histogram_freq=0,
                             batch_size=batch_size,
                             write_graph=True)

    checkpointer = ModelCheckpoint(
        './saved/{{val_loss:.2f}}__{}_best_weights.h5'.format(fingerprint),
        monitor='val_loss',
        verbose=1,
        save_best_only=save_weights_only,
        save_weights_only=True,
        mode='auto',
        period=1)

    learning_rate_reduction = ReduceLROnPlateau(monitor='val_loss',
                                                patience=5,
                                                verbose=1,
                                                factor=0.5,
                                                min_lr=1e-6)

    stopper = EarlyStopping(monitor='val_loss', min_delta=0.001, patience=10)

    epochs = 30

    history = model.fit_generator(
        generator=train_generator,
        steps_per_epoch=ceil(n_train_samples / batch_size),
        epochs=epochs,
        callbacks=[checkpointer, learning_rate_reduction, stopper, tbCallBack],
        validation_data=val_generator,
        validation_steps=ceil(n_val_samples / batch_size))
Exemplo n.º 4
0
                                two_boxes_for_ar1=two_boxes_for_ar1,
                                limit_boxes=limit_boxes,
                                variances=variances,
                                pos_iou_threshold=0.5,
                                neg_iou_threshold=0.2,
                                coords=coords,
                                normalize_coords=normalize_coords)

#--------------------------------------------------------------------------------------------------------------
# 5: Create the training set batch generator

classes = ['background', 'nation']

train_dataset = BatchGenerator(
    images_path=data_path,
    include_classes='all',
    box_output_format=['class_id', 'xmin', 'xmax', 'ymin', 'ymax']
)  # This is the format in which the generator is supposed to output the labels. At the moment it **must** be the format set here.

train_dataset.parse_csv(
    labels_path=data_path + '/train_labels.csv',
    input_format=['image_name', 'xmin', 'xmax', 'ymin', 'ymax', 'class_id']
)  # This is the order of the first six columns in the CSV file that contains the labels for your dataset. If your labels are in XML format, maybe the XML parser will be helpful, check the documentation.

# Change the online data augmentation settings as you like
train_generator = train_dataset.generate(
    batch_size=batch_size,
    train=True,
    ssd_box_encoder=ssd_box_encoder,
    equalize=False,
    brightness=(0.5, 2, 0.5),
# We need to create an SSDLoss object in order to pass that to the model loader.
ssd_loss = SSDLoss(neg_pos_ratio=3, n_neg_min=0, alpha=1.0)

K.clear_session()  # Clear previous models from memory.

model = load_model(model_path,
                   custom_objects={
                       'AnchorBoxes': AnchorBoxes,
                       'L2Normalization': L2Normalization,
                       'compute_loss': ssd_loss.compute_loss
                   })

# 1: Instantiate to `BatchGenerator` objects: One for training, one for validation.

train_dataset = BatchGenerator(
    box_output_format=['class_id', 'xmin', 'xmax', 'ymin', 'ymax'])
val_dataset = BatchGenerator(
    box_output_format=['class_id', 'xmin', 'xmax', 'ymin', 'ymax'])

# 2: Parse the image and label lists for the training and validation datasets. This can take a while.

# TODO: Set the paths to the datasets here.

# The directories that contain the images.

road_test_images_path = 'datasets/LaneMarkings/1/'

# The directories that contain the annotations.
road_test_annotations_path = 'datasets/LaneMarkings/11/'

# The paths to the image sets.
Exemplo n.º 6
0
                    min_scale=min_scale,
                    max_scale=max_scale,
                    scales=scales,
                    aspect_ratios_global=aspect_ratios,
                    aspect_ratios_per_layer=None,
                    two_boxes_for_ar1=two_boxes_for_ar1,
                    limit_boxes=limit_boxes,
                    variances=variances,
                    coords=coords,
                    normalize_coords=normalize_coords)
adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=5e-04)
ssd_loss = SSDLoss(neg_pos_ratio=3, n_neg_min=0, alpha=1.0)
model.compile(optimizer=adam, loss=ssd_loss.compute_loss)

# III. Set up the data generators for the training
train_dataset = BatchGenerator(
    box_output_format=['class_id', 'xmin', 'xmax', 'ymin', 'ymax'])
val_dataset = BatchGenerator(
    box_output_format=['class_id', 'xmin', 'xmax', 'ymin', 'ymax'])

# train_images_path = './sample/data/udacity_driving_datasets/'
# train_labels_path = './sample/data/udacity_driving_datasets/train_labels.csv'
# val_images_path = './sample/data/udacity_driving_datasets/'
# val_labels_path = './sample/data/udacity_driving_datasets/val_labels.csv'
train_images_path = './data/train'
train_labels_path = './train_labels.csv'
val_images_path = './data/train'
val_labels_path = './val_labels.csv'

train_dataset.parse_csv(
    images_dir=train_images_path,
    labels_filename=train_labels_path,
Exemplo n.º 7
0
                                    scales=scales,
                                    aspect_ratios_global=aspect_ratios,
                                    aspect_ratios_per_layer=None,
                                    two_boxes_for_ar1=two_boxes_for_ar1,
                                    limit_boxes=limit_boxes,
                                    variances=variances,
                                    pos_iou_threshold=0.5,
                                    neg_iou_threshold=0.2,
                                    coords=coords,
                                    normalize_coords=normalize_coords)

    # 5: Create the training set batch generator

    train_dataset = BatchGenerator(
        images_path='./bosch/',
        include_classes='all',
        box_output_format=['class_id', 'xmin', 'xmax', 'ymin', 'ymax']
    )  # This is the format in which the generator is supposed to output the labels. At the moment it **must** be the format set here.

    i, l = train_dataset.parse_bosch_yaml(
        yaml_file='./bosch/combined_train.yaml', ret=True)

    #  XML parser will be helpful, check the documentation.

    # Change the online data augmentation settings as you like
    train_generator = train_dataset.generate(
        batch_size=batch_size,
        train=True,
        ssd_box_encoder=ssd_box_encoder,
        equalize=False,
        brightness=(0.5, 2, 0.5),
Exemplo n.º 8
0
                                aspect_ratios_global=aspect_ratios,
                                aspect_ratios_per_layer=None,
                                two_boxes_for_ar1=two_boxes_for_ar1,
                                limit_boxes=limit_boxes,
                                variances=variances,
                                pos_iou_threshold=0.5,
                                neg_iou_threshold=0.2,
                                coords=coords,
                                normalize_coords=normalize_coords)

#--------------------------------------------------------------------------------------------------------------
# 5: Create the training set batch generator

train_dataset = BatchGenerator(
    images_path=data_path,
    include_classes='all',
    box_output_format=['class_id', 'xmin', 'xmax', 'ymin', 'ymax']
)  # This is the format in which the generator is supposed to output the labels. At the moment it **must** be the format set here.

train_dataset.parse_csv(
    labels_path=data_path + '/train_labels.csv',
    input_format=['image_name', 'xmin', 'xmax', 'ymin', 'ymax', 'class_id']
)  # This is the order of the first six columns in the CSV file that contains the labels for your dataset. If your labels are in XML format, maybe the XML parser will be helpful, check the documentation.

# Change the online data augmentation settings as you like
train_generator = train_dataset.generate(
    batch_size=batch_size,
    train=True,
    ssd_box_encoder=ssd_box_encoder,
    equalize=False,
    brightness=(
Exemplo n.º 9
0
ssd_loss = SSDLoss(neg_pos_ratio=3, n_neg_min=0, alpha=1.0)

K.clear_session()  # Clear previous models from memory.

model = load_model(model_path,
                   custom_objects={
                       'AnchorBoxes': AnchorBoxes,
                       'L2Normalization': L2Normalization,
                       'compute_loss': ssd_loss.compute_loss
                   })

# 3. Set up the data generators for the training

# 3.1: Instantiate to `BatchGenerator` objects: One for training, one for validation.

train_dataset = BatchGenerator(
    box_output_format=['class_id', 'xmin', 'ymin', 'xmax', 'ymax'])
val_dataset = BatchGenerator(
    box_output_format=['class_id', 'xmin', 'ymin', 'xmax', 'ymax'])

# 3.2: Parse the image and label lists for the training and validation datasets. This can take a while.

# TODO: Set the paths to the datasets here.

# The directories that contain the images.
VOC_2007_images_dir = '../../datasets/VOCdevkit/VOC2007/JPEGImages/'
VOC_2007_test_images_dir = '../../datasets/VOCdevkit/VOC2007_Test/JPEGImages/'
VOC_2012_images_dir = '../../datasets/VOCdevkit/VOC2012/JPEGImages/'

# The directories that contain the annotations.
VOC_2007_annotations_dir = '../../datasets/VOCdevkit/VOC2007/Annotations/'
VOC_2007_test_annotations_dir = '../../datasets/VOCdevkit/VOC2007_Test/Annotations/'
Exemplo n.º 10
0
    divide_by_stddev=divide_by_stddev,
    swap_channels=False)

# 2: Optional: Load some weights

#model.load_weights('./ssd7_weights.h5')

# 3: Instantiate an Adam optimizer and the SSD loss function and compile the model

adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=5e-04)

ssd_loss = SSDLoss(neg_pos_ratio=3, n_neg_min=0, alpha=1.0)

model.compile(optimizer=adam, loss=ssd_loss.compute_loss)

train_dataset = BatchGenerator(
    box_output_format=['class_id', 'xmin', 'ymin', 'xmax', 'ymax'])
val_dataset = BatchGenerator(
    box_output_format=['class_id', 'xmin', 'ymin', 'xmax', 'ymax'])

# The directories that contain the images.
PKLOT_images_dir = 'C:/Users/Damini/Documents/ECE477_ObjectDetection_Data_Storage/PKLot/PKLot'

# The directories that contain the annotations.
PKLOT_annotations_dir = 'C:/Users/Damini/Documents/ECE477_ObjectDetection_Data_Storage/PKLot2VOC_full_dataset/'

# Training dataset
PKLOT_train_image_set_filename = 'C:/Users/Damini/Documents/ECE477_ObjectDetection_Data_Storage/PKLot2VOC_full_dataset/ImageSets/Main/train.txt'
PKLOT_val_image_set_filename = 'C:/Users/Damini/Documents/ECE477_ObjectDetection_Data_Storage/PKLot2VOC_full_dataset/ImageSets/Main/valid.txt'

# The XML parser needs to now what object class names to look for and in which order to map them to integers.
classes = ['occupied', 'vaccant']
Exemplo n.º 11
0
val_labels_path = '../../datasets/Udacity_Driving/driving_dataset_consolidated_small/val_labels.csv'

#jr
datadir = '/media/jeremy/9FBD-1B00/data/image_dbs'
VOC_2007_images_path = datadir + '/VOCdevkit/VOC2007/JPEGImages/'
VOC_2007_annotations_path = datadir + '/VOCdevkit/VOC2007/Annotations/'

VOC_2007_test_images_path = datadir + '/VOCdevkit/VOC2007/JPEGImages/'
VOC_2007_test_annotations_path = datadir + '/VOCdevkit/VOC2007/Annotations/'

VOC_2007_trainval_image_set_path = datadir + '/VOCdevkit/VOC2007/ImageSets/Main/trainval_small.txt'
VOC_2007_train_image_set_path = datadir + '/VOCdevkit/VOC2007/ImageSets/Main/train_small.txt'
VOC_2007_test_image_set_path = datadir + '/VOCdevkit/VOC2007/ImageSets/Main/test_small.txt'

train_dataset = BatchGenerator(
    box_output_format=['class_id', 'xmin', 'xmax', 'ymin', 'ymax']
)  # This is the format in which the generator is supposed to output the labels. At the moment it **must** be the format set here.

# train_dataset.parse_csv(images_path=train_images_path,
#                         labels_path=train_labels_path,
#                         input_format=['image_name', 'xmin', 'xmax', 'ymin', 'ymax', 'class_id'], # This is the order of the first six columns in the CSV file that contains the labels for your dataset. If your labels are in XML format, maybe the XML parser will be helpful, check the documentation.
#                         include_classes='all')

classes = [
    'background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus',
    'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike',
    'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'
]

train_dataset.parse_xml(images_paths=[VOC_2007_images_path],
                        annotations_paths=[VOC_2007_annotations_path],
# TODO: Set the path to the `.h5` file of the model to be loaded.
# model_path = 'ssd300.h5'
#
# # We need to create an SSDLoss object in order to pass that to the model loader.
# ssd_loss = SSDLoss(neg_pos_ratio=3, n_neg_min=0, alpha=1.0)
#
# K.clear_session() # Clear previous models from memory.
#
# model = load_model(model_path, custom_objects={'AnchorBoxes': AnchorBoxes,
#                                                'L2Normalization': L2Normalization,
#                                                'compute_loss': ssd_loss.compute_loss})

# 1: Instantiate to `BatchGenerator` objects: One for training, one for validation.

train_dataset = BatchGenerator(box_output_format=['class_id', 'xmin', 'xmax', 'ymin', 'ymax'])
val_dataset = BatchGenerator(box_output_format=['class_id', 'xmin', 'xmax', 'ymin', 'ymax'])

# 2: Parse the image and label lists for the training and validation datasets. This can take a while.

# TODO: Set the paths to the datasets here.

# The directories that contain the images.
road_images_path = 'datasets/LaneMarkings/1'

# The directories that contain the annotations.
road_annotations_path = 'datasets/LaneMarkings/11'

# The paths to the image sets.
road_train_image_set_path = 'datasets/LaneMarkings/Main/train.txt'
road_val_image_set_path = 'datasets/LaneMarkings/Main/val.txt'
def train(args):
    model = ssd_300(mode='training',
                    image_size=(img_height, img_width, img_channels),
                    n_classes=n_classes,
                    l2_regularization=0.0005,
                    scales=scales,
                    aspect_ratios_per_layer=aspect_ratios,
                    two_boxes_for_ar1=two_boxes_for_ar1,
                    steps=steps,
                    offsets=offsets,
                    limit_boxes=limit_boxes,
                    variances=variances,
                    coords=coords,
                    normalize_coords=normalize_coords,
                    subtract_mean=subtract_mean,
                    divide_by_stddev=None,
                    swap_channels=swap_channels)

    model.load_weights(args.weight_file, by_name=True, skip_mismatch=True)

    predictor_sizes = [
        model.get_layer('conv11_mbox_conf').output_shape[1:3],
        model.get_layer('conv13_mbox_conf').output_shape[1:3],
        model.get_layer('conv14_2_mbox_conf').output_shape[1:3],
        model.get_layer('conv15_2_mbox_conf').output_shape[1:3],
        model.get_layer('conv16_2_mbox_conf').output_shape[1:3],
        model.get_layer('conv17_2_mbox_conf').output_shape[1:3]
    ]

    adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=5e-04)

    ssd_loss = SSDLoss(neg_pos_ratio=3, n_neg_min=0, alpha=1.0)

    model.compile(optimizer=adam,
                  loss=ssd_loss.compute_loss,
                  metrics=['accuracy'])

    train_dataset = BatchGenerator(
        box_output_format=['class_id', 'xmin', 'ymin', 'xmax', 'ymax'])
    val_dataset = BatchGenerator(
        box_output_format=['class_id', 'xmin', 'ymin', 'xmax', 'ymax'])

    # 2: Parse the image and label lists for the training and validation datasets. This can take a while.

    # TODO: Set the paths to the datasets here.

    COCO_format_val_images_dir = args.ms_coco_dir_path + '/val/'
    COCO_format_train_images_dir = args.ms_coco_dir_path + '/train/'
    COCO_format_train_annotation_dir = args.ms_coco_dir_path + '/annotations/train.json'
    COCO_format_val_annotation_dir = args.ms_coco_dir_path + '/annotations/val.json'

    VOC_2007_images_dir = args.voc_dir_path + '/VOC2007/JPEGImages/'
    VOC_2012_images_dir = args.voc_dir_path + '/VOC2012/JPEGImages/'

    # The directories that contain the annotations.
    VOC_2007_annotations_dir = args.voc_dir_path + '/VOC2007/Annotations/'
    VOC_2012_annotations_dir = args.voc_dir_path + '/VOC2012/Annotations/'

    # The paths to the image sets.
    VOC_2007_train_image_set_filename = args.voc_dir_path + '/VOC2007/ImageSets/Main/trainval.txt'
    VOC_2012_train_image_set_filename = args.voc_dir_path + '/VOC2012/ImageSets/Main/trainval.txt'

    VOC_2007_val_image_set_filename = args.voc_dir_path + '/VOC2007/ImageSets/Main/test.txt'

    # The XML parser needs to now what object class names to look for and in which order to map them to integers.

    classes = [
        'background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus',
        'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse',
        'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train',
        'tvmonitor'
    ]
    '''
          This is an JSON parser for the MS COCO datasets. It might be applicable to other datasets with minor changes to
          the code, but in its current form it expects the JSON format of the MS COCO datasets.

          Arguments:
              images_dirs (list, optional): A list of strings, where each string is the path of a directory that
                  contains images that are to be part of the dataset. This allows you to aggregate multiple datasets
                  into one (e.g. one directory that contains the images for MS COCO Train 2014, another one for MS COCO
                  Val 2014, another one for MS COCO Train 2017 etc.).
              annotations_filenames (list): A list of strings, where each string is the path of the JSON file
                  that contains the annotations for the images in the respective image directories given, i.e. one
                  JSON file per image directory that contains the annotations for all images in that directory.
                  The content of the JSON files must be in MS COCO object detection format. Note that these annotations
                  files do not necessarily need to contain ground truth information. MS COCO also provides annotations
                  files without ground truth information for the test datasets, called `image_info_[...].json`.
              ground_truth_available (bool, optional): Set `True` if the annotations files contain ground truth information.
              include_classes (list, optional): Either 'all' or a list of integers containing the class IDs that
                  are to be included in the dataset. Defaults to 'all', in which case all boxes will be included
                  in the dataset.
              ret (bool, optional): Whether or not the image filenames and labels are to be returned.

          Returns:
              None by default, optionally the image filenames and labels.
          '''

    train_dataset.parse_json(
        images_dirs=[COCO_format_train_images_dir],
        annotations_filenames=[COCO_format_train_annotation_dir],
        ground_truth_available=True,
        include_classes='all',
        ret=False)

    val_dataset.parse_json(
        images_dirs=[COCO_format_val_images_dir],
        annotations_filenames=[COCO_format_val_annotation_dir],
        ground_truth_available=True,
        include_classes='all',
        ret=False)

    # 3: Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function.

    ssd_box_encoder = SSDBoxEncoder(img_height=img_height,
                                    img_width=img_width,
                                    n_classes=n_classes,
                                    predictor_sizes=predictor_sizes,
                                    min_scale=None,
                                    max_scale=None,
                                    scales=scales,
                                    aspect_ratios_global=None,
                                    aspect_ratios_per_layer=aspect_ratios,
                                    two_boxes_for_ar1=two_boxes_for_ar1,
                                    steps=steps,
                                    offsets=offsets,
                                    limit_boxes=limit_boxes,
                                    variances=variances,
                                    pos_iou_threshold=0.5,
                                    neg_iou_threshold=0.2,
                                    coords=coords,
                                    normalize_coords=normalize_coords)

    batch_size = args.batch_size

    train_generator = train_dataset.generate(
        batch_size=batch_size,
        shuffle=True,
        train=True,
        ssd_box_encoder=ssd_box_encoder,
        convert_to_3_channels=True,
        equalize=False,
        brightness=(0.5, 2, 0.5),
        flip=0.5,
        translate=False,
        scale=False,
        max_crop_and_resize=(img_height, img_width, 1, 3),
        # This one is important because the Pascal VOC images vary in size
        random_pad_and_resize=(img_height, img_width, 1, 3, 0.5),
        # This one is important because the Pascal VOC images vary in size
        random_crop=False,
        crop=False,
        resize=False,
        gray=False,
        limit_boxes=True,
        # While the anchor boxes are not being clipped, the ground truth boxes should be
        include_thresh=0.4)

    val_generator = val_dataset.generate(
        batch_size=batch_size,
        shuffle=True,
        train=True,
        ssd_box_encoder=ssd_box_encoder,
        convert_to_3_channels=True,
        equalize=False,
        brightness=(0.5, 2, 0.5),
        flip=0.5,
        translate=False,
        scale=False,
        max_crop_and_resize=(img_height, img_width, 1, 3),
        # This one is important because the Pascal VOC images vary in size
        random_pad_and_resize=(img_height, img_width, 1, 3, 0.5),
        # This one is important because the Pascal VOC images vary in size
        random_crop=False,
        crop=False,
        resize=False,
        gray=False,
        limit_boxes=True,
        # While the anchor boxes are not being clipped, the ground truth boxes should be
        include_thresh=0.4)
    # Get the number of samples in the training and validations datasets to compute the epoch lengths below.
    n_train_samples = train_dataset.get_n_samples()
    n_val_samples = val_dataset.get_n_samples()

    def lr_schedule(epoch):
        if epoch <= 300:
            return 0.001
        else:
            return 0.0001

    learning_rate_scheduler = LearningRateScheduler(schedule=lr_schedule)

    checkpoint_path = args.checkpoint_path + "/ssd300_epoch-{epoch:02d}.h5"

    checkpoint = ModelCheckpoint(checkpoint_path)

    log_path = args.checkpoint_path + "/logs"

    tensorborad = TensorBoard(log_dir=log_path,
                              histogram_freq=1,
                              write_graph=True,
                              write_images=False)

    callbacks = [checkpoint, tensorborad, learning_rate_scheduler]

    # TODO: Set the number of epochs to train for.
    epochs = args.epochs
    intial_epoch = args.intial_epoch

    history = model.fit_generator(
        generator=train_generator,
        steps_per_epoch=ceil(n_train_samples) / batch_size,
        verbose=1,
        initial_epoch=intial_epoch,
        epochs=epochs,
        validation_data=val_generator,
        validation_steps=ceil(n_val_samples) / batch_size,
        callbacks=callbacks)
Exemplo n.º 14
0
                      fill=False,
                      linewidth=2))
    current_axis.text(xmin,
                      ymin,
                      label,
                      size='x-large',
                      color='white',
                      bbox={
                          'facecolor': color,
                          'alpha': 1.0
                      })

# 5. Make predictions on Pascal VOC 2007 Test
# Create a `BatchGenerator` instance and parse the Pascal VOC labels.

dataset = BatchGenerator(
    box_output_format=['class_id', 'xmin', 'ymin', 'xmax', 'ymax'])

# TODO: Set the paths to the datasets here.

VOC_2007_test_images_dir = '../../datasets/VOCdevkit/VOC2007_Test/JPEGImages/'
VOC_2007_test_annotations_dir = '../../datasets/VOCdevkit/VOC2007_Test/Annotations/'
VOC_2007_test_image_set_filename = '../../datasets/VOCdevkit/VOC2007_Test/ImageSets/Main/test.txt'

# The XML parser needs to now what object class names to look for and in which order to map them to integers.
classes = [
    'background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus',
    'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike',
    'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'
]

dataset.parse_xml(images_dirs=[VOC_2007_test_images_dir],
Exemplo n.º 15
0
# Load the weights that we've just created via sub-sampling.
weights_path = weights_destination_path

model.load_weights(weights_path, by_name=True)

print("Weights file loaded:", weights_path)

# 3: Instantiate an Adam optimizer and the SSD loss function and compile the model.

adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=5e-04)

ssd_loss = SSDLoss(neg_pos_ratio=3, n_neg_min=0, alpha=1.0)

model.compile(optimizer=adam, loss=ssd_loss.compute_loss)

dataset = BatchGenerator(box_output_format=['class_id', 'xmin', 'ymin', 'xmax', 'ymax'])

# TODO: Set the paths to your dataset here.
images_path = sys.argv[1] #'/home/pedro/QuartilImagesTest'
labels_path = sys.argv[2] #'/home/pedro/priorGroundTrue.csv'
batch_size=int(sys.argv[3])

dataset.parse_csv(images_dir=images_path,
                  labels_filename=labels_path,
                  input_format=['image_name', 'xmin', 'xmax', 'ymin', 'ymax', 'class_id'], # This is the order of the first six columns in the CSV file that contains the labels for your dataset. If your labels are in XML format, maybe the XML parser will be helpful, check the documentation.
                  include_classes='all',
                  random_sample=False)

print("Number of images in the dataset:", dataset.get_n_samples())

Exemplo n.º 16
0
                                variances=variances,
                                pos_iou_threshold=0.5,
                                neg_iou_threshold=0.2,
                                coords=coords,
                                normalize_coords=normalize_coords)

# 5: Create the training set batch generator

classes = [
    'background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus',
    'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike',
    'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'
]

train_dataset = BatchGenerator(
    images_path='./Datasets/VOCdevkit/VOC2012/JPEGImages/',
    include_classes='all',
    box_output_format=['class_id', 'xmin', 'xmax', 'ymin', 'ymax'])

train_dataset.parse_xml(
    annotations_path='./Datasets/VOCdevkit/VOC2012/Annotations/',
    image_set_path='./Datasets/VOCdevkit/VOC2012/ImageSets/Main/',
    image_set='train.txt',
    classes=classes,
    exclude_truncated=False,
    exclude_difficult=False,
    ret=False)

train_generator = train_dataset.generate(
    batch_size=batch_size,
    train=True,
    ssd_box_encoder=ssd_box_encoder,
Exemplo n.º 17
0
from keras.models import load_model
from math import ceil
import numpy as np
from matplotlib import pyplot as plt

from keras_ssd7 import build_model
from keras_ssd_loss import SSDLoss
from ssd_box_encode_decode_utils import SSDBoxEncoder, decode_y, decode_y2
from ssd_batch_generator import BatchGenerator

### Make predictions

# 1: Set the generator

val_dataset = BatchGenerator(
    images_path='./data/',
    include_classes='all',
    box_output_format=['class_id', 'xmin', 'xmax', 'ymin', 'ymax'])

predict_generator = val_dataset.generate(batch_size=1,
                                         train=False,
                                         equalize=False,
                                         brightness=False,
                                         flip=False,
                                         translate=False,
                                         scale=False,
                                         random_crop=False,
                                         crop=False,
                                         resize=False,
                                         gray=False,
                                         limit_boxes=True,
                                         include_thresh=0.4,
Exemplo n.º 18
0
ssd_loss = SSDLoss(neg_pos_ratio=3, n_neg_min=0, alpha=1.0)  #loss function

model.compile(optimizer=adam, loss=ssd_loss.compute_loss)  #create model
"""# TODO: Set the path to the `.h5` file of the model to be loaded.
model_path = '/home/ssd_keras/ssd7.h5'

# We need to create an SSDLoss object in order to pass that to the model loader.
ssd_loss = SSDLoss(neg_pos_ratio=3, n_neg_min=0, alpha=1.0)

K.clear_session() # Clear previous models from memory.

model = load_model(model_path, custom_objects={'AnchorBoxes': AnchorBoxes,
                                               'compute_loss': ssd_loss.compute_loss})"""

train_dataset = BatchGenerator(
    box_output_format=['class_id', 'xmin', 'ymin', 'xmax', 'ymax'])
val_dataset = BatchGenerator(
    box_output_format=['class_id', 'xmin', 'ymin', 'xmax', 'ymax'])

train_images_dir = '/home/udacity_driving_datasets'
train_labels_filename = '/home/udacity_driving_datasets/labels_train.csv'

val_images_dir = '/home/udacity_driving_datasets'
val_labels_filename = '/home/udacity_driving_datasets/labels_val.csv'

train_dataset.parse_csv(
    images_dir=train_images_dir,
    labels_filename=train_labels_filename,
    input_format=['image_name', 'xmin', 'xmax', 'ymin', 'ymax', 'class_id'],
    include_classes='all')