示例#1
0
def get_model(weights_path):
    ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)
    convert_to_3_channels = ConvertTo3Channels()
    resize = Resize(height=img_height, width=img_width)

    K.clear_session()
    model = ssd_300(image_size=(img_height, img_width, img_channels),
                    n_classes=n_classes,
                    mode='training',
                    l2_regularization=0.0005,
                    scales=scales,
                    aspect_ratios_per_layer=aspect_ratios,
                    two_boxes_for_ar1=two_boxes_for_ar1,
                    steps=steps,
                    offsets=offsets,
                    clip_boxes=clip_boxes,
                    variances=variances,
                    normalize_coords=normalize_coords,
                    subtract_mean=mean_color,
                    swap_channels=swap_channels)

    # 2: Load some weights into the model.
    model.load_weights(weights_path, by_name=True)

    sgd = SGD(lr=0.001, momentum=0.9, decay=0.0, nesterov=False)

    model.compile(optimizer=sgd, loss=ssd_loss.compute_loss)

    return model
示例#2
0
def create_model(model_type='ssd300', dataset='voc2007', dtype='float32'):
    if model_type == 'ssd300':
        model = ssd_300(image_size=(300, 300, 3),
                        n_classes=20 if dataset == 'voc2007' else 80,
                        mode='inference',
                        l2_regularization=0.0005,
                        scales=[0.1, 0.2, 0.37, 0.54,
                                0.71, 0.88, 1.05] if dataset == 'voc2007' else [0.07, 0.15, 0.33,
                                                                         0.51, 0.69, 0.87, 1.05],
                        aspect_ratios_per_layer=[[1.0, 2.0, 0.5],
                                                 [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                                                 [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                                                 [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                                                 [1.0, 2.0, 0.5],
                                                 [1.0, 2.0, 0.5]],
                        two_boxes_for_ar1=True,
                        steps=[8, 16, 32, 64, 100, 300],
                        offsets=[0.5, 0.5, 0.5, 0.5, 0.5, 0.5],
                        clip_boxes=False,
                        variances=[0.1, 0.1, 0.2, 0.2],
                        normalize_coords=True,
                        subtract_mean=[123, 117, 104],
                        swap_channels=[2, 1, 0],
                        confidence_thresh=0.01,
                        iou_threshold=0.45,
                        top_k=200,
                        nms_max_output_size=400,
                        dtype=dtype)
        if dataset == 'voc2007':
示例#3
0
def build_model(args: argparse.Namespace, weights_path: str) -> Model:
    K.clear_session()
    model = ssd_300(image_size=(args.img_height, args.img_width,
                                args.img_channels),
                    n_classes=args.n_classes,
                    mode='training',
                    l2_regularization=0.0005,
                    scales=args.scales,
                    aspect_ratios_per_layer=args.aspect_ratios,
                    two_boxes_for_ar1=args.two_boxes_for_ar1,
                    steps=args.steps,
                    offsets=args.offsets,
                    clip_boxes=args.clip_boxes,
                    variances=args.variances,
                    normalize_coords=args.normalize_coords,
                    subtract_mean=args.mean_color,
                    swap_channels=args.swap_channels)

    model.load_weights(weights_path, by_name=True)

    sgd = SGD(lr=0.001, momentum=0.9, decay=0.0, nesterov=False)
    ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)
    model.compile(optimizer=sgd, loss=ssd_loss.compute_loss)

    return model
示例#4
0
    def get_model(self,
                  mode='inference',
                  weights_path='',
                  n_classes='',
                  id2digit=''):
        #
        # n_classes, id2digit: for inference
        config = self.config
        if n_classes:  # inference setting
            self.n_classes = n_classes

        if id2digit:
            self.id2digit = id2digit

        self.model = ssd_300(
            image_size=(config.img_height, config.img_width,
                        config.img_channels),
            n_classes=self.n_classes,
            mode=mode,
            l2_regularization=0.0005,
            scales=config.scales,
            aspect_ratios_per_layer=config.aspect_ratios,
            two_boxes_for_ar1=config.two_boxes_for_ar1,
            steps=config.steps,
            offsets=config.offsets,
            clip_boxes=config.clip_boxes,
            variances=config.variances,
            normalize_coords=config.normalize_coords,
            subtract_mean=config.subtract_mean,  #
            divide_by_stddev=None,  #
            swap_channels=config.swap_channels,
            confidence_thresh=0.5,  #
            iou_threshold=0.45,
            top_k=200,
            nms_max_output_size=400,
            return_predictor_sizes=False)

        if weights_path:
            print(f'Loading weights from {weights_path}')
            self.model.load_weights(weights_path, by_name=True)
            self.weights_path = weights_path

        #adam = Adam(lr=0.005, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
        #sgd = SGD(lr=0.001, momentum=0.9, decay=0.0, nesterov=False)
        adam = Adam(lr=0.001,
                    beta_1=0.9,
                    beta_2=0.999,
                    epsilon=1e-08,
                    decay=0.0)

        ssd_loss = SSDLoss(neg_pos_ratio=3, n_neg_min=0, alpha=1.0)

        self.model.compile(optimizer=adam, loss=ssd_loss.compute_loss)
示例#5
0
    def __init__(self, confidence_threshold=0.5):

        self.confidence_th = confidence_threshold

        # 0: Set the image size.
        img_height = 300
        img_width = 300

        # 1: Build the Keras model
        self.loaded_model = ssd_300(
            image_size=(img_height, img_width, 3),
            n_classes=20,
            mode='inference',
            l2_regularization=0.0005,
            scales=[
                0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05
            ],  # The scales for MS COCO are [0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05]
            aspect_ratios_per_layer=[[1.0, 2.0, 0.5],
                                     [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                                     [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                                     [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                                     [1.0, 2.0, 0.5], [1.0, 2.0, 0.5]],
            two_boxes_for_ar1=True,
            steps=[8, 16, 32, 64, 100, 300],
            offsets=[0.5, 0.5, 0.5, 0.5, 0.5, 0.5],
            clip_boxes=False,
            variances=[0.1, 0.1, 0.2, 0.2],
            normalize_coords=True,
            subtract_mean=[123, 117, 104],
            swap_channels=[2, 1, 0],
            confidence_thresh=0.5,
            iou_threshold=0.45,
            top_k=200,
            nms_max_output_size=400)

        # 2: Load the trained weights into the model.
        weights_path = 'models/VGG_VOC0712_SSD_300x300_iter_240000.h5'
        self.loaded_model.load_weights(weights_path, by_name=True)

        # 3: Compile the model so that Keras won't complain the next time you load it.
        adam = Adam(lr=0.001,
                    beta_1=0.9,
                    beta_2=0.999,
                    epsilon=1e-08,
                    decay=0.0)
        ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)
        self.loaded_model.compile(optimizer=adam, loss=ssd_loss.compute_loss)

        # 4: make prediction (graph)
        self.loaded_model._make_predict_function()
示例#6
0
    def build_model_300(self):
        # 1: Build the Keras model

        K.clear_session()  # Clear previous models from memory.

        self.model = ssd_300(
            image_size=(self.img_height, self.img_width, 3),
            n_classes=20,
            mode='inference',
            l2_regularization=0.0005,
            scales=[0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05],
            # The scales for MS COCO are [0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05]
            aspect_ratios_per_layer=[[1.0, 2.0, 0.5],
                                     [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                                     [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                                     [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                                     [1.0, 2.0, 0.5], [1.0, 2.0, 0.5]],
            two_boxes_for_ar1=True,
            steps=[8, 16, 32, 64, 100, 300],
            offsets=[0.5, 0.5, 0.5, 0.5, 0.5, 0.5],
            clip_boxes=False,
            variances=[0.1, 0.1, 0.2, 0.2],
            normalize_coords=True,
            subtract_mean=[123, 117, 104],
            swap_channels=[2, 1, 0],
            confidence_thresh=0.5,
            iou_threshold=0.45,
            top_k=200,
            nms_max_output_size=400)

        # 2: Load the trained weights into the model.

        # TODO: Set the path of the trained weights.
        weights_path = self.weights_path

        self.model.load_weights(weights_path, by_name=True)

        # 3: Compile the model so that Keras won't complain the next time you load it.

        adam = Adam(lr=0.001,
                    beta_1=0.9,
                    beta_2=0.999,
                    epsilon=1e-08,
                    decay=0.0)

        ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)

        self.model.compile(optimizer=adam, loss=ssd_loss.compute_loss)
    def load_ssd300(self):
        print('loading SSD 300 ... ')
        img_shape = self.conf['IMG_SHAPE']
        classes = self.conf['CLASSES']
        swap_channels = [
            2, 1, 0
        ]  # The color channel order in the original SSD is BGR, so we'll have the model reverse the color channel order of the input images.
        n_classes = len(classes)
        scales_pascal = [
            0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05
        ]  # The anchor box scaling factors used in the original SSD300 for the Pascal VOC datasets
        scales = scales_pascal
        aspect_ratios = [
            [1.0, 2.0, 0.5], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
            [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
            [1.0, 2.0, 0.5], [1.0, 2.0, 0.5]
        ]  # The anchor box aspect ratios used in the original SSD300; the order matters
        steps = [
            8, 16, 32, 64, 100, 300
        ]  # The space between two adjacent anchor box center points for each predictor layer.
        two_boxes_for_ar1 = True
        mean_color = [123, 117, 104]  #TODO : add this as a parameter
        offsets = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
        clip_boxes = False
        variances = [0.1, 0.1, 0.2, 0.2]
        normalize_coords = True
        batch_size = self.conf['BATCH_SIZE']

        model = ssd_300(image_size=tuple(img_shape),
                        n_classes=20,
                        mode='training',
                        l2_regularization=0.0005,
                        scales=scales,
                        aspect_ratios_per_layer=aspect_ratios,
                        two_boxes_for_ar1=two_boxes_for_ar1,
                        steps=steps,
                        offsets=offsets,
                        clip_boxes=clip_boxes,
                        variances=variances,
                        normalize_coords=normalize_coords,
                        subtract_mean=mean_color,
                        swap_channels=swap_channels)
        self.load_weights(model)
        return model
示例#8
0
def init_model(weights_path='./ssdweights/rovio_v2.h5'):
    img_height = 300
    img_width = 300

    dirname = os.path.dirname(os.path.abspath(__file__))

    assert os.path.exists(weights_path), '%s not found...' % dirname

    K.clear_session()  # to clear all memory in the RAM

    model = ssd_300(image_size=(img_height, img_width, 3),
                    n_classes=2,
                    mode='inference_fast',
                    l2_regularization=0.0005,
                    scales=[0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05],
                    aspect_ratios_per_layer=[[1.0, 2.0, 0.5],
                                             [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                                             [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                                             [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                                             [1.0, 2.0, 0.5],
                                             [1.0, 2.0, 0.5]],
                    two_boxes_for_ar1=True,
                    steps=[8, 16, 32, 64, 100, 300],
                    offsets=[0.5, 0.5, 0.5, 0.5, 0.5, 0.5],
                    limit_boxes=False,
                    variances=[0.1, 0.1, 0.2, 0.2],
                    coords='centroids',
                    normalize_coords=True,
                    subtract_mean=[123, 117, 104],
                    swap_channels=True,
                    confidence_thresh=0.5,
                    iou_threshold=0.45,
                    top_k=200,
                    nms_max_output_size=400)

    model.load_weights(weights_path, by_name=True)

    adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=5e-04)

    ssd_loss = SSDLoss(neg_pos_ratio=3, n_neg_min=0, alpha=1.0)

    model.compile(optimizer=adam, loss=ssd_loss.compute_loss)

    return model, ['background', 'rovio', 'rovio']
示例#9
0
    def __init__(self,required_class=[2,6,7,14,15],weights_path='./VGG_VOC0712Plus_SSD_300x300_ft_iter_160000.h5',img_height = 300,img_width = 300):


        self.img_height,self.img_width=img_height,img_width

        K.clear_session() # Clear previous models from memory.

        self.model = ssd_300(image_size=(self.img_height, self.img_width, 3),
                        n_classes=20,
                        mode='inference',
                        l2_regularization=0.0005,
                        scales=[0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05], # The scales for MS COCO are [0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05]
                        aspect_ratios_per_layer=[[1.0, 2.0, 0.5],
                                                 [1.0, 2.0, 0.5, 3.0, 1.0/3.0],
                                                 [1.0, 2.0, 0.5, 3.0, 1.0/3.0],
                                                 [1.0, 2.0, 0.5, 3.0, 1.0/3.0],
                                                 [1.0, 2.0, 0.5],
                                                 [1.0, 2.0, 0.5]],
                        two_boxes_for_ar1=True,
                        steps=[8, 16, 32, 64, 100, 300],
                        offsets=[0.5, 0.5, 0.5, 0.5, 0.5, 0.5],
                        clip_boxes=False,
                        variances=[0.1, 0.1, 0.2, 0.2],
                        normalize_coords=True,
                        subtract_mean=[123, 117, 104],
                        swap_channels=[2, 1, 0],
                        confidence_thresh=0.5,
                        iou_threshold=0.45,
                        top_k=200,
                        nms_max_output_size=400)

        self.classes = ['background',
           'aeroplane', 'bicycle', 'bird', 'boat',
           'bottle', 'bus', 'car', 'cat',
           'chair', 'cow', 'diningtable', 'dog',
           'horse', 'motorbike', 'person', 'pottedplant',
           'sheep', 'sofa', 'train', 'tvmonitor']
        self.required_class=required_class

        self.model.load_weights(weights_path, by_name=True)
def create_network():
    # 1: Build the Keras model.

    K.clear_session()  # Clear previous models from memory.

    model = ssd_300(image_size=(img_height, img_width, img_channels),
                    n_classes=n_classes,
                    mode='training',
                    l2_regularization=0.0005,
                    scales=scales,
                    aspect_ratios_per_layer=aspect_ratios,
                    two_boxes_for_ar1=two_boxes_for_ar1,
                    steps=steps,
                    offsets=offsets,
                    clip_boxes=clip_boxes,
                    variances=variances,
                    normalize_coords=normalize_coords,
                    subtract_mean=mean_color,
                    swap_channels=swap_channels)

    # 2: Load some weights into the model.
    # TODO: Set the path to the weights you want to load.
    weights_path = 'D:/Develop/models/VOC0712/SSD_300x300/VGG_VOC0712_SSD_300x300_iter_120000.h5'
    model.load_weights(weights_path, by_name=True)

    freeze = [
        'input_1', 'conv1_1', 'conv1_2', 'pool1', 'conv2_1', 'conv2_2',
        'pool2', 'conv3_1', 'conv3_2', 'conv3_3', 'pool3'
    ]  #,
    #           'conv4_1', 'conv4_2', 'conv4_3', 'pool4']

    for L in model.layers:
        if L.name in freeze:
            L.trainable = False

    return model
def perimeter_detection(weights_path, video_path, result_path, threshold,
                        perimeter_a, perimeter_b):
    img_height = 300
    img_width = 300
    K.clear_session()  # Clear previous models from memory.

    model = ssd_300(
        image_size=(img_height, img_width, 3),
        n_classes=20,
        mode='inference',
        l2_regularization=0.0005,
        scales=[0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05],
        # The scales for MS COCO are [0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05]
        aspect_ratios_per_layer=[[1.0, 2.0, 0.5],
                                 [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                                 [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                                 [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                                 [1.0, 2.0, 0.5], [1.0, 2.0, 0.5]],
        two_boxes_for_ar1=True,
        steps=[8, 16, 32, 64, 100, 300],
        offsets=[0.5, 0.5, 0.5, 0.5, 0.5, 0.5],
        clip_boxes=False,
        variances=[0.1, 0.1, 0.2, 0.2],
        normalize_coords=True,
        subtract_mean=[123, 117, 104],
        swap_channels=[2, 1, 0],
        confidence_thresh=0.1,
        iou_threshold=0.45,
        top_k=200,
        nms_max_output_size=400)
    model.load_weights(weights_path, by_name=True)
    adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
    ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)
    model.compile(optimizer=adam, loss=ssd_loss.compute_loss)

    original_images = []
    process_images = []
    cap = cv2.VideoCapture(video_path)

    num = 0
    while (cap.isOpened()):
        ret, frame = cap.read()
        if ret == True:
            transposed_frame = cv2.transpose(frame)
            transposed_frame = cv2.flip(transposed_frame, 1)
            original_images.append(transposed_frame)
            subtracted_image = cv2.subtract(transposed_frame,
                                            original_images[0])
            subtracted_image = subtracted_image[600:1000, 0:720]
            #subtracted_image = cv2.bitwise_not(subtracted_image)
            subtracted_image = enhance_image(subtracted_image)
            cv2.imwrite(
                'perimeter_detection/sub_images/sub_' + str(num) + '.jpg',
                subtracted_image)
            resize_image = cv2.resize(subtracted_image,
                                      (img_height, img_width))
            process_images.append(resize_image)
            num += 1
            k = cv2.waitKey(20)
            if k & 0xff == ord('q'):
                break
        else:
            break
    print(len(original_images))
    process_images = np.array(process_images)
    cap.release()

    # start_time = time.time()
    y_pred = model.predict(process_images, batch_size=8)
    # end_time = time.time()
    # print(end_time - start_time)
    confidence_threshold = 0.1

    y_pred_thresh = [
        y_pred[k][y_pred[k, :, 1] > confidence_threshold]
        for k in range(y_pred.shape[0])
    ]

    np.set_printoptions(precision=2, suppress=True, linewidth=90)
    print('   class   conf xmin   ymin   xmax   ymax')

    fourcc = cv2.VideoWriter_fourcc(*'MJPG')
    result_video = cv2.VideoWriter(
        'result.avi', fourcc, 25.0,
        (original_images[0].shape[0], original_images[0].shape[1]))

    for k in range(len(y_pred_thresh)):
        print(k)
        print(y_pred_thresh[k])
        #colors = plt.cm.hsv(np.linspace(0, 1, 21)).tolist()
        classes = [
            'background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle',
            'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse',
            'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train',
            'tvmonitor'
        ]

        #plt.figure(figsize=(12, 8))
        #plt.imshow(original_images[k])
        #plt.xticks([])
        #plt.yticks([])
        #current_axis = plt.gca()

        for box in y_pred_thresh[k]:
            if box[0] != 15:
                continue
            # Transform the predicted bounding boxes for the 300x300 image to the original image dimensions.
            #xmin = box[2] * original_images[k].shape[1] / img_width
            #ymin = box[3] * original_images[k].shape[0] / img_height
            #xmax = box[4] * original_images[k].shape[1] / img_width
            #ymax = box[5] * original_images[k].shape[0] / img_height
            xmin = box[2] * 720 / 300
            ymin = box[3] * 400 / 300 + 600
            xmax = box[4] * 720 / 300
            ymax = box[5] * 400 / 300 + 600
            if xmin < 400:
                continue
            #color = colors[int(box[0])]
            label = '{}: {:.2f}'.format(classes[int(box[0])], box[1])
            #current_axis.add_patch(
            #    plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, color=color, fill=False, linewidth=2))
            #current_axis.text(xmin, ymin, label, size='x-large', color='white', bbox={'facecolor': color, 'alpha': 1.0})
            cv2.rectangle(original_images[k], (int(xmin), int(ymin)),
                          (int(xmax), int(ymax)), (0, 255, 0), 2)
        #plt.savefig(result_path + '/detection_' + str(k) + '.jpg', format='jpg')
        cv2.imwrite(result_path + '/detection_' + str(k) + '.jpg',
                    original_images[k])
        result_image = original_images[k]
        transposed_image = cv2.transpose(result_image)
        transposed_image = cv2.flip(transposed_image, 0)
        result_video.write(transposed_image)
        plt.close('all')
    result_video.release()
    cv2.destroyAllWindows()
    '''
示例#12
0
    def __init__(self):
        rospy.init_node('model_tester_keras')

        self.t_detect = 1.5  # minimum time between inferences
        self.t_last_detect = rospy.Time.now()

        self.img_pub = rospy.Publisher('image_detect', Image, queue_size=1)

        img_height = 300  # Height of the input images
        img_width = 300  # Width of the input images
        img_channels = 3  # Number of color channels of the input images
        subtract_mean = [123, 117, 104
                         ]  # The per-channel mean of the images in the dataset
        swap_channels = [
            2, 1, 0
        ]  # The color channel order in the original SSD is BGR, so we should set this to `True`, but weirdly the results are better without swapping.
        n_classes = 8  # Number of positive classes, e.g. 20 for Pascal VOC, 80 for MS COCO
        scales = [
            0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05
        ]  # The anchor box scaling factors used in the original SSD300 for the MS COCO datasets.
        # scales = [0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05] # The anchor box scaling factors used in the original SSD300 for the Pascal VOC datasets.
        aspect_ratios = [
            [1.0, 2.0, 0.5], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
            [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
            [1.0, 2.0, 0.5], [1.0, 2.0, 0.5]
        ]  # The anchor box aspect ratios used in the original SSD300; the order matters
        two_boxes_for_ar1 = True
        steps = [
            8, 16, 32, 64, 100, 300
        ]  # The space between two adjacent anchor box center points for each predictor layer.
        offsets = [
            0.5, 0.5, 0.5, 0.5, 0.5, 0.5
        ]  # The offsets of the first anchor box center points from the top and left borders of the image as a fraction of the step size for each predictor layer.
        clip_boxes = False  # Whether or not you want to limit the anchor boxes to lie entirely within the image boundaries
        variances = [
            0.1, 0.1, 0.2, 0.2
        ]  # The variances by which the encoded target coordinates are scaled as in the original implementation
        normalize_coords = True

        keras.backend.clear_session()
        self.model = ssd_300(image_size=(img_height, img_width, img_channels),
                             n_classes=n_classes,
                             mode='inference',
                             l2_regularization=0.0005,
                             scales=scales,
                             aspect_ratios_per_layer=aspect_ratios,
                             two_boxes_for_ar1=two_boxes_for_ar1,
                             steps=steps,
                             offsets=offsets,
                             clip_boxes=clip_boxes,
                             variances=variances,
                             normalize_coords=normalize_coords,
                             subtract_mean=subtract_mean,
                             divide_by_stddev=None,
                             swap_channels=swap_channels,
                             confidence_thresh=0.5,
                             iou_threshold=0.45,
                             top_k=200,
                             nms_max_output_size=400,
                             return_predictor_sizes=False)

        self.model.load_weights(os.path.join(module_path, 'ssdx_wt.h5'),
                                by_name=True)
        adam = Adam(lr=0.001,
                    beta_1=0.9,
                    beta_2=0.999,
                    epsilon=1e-08,
                    decay=0.0)
        ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)
        self.model.compile(optimizer=adam, loss=ssd_loss.compute_loss)

        # keras voodoo
        global graph
        graph = tf.get_default_graph()
        self.model._make_predict_function()

        img_sub = rospy.Subscriber('image_color',
                                   Image,
                                   self.img_cb,
                                   queue_size=1)
        self.bridge = CvBridge()

        rospy.loginfo("READY")

        rospy.spin()
示例#13
0
def detect_from_video(config: Dict):
    """Inference on a video with output a video showing all prediction
    
    Parameters
    ----------
    config : Dict
        Config yaml/json containing all parameter
    """
    video = config['inference']['video_input']['video_input_path']
    vp = VideoProcessing(video=video)
    vp.generate_frames(export_path=config['inference']['video_input']['video_to_frames_export_path'])
    if config['inference']['video_input']['video_to_frames_export_path'] == config['inference']['predicted_frames_export_path']:
        print("[Warning]... You have given Video to frame path same as prediction output path /nPredicted output will overwrite video to frame")
    img_height = config['inference']['img_height']
    img_width = config['inference']['img_width']
    model = ssd_300(image_size=(img_height, img_width, 3),
                n_classes=config['inference']['n_classes'],
                mode='inference',
                l2_regularization=0.0005,
                scales=[0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05], # The scales for MS COCO are [0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05]
                aspect_ratios_per_layer=[[1.0, 2.0, 0.5],
                                         [1.0, 2.0, 0.5, 3.0, 1.0/3.0],
                                         [1.0, 2.0, 0.5, 3.0, 1.0/3.0],
                                         [1.0, 2.0, 0.5, 3.0, 1.0/3.0],
                                         [1.0, 2.0, 0.5],
                                         [1.0, 2.0, 0.5]],
                two_boxes_for_ar1=True,
                steps=[8, 16, 32, 64, 100, 300],
                offsets=[0.5, 0.5, 0.5, 0.5, 0.5, 0.5],
                clip_boxes=False,
                variances=[0.1, 0.1, 0.2, 0.2],
                normalize_coords=True,
                subtract_mean=[123, 117, 104],
                swap_channels=[2, 1, 0],
                confidence_thresh=0.5,
                iou_threshold=0.45,
                top_k=200,
                nms_max_output_size=400)

    # Load the trained weights into the model.
    weights_path = config['inference']['weights_path']

    model.load_weights(weights_path, by_name=True)
    
    # Working with image
    all_images = glob.glob(f"{config['inference']['video_input']['video_to_frames_export_path']}/*/*")
    
    # Setting Up Prediction Threshold
    confidence_threshold = config['inference']['confidence_threshold']
    
    # Setting Up Classes (Note Should be in same order as in training)
    classes = config['inference']['classes']
    
    vp.existsFolder(f"{config['inference']['predicted_frames_export_path']}/{video.split('.')[0]}")
    # Working with image
    for current_img in tqdm(all_images):
        current_img_name = current_img.split('/')[-1]
        orig_image = cv2.imread(current_img)
        input_images = [] # Store resized versions of the images here
        img = image.load_img(current_img, target_size=(img_height, img_width))
        img = image.img_to_array(img) 
        input_images.append(img)
        input_images = np.array(input_images)
        
        # Prediction
        y_pred = model.predict(input_images)

        # Using threshold
        y_pred_thresh = [y_pred[k][y_pred[k,:,1] > confidence_threshold] for k in range(y_pred.shape[0])]
        
        # Drawing Boxes
        for box in y_pred_thresh[0]:
            xmin = box[2] * orig_image.shape[1] / img_width
            ymin = box[3] * orig_image.shape[0] / img_height
            xmax = box[4] * orig_image.shape[1] / img_width
            ymax = box[5] * orig_image.shape[0] / img_height
            
            label = f"{classes[int(box[0])]}: {box[1]:.2f}"
            cv2.rectangle(orig_image, (int(xmin), int(ymin)),  (int(xmax),int(ymax)), (255, 0, 0), 2)
            cv2.putText(orig_image, label, (int(xmin), int(ymin)), cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 255, 255), 2, cv2.LINE_AA)
            cv2.imwrite(f"{config['inference']['predicted_frames_export_path']}/{video.split('.')[0]}/{current_img_name}", orig_image)
        
        # Creating video
    vp.generate_video(import_path=config['inference']['predicted_frames_export_path'],
                      export_path=config['inference']['video_input']['video_output_path'])
        
        
                    
        
示例#14
0
def perimeter_detection(weights_path, image_path, result_path, threshold,
                        perimeter_a, perimeter_b):
    img_height = 300
    img_width = 300
    K.clear_session()  # Clear previous models from memory.

    model = ssd_300(
        image_size=(img_height, img_width, 3),
        n_classes=20,
        mode='inference',
        l2_regularization=0.0005,
        scales=[0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05],
        # The scales for MS COCO are [0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05]
        aspect_ratios_per_layer=[[1.0, 2.0, 0.5],
                                 [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                                 [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                                 [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                                 [1.0, 2.0, 0.5], [1.0, 2.0, 0.5]],
        two_boxes_for_ar1=True,
        steps=[8, 16, 32, 64, 100, 300],
        offsets=[0.5, 0.5, 0.5, 0.5, 0.5, 0.5],
        clip_boxes=False,
        variances=[0.1, 0.1, 0.2, 0.2],
        normalize_coords=True,
        subtract_mean=[123, 117, 104],
        swap_channels=[2, 1, 0],
        confidence_thresh=0.5,
        iou_threshold=0.45,
        top_k=200,
        nms_max_output_size=400)
    model.load_weights(weights_path, by_name=True)
    adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
    ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)
    model.compile(optimizer=adam, loss=ssd_loss.compute_loss)

    original_images = []
    process_images = []
    file_names = []
    for root, dirs, files in os.walk(image_path):
        for file in files:
            file_names.append(file)
            img_path = image_path + '/' + file
            original_images.append(imread(img_path))
            resize_image = image.load_img(img_path,
                                          target_size=(img_height, img_width))
            resize_image = image.img_to_array(resize_image)
            process_images.append(resize_image)
    process_images = np.array(process_images)

    #start_time = time.time()
    y_pred = model.predict(process_images, batch_size=8)
    #end_time = time.time()
    #print(end_time - start_time)
    confidence_threshold = 0.5

    y_pred_thresh = [
        y_pred[k][y_pred[k, :, 1] > confidence_threshold]
        for k in range(y_pred.shape[0])
    ]

    np.set_printoptions(precision=2, suppress=True, linewidth=90)
    print('   class   conf xmin   ymin   xmax   ymax')
    '''
    for k in range(len(y_pred_thresh)):
        print(file_names[k])
        print(y_pred_thresh[k])
        colors = plt.cm.hsv(np.linspace(0, 1, 21)).tolist()
        classes = ['background',
                   'aeroplane', 'bicycle', 'bird', 'boat',
                   'bottle', 'bus', 'car', 'cat',
                   'chair', 'cow', 'diningtable', 'dog',
                   'horse', 'motorbike', 'person', 'pottedplant',
                   'sheep', 'sofa', 'train', 'tvmonitor']

        plt.figure(figsize=(12, 8))
        plt.imshow(original_images[k])
        plt.xticks([])
        plt.yticks([])
        current_axis = plt.gca()

        for box in y_pred_thresh[k]:
            # Transform the predicted bounding boxes for the 300x300 image to the original image dimensions.
            xmin = box[2] * original_images[k].shape[1] / img_width
            ymin = box[3] * original_images[k].shape[0] / img_height
            xmax = box[4] * original_images[k].shape[1] / img_width
            ymax = box[5] * original_images[k].shape[0] / img_height
            color = colors[int(box[0])]
            label = '{}: {:.2f}'.format(classes[int(box[0])], box[1])
            current_axis.add_patch(
                plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, color=color, fill=False, linewidth=2))
            current_axis.text(xmin, ymin, label, size='x-large', color='white', bbox={'facecolor': color, 'alpha': 1.0})

        plt.savefig(result_path + '/detection_' + file_names[k], format='jpg')
        plt.close('all')
    '''
    #start_time = time.time()
    vector_a = np.array(
        [perimeter_a[0] - perimeter_b[0], perimeter_a[1] - perimeter_b[1]])
    distance_a = np.linalg.norm(vector_a)
    for k in range(len(y_pred_thresh)):
        print(file_names[k])
        print(y_pred_thresh[k])
        plt.figure(figsize=(12, 8))
        plt.imshow(original_images[k])
        plt.xticks([])
        plt.yticks([])
        current_axis = plt.gca()
        flag = 0
        for box in y_pred_thresh[k]:
            if box[0] != 15:
                continue
            # Transform the predicted bounding boxes for the 300x300 image to the original image dimensions.
            xmin = box[2] * original_images[k].shape[1] / img_width
            ymin = box[3] * original_images[k].shape[0] / img_height
            xmax = box[4] * original_images[k].shape[1] / img_width
            ymax = box[5] * original_images[k].shape[0] / img_height
            vector_b = np.array([xmin - perimeter_a[0], ymin - perimeter_a[1]])
            vector_cross = np.cross(vector_a, vector_b)
            distance = np.linalg.norm(vector_cross / distance_a)
            if vector_cross >= 0 or distance < threshold:
                flag = 1
                current_axis.add_patch(
                    plt.Rectangle((xmin, ymin),
                                  xmax - xmin,
                                  ymax - ymin,
                                  color='#FF0000',
                                  fill=False,
                                  linewidth=2))
                continue
            vector_b = np.array([xmin - perimeter_a[0], ymax - perimeter_a[1]])
            vector_cross = np.cross(vector_a, vector_b)
            distance = np.linalg.norm(vector_cross / distance_a)
            if vector_cross >= 0 or distance < threshold:
                flag = 1
                current_axis.add_patch(
                    plt.Rectangle((xmin, ymin),
                                  xmax - xmin,
                                  ymax - ymin,
                                  color='#FF0000',
                                  fill=False,
                                  linewidth=2))
                continue
            vector_b = np.array([xmax - perimeter_a[0], ymin - perimeter_a[1]])
            vector_cross = np.cross(vector_a, vector_b)
            distance = np.linalg.norm(vector_cross / distance_a)
            if vector_cross >= 0 or distance < threshold:
                flag = 1
                current_axis.add_patch(
                    plt.Rectangle((xmin, ymin),
                                  xmax - xmin,
                                  ymax - ymin,
                                  color='#FF0000',
                                  fill=False,
                                  linewidth=2))
                continue
            vector_b = np.array([xmax - perimeter_a[0], ymax - perimeter_a[1]])
            vector_cross = np.cross(vector_a, vector_b)
            distance = np.linalg.norm(vector_cross / distance_a)
            if vector_cross >= 0 or distance < threshold:
                flag = 1
                current_axis.add_patch(
                    plt.Rectangle((xmin, ymin),
                                  xmax - xmin,
                                  ymax - ymin,
                                  color='#FF0000',
                                  fill=False,
                                  linewidth=2))
                continue
            current_axis.add_patch(
                plt.Rectangle((xmin, ymin),
                              xmax - xmin,
                              ymax - ymin,
                              color='#00FF00',
                              fill=False,
                              linewidth=2))
        print(flag)
        line = Line2D([perimeter_a[0], perimeter_b[0]],
                      [perimeter_a[1], perimeter_b[1]],
                      color='#000000')
        current_axis.add_line(line)
        #plt.plot([perimeter_a[0], perimeter_b[0]], [perimeter_a[1], perimeter_b[1]], 'k')
        plt.savefig(result_path + '/perimeter_' + file_names[k], format='jpg')
        plt.close('all')
def _main_(args):
    print('Hello World! This is {:s}'.format(args.desc))

    # config_path = args.conf
    # with open(config_path) as config_buffer:    
    #     config = json.loads(config_buffer.read())
    #############################################################
    #   Set model parameters
    #############################################################
    img_height          = 300  # Height of the model input images
    img_width           = 300  # Width of the model input images
    img_channels        = 3  # Number of color channels of the model input images
    mean_color          = [123, 117, 104]  # The per-channel mean of the images in the dataset. Do not change this value if you're using any of the pre-trained weights.
    swap_channels       = [2, 1, 0]  # The color channel order in the original SSD is BGR, so we'll have the model reverse the color channel order of the input images.
    n_classes           = 20  # Number of positive classes, e.g. 20 for Pascal VOC, 80 for MS COCO
    scales_pascal       = [0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05]  # The anchor box scaling factors used in the original SSD300 for the Pascal VOC datasets
    scales_coco         = [0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05]  # The anchor box scaling factors used in the original SSD300 for the MS COCO datasets
    scales              = scales_pascal
    aspect_ratios       = [[1.0, 2.0, 0.5],
                           [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                           [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                           [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                           [1.0, 2.0, 0.5],
                           [1.0, 2.0, 0.5]]  # The anchor box aspect ratios used in the original SSD300; the order matters
    two_boxes_for_ar1   = True
    steps               = [8, 16, 32, 64, 100, 300]  # The space between two adjacent anchor box center points for each predictor layer.
    offsets             = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5]  # The offsets of the first anchor box center points from the top and left borders of the image as a fraction of the step size for each predictor layer.
    clip_boxes          = False  # Whether or not to clip the anchor boxes to lie entirely within the image boundaries
    variances           = [0.1, 0.1, 0.2, 0.2]  # The variances by which the encoded target coordinates are divided as in the original implementation
    normalize_coords    = True

    #############################################################
    #   Create the model
    #############################################################
    # 1: Build the Keras model.
    model = ssd_300(image_size=(img_height, img_width, img_channels),
                    n_classes=n_classes,
                    mode='training',
                    l2_regularization=0.0005,
                    scales=scales,
                    aspect_ratios_per_layer=aspect_ratios,
                    two_boxes_for_ar1=two_boxes_for_ar1,
                    steps=steps,
                    offsets=offsets,
                    clip_boxes=clip_boxes,
                    variances=variances,
                    normalize_coords=normalize_coords,
                    subtract_mean=mean_color,
                    swap_channels=swap_channels)
    # 2: Load some weights into the model.

    # 3: Instantiate an optimizer and the SSD loss function and compile the model.
    adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
    ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)
    model.compile(optimizer=adam, loss=ssd_loss.compute_loss)

    #############################################################
    #   Prepare the data
    #############################################################
    # 1: Instantiate two `DataGenerator` objects: One for training, one for validation.
    train_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=None)
    val_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=None)

    # 2: Parse the image and label lists for the training and validation datasets. This can take a while.
    VOC_2007_images_dir = '/home/minhnc-lab/WORKSPACES/AI/data/VOC/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/JPEGImages'
    VOC_2007_annotations_dir = '/home/minhnc-lab/WORKSPACES/AI/data/VOC/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/Annotations'
    VOC_2007_train_image_set_filename = '/home/minhnc-lab/WORKSPACES/AI/data/VOC/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/ImageSets/Main/train.txt'
    VOC_2007_val_image_set_filename = '/home/minhnc-lab/WORKSPACES/AI/data/VOC/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/ImageSets/Main/val.txt'
    # VOC_2007_trainval_image_set_filename = '/home/minhnc-lab/WORKSPACES/AI/data/VOC/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/ImageSets/Main/trainval.txt'
    # VOC_2007_test_image_set_filename = '/home/minhnc-lab/WORKSPACES/AI/data/VOC/VOCtest_06-Nov-2007/VOCdevkit/VOC2007/ImageSets/Main/test.txt'

    classes = ['background',
               'aeroplane', 'bicycle', 'bird', 'boat',
               'bottle', 'bus', 'car', 'cat',
               'chair', 'cow', 'diningtable', 'dog',
               'horse', 'motorbike', 'person', 'pottedplant',
               'sheep', 'sofa', 'train', 'tvmonitor']

    train_dataset.parse_xml(images_dirs=[VOC_2007_images_dir],
                            image_set_filenames=[VOC_2007_train_image_set_filename],
                            annotations_dirs=[VOC_2007_annotations_dir],
                            classes=classes,
                            include_classes='all',
                            exclude_truncated=False,
                            exclude_difficult=False,
                            ret=False)
    val_dataset.parse_xml(images_dirs=[VOC_2007_images_dir],
                          image_set_filenames=[VOC_2007_val_image_set_filename],
                          annotations_dirs=[VOC_2007_annotations_dir],
                          classes=classes,
                          include_classes='all',
                          exclude_truncated=False,
                          exclude_difficult=True,
                          ret=False)

    train_dataset.create_hdf5_dataset(file_path='dataset_pascal_voc_07+12_trainval.h5',
                                      resize=False,
                                      variable_image_size=True,
                                      verbose=True)

    val_dataset.create_hdf5_dataset(file_path='dataset_pascal_voc_07_test.h5',
                                    resize=False,
                                    variable_image_size=True,
                                    verbose=True)
    # 3: Set the batch size.
    batch_size = 8  # Change the batch size if you like, or if you run into GPU memory issues.

    # 4: Set the image transformations for pre-processing and data augmentation options.
    ssd_data_augmentation = SSDDataAugmentation(img_height=img_height,
                                                img_width=img_width,
                                                background=mean_color)
    convert_to_3_channels = ConvertTo3Channels()
    resize = Resize(height=img_height, width=img_width)

    # 5: Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function.
    predictor_sizes = [model.get_layer('conv4_3_norm_mbox_conf').output_shape[1:3],
                       model.get_layer('fc7_mbox_conf').output_shape[1:3],
                       model.get_layer('conv6_2_mbox_conf').output_shape[1:3],
                       model.get_layer('conv7_2_mbox_conf').output_shape[1:3],
                       model.get_layer('conv8_2_mbox_conf').output_shape[1:3],
                       model.get_layer('conv9_2_mbox_conf').output_shape[1:3]]

    ssd_input_encoder = SSDInputEncoder(img_height=img_height,
                                        img_width=img_width,
                                        n_classes=n_classes,
                                        predictor_sizes=predictor_sizes,
                                        scales=scales,
                                        aspect_ratios_per_layer=aspect_ratios,
                                        two_boxes_for_ar1=two_boxes_for_ar1,
                                        steps=steps,
                                        offsets=offsets,
                                        clip_boxes=clip_boxes,
                                        variances=variances,
                                        matching_type='multi',
                                        pos_iou_threshold=0.5,
                                        neg_iou_limit=0.5,
                                        normalize_coords=normalize_coords)

    # 6: Create the generator handles that will be passed to Keras' `fit_generator()` function.
    train_generator = train_dataset.generate(batch_size=batch_size,
                                             shuffle=True,
                                             transformations=[ssd_data_augmentation],
                                             label_encoder=ssd_input_encoder,
                                             returns={'processed_images',
                                                      'encoded_labels'},
                                             keep_images_without_gt=False)

    val_generator = val_dataset.generate(batch_size=batch_size,
                                         shuffle=False,
                                         transformations=[convert_to_3_channels,
                                                          resize],
                                         label_encoder=ssd_input_encoder,
                                         returns={'processed_images',
                                                  'encoded_labels'},
                                         keep_images_without_gt=False)

    # Get the number of samples in the training and validations datasets.
    train_dataset_size = train_dataset.get_dataset_size()
    val_dataset_size = val_dataset.get_dataset_size()

    print("Number of images in the training dataset:\t{:>6}".format(train_dataset_size))
    print("Number of images in the validation dataset:\t{:>6}".format(val_dataset_size))

    #############################################################
    #   Kick off the training
    #############################################################
    # Define model callbacks.
    model_checkpoint = ModelCheckpoint(
        filepath='ssd300_pascal_07+12_epoch-{epoch:02d}_loss-{loss:.4f}_val_loss-{val_loss:.4f}.h5',
        monitor='val_loss',
        verbose=1,
        save_best_only=True,
        save_weights_only=False,
        mode='auto',
        period=1)

    csv_logger = CSVLogger(filename='ssd300_pascal_07+12_training_log.csv',
                           separator=',',
                           append=True)

    learning_rate_scheduler = LearningRateScheduler(schedule=lr_schedule,
                                                    verbose=1)

    terminate_on_nan = TerminateOnNaN()

    callbacks = [model_checkpoint,
                 csv_logger,
                 learning_rate_scheduler,
                 terminate_on_nan]

    # Train
    initial_epoch = 0
    final_epoch = 120
    steps_per_epoch = 1000

    history = model.fit_generator(generator=train_generator,
                                  steps_per_epoch=steps_per_epoch,
                                  epochs=final_epoch,
                                  callbacks=callbacks,
                                  validation_data=val_generator,
                                  validation_steps=ceil(val_dataset_size / batch_size),
                                  initial_epoch=initial_epoch)

    #############################################################
    #   Run the evaluation
    #############################################################
    # 1: Set the generator for the predictions.
    predict_generator = val_dataset.generate(batch_size=1,
                                             shuffle=True,
                                             transformations=[convert_to_3_channels,
                                                              resize],
                                             label_encoder=None,
                                             returns={'processed_images',
                                                      'filenames',
                                                      'inverse_transform',
                                                      'original_images',
                                                      'original_labels'},
                                             keep_images_without_gt=False)

    # 2: Generate samples.
    batch_images, batch_filenames, batch_inverse_transforms, batch_original_images, batch_original_labels = next(
        predict_generator)

    i = 0  # Which batch item to look at

    print("Image:", batch_filenames[i])
    print()
    print("Ground truth boxes:\n")
    print(np.array(batch_original_labels[i]))

    # 3: Make predictions.
    y_pred = model.predict(batch_images)

    # 4: Decode the raw predictions in `y_pred`.
    y_pred_decoded = decode_detections(y_pred,
                                       confidence_thresh=0.5,
                                       iou_threshold=0.4,
                                       top_k=200,
                                       normalize_coords=normalize_coords,
                                       img_height=img_height,
                                       img_width=img_width)

    # 5: Convert the predictions for the original image.
    y_pred_decoded_inv = apply_inverse_transforms(y_pred_decoded, batch_inverse_transforms)
    np.set_printoptions(precision=2, suppress=True, linewidth=90)
    print("Predicted boxes:\n")
    print('   class   conf xmin   ymin   xmax   ymax')
    print(y_pred_decoded_inv[i])

    # 6: Draw the predicted boxes onto the image
    # Set the colors for the bounding boxes
    colors = plt.cm.hsv(np.linspace(0, 1, n_classes + 1)).tolist()
    classes = ['background',
               'aeroplane', 'bicycle', 'bird', 'boat',
               'bottle', 'bus', 'car', 'cat',
               'chair', 'cow', 'diningtable', 'dog',
               'horse', 'motorbike', 'person', 'pottedplant',
               'sheep', 'sofa', 'train', 'tvmonitor']

    plt.figure(figsize=(20, 12))
    plt.imshow(batch_original_images[i])

    current_axis = plt.gca()

    for box in batch_original_labels[i]:
        xmin = box[1]
        ymin = box[2]
        xmax = box[3]
        ymax = box[4]
        label = '{}'.format(classes[int(box[0])])
        current_axis.add_patch(
            plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, color='green', fill=False, linewidth=2))
        current_axis.text(xmin, ymin, label, size='x-large', color='white', bbox={'facecolor': 'green', 'alpha': 1.0})

    for box in y_pred_decoded_inv[i]:
        xmin = box[2]
        ymin = box[3]
        xmax = box[4]
        ymax = box[5]
        color = colors[int(box[0])]
        label = '{}: {:.2f}'.format(classes[int(box[0])], box[1])
        current_axis.add_patch(
            plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, color=color, fill=False, linewidth=2))
        current_axis.text(xmin, ymin, label, size='x-large', color='white', bbox={'facecolor': color, 'alpha': 1.0})
示例#16
0
def main(job_dir, **args):
    ##Setting up the path for saving logs
    logs_dir = job_dir + 'logs/'
    data_dir = "gs://deeplearningteam11/data"

    print("Current Directory: " + os.path.dirname(__file__))
    print("Lets copy the data to: " + os.path.dirname(__file__))
    os.system("gsutil -m cp -r " + data_dir + "  " +
              os.path.dirname(__file__) + " > /dev/null 2>&1 ")
    #exit(0)

    with tf.device('/device:GPU:0'):
        # 1: Build the Keras model.
        K.clear_session()  # Clear previous models from memory.
        model = ssd_300(image_size=(img_height, img_width, img_channels),
                        n_classes=n_classes,
                        mode='training',
                        l2_regularization=0.0005,
                        scales=scales,
                        aspect_ratios_per_layer=aspect_ratios,
                        two_boxes_for_ar1=two_boxes_for_ar1,
                        steps=steps,
                        offsets=offsets,
                        clip_boxes=clip_boxes,
                        variances=variances,
                        normalize_coords=normalize_coords,
                        subtract_mean=mean_color,
                        swap_channels=swap_channels)

        # Set the path to the `.h5` file of the model to be loaded.
        model_file = file_io.FileIO('gs://deeplearningteam11/vgg19BNmodel.h5',
                                    mode='rb')

        # Store model locally on instance
        model_path = 'model.h5'
        with open(model_path, 'wb') as f:
            f.write(model_file.read())
        model_file.close()

        ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)

        model = load_model(model_path,
                           custom_objects={
                               'AnchorBoxes': AnchorBoxes,
                               'L2Normalization': L2Normalization,
                               'DecodeDetections': DecodeDetections,
                               'compute_loss': ssd_loss.compute_loss
                           })

        for layer in model.layers:
            layer.trainable = True

        model.summary()

        # 1: Instantiate two `DataGenerator` objects: One for training, one for validation.
        train_dataset = DataGenerator(load_images_into_memory=True,
                                      hdf5_dataset_path=None)
        val_dataset = DataGenerator(load_images_into_memory=True,
                                    hdf5_dataset_path=None)

        # 2: Parse the image and label lists for the training and validation datasets. This can take a while.
        #  VOC 2007
        #  The directories that contain the images.
        VOC_2007_train_images_dir = 'data/data/VOC2007/train/JPEGImages/'
        VOC_2007_test_images_dir = 'data/data/VOC2007/test/JPEGImages/'

        VOC_2007_train_anns_dir = 'data/data/VOC2007/train/Annotations/'
        VOC_2007_test_anns_dir = 'data/data/VOC2007/test/Annotations/'

        # The paths to the image sets.
        VOC_2007_trainval_image_set_dir = 'data/data/VOC2007/train/ImageSets/Main/'
        VOC_2007_test_image_set_dir = 'data/data/VOC2007/test/ImageSets/Main/'

        VOC_2007_train_images_dir = os.path.dirname(
            __file__) + "/" + VOC_2007_train_images_dir
        VOC_2007_test_images_dir = os.path.dirname(
            __file__) + "/" + VOC_2007_test_images_dir

        VOC_2007_train_anns_dir = os.path.dirname(
            __file__) + "/" + VOC_2007_train_anns_dir
        VOC_2007_test_anns_dir = os.path.dirname(
            __file__) + "/" + VOC_2007_test_anns_dir

        VOC_2007_trainval_image_set_dir = os.path.dirname(
            __file__) + "/" + VOC_2007_trainval_image_set_dir
        VOC_2007_test_image_set_dir = os.path.dirname(
            __file__) + "/" + VOC_2007_test_image_set_dir

        VOC_2007_trainval_image_set_filename = VOC_2007_trainval_image_set_dir + '/trainval.txt'
        VOC_2007_test_image_set_filename = VOC_2007_test_image_set_dir + '/test.txt'

        # The XML parser needs to now what object class names to look for and in which order to map them to integers.
        classes = [
            'background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle',
            'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse',
            'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train',
            'tvmonitor'
        ]

        print("Parsing Training Data ...")
        train_dataset.parse_xml(
            images_dirs=[VOC_2007_train_images_dir],
            image_set_filenames=[VOC_2007_trainval_image_set_filename],
            annotations_dirs=[VOC_2007_train_anns_dir],
            classes=classes,
            include_classes='all',
            exclude_truncated=False,
            exclude_difficult=False,
            ret=False,
            verbose=False)
        print("Done")
        print(
            "================================================================")

        print("Parsing Test Data ...")
        val_dataset.parse_xml(
            images_dirs=[VOC_2007_test_images_dir],
            image_set_filenames=[VOC_2007_test_image_set_filename],
            annotations_dirs=[VOC_2007_test_anns_dir],
            classes=classes,
            include_classes='all',
            exclude_truncated=False,
            exclude_difficult=True,
            ret=False,
            verbose=False)
        print("Done")
        print(
            "================================================================")

        # 3: Set the batch size.
        batch_size = 32  # Change the batch size if you like, or if you run into GPU memory issues.

        #  4: Set the image transformations for pre-processing and data augmentation options.

        # For the training generator:
        ssd_data_augmentation = SSDDataAugmentation(img_height=img_height,
                                                    img_width=img_width,
                                                    background=mean_color)

        # For the validation generator:
        convert_to_3_channels = ConvertTo3Channels()
        resize = Resize(height=img_height, width=img_width)

        # 5: Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function.

        # The encoder constructor needs the spatial dimensions of the model's predictor layers to create the anchor boxes.
        predictor_sizes = [
            model.get_layer('conv4_4_norm_mbox_conf').output_shape[1:3],
            model.get_layer('fc7_mbox_conf').output_shape[1:3],
            model.get_layer('conv8_2_mbox_conf').output_shape[1:3],
            model.get_layer('conv9_2_mbox_conf').output_shape[1:3],
            model.get_layer('conv10_2_mbox_conf').output_shape[1:3],
            model.get_layer('conv11_2_mbox_conf').output_shape[1:3]
        ]

        ssd_input_encoder = SSDInputEncoder(
            img_height=img_height,
            img_width=img_width,
            n_classes=n_classes,
            predictor_sizes=predictor_sizes,
            scales=scales,
            aspect_ratios_per_layer=aspect_ratios,
            two_boxes_for_ar1=two_boxes_for_ar1,
            steps=steps,
            offsets=offsets,
            clip_boxes=clip_boxes,
            variances=variances,
            matching_type='multi',
            pos_iou_threshold=0.5,
            neg_iou_limit=0.5,
            normalize_coords=normalize_coords)

        # 6: Create the generator handles that will be passed to Keras' `fit_generator()` function.

        train_generator = train_dataset.generate(
            batch_size=batch_size,
            shuffle=True,
            transformations=[ssd_data_augmentation],
            label_encoder=ssd_input_encoder,
            returns={'processed_images', 'encoded_labels'},
            keep_images_without_gt=False)

        val_generator = val_dataset.generate(
            batch_size=batch_size,
            shuffle=False,
            transformations=[convert_to_3_channels, resize],
            label_encoder=ssd_input_encoder,
            returns={'processed_images', 'encoded_labels'},
            keep_images_without_gt=False)

        # Get the number of samples in the training and validations datasets.
        train_dataset_size = train_dataset.get_dataset_size()
        val_dataset_size = val_dataset.get_dataset_size()

        print("Number of images in the training dataset:\t{:>6}".format(
            train_dataset_size))
        print("Number of images in the validation dataset:\t{:>6}".format(
            val_dataset_size))

        # Define a learning rate schedule.

        def lr_schedule(epoch):
            return 1e-6
            # if epoch < 80:
            #     return 0.001
            # elif epoch < 100:
            #     return 0.0001
            # else:
            #     return 0.00001

        learning_rate_scheduler = LearningRateScheduler(schedule=lr_schedule,
                                                        verbose=1)

        terminate_on_nan = TerminateOnNaN()

        callbacks = [learning_rate_scheduler, terminate_on_nan]

        # If you're resuming a previous training, set `initial_epoch` and `final_epoch` accordingly.
        initial_epoch = 120
        final_epoch = 200
        steps_per_epoch = 500

        history = model.fit_generator(generator=train_generator,
                                      steps_per_epoch=steps_per_epoch,
                                      epochs=final_epoch,
                                      callbacks=callbacks,
                                      validation_data=val_generator,
                                      validation_steps=ceil(val_dataset_size /
                                                            batch_size),
                                      initial_epoch=initial_epoch)

        model_name = "vgg19BNmodel_cont.h5"
        model.save(model_name)
        with file_io.FileIO(model_name, mode='rb') as input_f:
            with file_io.FileIO("gs://deeplearningteam11/" + model_name,
                                mode='w+') as output_f:
                output_f.write(input_f.read())
示例#17
0
# 1: Build the Keras model

K.clear_session() # Clear previous models from memory.

model = ssd_300(image_size=(img_height, img_width, img_channels),
                n_classes=n_classes,
                #mode='inference',
                mode='training',
                l2_regularization=0.0005,
                scales=scales,
                aspect_ratios_per_layer=aspect_ratios,
                two_boxes_for_ar1=two_boxes_for_ar1,
                steps=steps,
                offsets=offsets,
                clip_boxes=clip_boxes,
                variances=variances,
                normalize_coords=normalize_coords,
                subtract_mean=subtract_mean,
                divide_by_stddev=None,
                swap_channels=swap_channels,
                confidence_thresh=0.5,
                iou_threshold=0.45,
                top_k=200,
                nms_max_output_size=400,
                return_predictor_sizes=False)

print("Model built.")

# 2: Load the sub-sampled weights into the model.
def main():
    # create dataset
    dataset = DataGenerator()
    dataset.parse_xml(images_dirs=[dataset_images_dir],
                      image_set_filenames=[test_image_set_filename],
                      annotations_dirs=[dataset_annotations_dir],
                      classes=classes,
                      include_classes='all',
                      exclude_truncated=False,
                      exclude_difficult=False,
                      ret=False)

    # create model
    model = ssd_300(
        image_size=(img_height, img_width, 3),
        n_classes=n_classes,
        mode=model_mode,
        l2_regularization=0.0005,
        scales=[
            0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05
        ],  # The scales for MS COCO are [0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05]
        aspect_ratios_per_layer=[[1.0, 2.0, 0.5],
                                 [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                                 [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                                 [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                                 [1.0, 2.0, 0.5], [1.0, 2.0, 0.5]],
        two_boxes_for_ar1=True,
        steps=None,
        offsets=[0.5, 0.5, 0.5, 0.5, 0.5, 0.5],
        clip_boxes=False,
        variances=[0.1, 0.1, 0.2, 0.2],
        normalize_coords=True,
        subtract_mean=[123, 117, 104],
        swap_channels=[2, 1, 0],
        confidence_thresh=1.0e-4,
        iou_threshold=0.45,
        top_k=200,
        nms_max_output_size=400)

    # load weights and compile it
    model.load_weights(weights_path, by_name=True)
    adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
    ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)
    model.compile(optimizer=adam, loss=ssd_loss.compute_loss)

    evaluator = Evaluator(model=model,
                          n_classes=n_classes,
                          data_generator=dataset,
                          model_mode=model_mode)

    results = evaluator(img_height=img_height,
                        img_width=img_width,
                        batch_size=8,
                        data_generator_mode='resize',
                        round_confidences=False,
                        matching_iou_threshold=0.5,
                        border_pixels='include',
                        sorting_algorithm='quicksort',
                        average_precision_mode='sample',
                        num_recall_points=11,
                        ignore_neutral_boxes=True,
                        return_precisions=True,
                        return_recalls=True,
                        return_average_precisions=True,
                        verbose=True)

    mean_average_precision, average_precisions, precisions, recalls = results

    for i in range(1, len(average_precisions)):
        print("{:<14}{:<6}{}".format(classes[i], 'AP',
                                     round(average_precisions[i], 3)))
    print()
    print("{:<14}{:<6}{}".format('', 'mAP', round(mean_average_precision, 3)))

    m = max((n_classes + 1) // 2, 2)
    n = 2

    fig, cells = plt.subplots(m, n, figsize=(n * 8, m * 8))
    for i in range(m):
        for j in range(n):
            if n * i + j + 1 > n_classes: break
            cells[i, j].plot(recalls[n * i + j + 1],
                             precisions[n * i + j + 1],
                             color='blue',
                             linewidth=1.0)
            cells[i, j].set_xlabel('recall', fontsize=14)
            cells[i, j].set_ylabel('precision', fontsize=14)
            cells[i, j].grid(True)
            cells[i, j].set_xticks(np.linspace(0, 1, 6))
            cells[i, j].set_yticks(np.linspace(0, 1, 6))
            cells[i, j].set_xlim(0.0, 1.0)
            cells[i, j].set_ylim(0.0, 1.0)
            cells[i, j].set_title("{}, AP: {:.3f}".format(
                classes[n * i + j + 1], average_precisions[n * i + j + 1]),
                                  fontsize=16)

    if not os.path.isdir("evaluate_result"):
        os.makedirs("evaluate_result")

    plt.savefig('evaluate_result/ssd300_face_detection.png')
示例#19
0
def main():
    model_mode = 'inference'
    K.clear_session()  # Clear previous models from memory.

    model = ssd_300(image_size=(Config.img_height, Config.img_width,
                                Config.img_channels),
                    n_classes=Config.n_classes,
                    mode=model_mode,
                    l2_regularization=Config.l2_regularization,
                    scales=Config.scales,
                    aspect_ratios_per_layer=Config.aspect_ratios,
                    two_boxes_for_ar1=True,
                    steps=Config.steps,
                    offsets=Config.offsets,
                    clip_boxes=False,
                    variances=Config.variances,
                    normalize_coords=Config.normalize_coords,
                    subtract_mean=Config.mean_color,
                    swap_channels=[2, 1, 0],
                    confidence_thresh=0.01,
                    iou_threshold=0.45,
                    top_k=200,
                    nms_max_output_size=400)

    # 2: Load the trained weights into the model.

    weights_path = os.getcwd() + '/weights/' + args.model_name + ".h5"
    model.load_weights(weights_path, by_name=True)
    adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
    ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)
    model.compile(optimizer=adam, loss=ssd_loss.compute_loss)

    test_dataset = DataGenerator(load_images_into_memory=True,
                                 hdf5_dataset_path=os.getcwd() + "/data/" +
                                 args.dataset + '/polyp_test.h5')

    test_dataset_size = test_dataset.get_dataset_size()
    print("Number of images in the test dataset:\t{:>6}".format(
        test_dataset_size))

    classes = ['background', 'polyp']

    generator = test_dataset.generate(batch_size=1,
                                      shuffle=True,
                                      transformations=[],
                                      returns={
                                          'processed_images', 'filenames',
                                          'inverse_transform',
                                          'original_images', 'original_labels'
                                      },
                                      keep_images_without_gt=False)

    # Generate a batch and make predictions.

    i = 0
    confidence_threshold = Config.confidence_threshold

    for val in range(test_dataset_size):
        batch_images, batch_filenames, batch_inverse_transforms, batch_original_images, batch_original_labels = next(
            generator)

        print("Ground truth boxes:\n")
        print(np.array(batch_original_labels[i]))

        y_pred = model.predict(batch_images)

        # Perform confidence thresholding.
        y_pred_thresh = [
            y_pred[k][y_pred[k, :, 1] > confidence_threshold]
            for k in range(y_pred.shape[0])
        ]

        # Convert the predictions for the original image.
        # y_pred_thresh_inv = apply_inverse_transforms(y_pred_thresh, batch_inverse_transforms)

        np.set_printoptions(precision=2, suppress=True, linewidth=90)
        print("Predicted boxes:\n")
        print('   class   conf xmin   ymin   xmax   ymax')
        print(y_pred_thresh[i])

        plt.figure(figsize=(20, 12))
        plt.imshow(batch_images[i])

        current_axis = plt.gca()

        colors = plt.cm.hsv(
            np.linspace(0, 1, Config.n_classes +
                        1)).tolist()  # Set the colors for the bounding boxes
        classes = [
            'background', 'polyps'
        ]  # Just so we can print class names onto the image instead of IDs

        for box in batch_original_labels[i]:
            xmin = box[1]
            ymin = box[2]
            xmax = box[3]
            ymax = box[4]
            label = '{}'.format(classes[int(box[0])])
            current_axis.add_patch(
                plt.Rectangle((xmin, ymin),
                              xmax - xmin,
                              ymax - ymin,
                              color='green',
                              fill=False,
                              linewidth=2))
            current_axis.text(xmin,
                              ymin,
                              label,
                              size='x-large',
                              color='white',
                              bbox={
                                  'facecolor': 'green',
                                  'alpha': 1.0
                              })

        for box in y_pred_thresh[i]:
            xmin = box[2]
            ymin = box[3]
            xmax = box[4]
            ymax = box[5]
            color = colors[int(box[0])]
            label = '{}: {:.2f}'.format(classes[int(box[0])], box[1])
            current_axis.add_patch(
                plt.Rectangle((xmin, ymin),
                              xmax - xmin,
                              ymax - ymin,
                              color=color,
                              fill=False,
                              linewidth=2))
            current_axis.text(xmin,
                              ymin,
                              label,
                              size='x-large',
                              color='white',
                              bbox={
                                  'facecolor': color,
                                  'alpha': 1.0
                              })
        image = plt.gcf()
        # plt.show()
        plt.draw()
        image.savefig(os.getcwd() + "/val_ssd300/val_" + str(val) + ".png",
                      dpi=100)

    evaluator = Evaluator(model=model,
                          n_classes=Config.n_classes,
                          data_generator=test_dataset,
                          model_mode=model_mode)

    results = evaluator(img_height=Config.img_height,
                        img_width=Config.img_width,
                        batch_size=args.batch_size,
                        data_generator_mode='resize',
                        round_confidences=False,
                        matching_iou_threshold=0.3,
                        border_pixels='include',
                        sorting_algorithm='quicksort',
                        average_precision_mode='sample',
                        num_recall_points=11,
                        ignore_neutral_boxes=True,
                        return_precisions=True,
                        return_recalls=True,
                        return_average_precisions=True,
                        verbose=True)

    mean_average_precision, average_precisions, precisions, recalls, tp_count, fp_count, fn_count, polyp_precision, polyp_recall = results

    print("TP : %d, FP : %d, FN : %d " % (tp_count, fp_count, fn_count))
    print("{:<14}{:<6}{}".format('polyp', 'Precision ',
                                 round(polyp_precision, 3)))
    print("{:<14}{:<6}{}".format('polyp', 'Recall ', round(polyp_recall, 3)))

    # for i in range(1, len(average_precisions)):
    #     print("{:<14}{:<6}{}".format(classes[i], 'AP', round(average_precisions[i], 3)))
    #
    # print("{:<14}{:<6}{}".format('', 'mAP', round(mean_average_precision, 3)))
    # print('Precisions', np.mean(precisions[1]))
    # print('Recalls', np.mean(recalls[1]))

    m = max((Config.n_classes + 1) // 2, 2)
    n = 2

    fig, cells = plt.subplots(m, n, figsize=(n * 8, m * 8))
    val = 0
    for i in range(m):
        for j in range(n):
            if n * i + j + 1 > Config.n_classes: break
            cells[i, j].plot(recalls[n * i + j + 1],
                             precisions[n * i + j + 1],
                             color='blue',
                             linewidth=1.0)
            cells[i, j].set_xlabel('recall', fontsize=14)
            cells[i, j].set_ylabel('precision', fontsize=14)
            cells[i, j].grid(True)
            cells[i, j].set_xticks(np.linspace(0, 1, 11))
            cells[i, j].set_yticks(np.linspace(0, 1, 11))
            cells[i, j].set_title("{}, AP: {:.3f}".format(
                classes[n * i + j + 1], average_precisions[n * i + j + 1]),
                                  fontsize=16)
            image = plt.gcf()
            # plt.show()
            plt.draw()
            image.savefig(os.getcwd() + "/test_out/test_" + str(val) + ".png",
                          dpi=100)
            val += 1
def _main_(args):

    config_path = args.conf

    with open(config_path) as config_buffer:
        config = json.loads(config_buffer.read())

    ###############################
    #   Parse the annotations
    ###############################
    path_imgs_training = config['train']['train_image_folder']
    path_anns_training = config['train']['train_annot_folder']
    path_imgs_val = config['valid']['valid_image_folder']
    path_anns_val = config['valid']['valid_annot_folder']
    labels = config['model']['labels']
    categories = {}
    #categories = {"Razor": 1, "Gun": 2, "Knife": 3, "Shuriken": 4} #la categoría 0 es la background
    for i in range(len(labels)):
        categories[labels[i]] = i + 1
    print('\nTraining on: \t' + str(categories) + '\n')

    ####################################
    #   Parameters
    ###################################
    #%%
    img_height = config['model']['input']  # Height of the model input images
    img_width = config['model']['input']  # Width of the model input images
    img_channels = 3  # Number of color channels of the model input images
    mean_color = [
        123, 117, 104
    ]  # The per-channel mean of the images in the dataset. Do not change this value if you're using any of the pre-trained weights.
    swap_channels = [
        2, 1, 0
    ]  # The color channel order in the original SSD is BGR, so we'll have the model reverse the color channel order of the input images.
    n_classes = len(
        labels
    )  # Number of positive classes, e.g. 20 for Pascal VOC, 80 for MS COCO
    scales_pascal = [
        0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05
    ]  # The anchor box scaling factors used in the original SSD300 for the Pascal VOC datasets
    #scales_coco = [0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05] # The anchor box scaling factors used in the original SSD300 for the MS COCO datasets
    scales = scales_pascal
    aspect_ratios = [
        [1.0,
         2.0, 0.5], [1.0, 2.0, 0.5, 3.0,
                     1.0 / 3.0],
        [1.0, 2.0, 0.5,
         3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0
                           ], [1.0, 2.0, 0.5
                               ], [1.0, 2.0, 0.5]
    ]  # The anchor box aspect ratios used in the original SSD300; the order matters
    two_boxes_for_ar1 = True
    steps = [
        8, 16, 32, 64, 100, 300
    ]  # The space between two adjacent anchor box center points for each predictor layer.
    offsets = [
        0.5, 0.5, 0.5, 0.5, 0.5, 0.5
    ]  # The offsets of the first anchor box center points from the top and left borders of the image as a fraction of the step size for each predictor layer.
    clip_boxes = False  # Whether or not to clip the anchor boxes to lie entirely within the image boundaries
    variances = [
        0.1, 0.1, 0.2, 0.2
    ]  # The variances by which the encoded target coordinates are divided as in the original implementation
    normalize_coords = True

    K.clear_session()  # Clear previous models from memory.

    model_path = config['train']['saved_weights_name']
    # 3: Instantiate an optimizer and the SSD loss function and compile the model.
    #    If you want to follow the original Caffe implementation, use the preset SGD
    #    optimizer, otherwise I'd recommend the commented-out Adam optimizer.

    if config['model']['backend'] == 'ssd512':
        aspect_ratios = [[1.0, 2.0, 0.5], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                         [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                         [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                         [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5],
                         [1.0, 2.0, 0.5]]
        steps = [
            8, 16, 32, 64, 100, 200, 300
        ]  # The space between two adjacent anchor box center points for each predictor layer.
        offsets = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
        scales = [0.07, 0.15, 0.3, 0.45, 0.6, 0.75, 0.9, 1.05]

    elif config['model']['backend'] == 'ssd7':
        #weights_path = 'VGG_ILSVRC_16_layers_fc_reduced.h5'
        scales = [
            0.08, 0.16, 0.32, 0.64, 0.96
        ]  # An explicit list of anchor box scaling factors. If this is passed, it will override `min_scale` and `max_scale`.
        aspect_ratios = [0.5, 1.0,
                         2.0]  # The list of aspect ratios for the anchor boxes
        two_boxes_for_ar1 = True  # Whether or not you want to generate two anchor boxes for aspect ratio 1
        steps = None  # In case you'd like to set the step sizes for the anchor box grids manually; not recommended
        offsets = None

    if os.path.exists(model_path):
        print("\nLoading pretrained weights.\n")
        # We need to create an SSDLoss object in order to pass that to the model loader.
        ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)

        K.clear_session()  # Clear previous models from memory.
        model = load_model(model_path,
                           custom_objects={
                               'AnchorBoxes': AnchorBoxes,
                               'L2Normalization': L2Normalization,
                               'compute_loss': ssd_loss.compute_loss
                           })

    else:
        ####################################
        #   Build the Keras model.
        ###################################

        if config['model']['backend'] == 'ssd300':
            #weights_path = 'VGG_VOC0712Plus_SSD_300x300_ft_iter_160000.h5'
            from models.keras_ssd300 import ssd_300 as ssd

            model = ssd_300(image_size=(img_height, img_width, img_channels),
                            n_classes=n_classes,
                            mode='training',
                            l2_regularization=0.0005,
                            scales=scales,
                            aspect_ratios_per_layer=aspect_ratios,
                            two_boxes_for_ar1=two_boxes_for_ar1,
                            steps=steps,
                            offsets=offsets,
                            clip_boxes=clip_boxes,
                            variances=variances,
                            normalize_coords=normalize_coords,
                            subtract_mean=mean_color,
                            swap_channels=swap_channels)

        elif config['model']['backend'] == 'ssd512':
            #weights_path = 'VGG_VOC0712Plus_SSD_512x512_ft_iter_160000.h5'
            from models.keras_ssd512 import ssd_512 as ssd

            # 2: Load some weights into the model.
            model = ssd(image_size=(img_height, img_width, img_channels),
                        n_classes=n_classes,
                        mode='training',
                        l2_regularization=0.0005,
                        scales=scales,
                        aspect_ratios_per_layer=aspect_ratios,
                        two_boxes_for_ar1=two_boxes_for_ar1,
                        steps=steps,
                        offsets=offsets,
                        clip_boxes=clip_boxes,
                        variances=variances,
                        normalize_coords=normalize_coords,
                        swap_channels=swap_channels)

        elif config['model']['backend'] == 'ssd7':
            #weights_path = 'VGG_ILSVRC_16_layers_fc_reduced.h5'
            from models.keras_ssd7 import build_model as ssd
            scales = [
                0.08, 0.16, 0.32, 0.64, 0.96
            ]  # An explicit list of anchor box scaling factors. If this is passed, it will override `min_scale` and `max_scale`.
            aspect_ratios = [
                0.5, 1.0, 2.0
            ]  # The list of aspect ratios for the anchor boxes
            two_boxes_for_ar1 = True  # Whether or not you want to generate two anchor boxes for aspect ratio 1
            steps = None  # In case you'd like to set the step sizes for the anchor box grids manually; not recommended
            offsets = None
            model = ssd(image_size=(img_height, img_width, img_channels),
                        n_classes=n_classes,
                        mode='training',
                        l2_regularization=0.0005,
                        scales=scales,
                        aspect_ratios_global=aspect_ratios,
                        aspect_ratios_per_layer=None,
                        two_boxes_for_ar1=two_boxes_for_ar1,
                        steps=steps,
                        offsets=offsets,
                        clip_boxes=clip_boxes,
                        variances=variances,
                        normalize_coords=normalize_coords,
                        subtract_mean=None,
                        divide_by_stddev=None)

        else:
            print('Wrong Backend')

        print('OK create model')
        #sgd = SGD(lr=config['train']['learning_rate'], momentum=0.9, decay=0.0, nesterov=False)

        # TODO: Set the path to the weights you want to load. only for ssd300 or ssd512

        weights_path = 'VGG_ILSVRC_16_layers_fc_reduced.h5'
        print("\nLoading pretrained weights VGG.\n")
        model.load_weights(weights_path, by_name=True)

        # 3: Instantiate an optimizer and the SSD loss function and compile the model.
        #    If you want to follow the original Caffe implementation, use the preset SGD
        #    optimizer, otherwise I'd recommend the commented-out Adam optimizer.

        #adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
        #sgd = SGD(lr=0.001, momentum=0.9, decay=0.0, nesterov=False)
        optimizer = Adam(lr=config['train']['learning_rate'],
                         beta_1=0.9,
                         beta_2=0.999,
                         epsilon=1e-08,
                         decay=0.0)
        ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)
        model.compile(optimizer=optimizer, loss=ssd_loss.compute_loss)

        model.summary()

    #####################################################################
    #  Instantiate two `DataGenerator` objects: One for training, one for validation.
    ######################################################################
    # Optional: If you have enough memory, consider loading the images into memory for the reasons explained above.

    train_dataset = DataGenerator(load_images_into_memory=False,
                                  hdf5_dataset_path=None)
    val_dataset = DataGenerator(load_images_into_memory=False,
                                hdf5_dataset_path=None)

    # 2: Parse the image and label lists for the training and validation datasets. This can take a while.

    # The XML parser needs to now what object class names to look for and in which order to map them to integers.
    classes = ['background'] + labels

    train_dataset.parse_xml(
        images_dirs=[config['train']['train_image_folder']],
        image_set_filenames=[config['train']['train_image_set_filename']],
        annotations_dirs=[config['train']['train_annot_folder']],
        classes=classes,
        include_classes='all',
        exclude_truncated=False,
        exclude_difficult=False,
        ret=False)

    val_dataset.parse_xml(
        images_dirs=[config['valid']['valid_image_folder']],
        image_set_filenames=[config['valid']['valid_image_set_filename']],
        annotations_dirs=[config['valid']['valid_annot_folder']],
        classes=classes,
        include_classes='all',
        exclude_truncated=False,
        exclude_difficult=False,
        ret=False)

    #########################
    # 3: Set the batch size.
    #########################
    batch_size = config['train'][
        'batch_size']  # Change the batch size if you like, or if you run into GPU memory issues.

    ##########################
    # 4: Set the image transformations for pre-processing and data augmentation options.
    ##########################
    # For the training generator:

    # For the validation generator:
    convert_to_3_channels = ConvertTo3Channels()
    resize = Resize(height=img_height, width=img_width)

    ######################################3
    # 5: Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function.
    #########################################
    # The encoder constructor needs the spatial dimensions of the model's predictor layers to create the anchor boxes.
    if config['model']['backend'] == 'ssd512':
        predictor_sizes = [
            model.get_layer('conv4_3_norm_mbox_conf').output_shape[1:3],
            model.get_layer('fc7_mbox_conf').output_shape[1:3],
            model.get_layer('conv6_2_mbox_conf').output_shape[1:3],
            model.get_layer('conv7_2_mbox_conf').output_shape[1:3],
            model.get_layer('conv8_2_mbox_conf').output_shape[1:3],
            model.get_layer('conv9_2_mbox_conf').output_shape[1:3],
            model.get_layer('conv10_2_mbox_conf').output_shape[1:3]
        ]

        ssd_input_encoder = SSDInputEncoder(
            img_height=img_height,
            img_width=img_width,
            n_classes=n_classes,
            predictor_sizes=predictor_sizes,
            scales=scales,
            aspect_ratios_per_layer=aspect_ratios,
            two_boxes_for_ar1=two_boxes_for_ar1,
            steps=steps,
            offsets=offsets,
            clip_boxes=clip_boxes,
            variances=variances,
            matching_type='multi',
            pos_iou_threshold=0.5,
            neg_iou_limit=0.5,
            normalize_coords=normalize_coords)

    elif config['model']['backend'] == 'ssd300':
        predictor_sizes = [
            model.get_layer('conv4_3_norm_mbox_conf').output_shape[1:3],
            model.get_layer('fc7_mbox_conf').output_shape[1:3],
            model.get_layer('conv6_2_mbox_conf').output_shape[1:3],
            model.get_layer('conv7_2_mbox_conf').output_shape[1:3],
            model.get_layer('conv8_2_mbox_conf').output_shape[1:3],
            model.get_layer('conv9_2_mbox_conf').output_shape[1:3]
        ]
        ssd_input_encoder = SSDInputEncoder(
            img_height=img_height,
            img_width=img_width,
            n_classes=n_classes,
            predictor_sizes=predictor_sizes,
            scales=scales,
            aspect_ratios_per_layer=aspect_ratios,
            two_boxes_for_ar1=two_boxes_for_ar1,
            steps=steps,
            offsets=offsets,
            clip_boxes=clip_boxes,
            variances=variances,
            matching_type='multi',
            pos_iou_threshold=0.5,
            neg_iou_limit=0.5,
            normalize_coords=normalize_coords)

    elif config['model']['backend'] == 'ssd7':
        predictor_sizes = [
            model.get_layer('classes4').output_shape[1:3],
            model.get_layer('classes5').output_shape[1:3],
            model.get_layer('classes6').output_shape[1:3],
            model.get_layer('classes7').output_shape[1:3]
        ]
        ssd_input_encoder = SSDInputEncoder(
            img_height=img_height,
            img_width=img_width,
            n_classes=n_classes,
            predictor_sizes=predictor_sizes,
            scales=scales,
            aspect_ratios_global=aspect_ratios,
            two_boxes_for_ar1=two_boxes_for_ar1,
            steps=steps,
            offsets=offsets,
            clip_boxes=clip_boxes,
            variances=variances,
            matching_type='multi',
            pos_iou_threshold=0.5,
            neg_iou_limit=0.3,
            normalize_coords=normalize_coords)

    #######################
    # 6: Create the generator handles that will be passed to Keras' `fit_generator()` function.
    #######################

    train_generator = train_dataset.generate(
        batch_size=batch_size,
        shuffle=True,
        transformations=[
            SSDDataAugmentation(img_height=img_height, img_width=img_width)
        ],
        label_encoder=ssd_input_encoder,
        returns={'processed_images', 'encoded_labels'},
        keep_images_without_gt=False)

    val_generator = val_dataset.generate(
        batch_size=batch_size,
        shuffle=False,
        transformations=[convert_to_3_channels, resize],
        label_encoder=ssd_input_encoder,
        returns={'processed_images', 'encoded_labels'},
        keep_images_without_gt=False)

    # Get the number of samples in the training and validations datasets.
    train_dataset_size = train_dataset.get_dataset_size()
    val_dataset_size = val_dataset.get_dataset_size()

    print("Number of images in the training dataset:\t{:>6}".format(
        train_dataset_size))
    print("Number of images in the validation dataset:\t{:>6}".format(
        val_dataset_size))

    ##########################
    # Define model callbacks.
    #########################

    # TODO: Set the filepath under which you want to save the model.
    model_checkpoint = ModelCheckpoint(
        filepath=config['train']['saved_weights_name'],
        monitor='val_loss',
        verbose=1,
        save_best_only=True,
        save_weights_only=False,
        mode='auto',
        period=1)
    #model_checkpoint.best =

    csv_logger = CSVLogger(filename='log.csv', separator=',', append=True)

    learning_rate_scheduler = LearningRateScheduler(schedule=lr_schedule,
                                                    verbose=1)

    terminate_on_nan = TerminateOnNaN()

    callbacks = [
        model_checkpoint, csv_logger, learning_rate_scheduler, terminate_on_nan
    ]

    #print(model.summary())
    batch_images, batch_labels = next(train_generator)

    #    i = 0 # Which batch item to look at
    #
    #    print("Image:", batch_filenames[i])
    #    print()
    #    print("Ground truth boxes:\n")
    #    print(batch_labels[i])

    initial_epoch = 0
    final_epoch = config['train']['nb_epochs']
    #final_epoch     = 20
    steps_per_epoch = 500

    history = model.fit_generator(generator=train_generator,
                                  steps_per_epoch=steps_per_epoch,
                                  epochs=final_epoch,
                                  callbacks=callbacks,
                                  validation_data=val_generator,
                                  validation_steps=ceil(val_dataset_size /
                                                        batch_size),
                                  initial_epoch=initial_epoch,
                                  verbose=1 if config['train']['debug'] else 2)
steps_per_epoch = 1000

# checkpoint_path = './checkpoints/final_ssd.h5'
checkpoint_path = './checkpoints/final_ssd'
os.makedirs(os.path.dirname(checkpoint_path), exist_ok=True)

config = SSD300Config(pos_iou_threshold=0.5, neg_iou_limit=0.3)

model, preprocess_input, predictor_sizes = ssd_300(
    weights='imagenet',
    image_size=config.input_shape,
    n_classes=num_classes,
    mode='training',
    l2_regularization=0.0005,
    scales=config.scales,
    aspect_ratios_per_layer=config.aspect_ratios,
    two_boxes_for_ar1=config.two_boxes_for_ar1,
    steps=config.strides,
    offsets=config.offsets,
    clip_boxes=config.clip_boxes,
    variances=config.variances,
    normalize_coords=config.normalize_coords,
    return_predictor_sizes=True)

parser = Tfrpaser(config=config,
                  predictor_sizes=predictor_sizes,
                  num_classes=num_classes,
                  batch_size=batch_size,
                  preprocess_input=preprocess_input)

dataset = parser.parse_tfrecords(
示例#22
0
confidence_threshold = 0.7

K.clear_session() # Clear previous models from memory.

model = ssd_300(  image_size=(img_height, img_width, 3),
                            n_classes=20,
                            mode='inference',
                            l2_regularization=0.0005,
                            scales=[0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05], # The scales for MS COCO are [0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05]
                            aspect_ratios_per_layer=[[1.0, 2.0, 0.5],
                                                    [1.0, 2.0, 0.5, 3.0, 1.0/3.0],
                                                    [1.0, 2.0, 0.5, 3.0, 1.0/3.0],
                                                    [1.0, 2.0, 0.5, 3.0, 1.0/3.0],
                                                    [1.0, 2.0, 0.5],
                                                    [1.0, 2.0, 0.5]],
                            two_boxes_for_ar1=True,
                            steps=None,
                            offsets=[0.5, 0.5, 0.5, 0.5, 0.5, 0.5],
                            clip_boxes=False,
                            variances=[0.1, 0.1, 0.2, 0.2],
                            normalize_coords=True,
                            subtract_mean=[123, 117, 104],
                            swap_channels=[2, 1, 0],
                            confidence_thresh=confidence_threshold,
                            iou_threshold=0.45,
                            top_k=200,
                            nms_max_output_size=400)
                            
# 2: Load the trained weights into the model.

# TODO: Set the path of the trained weights.
示例#23
0
def perimeter_detection(weights_path, video_path, result_path, threshold,
                        perimeter_a, perimeter_b):
    img_height = 300
    img_width = 300
    K.clear_session()  # Clear previous models from memory.

    model = ssd_300(
        image_size=(img_height, img_width, 3),
        n_classes=20,
        mode='inference',
        l2_regularization=0.0005,
        scales=[0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05],
        # The scales for MS COCO are [0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05]
        aspect_ratios_per_layer=[[1.0, 2.0, 0.5],
                                 [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                                 [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                                 [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                                 [1.0, 2.0, 0.5], [1.0, 2.0, 0.5]],
        two_boxes_for_ar1=True,
        steps=[8, 16, 32, 64, 100, 300],
        offsets=[0.5, 0.5, 0.5, 0.5, 0.5, 0.5],
        clip_boxes=False,
        variances=[0.1, 0.1, 0.2, 0.2],
        normalize_coords=True,
        subtract_mean=[123, 117, 104],
        swap_channels=[2, 1, 0],
        confidence_thresh=0.1,
        iou_threshold=0.45,
        top_k=200,
        nms_max_output_size=400)
    model.load_weights(weights_path, by_name=True)
    adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
    ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)
    model.compile(optimizer=adam, loss=ssd_loss.compute_loss)

    original_images = []
    process_images = []
    cap = cv2.VideoCapture(video_path)

    while (cap.isOpened()):
        ret, frame = cap.read()
        if ret == True:
            transposed_frame = cv2.transpose(frame)
            transposed_frame = cv2.flip(transposed_frame, 1)
            original_images.append(transposed_frame)
            k = cv2.waitKey(20)
            if k & 0xff == ord('q'):
                break
        else:
            break
    cap.release()
    cv2.destroyAllWindows()
    for k in range(8250):
        sub_image = cv2.imread('perimeter_detection/sub_images/sub_' + str(k) +
                               '.jpg')
        resize_image = cv2.resize(sub_image, (img_height, img_width))
        process_images.append(resize_image)
    print(len(original_images))
    process_images = np.array(process_images)
    # start_time = time.time()
    y_pred = model.predict(process_images, batch_size=8)
    # end_time = time.time()
    # print(end_time - start_time)
    confidence_threshold = 0.1

    y_pred_thresh = [
        y_pred[k][y_pred[k, :, 1] > confidence_threshold]
        for k in range(y_pred.shape[0])
    ]

    np.set_printoptions(precision=2, suppress=True, linewidth=90)
    print('   class   conf xmin   ymin   xmax   ymax')

    fourcc = cv2.VideoWriter_fourcc(*'MJPG')
    result_video = cv2.VideoWriter(
        'result1.avi', fourcc, 25.0,
        (original_images[0].shape[0], original_images[0].shape[1]))

    vector_a = np.array(
        [perimeter_a[0] - perimeter_b[0], perimeter_a[1] - perimeter_b[1]])
    distance_a = np.linalg.norm(vector_a)
    for k in range(len(y_pred_thresh)):
        print(k)
        print(y_pred_thresh[k])
        classes = [
            'background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle',
            'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse',
            'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train',
            'tvmonitor'
        ]
        for box in y_pred_thresh[k]:
            if box[0] != 15:
                continue
            xmin = box[2] * 720 / 300
            ymin = box[3] * 400 / 300 + 600
            xmax = box[4] * 720 / 300
            ymax = box[5] * 400 / 300 + 600
            if xmin < 400:
                continue
            vector_b = np.array([xmin - perimeter_a[0], ymin - perimeter_a[1]])
            vector_cross = np.cross(vector_a, vector_b)
            distance = np.linalg.norm(vector_cross / distance_a)
            if vector_cross >= 0 or distance < threshold:
                cv2.rectangle(original_images[k], (int(xmin), int(ymin)),
                              (int(xmax), int(ymax)), (0, 0, 255), 2)
                continue
            vector_b = np.array([xmin - perimeter_a[0], ymax - perimeter_a[1]])
            vector_cross = np.cross(vector_a, vector_b)
            distance = np.linalg.norm(vector_cross / distance_a)
            if vector_cross >= 0 or distance < threshold:
                cv2.rectangle(original_images[k], (int(xmin), int(ymin)),
                              (int(xmax), int(ymax)), (0, 0, 255), 2)
                continue
            vector_b = np.array([xmax - perimeter_a[0], ymin - perimeter_a[1]])
            vector_cross = np.cross(vector_a, vector_b)
            distance = np.linalg.norm(vector_cross / distance_a)
            if vector_cross >= 0 or distance < threshold:
                cv2.rectangle(original_images[k], (int(xmin), int(ymin)),
                              (int(xmax), int(ymax)), (0, 0, 255), 2)
                continue
            vector_b = np.array([xmax - perimeter_a[0], ymax - perimeter_a[1]])
            vector_cross = np.cross(vector_a, vector_b)
            distance = np.linalg.norm(vector_cross / distance_a)
            if vector_cross >= 0 or distance < threshold:
                cv2.rectangle(original_images[k], (int(xmin), int(ymin)),
                              (int(xmax), int(ymax)), (0, 0, 255), 2)
                continue
            cv2.rectangle(original_images[k], (int(xmin), int(ymin)),
                          (int(xmax), int(ymax)), (0, 255, 0), 2)
        cv2.line(original_images[k],
                 (int(perimeter_a[0]), int(perimeter_a[1])),
                 (int(perimeter_b[0]), int(perimeter_b[1])), (0, 255, 255), 2)
        cv2.imwrite(result_path + '/detection_' + str(k) + '.jpg',
                    original_images[k])
        result_image = original_images[k]
        transposed_image = cv2.transpose(result_image)
        transposed_image = cv2.flip(transposed_image, 0)
        result_video.write(transposed_image)
    result_video.release()
    cv2.destroyAllWindows()
    '''
示例#24
0
def init_model(model_file=None):
    K.clear_session()
    global img_height
    global img_width

    if not model_file:
        img_height = 300
        img_height = 300
        model = ssd_300(
            image_size=(img_height, img_width, 3),
            n_classes=20,
            mode='inference',
            l2_regularization=0.0005,
            scales=[
                0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05
            ],  # The scales for MS COCO are [0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05]
            aspect_ratios_per_layer=[[1.0, 2.0, 0.5],
                                     [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                                     [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                                     [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                                     [1.0, 2.0, 0.5], [1.0, 2.0, 0.5]],
            two_boxes_for_ar1=True,
            steps=[8, 16, 32, 64, 100, 300],
            offsets=[0.5, 0.5, 0.5, 0.5, 0.5, 0.5],
            clip_boxes=False,
            variances=[0.1, 0.1, 0.2, 0.2],
            normalize_coords=True,
            subtract_mean=[123, 117, 104],
            swap_channels=[2, 1, 0],
            confidence_thresh=0.5,
            iou_threshold=0.45,
            top_k=200,
            nms_max_output_size=400)

        # TODO: Set the path of the trained weights.
        weights_path = './VGG_VOC0712Plus_SSD_300x300_ft_iter_160000.h5'
        model.load_weights(weights_path, by_name=True)

        adam = Adam(lr=0.001,
                    beta_1=0.9,
                    beta_2=0.999,
                    epsilon=1e-08,
                    decay=0.0)
        ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)
        model.compile(optimizer=adam, loss=ssd_loss.compute_loss)
    else:

        img_height = 224
        img_width = 224
        ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)
        model = keras.models.load_model(model_file,
                                        custom_objects={
                                            'AnchorBoxes': AnchorBoxes,
                                            'L2Normalization': L2Normalization,
                                            'DecodeDetections':
                                            DecodeDetections,
                                            'compute_loss':
                                            ssd_loss.compute_loss
                                        })

    print(model.summary())

    return model
示例#25
0
    0.5, 0.5, 0.5, 0.5, 0.5, 0.5
]  # The offsets of the first anchor box center points from the top and left borders of the image as a fraction of the step size for each predictor layer.
clip_boxes = False  # Whether or not to clip the anchor boxes to lie entirely within the image boundaries
variances = [
    0.1, 0.1, 0.2, 0.2
]  # The variances by which the encoded target coordinates are divided as in the original implementation
normalize_coords = True
K.clear_session()  # Clear previous models from memory.

model = ssd_300(image_size=(img_height, img_width, img_channels),
                n_classes=n_classes,
                mode='inference',
                l2_regularization=0.0005,
                scales=scales,
                aspect_ratios_per_layer=aspect_ratios,
                two_boxes_for_ar1=two_boxes_for_ar1,
                steps=steps,
                offsets=offsets,
                clip_boxes=clip_boxes,
                variances=variances,
                normalize_coords=normalize_coords,
                subtract_mean=mean_color,
                swap_channels=swap_channels)

weights_path = './trained_model/ssd300_epoch-1177_loss-5.6914_val_loss-5.5798.h5'
model.load_weights(weights_path, by_name=True)
adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)
model.compile(optimizer=adam, loss=ssd_loss.compute_loss)

orig_images = []  # Store the images here.
input_images = []  # Store resized versions of the images here.
示例#26
0
def main():
    create_new_model = True if args.model_name == 'default' else False

    if create_new_model:
        K.clear_session()  # Clear previous models from memory.
        model = ssd_300(image_size=(Config.img_height, Config.img_width,
                                    Config.img_channels),
                        n_classes=Config.n_classes,
                        mode='training',
                        l2_regularization=Config.l2_regularization,
                        scales=Config.scales,
                        aspect_ratios_per_layer=Config.aspect_ratios,
                        two_boxes_for_ar1=Config.two_boxes_for_ar1,
                        steps=Config.steps,
                        offsets=Config.offsets,
                        clip_boxes=Config.clip_boxes,
                        variances=Config.variances,
                        normalize_coords=Config.normalize_coords,
                        subtract_mean=Config.mean_color,
                        swap_channels=Config.swap_channels)

        adam = Adam(lr=0.001,
                    beta_1=0.9,
                    beta_2=0.999,
                    epsilon=1e-08,
                    decay=0.0)
        ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)
        model.compile(optimizer=adam, loss=ssd_loss.compute_loss)
    else:

        model_path = "weights/" + args.model_name + ".h5"
        # We need to create an SSDLoss object in order to pass that to the model loader.
        ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)

        K.clear_session()  # Clear previous models from memory.

        model = load_model(model_path,
                           custom_objects={
                               'AnchorBoxes': AnchorBoxes,
                               'L2Normalization': L2Normalization,
                               'compute_loss': ssd_loss.compute_loss
                           })

    # Load the data
    train_dataset = DataGenerator(load_images_into_memory=True,
                                  hdf5_dataset_path=os.getcwd() + "/data/" +
                                  args.dataset + '/polyp_train.h5')
    val_dataset = DataGenerator(load_images_into_memory=True,
                                hdf5_dataset_path=os.getcwd() + "/data/" +
                                args.dataset + '/polyp_val.h5')
    train_dataset_size = train_dataset.get_dataset_size()
    val_dataset_size = val_dataset.get_dataset_size()
    print("Number of images in the training dataset:\t{:>6}".format(
        train_dataset_size))
    print("Number of images in the validation dataset:\t{:>6}".format(
        val_dataset_size))

    batch_size = args.batch_size

    # For the training generator:
    ssd_data_augmentation = SSDDataAugmentation(img_height=Config.img_height,
                                                img_width=Config.img_width,
                                                background=Config.mean_color)

    # For the validation generator:
    convert_to_3_channels = ConvertTo3Channels()
    resize = Resize(height=Config.img_height, width=Config.img_width)

    # 5: Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function.

    # The encoder constructor needs the spatial dimensions of the model's predictor layers to create the anchor boxes.
    predictor_sizes = [
        model.get_layer('conv4_3_norm_mbox_conf').output_shape[1:3],
        model.get_layer('fc7_mbox_conf').output_shape[1:3],
        model.get_layer('conv6_2_mbox_conf').output_shape[1:3],
        model.get_layer('conv7_2_mbox_conf').output_shape[1:3],
        model.get_layer('conv8_2_mbox_conf').output_shape[1:3],
        model.get_layer('conv9_2_mbox_conf').output_shape[1:3]
    ]

    ssd_input_encoder = SSDInputEncoder(
        img_height=Config.img_height,
        img_width=Config.img_width,
        n_classes=Config.n_classes,
        predictor_sizes=predictor_sizes,
        scales=Config.scales,
        aspect_ratios_per_layer=Config.aspect_ratios,
        two_boxes_for_ar1=Config.two_boxes_for_ar1,
        steps=Config.steps,
        offsets=Config.offsets,
        clip_boxes=Config.clip_boxes,
        variances=Config.variances,
        matching_type='multi',
        pos_iou_threshold=0.5,
        neg_iou_limit=0.5,
        normalize_coords=Config.normalize_coords)

    # 6: Create the generator handles that will be passed to Keras' `fit_generator()` function.
    train_generator = train_dataset.generate(
        batch_size=batch_size,
        shuffle=True,
        transformations=[ssd_data_augmentation],
        label_encoder=ssd_input_encoder,
        returns={'processed_images', 'encoded_labels'},
        keep_images_without_gt=False)

    val_generator = val_dataset.generate(
        batch_size=batch_size,
        shuffle=False,
        transformations=[convert_to_3_channels, resize],
        label_encoder=ssd_input_encoder,
        returns={'processed_images', 'encoded_labels'},
        keep_images_without_gt=False)

    model_checkpoint = ModelCheckpoint(
        filepath=os.getcwd() +
        '/weights/ssd300_epoch-{epoch:02d}_loss-{loss:.4f}_val_loss-{val_loss:.4f}.h5',
        monitor='val_loss',
        verbose=1,
        save_best_only=True,
        save_weights_only=False,
        mode='auto',
        period=5)

    csv_logger = CSVLogger(filename='ssd300_training_log.csv',
                           separator=',',
                           append=True)
    learning_rate_scheduler = LearningRateScheduler(schedule=lr_schedule)
    terminate_on_nan = TerminateOnNaN()

    tf_log = keras.callbacks.TensorBoard(log_dir=TF_LOG_PATH + args.tf_logs,
                                         histogram_freq=0,
                                         batch_size=batch_size,
                                         write_graph=True,
                                         write_grads=False,
                                         write_images=False)

    callbacks = [
        model_checkpoint, csv_logger, learning_rate_scheduler,
        terminate_on_nan, tf_log
    ]

    # If you're resuming a previous training, set `initial_epoch` and `final_epoch` accordingly.
    initial_epoch = 0
    final_epoch = args.final_epoch
    steps_per_epoch = 500

    # Train/Fit the model
    if args.predict_mode == 'train':
        history = model.fit_generator(generator=train_generator,
                                      steps_per_epoch=steps_per_epoch,
                                      epochs=final_epoch,
                                      callbacks=callbacks,
                                      validation_data=val_generator,
                                      validation_steps=ceil(val_dataset_size /
                                                            batch_size),
                                      initial_epoch=initial_epoch)

    # Prediction Output
    predict_generator = val_dataset.generate(
        batch_size=1,
        shuffle=True,
        transformations=[convert_to_3_channels, resize],
        label_encoder=None,
        returns={
            'processed_images', 'filenames', 'inverse_transform',
            'original_images', 'original_labels'
        },
        keep_images_without_gt=False)

    i = 0
    for val in range(val_dataset_size):
        batch_images, batch_filenames, batch_inverse_transforms, batch_original_images, batch_original_labels = next(
            predict_generator)

        y_pred = model.predict(batch_images)

        y_pred_decoded = decode_detections(
            y_pred,
            confidence_thresh=0.5,
            iou_threshold=0.4,
            top_k=200,
            normalize_coords=Config.normalize_coords,
            img_height=Config.img_height,
            img_width=Config.img_width)

        # 5: Convert the predictions for the original image.
        y_pred_decoded_inv = apply_inverse_transforms(
            y_pred_decoded, batch_inverse_transforms)

        np.set_printoptions(precision=2, suppress=True, linewidth=90)
        print("Predicted boxes:\n")
        print('   class   conf xmin   ymin   xmax   ymax')
        print(y_pred_decoded_inv[i])

        plt.figure(figsize=(20, 12))
        plt.imshow(batch_images[i])

        current_axis = plt.gca()

        colors = plt.cm.hsv(
            np.linspace(0, 1, Config.n_classes +
                        1)).tolist()  # Set the colors for the bounding boxes
        classes = [
            'background', 'polyps'
        ]  # Just so we can print class names onto the image instead of IDs

        for box in batch_original_labels[i]:
            xmin = box[1]
            ymin = box[2]
            xmax = box[3]
            ymax = box[4]
            label = '{}'.format(classes[int(box[0])])
            current_axis.add_patch(
                plt.Rectangle((xmin, ymin),
                              xmax - xmin,
                              ymax - ymin,
                              color='green',
                              fill=False,
                              linewidth=2))
            current_axis.text(xmin,
                              ymin,
                              label,
                              size='x-large',
                              color='white',
                              bbox={
                                  'facecolor': 'green',
                                  'alpha': 1.0
                              })

        for box in y_pred_decoded_inv[i]:
            xmin = box[2]
            ymin = box[3]
            xmax = box[4]
            ymax = box[5]
            color = colors[int(box[0])]
            label = '{}: {:.2f}'.format(classes[int(box[0])], box[1])
            current_axis.add_patch(
                plt.Rectangle((xmin, ymin),
                              xmax - xmin,
                              ymax - ymin,
                              color=color,
                              fill=False,
                              linewidth=2))
            current_axis.text(xmin,
                              ymin,
                              label,
                              size='x-large',
                              color='white',
                              bbox={
                                  'facecolor': color,
                                  'alpha': 1.0
                              })
        image = plt.gcf()
        plt.draw()
        image.savefig(os.getcwd() + "/val_ssd300/val_" + str(val) + ".png",
                      dpi=100)
示例#27
0
    def __init__(self, model_name, n_classes=1, mode='inference'):
        self.model_name = model_name
        self.n_classes = n_classes
        self.mode = mode

        if self.model_name == 'ssd_7':
            self.image_size = (300, 300, 3)
            self.intensity_mean = 127.5
            self.intensity_range = 127.5
            self.scales = [0.08, 0.16, 0.32, 0.64, 0.96]
            self.aspect_ratios_per_layer = None
            self.two_boxes_for_ar1 = True
            self.steps = None
            self.offsets = None
            self.clip_boxes = False
            self.variances = [1.0, 1.0, 1.0, 1.0]
            self.normalize_coords = True

            self.model = build_model(
                image_size=self.image_size,
                n_classes=self.n_classes,
                mode=self.mode,
                l2_regularization=0.0005,
                scales=self.scales,
                aspect_ratios_global=[0.5, 1.0, 2.0],
                aspect_ratios_per_layer=self.aspect_ratios_per_layer,
                two_boxes_for_ar1=self.two_boxes_for_ar1,
                steps=self.steps,
                offsets=self.offsets,
                clip_boxes=self.clip_boxes,
                variances=self.variances,
                normalize_coords=self.normalize_coords,
                subtract_mean=self.intensity_mean,
                divide_by_stddev=self.intensity_range)
        elif self.model_name == 'ssd_300':
            self.image_size = (300, 300, 3)
            self.mean_color = [123, 117, 104]
            self.swap_channels = [2, 1, 0]
            self.scales = [0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05]
            self.aspect_ratios_per_layer = [[1.0, 2.0, 0.5],
                                            [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                                            [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                                            [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                                            [1.0, 2.0, 0.5], [1.0, 2.0, 0.5]]
            self.two_boxes_for_ar1 = True
            self.steps = [8, 16, 32, 64, 100, 300]
            self.offsets = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
            self.clip_boxes = False
            self.variances = [0.1, 0.1, 0.2, 0.2]
            self.normalize_coords = True

            self.model = ssd_300(
                image_size=self.image_size,
                n_classes=self.n_classes,
                mode=self.mode,
                l2_regularization=0.0005,
                scales=self.scales,
                aspect_ratios_per_layer=self.aspect_ratios_per_layer,
                two_boxes_for_ar1=self.two_boxes_for_ar1,
                steps=self.steps,
                offsets=self.offsets,
                clip_boxes=self.clip_boxes,
                variances=self.variances,
                normalize_coords=self.normalize_coords,
                subtract_mean=self.mean_color,
                swap_channels=self.swap_channels)
        elif self.model_name == 'ssd_512':
            self.image_size = (512, 512, 3)
            self.mean_color = [123, 117, 104]
            self.swap_channels = [2, 1, 0]
            self.scales = [0.07, 0.15, 0.3, 0.45, 0.6, 0.75, 0.9, 1.05]
            self.aspect_ratios_per_layer = [[1.0, 2.0, 0.5],
                                            [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                                            [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                                            [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                                            [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                                            [1.0, 2.0, 0.5], [1.0, 2.0, 0.5]]
            self.two_boxes_for_ar1 = True
            self.steps = [8, 16, 32, 64, 128, 256, 512]
            self.offsets = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
            self.clip_boxes = False
            self.variances = [0.1, 0.1, 0.2, 0.2]
            self.normalize_coords = True

            self.model = ssd_512(
                image_size=self.image_size,
                n_classes=self.n_classes,
                mode=self.mode,
                l2_regularization=0.0005,
                scales=self.scales,
                aspect_ratios_per_layer=self.aspect_ratios_per_layer,
                two_boxes_for_ar1=self.two_boxes_for_ar1,
                steps=self.steps,
                offsets=self.offsets,
                clip_boxes=self.clip_boxes,
                variances=self.variances,
                normalize_coords=self.normalize_coords,
                subtract_mean=self.mean_color,
                swap_channels=self.swap_channels)
        else:
            print('creating ssd_7')
            self.model_name = 'ssd_7'
            self.image_size = (300, 300, 3)
            self.intensity_mean = 127.5
            self.intensity_range = 127.5
            self.scales = [0.08, 0.16, 0.32, 0.64, 0.96]
            self.aspect_ratios_per_layer = None
            self.two_boxes_for_ar1 = True
            self.steps = None
            self.offsets = None
            self.clip_boxes = False
            self.variances = [1.0, 1.0, 1.0, 1.0]
            self.normalize_coords = True

            self.model = build_model(
                image_size=self.image_size,
                n_classes=self.n_classes,
                mode=self.mode,
                l2_regularization=0.0005,
                scales=self.scales,
                aspect_ratios_global=[0.5, 1.0, 2.0],
                aspect_ratios_per_layer=self.aspect_ratios_per_layer,
                two_boxes_for_ar1=self.two_boxes_for_ar1,
                steps=self.steps,
                offsets=self.offsets,
                clip_boxes=self.clip_boxes,
                variances=self.variances,
                normalize_coords=self.normalize_coords,
                subtract_mean=self.intensity_mean,
                divide_by_stddev=self.intensity_range)
示例#28
0
    def load_weight(self):

        self.weights_path = 'ssd300_pascal_07+12_epoch-08_loss-1.9471_val_loss-1.9156.h5'

        adam = Adam(lr=0.001,
                    beta_1=0.9,
                    beta_2=0.999,
                    epsilon=1e-08,
                    decay=0.0)
        ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)

        K.clear_session()  # Clear previous models from memory.

        #limit memory: https://www.tensorflow.org/guide/gpu#limiting_gpu_memory_growth
        # TF 2.x options works for TF 1.15.5
        gpus = tf.config.experimental.list_physical_devices('GPU')
        if gpus:
            try:
                # option1: fixed size
                tf.config.experimental.set_virtual_device_configuration(
                    gpus[self.gpu_no], [
                        tf.config.experimental.VirtualDeviceConfiguration(
                            memory_limit=1024)
                    ])
                # option2: dynamic
                #for gpu in gpus:
                #    tf.config.experimental.set_memory_growth(gpu, True)

                # choose gpu device
                tf.config.experimental.set_visible_devices(
                    gpus[self.gpu_no], 'GPU')
            except RuntimeError as e:
                print(e)

        self.session = tf.Session()
        self.graph = tf.get_default_graph()

        with self.graph.as_default():
            with self.session.as_default():
                self.model = ssd_300(
                    image_size=(self.img_height, self.img_width, 3),
                    n_classes=2,
                    mode='inference',
                    l2_regularization=0.0005,
                    scales=[
                        0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05
                    ],  # The scales for MS COCO are [0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05]
                    aspect_ratios_per_layer=[[1.0, 2.0, 0.5],
                                             [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                                             [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                                             [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                                             [1.0, 2.0, 0.5], [1.0, 2.0, 0.5]],
                    two_boxes_for_ar1=True,
                    steps=[8, 16, 32, 64, 100, 300],
                    offsets=[0.5, 0.5, 0.5, 0.5, 0.5, 0.5],
                    clip_boxes=False,
                    variances=[0.1, 0.1, 0.2, 0.2],
                    normalize_coords=True,
                    subtract_mean=[123, 117, 104],
                    swap_channels=[2, 1, 0],
                    confidence_thresh=0.5,
                    iou_threshold=0.45,
                    top_k=200,
                    nms_max_output_size=400)

                self.model.load_weights(self.weights_path, by_name=True)
                self.model.compile(optimizer=adam, loss=ssd_loss.compute_loss)
        0.1, 0.1, 0.2, 0.2
    ]  # The variances by which the encoded target coordinates are divided as in the original implementation
    normalize_coords = True

    # 1: Build the Keras model.

    K.clear_session()  # Clear previous models from memory.

    model, predictor_sizes = ssd_300(image_size=(img_height, img_width,
                                                 img_channels),
                                     n_classes=n_classes,
                                     mode='training',
                                     l2_regularization=0.0005,
                                     scales=scales,
                                     aspect_ratios_per_layer=aspect_ratios,
                                     two_boxes_for_ar1=two_boxes_for_ar1,
                                     steps=steps,
                                     offsets=offsets,
                                     clip_boxes=clip_boxes,
                                     variances=variances,
                                     normalize_coords=normalize_coords,
                                     subtract_mean=mean_color,
                                     confidence_thresh=0.5,
                                     iou_threshold=0.45)

    # 3: Instantiate an optimizer and the SSD loss function and compile the model.
    #    If you want to follow the original Caffe implementation, use the preset SGD
    #    optimizer, otherwise I'd recommend the commented-out Adam optimizer.
    print(model.summary())
    weights_path = 'VGG_ILSVRC_16_layers_fc_reduced.h5'

    model.load_weights(weights_path, by_name=True)
def model_detection():
    img_height = 300  # Height of the input images
    img_width = 300  # Width of the input images
    img_channels = 3  # Number of color channels of the input images
    subtract_mean = [123, 117,
                     104]  # The per-channel mean of the images in the dataset
    swap_channels = [
        2, 1, 0
    ]  # The color channel order in the original SSD is BGR, so we should set this to `True`, but weirdly the results are better without swapping.
    # TODO: Set the number of classes.
    n_classes = 8  # Number of positive classes, e.g. 20 for Pascal VOC, 80 for MS COCO
    scales = [
        0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05
    ]  # The anchor box scaling factors used in the original SSD300 for the MS COCO datasets.
    # scales = [0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05] # The anchor box scaling factors used in the original SSD300 for the Pascal VOC datasets.
    aspect_ratios = [
        [1.0,
         2.0, 0.5], [1.0, 2.0, 0.5, 3.0,
                     1.0 / 3.0],
        [1.0, 2.0, 0.5,
         3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0
                           ], [1.0, 2.0, 0.5
                               ], [1.0, 2.0, 0.5]
    ]  # The anchor box aspect ratios used in the original SSD300; the order matters
    two_boxes_for_ar1 = True
    steps = [
        8, 16, 32, 64, 100, 300
    ]  # The space between two adjacent anchor box center points for each predictor layer.
    offsets = [
        0.5, 0.5, 0.5, 0.5, 0.5, 0.5
    ]  # The offsets of the first anchor box center points from the top and left borders of the image as a fraction of the step size for each predictor layer.
    clip_boxes = False  # Whether or not you want to limit the anchor boxes to lie entirely within the image boundaries
    variances = [
        0.1, 0.1, 0.2, 0.2
    ]  # The variances by which the encoded target coordinates are scaled as in the original implementation
    normalize_coords = True
    weights_path = 'C:\\Users\\lamin\\Desktop\\PFE\\traffic_objetct_detection\\weights\\VGG_coco_SSD_300x300_iter_400000_subsampled_8_classes.h5'
    model = ssd_300(image_size=(img_height, img_width, img_channels),
                    n_classes=n_classes,
                    mode='inference',
                    l2_regularization=0.0005,
                    scales=scales,
                    aspect_ratios_per_layer=aspect_ratios,
                    two_boxes_for_ar1=two_boxes_for_ar1,
                    steps=steps,
                    offsets=offsets,
                    clip_boxes=clip_boxes,
                    variances=variances,
                    normalize_coords=normalize_coords,
                    subtract_mean=subtract_mean,
                    divide_by_stddev=None,
                    swap_channels=swap_channels,
                    confidence_thresh=0.5,
                    iou_threshold=0.45,
                    top_k=200,
                    nms_max_output_size=400,
                    return_predictor_sizes=False)

    print("Model built.")

    # 2: Load the sub-sampled weights into the model.

    # Load the weights that we've just created via sub-sampling.

    model.load_weights(weights_path, by_name=True)

    print("Weights file loaded:", weights_path)
    return model