Exemplo n.º 1
0
def num_detection(in_img):
    model_path = 'num_ssd.h5'
    # We need to create an SSDLoss object in order to pass that to the model loader.
    ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)

    K.clear_session()  # Clear previous models from memory.

    model = load_model(model_path, custom_objects={'AnchorBoxes': AnchorBoxes,
                                                   'compute_loss': ssd_loss.compute_loss})

    img_height = 192
    img_width = 192

    test_img,test_single_channel = ssd2_resize(in_img,img_height,img_width)
    input_images=[]
    input_images.append(test_img)
    input_images = np.array(input_images)

    y_pred = model.predict(input_images)
    # 4: Decode the raw predictions in `y_pred`.
    confidence_threshold = 0.5

    # y_pred_thresh = [y_pred[k][y_pred[k, :, 1] > confidence_threshold] for k in range(y_pred.shape[0])]

    y_pred_decoded = decode_detections(y_pred,
                                       confidence_thresh=0.5,
                                       iou_threshold=0.1,
                                       top_k=200,
                                       normalize_coords=True,
                                       img_height=img_height,
                                       img_width=img_width)

    np.set_printoptions(precision=2, suppress=True, linewidth=90)
    print("Predicted boxes:\n")
    print('   class   conf xmin   ymin   xmax   ymax')
    print(y_pred_decoded[0])


    new_list = []
    t_img = test_img.copy()
    for box in y_pred_decoded[0]:
        if float(box[1])>0.7:
            i_class = int(box[0])-1 if int(box[0])!=11 else '-'
            xmin = int(box[2])
            ymin = int(box[3])
            xmax = int(box[4] )
            ymax = int(box[5]  )
            cv2.rectangle(t_img, (xmin, ymin), (xmax, ymax), (0,0,255), 1)
            new_list.append([i_class,box[1],xmin,ymin,xmax,ymax])
            # mask_sets.append([xmin,ymin,xmax,ymax])
            # cv2.rectangle(input_images[0], (xmin, ymin), (xmax, ymax), 255, 2)

    input_images[0] = cv2.cvtColor(input_images[0], cv2.COLOR_RGB2BGR)

    # imwrite(dir_path + '/0_black_img.jpg', black_img)
    # mask = cv2.dilate(mask, kernel4)
    sort_arr =  parse_box(new_list)
    imwrite(dir_path+'/0_ssd_detect.jpg', t_img)

    return sort_arr,test_img,test_single_channel
Exemplo n.º 2
0
def draw_test(index, dataset, model):
    fig, ax = plt.subplots(1)
    image_index = dataset.dataset_indices[index]
    ax.imshow(dataset.images[image_index])

    for label, xmin, ymin, xmax, ymax in dataset.labels[index]:
        rect = plt.Rectangle((xmin, ymin),
                             xmax - xmin,
                             ymax - ymin,
                             linewidth=1,
                             edgecolor='r',
                             facecolor='none')
        ax.add_patch(rect)
    if model != None:
        y_pred = model.predict([[dataset.images[image_index]]])
        y_pred_decoded = decode_detections(y_pred,
                                           confidence_thresh=0.5,
                                           iou_threshold=0.1,
                                           top_k=200,
                                           normalize_coords=normalize_coords,
                                           img_height=img_height,
                                           img_width=img_width)
        for label, confid, xmin, ymin, xmax, ymax in y_pred_decoded[0]:
            rect = plt.Rectangle((xmin, ymin),
                                 xmax - xmin,
                                 ymax - ymin,
                                 linewidth=1,
                                 edgecolor='b',
                                 facecolor='none')
            ax.add_patch(rect)
Exemplo n.º 3
0
def inference_single_image():
    im2 = cv2.imread('./1478899159823020309.jpg')

    #Converting it into batch dimensions
    resized = cv2.resize(im2, (480,300))
    #print(resized.shape)
    frame2 = np.array(np.expand_dims(resized, axis=0), dtype=np.float32)
    #Detections which returns a list
    detections = sess.run([label_name], {input_name: frame2})
    #List converted to the numpy array
    arr = np.asarray(detections)
    y = np.squeeze(arr, axis=0)

    y_pred_decoded = decode_detections(y,
                                       confidence_thresh=0.5,
                                       iou_threshold=0.45,
                                       top_k=200,
                                       normalize_coords=normalize_coords,
                                       img_height=img_height,
                                       img_width=img_width)

    for box in y_pred_decoded[0]:
        xmin = box[-4]
        ymin = box[-3]
        xmax = box[-2]
        ymax = box[-1]
        # print(xmin,ymin,xmax,ymax)
        label = '{}: {:.2f}'.format(classes[int(box[0])], box[1])
        # cv2.rectangle(im2, (xmin,ymin),(xmax,ymax), color=color, thickness=2 )
        cv2.rectangle(resized, (int(xmin), int(ymin)), (int(xmax), int(ymax)), color=(0, 255, 0), thickness=2)
        cv2.putText(resized, label, (int(xmin), int(ymin)), font, fontScale, color, thickness)
    cv2.imshow('detected', resized)
    cv2.waitKey(0)
Exemplo n.º 4
0
    def get_classification(self, img):
        """Determines the color of the traffic light in the image

        Args:
            image (cv::Mat): image containing the traffic light

        Returns:
            int: ID of traffic light color (specified in styx_msgs/TrafficLight)

        """
        input_images = []
        b, g, r = cv2.split(img)
        rgb_img = cv2.merge([r, g, b])

        resized = cv2.resize(rgb_img, (self.img_width, self.img_height))
        resized.reshape(
            (1, self.img_height, self.img_width, self.img_channels))
        input_images.append(resized)
        input_images = np.array(input_images)

        with graph.as_default():
            y_pred = self.model.predict(input_images)

        y_pred_decoded = decode_detections(
            y_pred,
            confidence_thresh=0.2,
            iou_threshold=0.1,
            top_k=200,
            normalize_coords=self.normalize_coords,
            img_height=self.img_height,
            img_width=self.img_width)

        if len(y_pred_decoded[0]) == 0:
            return TrafficLight.UNKNOWN

        if len(y_pred_decoded[0][0]) == 0:
            return TrafficLight.UNKNOWN

        top3_green_avg = np.average(
            np.sort(y_pred_decoded[0][list(y_pred_decoded[0][:, 0] == 1),
                                      1])[-3:])
        top3_red_avg = np.average(
            np.sort(y_pred_decoded[0][list(y_pred_decoded[0][:, 0] == 2),
                                      1])[-3:])
        top3_yellow_avg = np.average(
            np.sort(y_pred_decoded[0][list(y_pred_decoded[0][:, 0] == 3),
                                      1])[-3:])
        top3s = np.nan_to_num([top3_green_avg, top3_red_avg, top3_yellow_avg])
        best_avg = np.argmax(top3s) + 1

        if best_avg == 1:
            return TrafficLight.GREEN
        else:
            if best_avg == 2:
                return TrafficLight.RED
            else:
                if best_avg == 3:
                    return TrafficLight.YELLOW
                else:
                    return TrafficLight.UNKNOWN
Exemplo n.º 5
0
 def predict(self):
     rate_x = self.image.shape[0] / img_width
     rate_y = self.image.shape[1] / img_height
     img = np.array(self.image)
     img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
     img = cv2.resize(img, (img_height, img_width))
     img = img.reshape(1, img_width, img_height, 3)
     y_pred = self.model.predict(img)
     y_pred_decoded = decode_detections(y_pred,
                                        confidence_thresh=0.6,
                                        iou_threshold=0.45,
                                        top_k=200,
                                        normalize_coords=normalize_coords,
                                        img_height=img_height,
                                        img_width=img_width)
     i = 0
     n = 0
     for box in y_pred_decoded[i]:
         print(box)
         xmin = int(box[-4] * rate_y)
         if xmin < 0:
             xmin = 0
         ymin = int(box[-3] * rate_x)
         if ymin < 0:
             ymin = 0
         xmax = int(box[-2] * rate_y)
         if xmax < 0:
             xmax = 0
         ymax = int(box[-1] * rate_x)
         if ymax < 0:
             ymax = 0
         image = self.image[ymin:ymax, xmin:xmax, :]
         self.object_detected.append(box)
         cv2.imwrite('images/person' + str(n) + '.jpg', image)
         n += 1
def text_detect(orig_images, input_images, model):
    y_pred = model.predict(input_images)

    # 3: Decode the raw predictions in `y_pred`.

    y_pred_decoded = decode_detections(y_pred,
                                       confidence_thresh=0.17,
                                       iou_threshold=0.01,
                                       top_k=200,
                                       normalize_coords=normalize_coords,
                                       img_height=img_height,
                                       img_width=img_width)

    # 4: Convert the predictions for the original image + manual offset.
    # decode
    y_pred_decoded_inv = y_pred_decoded.copy()
    y_pred_decoded_inv[0][:, 2] = y_pred_decoded[0][:, 2] * \
                                  orig_images[0].shape[1] / img_width - 3
    y_pred_decoded_inv[0][:, 3] = y_pred_decoded[0][:, 3] * \
                                  orig_images[0].shape[0] / img_height - 3
    y_pred_decoded_inv[0][:, 4] = y_pred_decoded[0][:, 4] * \
                                  orig_images[0].shape[1] / img_width + 28
    y_pred_decoded_inv[0][:, 5] = y_pred_decoded[0][:, 5] * \
                                  orig_images[0].shape[0] / img_height + 28
    text_box = []
    for y_pred_decode in y_pred_decoded_inv:
        text_box.append(y_pred_decode[:, 2:].astype(np.int32))

    return text_box
Exemplo n.º 7
0
def inference_video():
    #Reading a dummy image

    cap = cv2.VideoCapture('/home/mohan/git/backups/drive_1_min_more_cars.mp4')
    #cap = cv2.VideoCapture('/home/mohan/git/backups/drive.mp4')
    prev_frame_time = 0
    new_frame_time = 0

    while cap.isOpened():
        new_frame_time = time.time()
        ret, frame = cap.read()
        resized = cv2.resize(frame, (480, 300))

        frame2 = np.array(np.expand_dims(resized, axis=0), dtype=np.float32)
        # Detections which returns a list
        detections = sess.run([label_name], {input_name: frame2})
        # List converted to the numpy array
        arr = np.asarray(detections)
        y = np.squeeze(arr, axis=0)

        # 4: Decode the raw prediction `y_pred`

        y_pred_decoded = decode_detections(y,
                                           confidence_thresh=0.5,
                                           iou_threshold=0.45,
                                           top_k=200,
                                           normalize_coords=normalize_coords,
                                           img_height=img_height,
                                           img_width=img_width)

        np.set_printoptions(precision=2, suppress=True, linewidth=90)

        fps = 1 / (new_frame_time - prev_frame_time)
        prev_frame_time = new_frame_time

        # converting the fps into integer
        fps = int(fps)

        # converting the fps to string so that we can display it on frame
        # by using putText function
        fps = str(fps)

        ## Drawing a bounding box around the predictions
        for box in y_pred_decoded[0]:
            xmin = box[-4]
            ymin = box[-3]
            xmax = box[-2]
            ymax = box[-1]
            #print(xmin,ymin,xmax,ymax)
            label = '{}: {:.2f}'.format(classes[int(box[0])], box[1])
            #cv2.rectangle(im2, (xmin,ymin),(xmax,ymax), color=color, thickness=2 )
            cv2.rectangle(resized, (int(xmin),int(ymin)),(int(xmax),int(ymax)), color=(0,255,0), thickness=2 )
            cv2.putText(resized, label, (int(xmin), int(ymin)), font, fontScale, color, thickness)
            cv2.putText(resized, fps, (7, 70), font, 3, (100, 255, 0), 3, cv2.LINE_AA)
            print(fps)
        cv2.imshow('im', resized)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            cv2.destroyAllWindows()
            break
Exemplo n.º 8
0
def handle_client(client_req):
    """
    处理客户端请求
    """
    # 获取客户端请求数据
    req_buff = client_req.makefile('rb')
    # 解析请求报文
    method, route, params, http_version = parse_first_line(req_buff)
    # print("method: " + method)
    # print("route: " + route)
    # print("params: " + str(params))
    # print("http_version: " + http_version)
    headers = parse_headers(req_buff)
    # print('headers: ' + str(headers))
    data = parse_body(req_buff, headers)
    # print('data: ' + data)
    body_content = json.loads(data)
    images_numpy = []
    response_body = ''
    if 'path' in body_content:
        image_path = body_content['path']
        if not os.path.isfile(image_path):
            print(f"'{image_path}' is not a file.")
            response_body += f"'{image_path}' is not a file."
            response_start_line = "HTTP/1.1 404 Bad Request\r\n"
            send_response(client_req, response_start_line, response_body)
            return
        print("load file from local: " + image_path)
        try:
            with Image.open(image_path) as image:
                images_numpy.append(np.array(image, dtype=np.uint8))
        except OSError:
            print(f"Input '{image_path}' can not be open as an image.")
            response_body += f"Input '{image_path}' can not be open as an image."
            response_start_line = "HTTP/1.1 404 Bad Request\r\n"
            send_response(client_req, response_start_line, response_body)
            return
    elif 'image' in body_content:
        image_content = body_content['image']
        print("load file from request body.")
        image_asc = image_content.endoce('ascii')
        image_decode = base64.b64decode(image_asc)
        images_numpy.append(np.frombuffer(image_decode, dtype=np.uint8))
    images_numpy = np.array(images_numpy)
    y_pred = model.predict(images_numpy)
    y_pred_decoded = decode_detections(y_pred,
                                       confidence_thresh=0.5,
                                       iou_threshold=0.005,
                                       top_k=200,
                                       normalize_coords=True,
                                       img_height=img_height,
                                       img_width=img_width)
    pred_labels = set(y_pred_decoded[0][..., 0].astype(np.int8).astype(
        np.str).tolist())
    # label_names = [classes[int(lb)] for lb in pred_labels]
    # 构造响应数据
    response_body = ','.join(pred_labels)
    response_start_line = "HTTP/1.1 200 OK\r\n"
    send_response(client_req, response_start_line, response_body)
Exemplo n.º 9
0
def inference_single_image():
    #Reading a dummy image
    im2 = cv2.imread('./1478899365487445082.jpg')
    #im2 = image.img_to_array(im2)

    #Converting it into batch dimensions
    im3 = np.expand_dims(im2, axis=0)
    #print(im3.shape)

    # Make a prediction

    y_pred = model.predict(im3)

    #np.save('array_ssd7_pc.npy', y_pred)
    # 4: Decode the raw prediction `y_pred`

    y_pred_decoded = decode_detections(y_pred,
                                       confidence_thresh=0.5,
                                       iou_threshold=0.45,
                                       top_k=200,
                                       normalize_coords=normalize_coords,
                                       img_height=img_height,
                                       img_width=img_width)

    np.set_printoptions(precision=2, suppress=True, linewidth=90)
    print("Predicted boxes:\n")
    print('   class   conf xmin   ymin   xmax   ymax')
    #print(y_pred_decoded[0])
    #print(y_pred_decoded)
    #print(len(y_pred_decoded))

    ## Drawing a bounding box around the predictions

    for box in y_pred_decoded[0]:
        xmin = box[-4]
        ymin = box[-3]
        xmax = box[-2]
        ymax = box[-1]
        #print(xmin,ymin,xmax,ymax)
        label = '{}: {:.2f}'.format(classes[int(box[0])], box[1])
        #cv2.rectangle(im2, (xmin,ymin),(xmax,ymax), color=color, thickness=2 )
        cv2.rectangle(im2, (int(xmin), int(ymin)), (int(xmax), int(ymax)),
                      color=(0, 255, 0),
                      thickness=2)
        cv2.putText(im2, label, (int(xmin), int(ymin)), font, fontScale, color,
                    thickness)

    # In[ ]:
    '''
    value = True
    while (value):
        cv2.imshow('frame', im2)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
        value = False
    '''
    cv2.imshow('frame', im2)
    cv2.waitKey(0)
Exemplo n.º 10
0
    def get_ypred_decoded(self, r_img):
        y_pred = self.model.predict(r_img)
        #y_pred = model.predict(r_img)
        y_pred_decoded = decode_detections(y_pred,
                                           confidence_thresh=0.9,
                                           iou_threshold=0.001,
                                           top_k=200,
                                           normalize_coords=normalize_coords,
                                           img_height=img_height,
                                           img_width=img_width)

        np.set_printoptions(precision=2, suppress=True, linewidth=90)

        return y_pred_decoded
Exemplo n.º 11
0
    def training_plot(self, epoch, logs):
        # plots results on epoch end

        if self.val_generator:
            imgs, gt = next(self.val_generator)
            y_pred = self.model.predict(np.expand_dims(imgs[0], 0))
            y_pred_decoded = decode_detections(
                y_pred,
                confidence_thresh=0.5,
                iou_threshold=0.45,
                top_k=200,
                input_coords='centroids',
                normalize_coords=True,
                img_height=self.config.img_height,
                img_width=self.config.img_width)

            plt.figure(figsize=(6, 6))
            plt.imshow(imgs[0])

            current_axis = plt.gca()

            for box in y_pred_decoded[0]:
                class_id = box[0]
                confidence = box[1]
                xmin, ymin, xmax, ymax = box[2], box[3], box[4], box[5]

                label = '{}: {:.2f}'.format(self.id2digit[class_id],
                                            confidence)
                current_axis.add_patch(
                    plt.Rectangle((xmin, ymin),
                                  xmax - xmin,
                                  ymax - ymin,
                                  color='blue',
                                  fill=False,
                                  linewidth=2))
                current_axis.text(xmin,
                                  ymin,
                                  label,
                                  size='x-large',
                                  color='white',
                                  bbox={
                                      'facecolor': 'blue',
                                      'alpha': 1.0
                                  })
            plt.show()
        else:
            print('no val generator defined')
Exemplo n.º 12
0
 def postProccessing(self, y_pred):
     y_pred_decoded = decode_detections(
         y_pred,
         confidence_thresh=self.confidence_thresh,  #0.25,
         iou_threshold=self.iou_threshold,  #0.15, #0.45,
         top_k=self.top_k,  #200,
         normalize_coords=self.normalize_coords,
         img_height=self.realHeight,
         img_width=self.realWidth)
     y_pred_decoded = y_pred_decoded[0]
     if y_pred_decoded.shape != (0, ):
         y_pred_decoded[:, 1] *= 100
         y_pred_decoded = y_pred_decoded.astype(int)
         y_pred_fixed = self._fix_decoded(y_pred_decoded)
     else:
         y_pred_fixed = y_pred_decoded
     return y_pred_fixed
Exemplo n.º 13
0
 def predict(self,image):
     with self.sess.as_default():
         with self.graph.as_default():
             
             softmax_tensor = self.sess.graph.get_tensor_by_name('import/predictions/concat:0')
             
             pred = self.sess.run(softmax_tensor, {'import/input_1:0': np.array(image)})        
             
             y_pred_decoded = decode_detections(pred,
                                confidence_thresh=0.5,
                                iou_threshold=0.2,
                                top_k=200,
                                normalize_coords=True,
                                img_height=180,
                                img_width=240)
             
             return y_pred_decoded        
Exemplo n.º 14
0
 def onlion_rec(self, img1):  #输入应该是bgr的540×540的三通道图片
     self.objtuple = []
     img1 = cv2.cvtColor(img1, cv2.COLOR_BGR2RGB)
     dst = cv2.resize(img1, (270, 270), cv2.INTER_LINEAR)
     img1 = dst.reshape(1, 270, 270, 3)
     y_pred = self.model.predict(img1)
     y_pred_decoded = decode_detections(
         y_pred,
         confidence_thresh=0.92,
         iou_threshold=0.1,
         top_k=200,
         normalize_coords=self.normalize_coords,
         img_height=self.img_height,
         img_width=self.img_width)
     #print("Predicted boxes:\n")
     #print('   class   conf xmin   ymin   xmax   ymax')
     #print(y_pred_decoded[0])
     self.output = y_pred_decoded[0]
 def get_ypred_decoded(self,r_img):
     '''
         Perform prediction on 1 image
         
         Arguments:
             r_img: rgb images which is reshaped to (1, h, w, c)
         Returns: Raw prediction that needed to be decode -> bboxes and labels
     '''
     y_pred = self.ssd_model.predict(r_img)
     #y_pred = model.predict(r_img)
     y_pred_decoded = decode_detections(y_pred,
                                     confidence_thresh=0.61,
                                     iou_threshold=0.1,
                                     top_k=200,
                                     normalize_coords=self.normalize_coords,
                                     img_height=240,
                                     img_width=320)
     
     return y_pred_decoded[0]
Exemplo n.º 16
0
def get_img_with_bbox(model, frame):

    xmin, ymin, xmax, ymax = [0 for i in range(4)]

    orig_images = []
    input_images = []

    print('\nFrame shape before feeding: ', frame.shape)
    orig_images.append(frame)

    img = Image.fromarray(frame)
    img = img.resize((img_height, img_width))
    img = image.img_to_array(img)
    input_images.append(img)
    input_images = np.array(input_images)

    y_pred = model.predict(input_images)
    y_pred_decoded = decode_detections(y_pred,
                                       confidence_thresh=0.5,
                                       iou_threshold=0.4,
                                       top_k=200,
                                       normalize_coords=True,
                                       img_height=img_height,
                                       img_width=img_width)

    np.set_printoptions(precision=2, suppress=True, linewidth=90)
    print("Predicted boxes:\n")
    print('   class   conf xmin   ymin   xmax   ymax')
    print(y_pred_decoded[0])

    for box in y_pred_decoded[0]:
        # Transform the predicted bounding boxes to the original image size.
        xmin = box[2] * video_width / img_width
        ymin = box[3] * video_height / img_height
        xmax = box[4] * video_width / img_width
        ymax = box[5] * video_height / img_height
        print(xmin, ymin, xmax, ymax)

    return xmin, ymin, xmax, ymax
Exemplo n.º 17
0
def predict_all(dataset, model):
    predict_generator = dataset.generate(
        batch_size=1,
        shuffle=False,
        transformations=[],
        label_encoder=None,
        returns={'processed_images', 'processed_labels'},
        keep_images_without_gt=False)
    images = []
    labels = []
    for i in range(len(dataset.images)):
        x, l = next(predict_generator)
        images.append(x)
        labels.append(l[0])
        print(x.shape)
    images = np.concatenate(images, axis=0)
    y_pred = model.predict(images)
    y_pred_decoded = decode_detections(y_pred,
                                       confidence_thresh=0.2,
                                       iou_threshold=0.0,
                                       top_k=200,
                                       normalize_coords=normalize_coords,
                                       img_height=images.shape[1],
                                       img_width=images.shape[2])
    missed_ar = []
    best_iou = []

    for true_labels, pred_labels in zip(labels, y_pred_decoded):
        for tl in true_labels:
            _sorted = [(iou(tl, pl), i) for pl in pred_labels]
            if len(_sorted) != 0:
                _sorted.sort()
                best_iou, best_match = _sorted[-1]
            else:
                best_iou = 0
                best_match = None
                missed_ar.append(aratio(tl))
            print(best_iou)
Exemplo n.º 18
0
for i in range(test_size):
    print("Image:", batch_filenames[i])
    print()
    print("Ground truth boxes:\n")
    print(np.array(batch_original_labels[i]))

    # 3: Make predictions.

    y_pred = model.predict(batch_images)

    # 4: Decode the raw predictions in `y_pred`.

    y_pred_decoded = decode_detections(y_pred,
                                       confidence_thresh=float(params["confidence_thresh"]),
                                       iou_threshold=float(params["iou_thresh"]),
                                       top_k=200,
                                       normalize_coords=normalize_coords,
                                       img_height=img_height,
                                       img_width=img_width)

    # 5: Convert the predictions for the original image.

    y_pred_decoded_inv = apply_inverse_transforms(y_pred_decoded, batch_inverse_transforms)

    np.set_printoptions(precision=2, suppress=True, linewidth=90)
    print("Predicted boxes:\n")
    print('   class   conf xmin   ymin   xmax   ymax')
    print(y_pred_decoded_inv[i])

    # 5: Draw the predicted boxes onto the image
Exemplo n.º 19
0
def play():
    start_time_video = time.time()

    cap = cv2.VideoCapture(
        "/home/kara9147/ML/caltech-pedestrian-dataset-converter/data/plots/set00_V000.avi"
    )
    #cap = cv2.VideoCapture("/home/kara9147/ML/caltech-pedestrian-dataset-converter/data/plots/set03_V008.avi")
    #cap = cv2.VideoCapture("/home/kara9147/ML/caltech-pedestrian-dataset-converter/data/plots/set08_V004.avi")
    #cap = cv2.VideoCapture("/home/kara9147/ML/JAAD/clips/video_0006.mp4")

    # Time to read all frames, predict and put bounding boxes around them, and show them.
    i = 0
    total_time = 0.0

    # Capture frame-by-frame
    ret = True
    while (ret):
        ret, origimg = cap.read()

        i = i + 1
        #print("Processing {} th frame".format(i))
        if (ret != False):
            # Our operations on the frame come here
            img = cv2.resize(origimg, (img_width, img_height))
            # Open CV uses BGR color format
            frame = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            #print(type(img))
            #print(img.shape)

            batch_img = np.expand_dims(frame, axis=0)
            #print(batch_img.shape )

            current = time.time()
            ##################################PREDICTION######################
            y_pred = model.predict(batch_img)
            end = time.time()
            diff = end - current
            total_time = total_time + diff
            print(end - current)
            print("Time spent for predicting: {0}".format(diff))

            # 4: Decode the raw prediction `y_pred`

            y_pred_decoded = decode_detections(
                y_pred,
                confidence_thresh=0.3,
                iou_threshold=0.45,
                top_k=200,
                normalize_coords=normalize_coords,
                img_height=img_height,
                img_width=img_width)

            np.set_printoptions(precision=2, suppress=True, linewidth=90)
            # print("Predicted boxes:\n")
            # print('   class   conf xmin   ymin   xmax   ymax')

            #print(y_pred_decoded)

            #print(time.time() - start_time)

            if (y_pred_decoded and len(y_pred_decoded[0])):
                colors = plt.cm.hsv(np.linspace(
                    0, 1, n_classes +
                    1)).tolist()  # Set the colors for the bounding boxes
                classes = [
                    'background', 'person', 'people'
                ]  # Just so we can print class names onto the image instead of IDs

                # Draw the predicted boxes in blue
                for box in y_pred_decoded[0]:
                    xmin = int(box[-4])
                    ymin = int(box[-3])
                    xmax = int(box[-2])
                    ymax = int(box[-1])
                    color = colors[int(box[0])]
                    label = '{}: {:.2f}'.format(classes[int(box[0])], box[1])

                    #print((xmin, ymin))
                    #print((xmax, ymax))

                    cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (255, 0, 0),
                                  1)

            # Display the resulting frame
            cv2.imshow('frame', img)

        # waitKey: 0, wait indefinitely
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    end_time_video = time.time()
    print("No of frames: {}".format(i))
    print("Total Time: {}".format(total_time))
    print("fps: {}".format(i / (total_time)))

    # When everything done, release the capture
    cap.release()
    cv2.destroyAllWindows()
Exemplo n.º 20
0
cap = cv2.VideoCapture(0)


#cap.set(cv2.CAP_PROP_FPS, 10) ----- uncomment this line and set the fps of camera capture if required.

while True:
    ret, frame = cap.read()
    frame=cv2.resize(frame,(512,512))
    frame2=frame
    frame= frame[...,::-1] #convert from rgb to bgr
    frame=np.expand_dims(frame, axis=0)
    y_pred = model.predict(frame)
    y_pred_thresh = decode_detections(y_pred,
                                   confidence_thresh=0.3,
                                   iou_threshold=0.5,
                                   top_k=200,
                                   normalize_coords=normalize_coords,
                                   img_height=img_height,
                                   img_width=img_width)

    np.set_printoptions(precision=2, suppress=True, linewidth=90)
    #
    
    classes = ['background',
           'face']


    current_axis = plt.gca()


    for box in y_pred_thresh[0]:
Exemplo n.º 21
0
def test_config(config):
    '''
    Test the given configuration ; the configuration should already have been
    used for training purposes, or this will return an error (see ssd_train.py)

    Arguments:
        config : the configuration of the model to use ; should already be
            loaded.

    '''
    local_dir = config.ROOT_FOLDER
    data_dir = config.DATA_DIR
    img_shape = config.IMG_SHAPE
    img_height = img_shape[0]  # Height of the model input images
    img_width = img_shape[1]  # Width of the model input images
    img_channels = img_shape[
        2]  # Number of color channels of the model input images
    n_classes = 20  # Number of positive classes, e.g. 20 for Pascal VOC, 80 for MS COCO
    normalize_coords = True

    K.clear_session()  # Clear previous models from memory.
    print("[INFO] loading model...")
    model_path = os.path.join(local_dir, 'models', config.MODEL_NAME)

    # We need to create an SSDLoss object in order to pass that to the model loader.
    ssd_loss = SSDLoss(neg_pos_ratio=3, n_neg_min=0, alpha=1.0)
    model = load_model(model_path,
                       custom_objects={
                           'AnchorBoxes': AnchorBoxes,
                           'L2Normalization': L2Normalization,
                           'DecodeDetections': DecodeDetections,
                           'compute_loss': ssd_loss.compute_loss
                       })
    classes = config.CLASSES
    dataset = DataGenerator(load_images_into_memory=False,
                            hdf5_dataset_path=None)
    dataset_images_dir = os.path.join(data_dir, 'Images')
    dataset_annotations_dir = os.path.join(data_dir, 'Annotations/')
    dataset_test_image_set_filename = os.path.join(data_dir,
                                                   'ImageSets\\test.txt')

    dataset.parse_xml(images_dirs=[dataset_images_dir],
                      image_set_filenames=[dataset_test_image_set_filename],
                      annotations_dirs=[dataset_annotations_dir],
                      classes=classes,
                      include_classes='all',
                      exclude_truncated=False,
                      exclude_difficult=True,
                      ret=False)
    dataset.create_hdf5_dataset(file_path=config.MODEL_NAME,
                                resize=False,
                                variable_image_size=True,
                                verbose=True)

    convert_to_3_channels = ConvertTo3Channels()
    resize = Resize(height=img_height, width=img_width)
    dataset_size = dataset.get_dataset_size()

    print("Number of images in the dataset:\t{:>6}".format(dataset_size))

    predict_generator = dataset.generate(
        batch_size=config.PREDICT_BATCH_SIZE,
        shuffle=True,
        transformations=[convert_to_3_channels, resize],
        label_encoder=None,
        returns={
            'processed_images', 'filenames', 'inverse_transform',
            'original_images', 'original_labels'
        },
        keep_images_without_gt=False)

    count = 0
    while True and count < dataset_size:
        batch_images, batch_filenames, batch_inverse_transforms, batch_original_images, batch_original_labels = next(
            predict_generator)
        i = 0
        print("Image:", batch_filenames[i])
        print()
        print("Ground truth boxes:\n")
        print(np.array(batch_original_labels[i]))

        y_pred = model.predict(batch_images)
        y_pred_decoded = decode_detections(y_pred,
                                           confidence_thresh=0.5,
                                           iou_threshold=0.4,
                                           top_k=200,
                                           normalize_coords=normalize_coords,
                                           img_height=img_height,
                                           img_width=img_width)
        y_pred_decoded_inv = apply_inverse_transforms(
            y_pred_decoded, batch_inverse_transforms)

        np.set_printoptions(precision=2, suppress=True, linewidth=90)
        print("Predicted boxes:\n")
        print('   class   conf xmin   ymin   xmax   ymax')
        print(y_pred_decoded_inv[i])
        # cv2.imshow('original image',batch_original_images[i])
        # cv2.waitKey(800)
        # cv2.destroyAllWindows()
        colors = plt.cm.hsv(np.linspace(0, 1, n_classes + 1)).tolist()
        plt.figure(figsize=(15, 8))
        plt.imshow(batch_original_images[i])

        current_axis = plt.gca()
        len_orig = 0
        for box in batch_original_labels[i]:
            len_orig += 1
            xmin = box[1]
            ymin = box[2]
            xmax = box[3]
            ymax = box[4]
            label = '{}'.format(classes[int(box[0])])
            current_axis.add_patch(
                plt.Rectangle((xmin, ymin),
                              xmax - xmin,
                              ymax - ymin,
                              color='green',
                              fill=False,
                              linewidth=2))
            current_axis.text(xmin,
                              ymin,
                              label,
                              size='x-large',
                              color='white',
                              bbox={
                                  'facecolor': 'green',
                                  'alpha': 1.0
                              })

        len_found = 0
        for box in y_pred_decoded_inv[i]:
            len_found += 1
            xmin = box[2]
            ymin = box[3]
            xmax = box[4]
            ymax = box[5]
            color = colors[int(box[0])]
            label = '{}: {:.2f}'.format(classes[int(box[0])], box[1])
            current_axis.add_patch(
                plt.Rectangle((xmin, ymin),
                              xmax - xmin,
                              ymax - ymin,
                              color=color,
                              fill=False,
                              linewidth=2))
            current_axis.text(xmin,
                              ymin,
                              label,
                              size='x-large',
                              color='white',
                              bbox={
                                  'facecolor': color,
                                  'alpha': 1.0
                              })

        print('Number of original boxes : {}'.format(len_orig))
        print('Number of found boxes : {}'.format(len_found))
        plt.show()
        count += 1
Exemplo n.º 22
0
def main():
    img_height = 300
    img_width = 480
    n_classes = 5
    ### Load model
    LOAD_MODEL = True

    if LOAD_MODEL:
        # TODO: Set the path to the `.h5` file of the model to be loaded.
        model_path = '../ConeData/SavedModels/training3/(ssd7_epoch-10_loss-0.3291_val_loss-0.2664.h5'

        # We need to create an SSDLoss object in order to pass that to the model loader.
        ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)

        K.clear_session()  # Clear previous models from memory.

        model = load_model(model_path,
                           custom_objects={
                               'AnchorBoxes': AnchorBoxes,
                               'DecodeDetections': DecodeDetections,
                               'compute_loss': ssd_loss.compute_loss
                           })

    ### Read video
    # cap = cv2.VideoCapture('test_videos/Building Self Driving Car - Local Dataset - Day.mp4')
    #cap = cv2.VideoCapture('test_videos/original.m4v')
    cap = cv2.VideoCapture('test_videos/20180619_175221224.mp4')
    width = int(cap.get(3))
    height = int(cap.get(4))
    property_id = int(cv2.CAP_PROP_FRAME_COUNT)
    fps = cap.get(cv2.CAP_PROP_FPS)
    total_frames = int(cv2.VideoCapture.get(cap, property_id))
    count = 0

    detect = True

    while (count < total_frames):
        #print(str(j)+'/'+str(total_frames))
        # Capture frame-by-frame
        ret, frame = cap.read()
        # if ret == True:
        #     cv2.imshow('original frame', frame)
        #     cv2.waitKey(10)

        if detect == True:
            frame = frame[..., ::-1]
            frame_resized = cv2.resize(frame, (480, 300))
            frame_tensor = np.expand_dims(frame_resized, axis=0)
            ### Make predictions
            y_pred = model.predict(frame_tensor)
            y_pred_decoded = decode_detections(y_pred,
                                               confidence_thresh=0.75,
                                               iou_threshold=0.45,
                                               top_k=200,
                                               normalize_coords=True,
                                               img_height=img_height,
                                               img_width=img_width)

            #plt.figure(figsize=(20,12))
            #plt.imshow(frame_resized)

            #current_axis = plt.gca()

            ### plot predictions
            colors = plt.cm.hsv(np.linspace(
                0, 1, n_classes +
                1)).tolist()  # Set the colors for the bounding boxes
            #classes = ['background', 'car', 'truck', 'pedestrian', 'bicyclist', 'light'] # Just so we can print class names onto the image instead of IDs
            classes = [
                'background', 'cone'
            ]  # Just so we can print class names onto the image instead of IDs

            # Draw the predicted boxes in blue
            for box in y_pred_decoded[0]:
                xmin = int(box[-4])
                ymin = int(box[-3])
                xmax = int(box[-2])
                ymax = int(box[-1])

                #convert to x,y,w,h format
                # x_bbox = int(xmin)
                # y_bbox = int(ymin)
                # w_bbox = abs(int(xmax - xmin))
                # h_bbox = abs(int(ymax - ymin))

                color = colors[int(box[0])]
                cv2.rectangle(frame_resized, (xmin, ymin), (xmax, ymax),
                              (0, 255, 0), 5)

                label = '{}: {:.2f}'.format(classes[int(box[0])], box[1])
                # current_axis.add_patch(plt.Rectangle((xmin, ymin), xmax-xmin, ymax-ymin, color=color, fill=False, linewidth=2))
                # current_axis.text(xmin, ymin, label, size='x-large', color='white', bbox={'facecolor':color, 'alpha':1.0})
                cv2.putText(frame_resized, label, (int(xmin), int(ymin) - 10),
                            cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 0), 2,
                            cv2.LINE_AA)

            #plt.savefig('output_frames/video_frame'+str(j)+'.png')
            #plt.close('all')

            #if j % 10 == 0:
            #clear_output()
        cv2.imshow('ssd7_inference', frame_resized)
        cv2.waitKey(10)

        count = count + 1

        # Break the loop
        #else:
        #break

        #out.release()
    cap.release()
def main():
    create_new_model = True if args.model_name == 'default' else False

    if create_new_model:
        K.clear_session()  # Clear previous models from memory.
        model = ssd_512(image_size=(Config.img_height, Config.img_width,
                                    Config.img_channels),
                        n_classes=Config.n_classes,
                        mode='training',
                        l2_regularization=Config.l2_regularization,
                        scales=Config.scales,
                        aspect_ratios_per_layer=Config.aspect_ratios,
                        two_boxes_for_ar1=Config.two_boxes_for_ar1,
                        steps=Config.steps,
                        offsets=Config.offsets,
                        clip_boxes=Config.clip_boxes,
                        variances=Config.variances,
                        normalize_coords=Config.normalize_coords,
                        subtract_mean=Config.mean_color,
                        swap_channels=Config.swap_channels)

        adam = Adam(lr=0.001,
                    beta_1=0.9,
                    beta_2=0.999,
                    epsilon=1e-08,
                    decay=0.0)
        ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)
        model.compile(optimizer=adam, loss=ssd_loss.compute_loss)
    else:

        model_path = "weights/" + args.model_name + ".h5"
        # We need to create an SSDLoss object in order to pass that to the model loader.
        ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)

        K.clear_session()  # Clear previous models from memory.

        model = load_model(model_path,
                           custom_objects={
                               'AnchorBoxes': AnchorBoxes,
                               'L2Normalization': L2Normalization,
                               'compute_loss': ssd_loss.compute_loss
                           })

    # Load the data
    train_dataset = DataGenerator(load_images_into_memory=True,
                                  hdf5_dataset_path=os.getcwd() + "/data/" +
                                  args.dataset + '/polyp_train.h5')
    val_dataset = DataGenerator(load_images_into_memory=True,
                                hdf5_dataset_path=os.getcwd() + "/data/" +
                                args.dataset + '/polyp_val.h5')
    train_dataset_size = train_dataset.get_dataset_size()
    val_dataset_size = val_dataset.get_dataset_size()
    print("Number of images in the training dataset:\t{:>6}".format(
        train_dataset_size))
    print("Number of images in the validation dataset:\t{:>6}".format(
        val_dataset_size))

    batch_size = args.batch_size

    # For the training generator:
    ssd_data_augmentation = SSDDataAugmentation(img_height=Config.img_height,
                                                img_width=Config.img_width,
                                                background=Config.mean_color)

    # For the validation generator:
    convert_to_3_channels = ConvertTo3Channels()
    resize = Resize(height=Config.img_height, width=Config.img_width)

    # 5: Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function.

    # The encoder constructor needs the spatial dimensions of the model's predictor layers to create the anchor boxes.
    predictor_sizes = [
        model.get_layer('conv4_3_norm_mbox_conf').output_shape[1:3],
        model.get_layer('fc7_mbox_conf').output_shape[1:3],
        model.get_layer('conv6_2_mbox_conf').output_shape[1:3],
        model.get_layer('conv7_2_mbox_conf').output_shape[1:3],
        model.get_layer('conv8_2_mbox_conf').output_shape[1:3],
        model.get_layer('conv9_2_mbox_conf').output_shape[1:3],
        model.get_layer('conv10_2_mbox_conf').output_shape[1:3]
    ]

    ssd_input_encoder = SSDInputEncoder(
        img_height=Config.img_height,
        img_width=Config.img_width,
        n_classes=Config.n_classes,
        predictor_sizes=predictor_sizes,
        scales=Config.scales,
        aspect_ratios_per_layer=Config.aspect_ratios,
        two_boxes_for_ar1=Config.two_boxes_for_ar1,
        steps=Config.steps,
        offsets=Config.offsets,
        clip_boxes=Config.clip_boxes,
        variances=Config.variances,
        matching_type='multi',
        pos_iou_threshold=0.5,
        neg_iou_limit=0.5,
        normalize_coords=Config.normalize_coords)

    # 6: Create the generator handles that will be passed to Keras' `fit_generator()` function.
    train_generator = train_dataset.generate(
        batch_size=batch_size,
        shuffle=True,
        transformations=[ssd_data_augmentation],
        label_encoder=ssd_input_encoder,
        returns={'processed_images', 'encoded_labels'},
        keep_images_without_gt=False)

    val_generator = val_dataset.generate(
        batch_size=batch_size,
        shuffle=False,
        transformations=[convert_to_3_channels, resize],
        label_encoder=ssd_input_encoder,
        returns={'processed_images', 'encoded_labels'},
        keep_images_without_gt=False)

    model_checkpoint = ModelCheckpoint(
        filepath=os.getcwd() +
        '/weights/ssd512_epoch-{epoch:02d}_loss-{loss:.4f}_val_loss-{val_loss:.4f}.h5',
        monitor='val_loss',
        verbose=1,
        save_best_only=True,
        save_weights_only=False,
        mode='auto',
        period=30)

    csv_logger = CSVLogger(filename='ssd512_training_log.csv',
                           separator=',',
                           append=True)
    learning_rate_scheduler = LearningRateScheduler(schedule=lr_schedule)
    terminate_on_nan = TerminateOnNaN()

    tf_log = keras.callbacks.TensorBoard(log_dir=TF_LOG_PATH + args.tf_logs,
                                         histogram_freq=0,
                                         batch_size=batch_size,
                                         write_graph=True,
                                         write_grads=False,
                                         write_images=False)

    callbacks = [
        model_checkpoint, csv_logger, learning_rate_scheduler,
        terminate_on_nan, tf_log
    ]

    # If you're resuming a previous training, set `initial_epoch` and `final_epoch` accordingly.
    initial_epoch = 0
    final_epoch = args.final_epoch
    steps_per_epoch = 500

    # Train/Fit the model
    if args.predict_mode == 'train':
        history = model.fit_generator(generator=train_generator,
                                      steps_per_epoch=steps_per_epoch,
                                      epochs=final_epoch,
                                      callbacks=callbacks,
                                      validation_data=val_generator,
                                      validation_steps=ceil(val_dataset_size /
                                                            batch_size),
                                      initial_epoch=initial_epoch)

    # Prediction Output
    predict_generator = val_dataset.generate(
        batch_size=1,
        shuffle=True,
        transformations=[convert_to_3_channels, resize],
        label_encoder=None,
        returns={
            'processed_images', 'filenames', 'inverse_transform',
            'original_images', 'original_labels'
        },
        keep_images_without_gt=False)

    i = 0
    for val in range(val_dataset_size):
        batch_images, batch_filenames, batch_inverse_transforms, batch_original_images, batch_original_labels = next(
            predict_generator)

        y_pred = model.predict(batch_images)

        y_pred_decoded = decode_detections(
            y_pred,
            confidence_thresh=0.5,
            iou_threshold=0.4,
            top_k=200,
            normalize_coords=Config.normalize_coords,
            img_height=Config.img_height,
            img_width=Config.img_width)

        # 5: Convert the predictions for the original image.
        y_pred_decoded_inv = apply_inverse_transforms(
            y_pred_decoded, batch_inverse_transforms)

        np.set_printoptions(precision=2, suppress=True, linewidth=90)
        print("Predicted boxes:\n")
        print('   class   conf xmin   ymin   xmax   ymax')
        print(y_pred_decoded_inv[i])

        plt.figure(figsize=(20, 12))
        plt.imshow(batch_images[i])

        current_axis = plt.gca()

        colors = plt.cm.hsv(
            np.linspace(0, 1, Config.n_classes +
                        1)).tolist()  # Set the colors for the bounding boxes
        classes = [
            'background', 'polyps'
        ]  # Just so we can print class names onto the image instead of IDs

        for box in batch_original_labels[i]:
            xmin = box[1]
            ymin = box[2]
            xmax = box[3]
            ymax = box[4]
            label = '{}'.format(classes[int(box[0])])
            current_axis.add_patch(
                plt.Rectangle((xmin, ymin),
                              xmax - xmin,
                              ymax - ymin,
                              color='green',
                              fill=False,
                              linewidth=2))
            current_axis.text(xmin,
                              ymin,
                              label,
                              size='x-large',
                              color='white',
                              bbox={
                                  'facecolor': 'green',
                                  'alpha': 1.0
                              })

        for box in y_pred_decoded_inv[i]:
            xmin = box[2]
            ymin = box[3]
            xmax = box[4]
            ymax = box[5]
            color = colors[int(box[0])]
            label = '{}: {:.2f}'.format(classes[int(box[0])], box[1])
            current_axis.add_patch(
                plt.Rectangle((xmin, ymin),
                              xmax - xmin,
                              ymax - ymin,
                              color=color,
                              fill=False,
                              linewidth=2))
            current_axis.text(xmin,
                              ymin,
                              label,
                              size='x-large',
                              color='white',
                              bbox={
                                  'facecolor': color,
                                  'alpha': 1.0
                              })
        image = plt.gcf()
        plt.draw()
        image.savefig(os.getcwd() + "/val_ssd512val_" + str(val) + ".png",
                      dpi=100)
Exemplo n.º 24
0
def predict_num_area(src):
    '''
    :param src:opencv读取的图
    :return: scale:缩放倍率,mask:数字区域的全白图,all_blocks:矩形坐标
    '''

    model_path = '../ssd7_pascal_07_epoch-17_loss-0.8387_val_loss-0.8608.h5'
    # We need to create an SSDLoss object in order to pass that to the model loader.
    ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)

    K.clear_session()  # Clear previous models from memory.

    model = load_model(model_path,
                       custom_objects={
                           'AnchorBoxes': AnchorBoxes,
                           'compute_loss': ssd_loss.compute_loss
                       })
    img_height = 341
    img_width = 256
    scale_y = src.shape[0] / img_height
    scale_x = src.shape[1] / img_width

    normalize_coords = True
    orig_images = []  # Store the images here.
    input_images = []  # Store resized versions of the images here.

    # We'll only load one image in this example.
    # filename = '../NumInstrument/img/im3.JPG'
    # filename='../ssd_trains/JPEGImages/image1024.JPG'

    # img = cv2.imread(filename)

    if scale_x > scale_y:
        scale = scale_x
        real_w = img_width
        real_h = int(src.shape[0] / scale)
        black_img = np.zeros((img_height, img_width, 3), np.uint8)
        img = cv2.resize(src, (real_w, real_h))

        t_ = int((img_height - real_h) / 2)
        black_img[t_:t_ + real_h, :] = img
    else:
        scale = scale_y
        real_w = int(src.shape[1] / scale)
        real_h = img_height
        black_img = np.zeros((img_height, img_width, 3), np.uint8)
        img = cv2.resize(src, (real_w, real_h))
        t_ = int((img_width - real_w) / 2)
        black_img[:, t_:t_ + real_w] = img

    orig_images.append(black_img)
    img = cv2.cvtColor(black_img, cv2.COLOR_BGR2RGB)
    input_images.append(img)
    input_images = np.array(input_images)

    y_pred = model.predict(input_images)
    # 4: Decode the raw predictions in `y_pred`.
    confidence_threshold = 0.8

    y_pred_thresh = [
        y_pred[k][y_pred[k, :, 1] > confidence_threshold]
        for k in range(y_pred.shape[0])
    ]

    y_pred_decoded = decode_detections(y_pred,
                                       confidence_thresh=0.5,
                                       iou_threshold=0.45,
                                       top_k=200,
                                       normalize_coords=True,
                                       img_height=img_height,
                                       img_width=img_width)

    np.set_printoptions(precision=2, suppress=True, linewidth=90)
    # print("Predicted boxes:\n")
    # print('   class   conf xmin   ymin   xmax   ymax')
    # print(y_pred_decoded[0])
    mask = np.zeros((img_height, img_width), np.uint8)
    max_arr = sorted(y_pred_decoded[0], key=lambda xx: xx[1], reverse=True)
    for box in max_arr[:1]:
        xmin = int(box[2] - 2) if int(box[2] - 2) >= 0 else 0
        ymin = int(box[3])
        xmax = int(box[4] + 2)
        ymax = int(box[5] + 1)
        # cv2.rectangle(mask, (xmin, ymin), (xmax, ymax), 255, -1)
        cv2.rectangle(input_images[0], (xmin, ymin), (xmax, ymax), 255, 2)
        # cv2.rectangle(src, (int((xmin)*scale), int((ymin-t_)*scale)), (int((xmax)*scale), int((ymax-t_)*scale)), 255, 2)

        write_xml(img_name, [
            int((xmin) * scale / 4.21),
            int((ymin - t_) * scale / 4.21),
            int((xmax) * scale / 4.21),
            int((ymax - t_) * scale / 4.21)
        ])

    input_images[0] = cv2.cvtColor(input_images[0], cv2.COLOR_RGB2BGR)

    # imwrite(dir_path + '/0_black_img.jpg', black_img)
    # mask = cv2.dilate(mask, kernel4)

    imwrite(dir_path + img_name, input_images[0])
    # imwrite(dir_path+'1.jpg', src)

    # imwrite(dir_path+'/4_pre_res' + str(index) + '.jpg', input_images[0])
    #
    # all_blocks = cutBlocks(mask,input_images[0])
    # # print(all_blocks,scale)
    # if len(all_blocks)==0:
    #     print('err', '未找到数字区域')
    #     # sys.exit(0)
    #
    # tmp=[]
    #
    # for abox in all_blocks:
    #     if scale_x>scale_y:
    #         aa=[int((abox[0])*scale),int((abox[1]-t_)*scale),int(abox[2]*scale),int((abox[3]-t_)*scale)]
    #     else:
    #         aa=[int((abox[0]-t_)*scale),int(abox[1]*scale),int((abox[2]-t_)*scale),int(abox[3]*scale)]
    #     tmp.append(aa)
    # all_blocks = tmp
    K.clear_session()
    return mask
Exemplo n.º 25
0
title = random.choice(imageset_content)

print(title)
# title = "B4_02_02_22_hor"

img = Image.open(os.path.join(TARGET_IMAGES, f"{title}.jpg"))
np_img = numpy.array(img)

xml_path = os.path.join(TARGET_ANNOTATIONS, f"{title}.xml")
boxes = read_content(xml_path)

model = get_model()

y_pred = model.predict(numpy.array([np_img]))

# 4: Decode the raw predictions in `y_pred`.

y_pred_decoded = decode_detections(y_pred,
                                   confidence_thresh=0.5,
                                   iou_threshold=0.4,
                                   top_k=200,
                                   normalize_coords=True,
                                   img_height=300,
                                   img_width=300)
y_pred_decoded = y_pred_decoded[0]

display_image(np_img, boxes)
display_image(np_img, y_pred_decoded)

print('test')
Exemplo n.º 26
0
def predict_num_area(src):
    '''
    :param src:opencv读取的图
    :return: scale:缩放倍率,mask:数字区域的全白图,all_blocks:矩形坐标
    '''

    model_path = 'ssd7_pascal_07_epoch-17_loss-0.8387_val_loss-0.8608.h5'
    # We need to create an SSDLoss object in order to pass that to the model loader.
    ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)

    K.clear_session()  # Clear previous models from memory.

    model = load_model(model_path,
                       custom_objects={
                           'AnchorBoxes': AnchorBoxes,
                           'compute_loss': ssd_loss.compute_loss
                       })
    img_height = 341
    img_width = 256
    scale_y = src.shape[0] / 341
    scale_x = src.shape[1] / 256
    normalize_coords = True
    orig_images = []  # Store the images here.
    input_images = []  # Store resized versions of the images here.

    # We'll only load one image in this example.
    filename = '../NumInstrument/img/im3.JPG'
    # filename='../ssd_trains/JPEGImages/image1024.JPG'

    # img = cv2.imread(filename)
    img = cv2.resize(src, (img_width, img_height))
    orig_images.append(img)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    input_images.append(img)
    input_images = np.array(input_images)

    y_pred = model.predict(input_images)
    # 4: Decode the raw predictions in `y_pred`.
    confidence_threshold = 0.5

    y_pred_thresh = [
        y_pred[k][y_pred[k, :, 1] > confidence_threshold]
        for k in range(y_pred.shape[0])
    ]

    y_pred_decoded = decode_detections(y_pred,
                                       confidence_thresh=0.5,
                                       iou_threshold=0.45,
                                       top_k=200,
                                       normalize_coords=True,
                                       img_height=img_height,
                                       img_width=img_width)

    np.set_printoptions(precision=2, suppress=True, linewidth=90)
    # print("Predicted boxes:\n")
    # print('   class   conf xmin   ymin   xmax   ymax')
    # print(y_pred_decoded[0])
    mask = np.zeros((341, 256), np.uint8)
    for box in y_pred_decoded[0]:
        xmin = int(box[2])
        ymin = int(box[3])
        xmax = int(box[4] - 2)
        ymax = int(box[5] + 5)
        cv2.rectangle(mask, (xmin, ymin), (xmax, ymax), 255, -1)
        cv2.rectangle(input_images[0], (xmin, ymin), (xmax, ymax), 255, 2)

    input_images[0] = cv2.cvtColor(input_images[0], cv2.COLOR_RGB2BGR)

    mask = cv2.dilate(mask, kernel4)

    cv2.imwrite('./pre_model/4_pre_mask' + str(0) + '.jpg', mask)
    cv2.imwrite('./pre_model/4_pre_res' + str(0) + '.jpg', input_images[0])

    all_blocks = cutImage(mask, input_images[0], -1)
    # print(all_blocks,scale)
    tmp = []
    for abox in all_blocks:
        aa = [
            int(abox[0] * scale_x),
            int(abox[1] * scale_y),
            int(abox[2] * scale_x),
            int(abox[3] * scale_y)
        ]
        tmp.append(aa)
    all_blocks = tmp
    K.clear_session()
    return mask, all_blocks
Exemplo n.º 27
0
def main():
    create_new_model = True if args.model_name == 'default' else False

    if create_new_model:
        K.clear_session()  # Clear previous models from memory.
        model = build_model(image_size=(Config.img_height, Config.img_width,
                                        Config.img_channels),
                            n_classes=Config.n_classes,
                            mode='training',
                            l2_regularization=Config.l2_regularization,
                            scales=Config.scales,
                            aspect_ratios_global=Config.aspect_ratios,
                            aspect_ratios_per_layer=None,
                            two_boxes_for_ar1=Config.two_boxes_for_ar1,
                            steps=Config.steps,
                            offsets=Config.offsets,
                            clip_boxes=Config.clip_boxes,
                            variances=Config.variances,
                            normalize_coords=Config.normalize_coords,
                            subtract_mean=Config.intensity_mean,
                            divide_by_stddev=Config.intensity_range)

        # model.load_weights("./weights/"+ args.model_name + ".h5", by_name=True)
        adam = Adam(lr=args.learning_rate,
                    beta_1=0.9,
                    beta_2=0.999,
                    epsilon=1e-08,
                    decay=0.0)
        ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)
        model.compile(optimizer=adam, loss=ssd_loss.compute_loss)
    else:

        model_path = "weights/" + args.model_name + ".h5"
        # We need to create an SSDLoss object in order to pass that to the model loader.
        ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)
        K.clear_session()  # Clear previous models from memory.
        model = load_model(model_path,
                           custom_objects={
                               'AnchorBoxes': AnchorBoxes,
                               'compute_loss': ssd_loss.compute_loss
                           })

    # Load the data
    train_dataset = DataGenerator(load_images_into_memory=True,
                                  hdf5_dataset_path=os.getcwd() + "/data/" +
                                  args.dataset + '/polyp_train.h5')
    val_dataset = DataGenerator(load_images_into_memory=True,
                                hdf5_dataset_path=os.getcwd() + "/data/" +
                                args.dataset + '/polyp_val.h5')
    train_dataset_size = train_dataset.get_dataset_size()
    val_dataset_size = val_dataset.get_dataset_size()
    print("Number of images in the training dataset:\t{:>6}".format(
        train_dataset_size))
    print("Number of images in the validation dataset:\t{:>6}".format(
        val_dataset_size))

    batch_size = args.batch_size

    # 4: Define the image processing chain.
    data_augmentation_chain = DataAugmentationConstantInputSize(
        random_brightness=(-48, 48, 0.5),
        random_contrast=(0.5, 1.8, 0.5),
        random_saturation=(0.5, 1.8, 0.5),
        random_hue=(18, 0.5),
        random_flip=0.5,
        random_translate=((0.03, 0.5), (0.03, 0.5), 0.5),
        random_scale=(0.5, 2.0, 0.5),
        n_trials_max=3,
        clip_boxes=True,
        overlap_criterion='area',
        bounds_box_filter=(0.3, 1.0),
        bounds_validator=(0.5, 1.0),
        n_boxes_min=1,
        background=(0, 0, 0))

    # 5: Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function.
    # The encoder constructor needs the spatial dimensions of the model's predictor layers to create the anchor boxes.
    predictor_sizes = [
        model.get_layer('classes4').output_shape[1:3],
        model.get_layer('classes5').output_shape[1:3],
        model.get_layer('classes6').output_shape[1:3],
        model.get_layer('classes7').output_shape[1:3]
    ]

    ssd_input_encoder = SSDInputEncoder(
        img_height=Config.img_height,
        img_width=Config.img_width,
        n_classes=Config.n_classes,
        predictor_sizes=predictor_sizes,
        scales=Config.scales,
        aspect_ratios_global=Config.aspect_ratios,
        two_boxes_for_ar1=Config.two_boxes_for_ar1,
        steps=Config.steps,
        offsets=Config.offsets,
        clip_boxes=Config.clip_boxes,
        variances=Config.variances,
        matching_type='multi',
        pos_iou_threshold=0.5,
        neg_iou_limit=0.3,
        normalize_coords=Config.normalize_coords)

    # 6: Create the generator handles that will be passed to Keras' `fit_generator()` function.
    train_generator = train_dataset.generate(
        batch_size=batch_size,
        shuffle=True,
        transformations=[data_augmentation_chain],
        label_encoder=ssd_input_encoder,
        returns={'processed_images', 'encoded_labels'},
        keep_images_without_gt=False)

    val_generator = val_dataset.generate(
        batch_size=batch_size,
        shuffle=False,
        transformations=[],
        label_encoder=ssd_input_encoder,
        returns={'processed_images', 'encoded_labels'},
        keep_images_without_gt=False)
    model_checkpoint = ModelCheckpoint(
        filepath=os.getcwd() +
        '/weights/ssd7_epoch-{epoch:02d}_loss-{loss:.4f}_val_loss-{val_loss:.4f}.h5',
        monitor='val_loss',
        verbose=1,
        save_best_only=True,
        save_weights_only=False,
        mode='auto',
        period=1)

    csv_logger = CSVLogger(filename='ssd7_training_log.csv',
                           separator=',',
                           append=True)
    early_stopping = EarlyStopping(monitor='val_loss',
                                   min_delta=0.0,
                                   patience=10,
                                   verbose=1)
    reduce_learning_rate = ReduceLROnPlateau(monitor='val_loss',
                                             factor=0.2,
                                             patience=8,
                                             verbose=1,
                                             epsilon=0.001,
                                             cooldown=0,
                                             min_lr=0.00001)

    tf_log = keras.callbacks.TensorBoard(log_dir=TF_LOG_PATH + args.tf_logs,
                                         histogram_freq=0,
                                         batch_size=batch_size,
                                         write_graph=True,
                                         write_grads=False,
                                         write_images=False)

    callbacks = [model_checkpoint, csv_logger, reduce_learning_rate, tf_log]

    # If you're resuming a previous training, set `initial_epoch` and `final_epoch` accordingly.
    initial_epoch = 0
    final_epoch = args.final_epoch
    steps_per_epoch = 1000

    # Train/Fit the model
    if args.predict_mode == 'train':
        history = model.fit_generator(generator=train_generator,
                                      steps_per_epoch=steps_per_epoch,
                                      epochs=final_epoch,
                                      callbacks=callbacks,
                                      validation_data=val_generator,
                                      validation_steps=ceil(val_dataset_size /
                                                            batch_size),
                                      initial_epoch=initial_epoch)

    # Prediction Output
    predict_generator = val_dataset.generate(
        batch_size=1,
        shuffle=False,
        transformations=[],
        label_encoder=ssd_input_encoder,
        returns={'processed_images', 'processed_labels', 'filenames'},
        keep_images_without_gt=False)

    i = 0
    for val in range(val_dataset_size):
        batch_images, batch_labels, batch_filenames = next(predict_generator)
        y_pred = model.predict(batch_images)

        y_pred_decoded = decode_detections(
            y_pred,
            confidence_thresh=0.5,
            iou_threshold=0.5,
            top_k=200,
            normalize_coords=Config.normalize_coords,
            img_height=Config.img_height,
            img_width=Config.img_width)

        np.set_printoptions(precision=2, suppress=True, linewidth=90)
        print("Predicted boxes:\n")
        print('   class   conf xmin   ymin   xmax   ymax')
        print(y_pred_decoded[i])

        plt.figure(figsize=(20, 12))
        plt.imshow(batch_images[i])

        current_axis = plt.gca()

        colors = plt.cm.hsv(
            np.linspace(0, 1, Config.n_classes +
                        1)).tolist()  # Set the colors for the bounding boxes
        classes = [
            'background', 'polyps'
        ]  # Just so we can print class names onto the image instead of IDs

        # Draw the ground truth boxes in green (omit the label for more clarity)
        for box in batch_labels[i]:
            xmin = box[1]
            ymin = box[2]
            xmax = box[3]
            ymax = box[4]
            label = '{}'.format(classes[int(box[0])])
            current_axis.add_patch(
                plt.Rectangle((xmin, ymin),
                              xmax - xmin,
                              ymax - ymin,
                              color='green',
                              fill=False,
                              linewidth=2))
            current_axis.text(xmin,
                              ymin,
                              label,
                              size='x-large',
                              color='white',
                              bbox={
                                  'facecolor': 'green',
                                  'alpha': 1.0
                              })

        # Draw the predicted boxes in blue
        for box in y_pred_decoded[i]:
            xmin = box[-4]
            ymin = box[-3]
            xmax = box[-2]
            ymax = box[-1]
            color = colors[int(box[0])]
            label = '{}: {:.2f}'.format(classes[int(box[0])], box[1])
            current_axis.add_patch(
                plt.Rectangle((xmin, ymin),
                              xmax - xmin,
                              ymax - ymin,
                              color=color,
                              fill=False,
                              linewidth=2))
            current_axis.text(xmin,
                              ymin,
                              label,
                              size='x-large',
                              color='white',
                              bbox={
                                  'facecolor': color,
                                  'alpha': 1.0
                              })

        image = plt.gcf()
        # plt.show()
        plt.draw()
        image.savefig(os.getcwd() + "/val_predictions/val_" + str(val) +
                      ".png",
                      dpi=100)
Exemplo n.º 28
0
def predict_all_to_json(out_file,
                        model,
                        img_height,
                        img_width,
                        classes_to_cats,
                        data_generator,
                        batch_size,
                        data_generator_mode='resize',
                        model_mode='training',
                        confidence_thresh=0.01,
                        iou_threshold=0.45,
                        top_k=200,
                        pred_coords='centroids',
                        normalize_coords=True):
    '''
    Runs detection predictions over the whole dataset given a model and saves them in a JSON file
    in the MS COCO detection results format.

    Arguments:
        out_file (str): The file name (full path) under which to save the results JSON file.
        model (Keras model): A Keras SSD model object.
        img_height (int): The input image height for the model.
        img_width (int): The input image width for the model.
        classes_to_cats (dict): A dictionary that maps the consecutive class IDs predicted by the model
            to the non-consecutive original MS COCO category IDs.
        data_generator (DataGenerator): A `DataGenerator` object with the evaluation dataset.
        batch_size (int): The batch size for the evaluation.
        data_generator_mode (str, optional): Either of 'resize' or 'pad'. If 'resize', the input images will
            be resized (i.e. warped) to `(img_height, img_width)`. This mode does not preserve the aspect ratios of the images.
            If 'pad', the input images will be first padded so that they have the aspect ratio defined by `img_height`
            and `img_width` and then resized to `(img_height, img_width)`. This mode preserves the aspect ratios of the images.
        model_mode (str, optional): The mode in which the model was created, i.e. 'training', 'inference' or 'inference_fast'.
            This is needed in order to know whether the model output is already decoded or still needs to be decoded. Refer to
            the model documentation for the meaning of the individual modes.
        confidence_thresh (float, optional): A float in [0,1), the minimum classification confidence in a specific
            positive class in order to be considered for the non-maximum suppression stage for the respective class.
            A lower value will result in a larger part of the selection process being done by the non-maximum suppression
            stage, while a larger value will result in a larger part of the selection process happening in the confidence
            thresholding stage.
        iou_threshold (float, optional): A float in [0,1]. All boxes with a Jaccard similarity of greater than `iou_threshold`
            with a locally maximal box will be removed from the set of predictions for a given class, where 'maximal' refers
            to the box score.
        top_k (int, optional): The number of highest scoring predictions to be kept for each batch item after the
            non-maximum suppression stage. Defaults to 200, following the paper.
        input_coords (str, optional): The box coordinate format that the model outputs. Can be either 'centroids'
            for the format `(cx, cy, w, h)` (box center coordinates, width, and height), 'minmax' for the format
            `(xmin, xmax, ymin, ymax)`, or 'corners' for the format `(xmin, ymin, xmax, ymax)`.
        normalize_coords (bool, optional): Set to `True` if the model outputs relative coordinates (i.e. coordinates in [0,1])
            and you wish to transform these relative coordinates back to absolute coordinates. If the model outputs
            relative coordinates, but you do not want to convert them back to absolute coordinates, set this to `False`.
            Do not set this to `True` if the model already outputs absolute coordinates, as that would result in incorrect
            coordinates. Requires `img_height` and `img_width` if set to `True`.

    Returns:
        None.
    '''

    convert_to_3_channels = ConvertTo3Channels()
    resize = Resize(height=img_height,width=img_width)
    if data_generator_mode == 'resize':
        transformations = [convert_to_3_channels,
                           resize]
    elif data_generator_mode == 'pad':
        random_pad = RandomPadFixedAR(patch_aspect_ratio=img_width/img_height, clip_boxes=False)
        transformations = [convert_to_3_channels,
                           random_pad,
                           resize]
    else:
        raise ValueError("Unexpected argument value: `data_generator_mode` can be either of 'resize' or 'pad', but received '{}'.".format(data_generator_mode))

    # Set the generator parameters.
    generator = data_generator.generate(batch_size=batch_size,
                                        shuffle=False,
                                        transformations=transformations,
                                        label_encoder=None,
                                        returns={'processed_images',
                                                 'image_ids',
                                                 'inverse_transform'},
                                        keep_images_without_gt=True)
    # Put the results in this list.
    results = []
    # Compute the number of batches to iterate over the entire dataset.
    n_images = data_generator.get_dataset_size()
    print("Number of images in the evaluation dataset: {}".format(n_images))
    n_batches = int(ceil(n_images / batch_size))
    # Loop over all batches.
    tr = trange(n_batches, file=sys.stdout)
    tr.set_description('Producing results file')
    for i in tr:
        # Generate batch.
        batch_X, batch_image_ids, batch_inverse_transforms = next(generator)
        # Predict.
        y_pred = model.predict(batch_X)
        # If the model was created in 'training' mode, the raw predictions need to
        # be decoded and filtered, otherwise that's already taken care of.
        if model_mode == 'training':
            # Decode.
            y_pred = decode_detections(y_pred,
                                       confidence_thresh=confidence_thresh,
                                       iou_threshold=iou_threshold,
                                       top_k=top_k,
                                       input_coords=pred_coords,
                                       normalize_coords=normalize_coords,
                                       img_height=img_height,
                                       img_width=img_width)
        else:
            # Filter out the all-zeros dummy elements of `y_pred`.
            y_pred_filtered = []
            for i in range(len(y_pred)):
                y_pred_filtered.append(y_pred[i][y_pred[i,:,0] != 0])
            y_pred = y_pred_filtered
        # Convert the predicted box coordinates for the original images.
        y_pred = apply_inverse_transforms(y_pred, batch_inverse_transforms)

        # Convert each predicted box into the results format.
        for k, batch_item in enumerate(y_pred):
            for box in batch_item:
                class_id = box[0]
                # Transform the consecutive class IDs back to the original COCO category IDs.
                cat_id = classes_to_cats[class_id]
                # Round the box coordinates to reduce the JSON file size.
                xmin = float(round(box[2], 1))
                ymin = float(round(box[3], 1))
                xmax = float(round(box[4], 1))
                ymax = float(round(box[5], 1))
                width = xmax - xmin
                height = ymax - ymin
                bbox = [xmin, ymin, width, height]
                result = {}
                result['image_id'] = batch_image_ids[k]
                result['category_id'] = cat_id
                result['score'] = float(round(box[1], 3))
                result['bbox'] = bbox
                results.append(result)

    with open(out_file, 'w') as f:
        json.dump(results, f)

    print("Prediction results saved in '{}'".format(out_file))
Exemplo n.º 29
0
def _main_(args):
    print('Hello World! This is {:s}'.format(args.desc))

    # config_path = args.conf
    # with open(config_path) as config_buffer:    
    #     config = json.loads(config_buffer.read())
    #############################################################
    #   Set model parameters
    #############################################################
    img_height          = 300  # Height of the model input images
    img_width           = 300  # Width of the model input images
    img_channels        = 3  # Number of color channels of the model input images
    mean_color          = [123, 117, 104]  # The per-channel mean of the images in the dataset. Do not change this value if you're using any of the pre-trained weights.
    swap_channels       = [2, 1, 0]  # The color channel order in the original SSD is BGR, so we'll have the model reverse the color channel order of the input images.
    n_classes           = 20  # Number of positive classes, e.g. 20 for Pascal VOC, 80 for MS COCO
    scales_pascal       = [0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05]  # The anchor box scaling factors used in the original SSD300 for the Pascal VOC datasets
    scales_coco         = [0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05]  # The anchor box scaling factors used in the original SSD300 for the MS COCO datasets
    scales              = scales_pascal
    aspect_ratios       = [[1.0, 2.0, 0.5],
                           [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                           [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                           [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                           [1.0, 2.0, 0.5],
                           [1.0, 2.0, 0.5]]  # The anchor box aspect ratios used in the original SSD300; the order matters
    two_boxes_for_ar1   = True
    steps               = [8, 16, 32, 64, 100, 300]  # The space between two adjacent anchor box center points for each predictor layer.
    offsets             = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5]  # The offsets of the first anchor box center points from the top and left borders of the image as a fraction of the step size for each predictor layer.
    clip_boxes          = False  # Whether or not to clip the anchor boxes to lie entirely within the image boundaries
    variances           = [0.1, 0.1, 0.2, 0.2]  # The variances by which the encoded target coordinates are divided as in the original implementation
    normalize_coords    = True

    #############################################################
    #   Create the model
    #############################################################
    # 1: Build the Keras model.
    model = ssd_300(image_size=(img_height, img_width, img_channels),
                    n_classes=n_classes,
                    mode='training',
                    l2_regularization=0.0005,
                    scales=scales,
                    aspect_ratios_per_layer=aspect_ratios,
                    two_boxes_for_ar1=two_boxes_for_ar1,
                    steps=steps,
                    offsets=offsets,
                    clip_boxes=clip_boxes,
                    variances=variances,
                    normalize_coords=normalize_coords,
                    subtract_mean=mean_color,
                    swap_channels=swap_channels)
    # 2: Load some weights into the model.

    # 3: Instantiate an optimizer and the SSD loss function and compile the model.
    adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
    ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)
    model.compile(optimizer=adam, loss=ssd_loss.compute_loss)

    #############################################################
    #   Prepare the data
    #############################################################
    # 1: Instantiate two `DataGenerator` objects: One for training, one for validation.
    train_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=None)
    val_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=None)

    # 2: Parse the image and label lists for the training and validation datasets. This can take a while.
    VOC_2007_images_dir = '/home/minhnc-lab/WORKSPACES/AI/data/VOC/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/JPEGImages'
    VOC_2007_annotations_dir = '/home/minhnc-lab/WORKSPACES/AI/data/VOC/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/Annotations'
    VOC_2007_train_image_set_filename = '/home/minhnc-lab/WORKSPACES/AI/data/VOC/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/ImageSets/Main/train.txt'
    VOC_2007_val_image_set_filename = '/home/minhnc-lab/WORKSPACES/AI/data/VOC/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/ImageSets/Main/val.txt'
    # VOC_2007_trainval_image_set_filename = '/home/minhnc-lab/WORKSPACES/AI/data/VOC/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/ImageSets/Main/trainval.txt'
    # VOC_2007_test_image_set_filename = '/home/minhnc-lab/WORKSPACES/AI/data/VOC/VOCtest_06-Nov-2007/VOCdevkit/VOC2007/ImageSets/Main/test.txt'

    classes = ['background',
               'aeroplane', 'bicycle', 'bird', 'boat',
               'bottle', 'bus', 'car', 'cat',
               'chair', 'cow', 'diningtable', 'dog',
               'horse', 'motorbike', 'person', 'pottedplant',
               'sheep', 'sofa', 'train', 'tvmonitor']

    train_dataset.parse_xml(images_dirs=[VOC_2007_images_dir],
                            image_set_filenames=[VOC_2007_train_image_set_filename],
                            annotations_dirs=[VOC_2007_annotations_dir],
                            classes=classes,
                            include_classes='all',
                            exclude_truncated=False,
                            exclude_difficult=False,
                            ret=False)
    val_dataset.parse_xml(images_dirs=[VOC_2007_images_dir],
                          image_set_filenames=[VOC_2007_val_image_set_filename],
                          annotations_dirs=[VOC_2007_annotations_dir],
                          classes=classes,
                          include_classes='all',
                          exclude_truncated=False,
                          exclude_difficult=True,
                          ret=False)

    train_dataset.create_hdf5_dataset(file_path='dataset_pascal_voc_07+12_trainval.h5',
                                      resize=False,
                                      variable_image_size=True,
                                      verbose=True)

    val_dataset.create_hdf5_dataset(file_path='dataset_pascal_voc_07_test.h5',
                                    resize=False,
                                    variable_image_size=True,
                                    verbose=True)
    # 3: Set the batch size.
    batch_size = 8  # Change the batch size if you like, or if you run into GPU memory issues.

    # 4: Set the image transformations for pre-processing and data augmentation options.
    ssd_data_augmentation = SSDDataAugmentation(img_height=img_height,
                                                img_width=img_width,
                                                background=mean_color)
    convert_to_3_channels = ConvertTo3Channels()
    resize = Resize(height=img_height, width=img_width)

    # 5: Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function.
    predictor_sizes = [model.get_layer('conv4_3_norm_mbox_conf').output_shape[1:3],
                       model.get_layer('fc7_mbox_conf').output_shape[1:3],
                       model.get_layer('conv6_2_mbox_conf').output_shape[1:3],
                       model.get_layer('conv7_2_mbox_conf').output_shape[1:3],
                       model.get_layer('conv8_2_mbox_conf').output_shape[1:3],
                       model.get_layer('conv9_2_mbox_conf').output_shape[1:3]]

    ssd_input_encoder = SSDInputEncoder(img_height=img_height,
                                        img_width=img_width,
                                        n_classes=n_classes,
                                        predictor_sizes=predictor_sizes,
                                        scales=scales,
                                        aspect_ratios_per_layer=aspect_ratios,
                                        two_boxes_for_ar1=two_boxes_for_ar1,
                                        steps=steps,
                                        offsets=offsets,
                                        clip_boxes=clip_boxes,
                                        variances=variances,
                                        matching_type='multi',
                                        pos_iou_threshold=0.5,
                                        neg_iou_limit=0.5,
                                        normalize_coords=normalize_coords)

    # 6: Create the generator handles that will be passed to Keras' `fit_generator()` function.
    train_generator = train_dataset.generate(batch_size=batch_size,
                                             shuffle=True,
                                             transformations=[ssd_data_augmentation],
                                             label_encoder=ssd_input_encoder,
                                             returns={'processed_images',
                                                      'encoded_labels'},
                                             keep_images_without_gt=False)

    val_generator = val_dataset.generate(batch_size=batch_size,
                                         shuffle=False,
                                         transformations=[convert_to_3_channels,
                                                          resize],
                                         label_encoder=ssd_input_encoder,
                                         returns={'processed_images',
                                                  'encoded_labels'},
                                         keep_images_without_gt=False)

    # Get the number of samples in the training and validations datasets.
    train_dataset_size = train_dataset.get_dataset_size()
    val_dataset_size = val_dataset.get_dataset_size()

    print("Number of images in the training dataset:\t{:>6}".format(train_dataset_size))
    print("Number of images in the validation dataset:\t{:>6}".format(val_dataset_size))

    #############################################################
    #   Kick off the training
    #############################################################
    # Define model callbacks.
    model_checkpoint = ModelCheckpoint(
        filepath='ssd300_pascal_07+12_epoch-{epoch:02d}_loss-{loss:.4f}_val_loss-{val_loss:.4f}.h5',
        monitor='val_loss',
        verbose=1,
        save_best_only=True,
        save_weights_only=False,
        mode='auto',
        period=1)

    csv_logger = CSVLogger(filename='ssd300_pascal_07+12_training_log.csv',
                           separator=',',
                           append=True)

    learning_rate_scheduler = LearningRateScheduler(schedule=lr_schedule,
                                                    verbose=1)

    terminate_on_nan = TerminateOnNaN()

    callbacks = [model_checkpoint,
                 csv_logger,
                 learning_rate_scheduler,
                 terminate_on_nan]

    # Train
    initial_epoch = 0
    final_epoch = 120
    steps_per_epoch = 1000

    history = model.fit_generator(generator=train_generator,
                                  steps_per_epoch=steps_per_epoch,
                                  epochs=final_epoch,
                                  callbacks=callbacks,
                                  validation_data=val_generator,
                                  validation_steps=ceil(val_dataset_size / batch_size),
                                  initial_epoch=initial_epoch)

    #############################################################
    #   Run the evaluation
    #############################################################
    # 1: Set the generator for the predictions.
    predict_generator = val_dataset.generate(batch_size=1,
                                             shuffle=True,
                                             transformations=[convert_to_3_channels,
                                                              resize],
                                             label_encoder=None,
                                             returns={'processed_images',
                                                      'filenames',
                                                      'inverse_transform',
                                                      'original_images',
                                                      'original_labels'},
                                             keep_images_without_gt=False)

    # 2: Generate samples.
    batch_images, batch_filenames, batch_inverse_transforms, batch_original_images, batch_original_labels = next(
        predict_generator)

    i = 0  # Which batch item to look at

    print("Image:", batch_filenames[i])
    print()
    print("Ground truth boxes:\n")
    print(np.array(batch_original_labels[i]))

    # 3: Make predictions.
    y_pred = model.predict(batch_images)

    # 4: Decode the raw predictions in `y_pred`.
    y_pred_decoded = decode_detections(y_pred,
                                       confidence_thresh=0.5,
                                       iou_threshold=0.4,
                                       top_k=200,
                                       normalize_coords=normalize_coords,
                                       img_height=img_height,
                                       img_width=img_width)

    # 5: Convert the predictions for the original image.
    y_pred_decoded_inv = apply_inverse_transforms(y_pred_decoded, batch_inverse_transforms)
    np.set_printoptions(precision=2, suppress=True, linewidth=90)
    print("Predicted boxes:\n")
    print('   class   conf xmin   ymin   xmax   ymax')
    print(y_pred_decoded_inv[i])

    # 6: Draw the predicted boxes onto the image
    # Set the colors for the bounding boxes
    colors = plt.cm.hsv(np.linspace(0, 1, n_classes + 1)).tolist()
    classes = ['background',
               'aeroplane', 'bicycle', 'bird', 'boat',
               'bottle', 'bus', 'car', 'cat',
               'chair', 'cow', 'diningtable', 'dog',
               'horse', 'motorbike', 'person', 'pottedplant',
               'sheep', 'sofa', 'train', 'tvmonitor']

    plt.figure(figsize=(20, 12))
    plt.imshow(batch_original_images[i])

    current_axis = plt.gca()

    for box in batch_original_labels[i]:
        xmin = box[1]
        ymin = box[2]
        xmax = box[3]
        ymax = box[4]
        label = '{}'.format(classes[int(box[0])])
        current_axis.add_patch(
            plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, color='green', fill=False, linewidth=2))
        current_axis.text(xmin, ymin, label, size='x-large', color='white', bbox={'facecolor': 'green', 'alpha': 1.0})

    for box in y_pred_decoded_inv[i]:
        xmin = box[2]
        ymin = box[3]
        xmax = box[4]
        ymax = box[5]
        color = colors[int(box[0])]
        label = '{}: {:.2f}'.format(classes[int(box[0])], box[1])
        current_axis.add_patch(
            plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, color=color, fill=False, linewidth=2))
        current_axis.text(xmin, ymin, label, size='x-large', color='white', bbox={'facecolor': color, 'alpha': 1.0})
Exemplo n.º 30
0
    def on_epoch_end(self, epoch, logs=None):
        # Compute f1 score by applying nms
        
        # Make predictions
        # Create variable to store predictions
        predictions = np.zeros(shape=self.output_shape)
        if self.label_csv == None:
            for batch in hp.get_batch(32, self.data):
                pred = self.model.predict(batch)
                predictions = np.append(predictions, pred, axis=0)
        else:
            file_label = pd.read_csv(self.label_csv)
            # get all images' names
            file_column = file_label.columns
            img_val = file_label[file_column[0]].unique()

            normalized_label = []

            # Iterate over images
            for start_i in range(0, len(img_val), 32):
                end_i = start_i + 32
                input_ = []
                for img_name in img_val[start_i:end_i]:
                    img = imread(self.path_img + '/' + img_name)
                    height = img.shape[0]
                    width = img.shape[1]

                    # get labels from image
                    original_label = file_label[file_label[file_column[0]] == img_name].values[:, 1:-1]

                    # change formato from xmin, xmax, ymin, ymax to x, y, width, height
                    new_label = []
                    for o_label in original_label:
                        new_label.append([o_label[0], o_label[2], o_label[1] - o_label[0], o_label[3]- o_label[2]])

                    # normalized between [0,1]
                    new_label = hp.normilize_boxes(new_label, width, height)
                    normalized_label.append(new_label)

                    # resize image
                    resized_img= misc.imresize(img, size=(300, 300))
                    input_.append(resized_img)
                input_ = np.array(input_)
                input_ = input_.reshape(-1, 300, 300, 3)
                pred = self.model.predict(input_)
                predictions = np.append(predictions, pred, axis=0)

        predictions = predictions[1:] # delete empty item
                    
        # Decode predictinos
        pred_decod = decode_detections(predictions,
                                       confidence_thresh=self.confidence,
                                       iou_threshold=self.iou,
                                       top_k=self.top_k,
                                       normalize_coords=self.normalize_coords,
                                       img_height=self.height,
                                       img_width=self.width)
        
        pred_decod = np.array(pred_decod)
            
        # Remove class and confidence from predictions
        pred_decod = hp.clean_predictions(pred_decod, id_class=1)
        pred_decod = hp.adjust_predictions(pred_decod)
        pred_decod = hp.get_coordinates(pred_decod)
        
        aux_decod = []
        for item in pred_decod:
            aux_decod.append(hp.normilize_boxes(item, self.width, self.height))
        pred_decod = aux_decod

        # Calculate performance
        if self.label_csv == None:
            presicion, recall, f1_score = hp.cal_performance(self.label, pred_decod, verborse=self.verborse, iou=self.iou_f1)
        else:
            presicion, recall, f1_score = hp.cal_performance(normalized_label, pred_decod, verborse=self.verborse, iou=self.iou_f1)
        print('F1 score:', f1_score)
        
        self.history.append([epoch, presicion, recall, f1_score])
        
        # save file
        history_f1 = pd.DataFrame(self.history, columns=['epoch', 'presicion', 'recall', 'f1 score'])
        history_f1.to_csv(self.path_csv, index=False)
                
        if f1_score > self.best_f1:
            # Save model
            print('Improve F1 score from', self.best_f1, 'to', f1_score)
            self.best_f1 = f1_score
            self.model.save(self.path_save)