Exemplo n.º 1
0
def test():
    anchors = config.ANCHORS

    transform = config.test_transforms

    dataset = YOLODataset(
        "COCO/train.csv",
        "COCO/images/images/",
        "COCO/labels/labels_new/",
        S=[13, 26, 52],
        anchors=anchors,
        transform=transform,
    )
    S = [13, 26, 52]
    scaled_anchors = torch.tensor(anchors) / (
        1 / torch.tensor(S).unsqueeze(1).unsqueeze(1).repeat(1, 3, 2))
    loader = DataLoader(dataset=dataset, batch_size=1, shuffle=True)
    for x, y in loader:
        boxes = []

        for i in range(y[0].shape[1]):
            anchor = scaled_anchors[i]
            print(anchor.shape)
            print(y[i].shape)
            boxes += cells_to_bboxes(y[i],
                                     is_preds=False,
                                     S=y[i].shape[2],
                                     anchors=anchor)[0]
        boxes = nms(boxes,
                    iou_threshold=1,
                    threshold=0.7,
                    box_format="midpoint")
        print(boxes)
        plot_image(x[0].permute(1, 2, 0).to("cpu"), boxes)
def run_inference(image, model, sess, mode, sign_map):
    """
	Run inference on a given image

	Arguments:
		* image: Numpy array representing a single RGB image
		* model: Dict of tensor references returned by SSDModel()
		* sess: TensorFlow session reference
		* mode: String of either "image", "video", or "demo"

	Returns:
		* Numpy array representing annotated image
	"""
    # Save original image in memory
    image = np.asarray(image)
    image_orig = np.copy(image)  # 原始图像

    # Get relevant tensors
    # 获取相关Tensor
    x = model['x']
    is_training = model['is_training']
    preds_conf = model['preds_conf']
    preds_loc = model['preds_loc']
    probs = model['probs']

    # Convert image to PIL Image, resize it, convert to grayscale (if necessary), convert back to numpy array
    image = Image.fromarray(image)  # 将图像矩阵,转为图像对象
    orig_w, orig_h = image.size
    logging.info("原始图像的宽:{:d},高:{:d}".format(orig_w, orig_h))
    # 灰度图
    if NUM_CHANNELS == 1:
        image = image.convert('L')  # 8-bit grayscale
    image = image.resize((IMG_W, IMG_H),
                         Image.LANCZOS)  # high-quality downsampling filter
    logging.info("处理后的图像:{}".format(image.size))
    image = np.asarray(image)  # 将图像转为矩阵

    images = np.array([image])  # create a "batch" of 1 image   #创建包含一个图像的批次矩阵
    if NUM_CHANNELS == 1:
        images = np.expand_dims(
            images, axis=-1
        )  # need extra dimension of size 1 for grayscale        # 增加一个灰度的额外维度???

    # Perform object detection
    t0 = time.time(
    )  # keep track of duration of object detection + NMS        # 开始处理前的时间
    preds_conf_val, preds_loc_val, probs_val = sess.run(
        [preds_conf, preds_loc, probs],
        feed_dict={
            x: images,
            is_training: False
        })
    if mode != 'video':
        logging.info('图像识别花费时间 %.1f ms (%.2f fps)' %
                     ((time.time() - t0) * 1000, 1 / (time.time() - t0)))

    # Gather class predictions and confidence values
    y_pred_conf = preds_conf_val[0]  # batch size of 1, so just take [0]
    y_pred_conf = y_pred_conf.astype('float32')
    prob = probs_val[0]

    # Gather localization predictions
    y_pred_loc = preds_loc_val[0]

    # Perform NMS
    boxes = nms(y_pred_conf, y_pred_loc, prob)
    # logging.info("标注矩形:{}".format(boxes))
    if mode != 'video':
        logging.info('Inference + NMS took %.1f ms (%.2f fps)' %
                     ((time.time() - t0) * 1000, 1 / (time.time() - t0)))

    # Rescale boxes' coordinates back to original image's dimensions
    # Recall boxes = [[x1, y1, x2, y2, cls, cls_prob], [...], ...]
    # 将标注矩形框位置按缩放比例恢复
    scale = np.array(
        [orig_w / IMG_W, orig_h / IMG_H, orig_w / IMG_W, orig_h / IMG_H])
    if len(boxes) > 0:
        boxes[:, :4] = boxes[:, :4] * scale
    # logging.info("标注矩形2:{}".format(boxes))

    # Draw and annotate boxes over original image, and return annotated image
    image = image_orig
    for box in boxes:
        # Get box parameters
        box_coords = [int(round(x)) for x in box[:4]]
        cls = int(box[4])
        cls_prob = box[5]

        # Annotate image
        image = cv2.rectangle(image, tuple(box_coords[:2]),
                              tuple(box_coords[2:]), (0, 255, 0))
        label_str = '%s %.3f' % (sign_map[cls], cls_prob)
        image = cv2.putText(image, label_str, (box_coords[0], box_coords[1]),
                            0, 0.5, (0, 255, 0), 1, cv2.LINE_AA)

    return image