Exemplo n.º 1
0
def filter_prediction(mc, boxes, probs, cls_idx):
    if mc.TOP_N_DETECTION < len(probs) and mc.TOP_N_DETECTION > 0:
        order = probs.argsort()[:-mc.TOP_N_DETECTION - 1:-1]
        probs = probs[order]
        boxes = boxes[order]
        cls_idx = cls_idx[order]
    else:
        filtered_idx = np.nonzero(probs > mc.PROB_THRESH)[0]
        probs = probs[filtered_idx]
        boxes = boxes[filtered_idx]
        cls_idx = cls_idx[filtered_idx]

    final_boxes = []
    final_probs = []
    final_cls_idx = []

    for c in range(mc.CLASSES):
        idx_per_class = [i for i in range(len(probs)) if cls_idx[i] == c]
        keep = util.nms(boxes[idx_per_class], probs[idx_per_class],
                        mc.NMS_THRESH)
        for i in range(len(keep)):
            if keep[i]:
                final_boxes.append(boxes[idx_per_class[i]])
                final_probs.append(probs[idx_per_class[i]])
                final_cls_idx.append(c)
    return final_boxes, final_probs, final_cls_idx
Exemplo n.º 2
0
    def filter_prediction(self, boxes, probs, cls_idx, backgroud_id=-1):
        """Filter bounding box predictions with probability threshold and
    non-maximum supression.

    Args:
      boxes: array of [cx, cy, w, h].
      probs: array of probabilities
      cls_idx: array of class indices
    Returns:
      final_boxes: array of filtered bounding boxes.
      final_probs: array of filtered probabilities
      final_cls_idx: array of filtered class indices
    """

        mc = self.mc
        '''
    if backgroud_id >= 0:
      print ('remove backgroud')
      order_forcegroud = np.where(cls_idx != backgroud_id)
      probs = probs[order_forcegroud]
      boxes = boxes[order_forcegroud]
      cls_idx = cls_idx[order_forcegroud]
    '''

        if mc.TOP_N_DETECTION < len(probs) and mc.TOP_N_DETECTION > 0:
            #print ('[filter_prediction]============1')
            order = probs.argsort()[:-mc.TOP_N_DETECTION - 1:-1]
            probs = probs[order]
            boxes = boxes[order]
            cls_idx = cls_idx[order]
        else:
            filtered_idx = np.nonzero(probs > mc.PROB_THRESH)[0]
            probs = probs[filtered_idx]
            boxes = boxes[filtered_idx]
            cls_idx = cls_idx[filtered_idx]

        final_boxes = []
        final_probs = []
        final_cls_idx = []
        #print ('probs:',probs)
        #print ('3===========cls_idx.shape:',cls_idx.shape)

        for c in range(mc.CLASSES):
            if backgroud_id >= 0:
                if c == backgroud_id:
                    continue

            idx_per_class = [i for i in range(len(probs)) if cls_idx[i] == c]

            keep = util.nms(boxes[idx_per_class], probs[idx_per_class],
                            mc.NMS_THRESH)
            #print ("c",c," keep:",keep)

            for i in range(len(keep)):
                if keep[i]:
                    final_boxes.append(boxes[idx_per_class[i]])
                    final_probs.append(probs[idx_per_class[i]])
                    final_cls_idx.append(c)
        return final_boxes, final_probs, final_cls_idx
Exemplo n.º 3
0
def detect_onet(onet, image, bboxes, device):

    # start = time.time()

    size = 48
    thresholds = 0.98  # face detection thresholds
    nms_thresholds = 0.7
    height, width, channel = image.shape

    num_boxes = len(bboxes)
    [dy, edy, dx, edx, y, ey, x, ex, w,
     h] = correct_bboxes(bboxes, width, height)

    img_boxes = np.zeros((num_boxes, 3, size, size))

    for i in range(num_boxes):
        img_box = np.zeros((h[i], w[i], 3))

        img_box[dy[i]:(edy[i] + 1), dx[i]:(edx[i] + 1), :] = \
            image[y[i]:(ey[i] + 1), x[i]:(ex[i] + 1), :]

        # resize
        img_box = cv2.resize(img_box, (size, size),
                             interpolation=cv2.INTER_LINEAR)

        img_boxes[i, :, :, :] = preprocess(img_box)

    img_boxes = torch.FloatTensor(img_boxes).to(device)
    landmark, offset, prob = onet(img_boxes)
    landmarks = landmark.cpu().data.numpy()  # shape [n_boxes, 10]
    offsets = offset.cpu().data.numpy()  # shape [n_boxes, 4]
    probs = prob.cpu().data.numpy()  # shape [n_boxes, 2]

    keep = np.where(probs[:, 1] > thresholds)[0]
    bboxes = bboxes[keep]
    bboxes[:, 4] = probs[keep, 1].reshape((-1, ))  # assign score from stage 2
    offsets = offsets[keep]
    landmarks = landmarks[keep]

    # compute landmark points
    width = bboxes[:, 2] - bboxes[:, 0] + 1.0
    height = bboxes[:, 3] - bboxes[:, 1] + 1.0
    xmin, ymin = bboxes[:, 0], bboxes[:, 1]

    landmarks[:, 0:5] = np.expand_dims(
        xmin, 1) + np.expand_dims(width, 1) * landmarks[:, 0:5]
    landmarks[:, 5:10] = np.expand_dims(
        ymin, 1) + np.expand_dims(height, 1) * landmarks[:, 5:10]

    bboxes = calibrate_box(bboxes, offsets)
    keep = nms(bboxes, nms_thresholds, mode='min')
    bboxes = bboxes[keep]
    landmarks = landmarks[keep]

    # print("onet predicted in {:2.3f} seconds".format(time.time() - start))

    return bboxes, landmarks
Exemplo n.º 4
0
 def run_det(self):
     for batch in tqdm(self.loader):
         out = self.model(batch['input'].to(device='cuda'))
         file_name = self.loadImgs(
             ids=[batch['img_id'].numpy()[0]])[0]['file_name']
         bboxes = self.model.convert_pred(out, scores_thresh=0.5)
         bboxes = nms(bboxes, 0.5, 0.5)
         gt_boxes = batch['gt_boxes'].numpy()[0]
         self.write(gt_boxes, file_name, det_dir='cal_map/gt')
         self.write(bboxes, file_name, det_dir='cal_map/det')
def predict(img, model, nms_iou=0.45, conf_thresh=0.45):
    num_classes = cfg.YOLO.NUM_CLASSES
    input_size = cfg.YOLO.INPUT_SIZE
    frame_size = img.shape[:2]
    img_data = util.image_preporcess(img.copy(), [input_size, input_size])
    img_data = img_data[np.newaxis, ...].astype(np.float32)
    prev_time = time.time()
    pred_bbox = model.predict_on_batch(img_data)[1:6:2]
    curr_time = time.time()
    exec_time = curr_time - prev_time
    pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox]
    pred_bbox = tf.concat(pred_bbox, axis=0)
    bboxes = util.postprocess_boxes(pred_bbox, frame_size, input_size,
                                    conf_thresh)
    bboxes = util.nms(bboxes, nms_iou, method='nms')
    return bboxes, exec_time
  def filter_prediction(self, boxes, probs, cls_idx):
    """Filter bounding box predictions with probability threshold and
    non-maximum supression.

    Args:
      boxes: array of [cx, cy, w, h].
      probs: array of probabilities
      cls_idx: array of class indices
    Returns:
      final_boxes: array of filtered bounding boxes.
      final_probs: array of filtered probabilities
      final_cls_idx: array of filtered class indices
    """
    mc = self.mc
    """add filter prob"""
    # print(len(probs))
    filtered_idx = np.nonzero(probs > mc.NMS_THRESH)[0]
    probs = probs[filtered_idx]
    boxes = boxes[filtered_idx]
    cls_idx = cls_idx[filtered_idx]
    # print(probs)


    if mc.TOP_N_DETECTION < len(probs) and mc.TOP_N_DETECTION > 0:
      order = probs.argsort()[:-mc.TOP_N_DETECTION-1:-1]
      probs = probs[order]
      boxes = boxes[order]
      cls_idx = cls_idx[order]
    # else:
    #   filtered_idx = np.nonzero(probs>mc.PROB_THRESH)[0]
    #   probs = probs[filtered_idx]
    #   boxes = boxes[filtered_idx]
    #   cls_idx = cls_idx[filtered_idx]

    final_boxes = []
    final_probs = []
    final_cls_idx = []

    for c in range(mc.CLASSES):
      idx_per_class = [i for i in range(len(probs)) if cls_idx[i] == c]
      keep = util.nms(boxes[idx_per_class], probs[idx_per_class], mc.NMS_THRESH)
      for i in range(len(keep)):
        if keep[i]:
          final_boxes.append(boxes[idx_per_class[i]])
          final_probs.append(probs[idx_per_class[i]])
          final_cls_idx.append(c)
    return final_boxes, final_probs, final_cls_idx
Exemplo n.º 7
0
def detect_rnet(rnet, image, bboxes, device):

    # start = time.time()

    size = 24
    thresholds = 0.8  # face detection thresholds
    nms_thresholds = 0.7
    height, width, channel = image.shape

    num_boxes = len(bboxes)
    [dy, edy, dx, edx, y, ey, x, ex, w,
     h] = correct_bboxes(bboxes, width, height)

    img_boxes = np.zeros((num_boxes, 3, size, size))

    for i in range(num_boxes):
        img_box = np.zeros((h[i], w[i], 3))

        img_box[dy[i]:(edy[i] + 1), dx[i]:(edx[i] + 1), :] = \
            image[y[i]:(ey[i] + 1), x[i]:(ex[i] + 1), :]

        # resize
        img_box = cv2.resize(img_box, (size, size),
                             interpolation=cv2.INTER_LINEAR)

        img_boxes[i, :, :, :] = preprocess(img_box)

    img_boxes = torch.FloatTensor(img_boxes).to(device)
    offset, prob = rnet(img_boxes)
    offsets = offset.cpu().data.numpy()  # shape [n_boxes, 4]
    probs = prob.cpu().data.numpy()  # shape [n_boxes, 2]

    keep = np.where(probs[:, 1] > thresholds)[0]
    bboxes = bboxes[keep]
    bboxes[:, 4] = probs[keep, 1].reshape((-1, ))  # assign score from stage 2
    offsets = offsets[keep]  #

    keep = nms(bboxes, nms_thresholds)
    bboxes = bboxes[keep]
    bboxes = calibrate_box(bboxes, offsets[keep])
    bboxes = convert_to_square(bboxes)
    bboxes[:, 0:4] = np.round(bboxes[:, 0:4])

    # print("rnet predicted in {:2.3f} seconds".format(time.time() - start))

    return bboxes
Exemplo n.º 8
0
def detect_pnet(pnet, image, min_face_size, device):

    # start = time.time()

    thresholds = 0.7  # face detection thresholds
    nms_thresholds = 0.7

    # BUILD AN IMAGE PYRAMID
    height, width, channel = image.shape
    min_length = min(height, width)

    min_detection_size = 12
    factor = 0.707  # sqrt(0.5)

    # scales for scaling the image
    scales = []

    # scales the image so that minimum size that we can detect equals to minimum face size that we want to detect
    m = min_detection_size / min_face_size
    min_length *= m

    factor_count = 0
    while min_length > min_detection_size:
        scales.append(m * factor**factor_count)
        min_length *= factor
        factor_count += 1

    # it will be returned
    bounding_boxes = []

    with torch.no_grad():
        # run P-Net on different scales
        for scale in scales:
            sw, sh = math.ceil(width * scale), math.ceil(height * scale)
            img = cv2.resize(image, (sw, sh), interpolation=cv2.INTER_LINEAR)
            img = torch.FloatTensor(preprocess(img)).to(device)
            offset, prob = pnet(img)
            probs = prob.cpu().data.numpy()[
                0,
                1, :, :]  # probs: probability of a face at each sliding window
            offsets = offset.cpu().data.numpy(
            )  # offsets: transformations to true bounding boxes
            # applying P-Net is equivalent, in some sense, to moving 12x12 window with stride 2
            stride, cell_size = 2, 12
            # indices of boxes where there is probably a face
            # returns a tuple with an array of row idx's, and an array of col idx's:
            inds = np.where(probs > thresholds)

            if inds[0].size == 0:
                boxes = None
            else:
                # transformations of bounding boxes
                tx1, ty1, tx2, ty2 = [
                    offsets[0, i, inds[0], inds[1]] for i in range(4)
                ]
                offsets = np.array([tx1, ty1, tx2, ty2])
                score = probs[inds[0], inds[1]]
                # P-Net is applied to scaled images
                # so we need to rescale bounding boxes back
                bounding_box = np.vstack([
                    np.round((stride * inds[1] + 1.0) / scale),
                    np.round((stride * inds[0] + 1.0) / scale),
                    np.round((stride * inds[1] + 1.0 + cell_size) / scale),
                    np.round((stride * inds[0] + 1.0 + cell_size) / scale),
                    score, offsets
                ])
                boxes = bounding_box.T
                keep = nms(boxes[:, 0:5], overlap_threshold=0.5)
                boxes[keep]

            bounding_boxes.append(boxes)

        # collect boxes (and offsets, and scores) from different scales
        bounding_boxes = [i for i in bounding_boxes if i is not None]
        bounding_boxes = np.vstack(bounding_boxes)

        keep = nms(bounding_boxes[:, 0:5], nms_thresholds)
        bounding_boxes = bounding_boxes[keep]

        # use offsets predicted by pnet to transform bounding boxes
        bboxes = calibrate_box(bounding_boxes[:, 0:5], bounding_boxes[:, 5:])
        # shape [n_boxes, 5],  x1, y1, x2, y2, score

        bboxes = convert_to_square(bboxes)
        bboxes[:, 0:4] = np.round(bboxes[:, 0:4])

        # print("pnet predicted in {:2.3f} seconds".format(time.time() - start))

        return bboxes
Exemplo n.º 9
0
    def filter_prediction(self, boxes, points, probs, poses, ages, cls_idx):
        """Filter bounding box predictions with probability threshold and
    non-maximum supression.

    Args:
      boxes: array of [cx, cy, w, h].
      probs: array of probabilities
      cls_idx: array of class indices
    Returns:
      final_boxes: array of filtered bounding boxes.
      final_probs: array of filtered probabilities
      final_cls_idx: array of filtered class indices
    """
        mc = self.mc

        if mc.TOP_N_DETECTION < len(probs) and mc.TOP_N_DETECTION > 0:
            order = probs.argsort()[:-mc.TOP_N_DETECTION - 1:-1]

            # print(np.array(probs).shape)
            # print(np.array(boxes).shape)
            # print(np.array(cls_idx).shape)
            # print(np.array(points).shape)
            probs = probs[order]
            boxes = boxes[order]
            points = points[order]
            cls_idx = cls_idx[order]
            poses = poses[order]
            ages = ages[order]

        else:
            filtered_idx = np.nonzero(probs > mc.PROB_THRESH)[0]
            probs = probs[filtered_idx]
            boxes = boxes[filtered_idx]
            points = points[filtered_idx]
            cls_idx = cls_idx[filtered_idx]
            poses = poses[filtered_idx]
            ages = ages[filtered_idx]

        final_boxes = []
        final_points = []
        final_probs = []
        final_cls_idx = []
        final_poses = []
        final_ages = []

        for c in range(mc.CLASSES):
            idx_per_class = [i for i in range(len(probs)) if cls_idx[i] == c]
            keep = util.nms(boxes[idx_per_class], probs[idx_per_class],
                            mc.NMS_THRESH)

            #boxcls = np.c_[boxes[idx_per_class], probs[idx_per_class]]

            #keep = soft_nms(boxcls)
            for i in range(len(keep)):
                #or i in keep:
                if keep[i]:
                    #if probs[idx_per_class[i]] > 0.8:
                    final_points.append(points[idx_per_class[i]])
                    final_boxes.append(boxes[idx_per_class[i]])
                    final_probs.append(probs[idx_per_class[i]])
                    final_poses.append(poses[idx_per_class[i]])
                    final_ages.append(ages[idx_per_class[i]])
                    final_cls_idx.append(c)
        #print(final_probs)
        return final_boxes, final_points, final_probs, final_poses, final_ages, final_cls_idx
Exemplo n.º 10
0
    # 计算类别
    pred_cates = torch.argmax(pred_probs, dim=1)
    # 计算分类概率
    pred_confidences_idxs = torch.argmax(pred_confidences, dim=1)
    pred_cate_probs = pred_probs[range(S * S), pred_cates] \
                      * pred_confidences[range(S * S), pred_confidences_idxs]
    # 计算预测边界框
    pred_cate_bboxs = torch.zeros(S * S, 4)
    pred_cate_bboxs[:, 0] = pred_bboxs[range(S * S), pred_confidences_idxs * 4]
    pred_cate_bboxs[:, 1] = pred_bboxs[range(S * S),
                                       pred_confidences_idxs * 4 + 1]
    pred_cate_bboxs[:, 2] = pred_bboxs[range(S * S),
                                       pred_confidences_idxs * 4 + 2]
    pred_cate_bboxs[:, 3] = pred_bboxs[range(S * S),
                                       pred_confidences_idxs * 4 + 3]

    # 预测边界框的缩放,回到原始图像
    pred_bboxs = util.deform_bboxs(pred_cate_bboxs, data_dict, S)

    nms_rects, nms_scores, nms_cates = util.nms(pred_bboxs, pred_cate_probs,
                                                pred_cates)
    # 在原图绘制标注边界框和预测边界框
    dst = draw.plot_bboxs(data_dict['src'], data_dict['bndboxs'],
                          data_dict['name_list'], cate_list, pred_bboxs,
                          pred_cates, pred_cate_probs)
    cv2.imwrite('./detect.png', dst)
    # BGR -> RGB
    dst = cv2.cvtColor(dst, cv2.COLOR_BGR2RGB)
    draw.show(dst)
Exemplo n.º 11
0
from utils.checkpoint import load_checkpoint
from utils.util import nms
import os
import cv2
import numpy as np
import time
cfg = dark53_yolo

loader = DataLoader(yoloPascal(cfg, 'val'), batch_size=1, shuffle=False)
model = Yolodet(cfg, pretrained=False)
load_checkpoint(model, 'weights/dark_yolo/model_273.pth')
model.eval()
model.cuda()
for batch in loader:
    start = time.time()
    out = model(batch['input'].to(device='cuda'))
    print('fps: {}'.format(1 / (time.time() - start)))
    file_name = loader.dataset.coco.loadImgs(
        ids=[batch['img_id'].numpy()[0]])[0]['file_name']
    img_path = os.path.join(loader.dataset.img_dir, file_name)
    image = cv2.imread(img_path)
    shape = image.shape
    bboxes = model.convert_pred(out, shape, 0.4)
    bboxes = nms(bboxes, 0.4, 0.4)
    for bb in bboxes:
        x, y, x1, y1 = bb.astype(np.int)[:4]
        cv2.rectangle(image, (x, y), (x1, y1), (255, 0, 0), 3)
    cv2.imshow('', image)
    if cv2.waitKey(0) & 0xff == 27:
        break
cv2.destroyAllWindows()
Exemplo n.º 12
0
        print('the shape of scores is {}'.format(scores.shape))

        boxes_total.append(boxes)
        scores_total.append(scores)

    boxes_total = np.concatenate(boxes_total, axis=0)
    scores_total = np.concatenate(scores_total, axis=0)

    # implement nms for each class
    for i in range(num_classes):
        score_per_class = scores_total[..., i:i + 1]
        rectangles = np.concatenate([boxes_total, score_per_class], axis=-1)
        have_object = np.where(rectangles[..., 4] > 0.6)[0]
        # print(have_object)
        rectangles = rectangles[have_object]
        pick = nms(rectangles, threshold=0.3)
        # pick = tf.image.non_max_suppression(rectangles[..., 0:4], rectangles[..., 4], 20, 0.3)

        # print(pick)
        # boxes_pick = tf.gather(rectangles, pick)
        if pick:
            boxes_pick = rectangles[pick]
            for box in boxes_pick:
                # cv2.rectangle(street_cv2, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (0, 0, 255), 2)

                # correct box
                x1, y1, x2, y2, _ = box
                left = int(max(0, x1))
                top = int(max(0, y1))
                right = int(min(street.size[0], x2))
                bottom = int(min(street.size[1], y2))
Exemplo n.º 13
0
from config import hrnet_yolo, dark53_yolo
from utils.checkpoint import load_checkpoint
from utils.util import nms
import os
import cv2
import numpy as np
import time
cfg=dark53_yolo

loader = DataLoader(yoloPascal(cfg, 'val'), batch_size=1, shuffle=False)
model = Yolodet(cfg, pretrained=False)
load_checkpoint(model, 'weights/dark_yolo/model_13.pth')
model.eval()
model.cuda()
for batch in loader:
    start= time.time()
    out = model(batch['input'].to(device='cuda'))
    print('fps: {}'.format(1/(time.time()-start)))
    file_name =  loader.dataset.coco.loadImgs(ids=[batch['img_id'].numpy()[0]])[0]['file_name']
    img_path = os.path.join( loader.dataset.img_dir, file_name)
    image = cv2.imread(img_path)
    shape = image.shape
    bboxes = model.convert_pred(out, shape , 0.5)
    bboxes = nms(bboxes,0.5,0.5)
    for bb in bboxes:
        x, y, x1, y1 = bb.astype(np.int)[:4]
        cv2.rectangle(image, (x, y), (x1, y1), (255, 0, 0), 3)
    cv2.imshow('', image)
    if cv2.waitKey(0) & 0xff == 27:
        break
cv2.destroyAllWindows()