示例#1
0
def rnet_boxes(img,
               rnet,
               bounding_boxes,
               thresholds=THRESHOLDS,
               nms_thresholds=NMS_THRESHOLDS,
               show_boxes=True):
    rnet.eval()
    img_boxes = get_image_boxes(bounding_boxes, img, size=24)
    img_boxes = torch.FloatTensor(img_boxes)
    img_boxes = img_boxes.to(
        torch.device('cuda' if torch.cuda.is_available() else 'cpu'))
    output = rnet(img_boxes)
    probs = output[0].data.cpu().numpy()  # shape [n_boxes, 1]
    offsets = output[1].data.cpu().numpy()  # shape [n_boxes, 4]

    keep = np.where(probs[:, 0] > thresholds[1])[0]
    bounding_boxes = bounding_boxes[keep]
    bounding_boxes[:, 4] = probs[keep, 0].reshape((-1, ))
    offsets = offsets[keep]

    keep = nms(bounding_boxes, nms_thresholds[1])
    bounding_boxes = bounding_boxes[keep]
    bounding_boxes = calibrate_box(bounding_boxes, offsets[keep])
    bounding_boxes = convert_to_square(bounding_boxes)
    bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4])
    if show_boxes: show_bboxes(img, bounding_boxes, []).show()
    return bounding_boxes
示例#2
0
def run_first_stage(image, net, scale, threshold):
    """ 
        Run P-Net, generate bounding boxes, and do NMS.
    """
    width, height = image.size
    sw, sh = math.ceil(width * scale), math.ceil(height * scale)
    img = image.resize((sw, sh), Image.BILINEAR)
    # img = np.asarray(img, 'float32')
    # preprocess 对图像进行归一化操作
    img = transforms.ToTensor()(img).unsqueeze(0)
    img = img.to(torch.device('cuda' if torch.cuda.is_available() else 'cpu'))
    # print('img:', img)

    output = net(img)
    # 只有一张图 batch = 1,所以 [0, ,:,:]
    # [ , 1,:,:]代表 face=True 的概率
    probs = output[0].data.cpu().numpy()[0, 0, :, :]
    # offsets shape[4, o_h,o_w]
    offsets = output[1].data.cpu().numpy()
    # print('offsets:', offsets)
    # boxes
    boxes = _generate_bboxes(probs, offsets, scale, threshold)
    if len(boxes) == 0:
        return None

    # [[x1,y1,x2,y2,score,offsets],[]...]
    # 只取4个坐标加一个置信度进行nms
    keep = nms(boxes[:, 0:5], overlap_threshold=0.5)
    return boxes[keep]
示例#3
0
 def predict(self, image, n, overlap_thresh):
     boxes, scores = self.edgebox.getproposals(
         image)  #edgeboxes also gives scores
     boxes = util.cv2_to_numpy(boxes)
     boxes, scores, _ = util.topn(boxes, scores, n)
     boxes, scores, _ = util.nms(boxes, scores, overlap_thresh)
     scores = self.infer(image, boxes)
     return boxes, scores
示例#4
0
def pnet_boxes(img, pnet, min_face_size=MIN_FACE_SIZE, thresholds=THRESHOLDS, nms_thresholds=NMS_THRESHOLDS,
               show_boxes=True):
    pnet.eval()
    width, height = img.size
    min_length = min(height, width)
    # print('img min_length is {}'.format(min_length))
    min_detection_size = 12
    factor = 0.707  # sqrt(0.5)
    scales = []
    # min_face_size 哪来的?
    m = min_detection_size / min_face_size
    # 缩放原图使得最小脸尺寸为12pix
    min_length *= m
    # 将图片从最小脸为12pix到整张图为12pix,保存对应的缩放比例,都为小于1的数?
    factor_count = 0
    while min_length > min_detection_size:
        scales.append(m * factor ** factor_count)
        min_length *= factor
        factor_count += 1

    # STAGE 1
    bounding_boxes = []
    for s in scales:  # run P-Net on different scales
        boxes = run_first_stage(img, pnet, scale=s, threshold=thresholds[0])
        bounding_boxes.append(boxes)
        # bounding_boxes shape:[scales,boxes_num_each_sale,5]
    # 把每个scale找到的框框全部打开堆在一起
    # [total_boxes_num, 5] 是list
    bounding_boxes = [i for i in bounding_boxes if i is not None]
    # print(bounding_boxes)
    # bounding_boxes = np.array(bounding_boxes)
    # print(bounding_boxes.shape, img.size)
    try:
        _ = bounding_boxes[0]
        # print('bounding_boxes:{}'.format(len(bounding_boxes)))
        # print('bounding_boxes[0]:{}'.format(len(bounding_boxes[0])))
    except Exception:
        print(bounding_boxes)
        img.show()
    if len(bounding_boxes) == 0:
        return None
    bounding_boxes = np.vstack(bounding_boxes)
    # print(bounding_boxes.shape)

    keep = nms(bounding_boxes[:, 0:5], nms_thresholds[0])
    bounding_boxes = bounding_boxes[keep]
    # print('bounding_boxes:{}'.format(bounding_boxes[:, 4] > 0.5))
    # 根据 w、h 对 x1,y1,x2,y2 的位置进行微调
    bounding_boxes = calibrate_box(bounding_boxes[:, 0:5], bounding_boxes[:, 5:])
    # 将检测出的框转化成矩形
    bounding_boxes = convert_to_square(bounding_boxes)
    bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4])
    # print('bounding_boxes:{}'.format(bounding_boxes[:, 4] > 0.5))
    # print('bounding_boxes:', len(bounding_boxes), bounding_boxes)
    if show_boxes: show_bboxes(img, bounding_boxes, []).show()
    return bounding_boxes
示例#5
0
def _worker_match(img, template_name, channel):
    global _worker_templates

    if template_name not in _worker_templates:
        return []

    # Store bounding boxes.
    boxes = []

    # Get the template.
    template_images, threshold, center = _worker_templates[template_name]

    for scale, (template_color, template_gray) in template_images.items():
        # Select a channel.
        template = template_gray
        if channel is not None:
            template = template_color[:, :, channel]

        # Find the center.
        w, h = template.shape[::-1]
        scaled_center = None
        if center is None:
            scaled_center = (w // 2, h // 2)
        else:
            scaled_center = (center[0] * scale, center[1] * scale)

        # Make sure the template is smaller than the image.
        if w >= img.shape[1] or h >= img.shape[0]:
            continue

        # Search for the template.
        res = cv2.matchTemplate(img, template, cv2.TM_SQDIFF_NORMED)

        # Threshold the result.
        match_locations = np.where(res <= threshold)

        # Find all locations.
        for (x, y) in zip(match_locations[1], match_locations[0]):
            v = res[y, x]
            x = int(x + scaled_center[0])
            y = int(y + scaled_center[1])

            boxes.append((x - w // 2, y - h // 2, x + w // 2, y + h // 2))

    # Run non maximum suppression.
    boxes = util.nms(np.asarray(boxes), 0.0)
    if len(boxes) == 0:
        return []

    # Return the centers of the boxes.
    centers_x = (boxes[:, 2] + boxes[:, 0]) / 2
    centers_y = (boxes[:, 3] + boxes[:, 1]) / 2
    centers = list(zip(centers_x, centers_y))

    return centers
示例#6
0
def onet_boxes(img,
               onet,
               bounding_boxes,
               thresholds=THRESHOLDS,
               nms_thresholds=NMS_THRESHOLDS,
               show_boxes=True):
    onet.eval()
    img_boxes = get_image_boxes(bounding_boxes, img, size=48)
    if len(img_boxes) == 0:
        return [], []
    img_boxes = torch.FloatTensor(img_boxes)
    img_boxes = img_boxes.to(
        torch.device('cuda' if torch.cuda.is_available() else 'cpu'))
    output = onet(img_boxes)

    probs = output[0].data.cpu().numpy()  # shape [n_boxes, 1]
    offsets = output[1].data.cpu().numpy()  # shape [n_boxes, 4]
    landmarks = output[2].data.cpu().numpy()  # shape [n_boxes, 10]

    keep = np.where(probs[:, 0] > thresholds[2])[0]
    bounding_boxes = bounding_boxes[keep]
    # 用更大模型的置信度对原置信度进行更新
    bounding_boxes[:, 4] = probs[keep, 0].reshape((-1, ))
    offsets = offsets[keep]
    landmarks = landmarks[keep]

    # compute landmark points
    width = bounding_boxes[:, 2] - bounding_boxes[:, 0] + 1.0
    height = bounding_boxes[:, 3] - bounding_boxes[:, 1] + 1.0
    xmin, ymin = bounding_boxes[:, 0], bounding_boxes[:, 1]
    # print('width:{},\nheight:{},\nxmin:{},\nymin:{}\n'.format(width, height, xmin, ymin))
    # landmark[,前5个为x,后5个为y]
    # 在左上角坐标的基础上,通过 w,h 确定脸各关键点的坐标。
    landmarks_pixel = np.zeros(landmarks.shape)
    landmarks_pixel[:, 0:5] = (
        np.expand_dims(xmin, 1) +
        np.expand_dims(width, 1) * landmarks[:, 0::2]).copy()
    landmarks_pixel[:, 5:10] = (
        np.expand_dims(ymin, 1) +
        np.expand_dims(height, 1) * landmarks[:, 1::2]).copy()
    # for i in landmarks:print(i)
    bounding_boxes = calibrate_box(bounding_boxes, offsets)
    keep = nms(bounding_boxes, nms_thresholds[2], mode='min')
    bounding_boxes = bounding_boxes[keep]
    landmarks_pixel = landmarks_pixel[keep]
    if show_boxes: show_bboxes(img, bounding_boxes, landmarks_pixel).show()
    return bounding_boxes, landmarks_pixel
示例#7
0
def detect(split="val", root_path="sandbox", year=2012, gpu=True):

    m = MatroidModel("matroid/Everyday-Objects.matroid", gpu)
    voc_train = VOCDetection("~/data/voc/",
                             image_set=split,
                             download=True,
                             year=str(year))
    # voc_train = VOCDetection("/deep/group/haosheng/voc/", image_set=split)i

    GROUNDTRUTH_PATH = os.path.join("Object-Detection-Metrics", "groundtruths")
    PREDICTION_PATH = os.path.join("Object-Detection-Metrics", "detections")
    os.makedirs(GROUNDTRUTH_PATH, exist_ok=True)
    os.makedirs(PREDICTION_PATH, exist_ok=True)

    for img, target in tqdm(voc_train):
        # Ground Truth
        file_name = target['annotation']['filename'].replace("jpg", "txt")
        with open(os.path.join(GROUNDTRUTH_PATH, file_name), "w") as f:
            objs = target['annotation']['object']
            if not isinstance(objs, list):
                objs = [objs]
            for obj in objs:
                name = obj['name']
                bbox = obj['bndbox']
                xmin, ymin, xmax, ymax = bbox['xmin'], bbox['ymin'], bbox[
                    'xmax'], bbox['ymax']
                f.write(f"{name} {xmin} {ymin} {xmax} {ymax}\n")

        # Prediction
        with open(os.path.join(PREDICTION_PATH, file_name), "w") as f:
            boxes, probs = m.predict(img)
            preds = nms(boxes, probs)

            h, w = img.size
            for label, confidence, bbox in preds:
                xmin, ymin, xmax, ymax = bbox[0] * \
                    h, bbox[1]*w, bbox[2]*h, bbox[3]*w
                name = VOC_LABEL2NAME[label]
                f.write(
                    f"{name} {confidence} {xmin:.0f} {ymin:.0f} {xmax:.0f} {ymax:.0f}\n"
                )
示例#8
0
    def forward(self, loc_data, conf_data, prior_data):
        """
        Args:
            loc_data: (tensor) Loc preds from loc layers
                Shape: [batch,num_priors*4]
            conf_data: (tensor) Shape: Conf preds from conf layers
                Shape: [batch*num_priors,num_classes]
            prior_data: (tensor) Prior boxes and variances from priorbox layers
                Shape: [1,num_priors,4]
        """
        num = loc_data.size(0)  # batch size
        num_priors = prior_data.size(0)
        output = torch.zeros(num, self.num_classes, self.top_k, 5)
        conf_preds = conf_data.view(num, num_priors,
                                    self.num_classes).transpose(2, 1)

        # Decode predictions into bboxes.
        for i in range(num):
            decoded_boxes = decode(loc_data[i], prior_data, self.variance)
            # For each class, perform nms
            conf_scores = conf_preds[i].clone()

            for cl in range(1, self.num_classes):
                c_mask = conf_scores[cl].gt(self.conf_thresh)
                scores = conf_scores[cl][c_mask]
                if scores.size(0) == 0:
                    continue
                l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes)
                boxes = decoded_boxes[l_mask].view(-1, 4)
                # idx of highest scoring and non-overlapping boxes per class
                ids, count = nms(boxes, scores, self.nms_thresh, self.top_k)
                output[i, cl, :count] = \
                    torch.cat((scores[ids[:count]].unsqueeze(1),
                               boxes[ids[:count]]), 1)
        flt = output.contiguous().view(num, -1, 5)
        _, idx = flt[:, :, 0].sort(1, descending=True)
        _, rank = idx.sort(1)
        flt[(rank < self.top_k).unsqueeze(-1).expand_as(flt)].fill_(0)
        return output
示例#9
0
    def hard_negative_mine(self, folder, epochs):
        # this function is used to  train svm by hard negative mining
        #initializing svm with default weights of positive sample means
        self.initialize_svm(folder)
        util.printProgressBar(0, epochs, prefix='Starting', suffix='complete')
        for epoch in range(epochs):
            # STEP 1: Mine for negative samples
            for i, img in enumerate(self.imgs):
                boxes, scores = self.edgebox.getproposals(img)
                if not isinstance(
                        boxes, tuple):  #because if boxes is tuple, its empty
                    boxes = util.cv2_to_numpy(boxes)
                    boxes, scores = self.filter_negsamples(boxes, scores, i)
                    if (boxes.shape[0] >
                            self.n):  #only do it if more than n samples
                        boxes, scores, _ = util.topn(boxes, scores, self.n)
                    if (boxes.shape[0] >
                            0):  #only do it some boxes survive the journey
                        boxes, scores, _ = util.nms(boxes, scores,
                                                    self.overlap_thresh)
                        self.neg_rects.append(boxes.tolist())
            #    else:
            #        print("no boxes detected!")

            # STEP 2: Add those samples into dataset
            self.populate_data(True)
            self.populate_data(False)

            # STEP 3: Prepare data
            X, y = self.prepare_data()

            # STEP 4: train the svm
            self.train_svm(X, y)
            util.printProgressBar(epoch + 1,
                                  epochs,
                                  prefix='Epoch %d' % (epoch + 1),
                                  suffix='complete')
        print('Training successfully finished after %d epochs' % epochs)
示例#10
0
def test_img(img, model):
    pts_all = []
    scores_all = []

    # original size
    pts, scores, _, _, _ = testutil.run_pipeline(img, model)
    pts_all += pts
    scores_all += scores

    # crop ratio 2
    imgs2, metas2 = util.crop_images(img, 2)
    for i, m in zip(imgs2, metas2):
        pts, scores, _, _, _ = testutil.run_pipeline(i, model)
        pts = util.restore_pts(pts, m)
        pts_all += pts
        scores_all += scores

    # crop ratio 4
    imgs2, metas2 = util.crop_images(img, 4)
    for i, m in zip(imgs2, metas2):
        pts, scores, _, _, _ = testutil.run_pipeline(i, model)
        pts = util.restore_pts(pts, m)
        pts_all += pts
        scores_all += scores

    pts, scores = util.nms(pts_all, scores_all)
    # print(scores)

    # for i in range(len(pts)):
    # 	if scores[i]<0.3:
    # 		continue
    # 	imgcp = img.copy()
    # 	skltn = vis_skeleton(imgcp, pts[i])
    # 	cv2.imwrite('outputs/skt_%d.png'%i, skltn)

    return pts, scores
示例#11
0
def get_bboxes(outputs,
               proposals,
               num_proposals,
               num_classes,
               im_shape,
               im_scale,
               max_per_image=100,
               thresh=0.001,
               nms_thresh=0.4):
    """
    Returns bounding boxes for detected objects, organized by class.

    Transforms the proposals from the region proposal network to bounding box predictions
    using the bounding box regressions from the classification network:
    (1) Applying bounding box regressions to the region proposals.
    (2) For each class, take proposed boxes where the corresponding objectness score is greater
        then THRESH.
    (3) Apply non-maximum suppression across classes using NMS_THRESH
    (4) Limit the maximum number of detections over all classes to MAX_PER_IMAGE

    Arguments:
        outputs (list of tensors): Faster-RCNN model outputs
        proposals (Tensor): Proposed boxes from the model's proposalLayer
        num_proposals (int): Number of proposals
        num_classes (int): Number of classes
        im_shape (tuple): Shape of image
        im_scale (float): Scaling factor of image
        max_per_image (int): Maximum number of allowed detections per image. Default is 100.
                             None indicates no enforced maximum.
        thresh (float): Threshold for objectness score. Default is 0.001.
        nms_thresh (float): Threshold for non-maximum suppression. Default is 0.4.

    Returns:
        detections (array): Array of bounding box detections in a N x 6 array. Each bounding box
                            has the following attributes:[xmin, ymin, xmas, ymax, score, class]

    """

    proposals = proposals.get()[:num_proposals, :]  # remove padded proposals
    boxes = proposals[:, 1:5] / im_scale  # scale back to real image space

    # obtain bounding box corrections from the frcn layers
    scores = outputs[2][0].get()[:, :num_proposals].T
    bbox_deltas = outputs[2][1].get()[:, :num_proposals].T

    # apply bounding box corrections to the region proposals
    pred_boxes = util.bbox_transform_inv(boxes, bbox_deltas)
    pred_boxes = util.clip_boxes(pred_boxes, im_shape)

    detections = []
    # Skip the background class, start processing from class 1
    for j in range(1, num_classes):
        inds = np.where(scores[:, j] > thresh)[0]

        # obtain class-specific boxes and scores
        cls_labels = j * np.ones((len(inds), 1))
        cls_scores = scores[inds, j]
        cls_boxes = pred_boxes[inds, j * 4:(j + 1) * 4]
        cls_dets = np.hstack(
            (cls_boxes, cls_scores[:,
                                   np.newaxis], cls_labels)).astype(np.float32,
                                                                    copy=False)

        # apply non-max suppression
        keep = util.nms(cls_dets, nms_thresh)
        cls_dets = cls_dets[keep, :]

        # store results
        if cls_dets.size != 0:
            detections.append(cls_dets)  # detections[j] = cls_dets

    # guard against no detections
    if len(detections) != 0:
        detections = np.vstack(detections)

    # Limit to max_per_image detections *over all classes*
    if max_per_image is not None:
        if len(detections) > max_per_image:
            # compute threshold needed to keep the top max_per_image
            image_thresh = np.sort(detections[:, -2])[-max_per_image]

            keep = np.where(detections[:, -2] >= image_thresh)[0]
            detections = detections[keep, :]

    # For each bounding box:
    # [xmin, ymin, xmax, ymax, score, class]

    return detections
示例#12
0
def main():
    args = parse_cmdline()
    img_fn = os.path.abspath(args.img_fn)
    if not os.path.exists(img_fn):
        print('Not found: {}'.format(img_fn))
        sys.exit(-1)
    else:
        print('Target image: {}'.format(img_fn))

    # Loaa target image
    target_image = util.load_target_image(img_fn)

    # Get object proposals
    object_proposals = util.get_object_proposals(target_image)

    # Setup computation graph
    graph_params = setup_graph()

    # Model initialize
    sess = tf.Session(graph=graph_params['graph'])
    tf.global_variables_initializer()
    if os.path.exists('models'):
        save_path = os.path.join('models', 'deep_logo_model')
        graph_params['saver'].restore(sess, save_path)
        print('Model restored')
    else:
        print('Initialized')

    # Logo recognition
    results = []
    for obj_proposal in object_proposals:
        x, y, w, h = obj_proposal
        crop_image = target_image[y:y + h, x:x + w]
        results.append(
            logo_recognition(sess, crop_image, obj_proposal, graph_params))

    del_idx = []
    for i, result in enumerate(results):
        if result['pred_class'] == common.CLASS_NAME[-1]:
            del_idx.append(i)
    results = np.delete(results, del_idx)

    # Non-max suppression
    nms_results = util.nms(results, pred_prob_th=0.999999, iou_th=0.4)

    # Draw rectangles on the target image
    fig, ax = plt.subplots(ncols=1, nrows=1, figsize=(6, 6))
    ax.imshow(target_image)

    for result in nms_results:
        print(result)
        (x, y, w, h) = result['obj_proposal']
        ax.text(x,
                y,
                result['pred_class'],
                fontsize=13,
                bbox=dict(facecolor='red', alpha=0.7))
        rect = mpatches.Rectangle((x, y),
                                  w,
                                  h,
                                  fill=False,
                                  edgecolor='red',
                                  linewidth=1)
        ax.add_patch(rect)
    plt.show()
def evaluate(model,
             image_path,
             target_path,
             iou_thres,
             conf_thres,
             nms_thres,
             image_size,
             batch_size,
             num_workers,
             device,
             output=False):
    model.eval()

    dataSet = utilData.ListDataset(image_path,
                                   target_path,
                                   augment=False,
                                   img_size=image_size)
    dataLoader = torch.utils.data.DataLoader(dataSet,
                                             batch_size=batch_size,
                                             shuffle=False,
                                             num_workers=num_workers,
                                             collate_fn=dataSet.collate_fn)

    labels = []
    correct = 0
    error = 0
    entire_time = 0
    if output and not os.path.isdir('./dog_dataset/eval/result_image'):
        os.mkdir('./dog_dataset/eval/result_image')
    for _, images, targets in tqdm.tqdm(dataLoader,
                                        desc='Evaluate method',
                                        leave=False):
        if targets is None:
            continue

        labels.extend(targets[:, 1].tolist())
        targets[:, 1:] = util.get_xxyy_from_xywh(targets[:, 1:])
        targets[:, 1:] *= image_size

        start_time = time.time()
        with torch.no_grad():
            images = images.to(device)
            outputs = model(images)
            outputs = util.nms(outputs, conf_thres, nms_thres)
        entire_time += time.time() - start_time
        if output:
            for i, path in enumerate(_):
                img = cv2.imread(path)
                h, w, a = img.shape
                if h > w: pad = [0, 0, (h - w) // 2, (h - w) - ((h - w) // 2)]
                else: pad = [(w - h) // 2, (w - h) - ((w - h) // 2), 0, 0]
                img = cv2.copyMakeBorder(img,
                                         pad[0],
                                         pad[1],
                                         pad[2],
                                         pad[3],
                                         cv2.BORDER_CONSTANT,
                                         value=[0, 0, 0])
                img = cv2.resize(img, (image_size, image_size),
                                 interpolation=cv2.INTER_AREA)
                splited_path = os.path.split(path)
                if outputs[i] is None:
                    if targets[i, 1:].sum() == 0:
                        correct += 1
                    else:
                        error += 1
                        print('outputnone error', path)
                    cv2.imwrite(
                        './dog_dataset/eval/result_image/' + splited_path[1],
                        img)
                    continue
                for box in outputs[i]:
                    img = cv2.rectangle(img, (int(box[0]), int(box[1])),
                                        (int(box[2]), int(box[3])),
                                        (0, 255, 0), 3)
                if targets[i, 1:].sum() == 0:
                    error += 1
                    print('targetnone error', path)
                    cv2.imwrite(
                        './dog_dataset/eval/result_image/' + splited_path[1],
                        img)
                    continue
                ious = util.get_bbox_iou(outputs[i], targets[i:i + 1, 1:],
                                         True)
                for iou in ious:
                    if iou >= iou_thres: correct += 1 / len(ious)
                    else:
                        error += 1 / len(ious)
                        print(iou, path)
                cv2.imwrite(
                    './dog_dataset/eval/result_image/' + splited_path[1], img)
        else:
            for i, path in enumerate(_):
                if outputs[i] is None:
                    if targets[i, 1:].sum() == 0:
                        correct += 1
                    else:
                        error += 1
                    continue
                ious = util.get_bbox_iou(outputs[i], targets[i:i + 1, 1:],
                                         True)
                for iou in ious:
                    if iou >= iou_thres: correct += 1 / len(ious)
                    else: error += 1 / len(ious)
    return correct, error, correct / (correct + error) * 100
示例#14
0
def get_bboxes(outputs, proposals, num_proposals, num_classes,
               im_shape, im_scale, max_per_image=100, thresh=0.001, nms_thresh=0.4):
    """
    Returns bounding boxes for detected objects, organized by class.

    Transforms the proposals from the region proposal network to bounding box predictions
    using the bounding box regressions from the classification network:
    (1) Applying bounding box regressions to the region proposals.
    (2) For each class, take proposed boxes where the corresponding objectness score is greater
        then THRESH.
    (3) Apply non-maximum suppression across classes using NMS_THRESH
    (4) Limit the maximum number of detections over all classes to MAX_PER_IMAGE

    Arguments:
        outputs (list of tensors): Faster-RCNN model outputs
        proposals (Tensor): Proposed boxes from the model's proposalLayer
        num_proposals (int): Number of proposals
        num_classes (int): Number of classes
        im_shape (tuple): Shape of image
        im_scale (float): Scaling factor of image
        max_per_image (int): Maximum number of allowed detections per image. Default is 100.
                             None indicates no enforced maximum.
        thresh (float): Threshold for objectness score. Default is 0.001.
        nms_thresh (float): Threshold for non-maximum suppression. Default is 0.4.

    Returns:
        detections (array): Array of bounding box detections in a N x 6 array. Each bounding box
                            has the following attributes:[xmin, ymin, xmas, ymax, score, class]

    """

    proposals = proposals.get()[:num_proposals, :]  # remove padded proposals
    boxes = proposals[:, 1:5] / im_scale  # scale back to real image space

    # obtain bounding box corrections from the frcn layers
    scores = outputs[2][0].get()[:, :num_proposals].T
    bbox_deltas = outputs[2][1].get()[:, :num_proposals].T

    # apply bounding box corrections to the region proposals
    pred_boxes = util.bbox_transform_inv(boxes, bbox_deltas)
    pred_boxes = util.clip_boxes(pred_boxes, im_shape)

    detections = []
    # Skip the background class, start processing from class 1
    for j in range(1, num_classes):
        inds = np.where(scores[:, j] > thresh)[0]

        # obtain class-specific boxes and scores
        cls_labels = j * np.ones((len(inds), 1))
        cls_scores = scores[inds, j]
        cls_boxes = pred_boxes[inds, j * 4:(j + 1) * 4]
        cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis],
                              cls_labels)).astype(np.float32, copy=False)

        # apply non-max suppression
        keep = util.nms(cls_dets, nms_thresh)
        cls_dets = cls_dets[keep, :]

        # store results
        if cls_dets.size != 0:
            detections.append(cls_dets)  # detections[j] = cls_dets

    # guard against no detections
    if len(detections) != 0:
        detections = np.vstack(detections)

    # Limit to max_per_image detections *over all classes*
    if max_per_image is not None:
        if len(detections) > max_per_image:
            # compute threshold needed to keep the top max_per_image
            image_thresh = np.sort(detections[:, -2])[-max_per_image]

            keep = np.where(detections[:, -2] >= image_thresh)[0]
            detections = detections[keep, :]

    # For each bounding box:
    # [xmin, ymin, xmax, ymax, score, class]

    return detections
示例#15
0
test_outputs = []

start = time.time()

for i, fname in enumerate(os.listdir(test_jpg_dir)):
    print(f"Predicting boxes for image # {i+1}\r", end="")
    fpath = os.path.join(test_jpg_dir, fname)
    fid = fname[:-4]

    boxes_pred1, scores1 = util.get_detection_from_file(fpath, model1, sz)
    boxes_pred2, scores2 = util.get_detection_from_file(fpath, model2, sz)

    indices1 = np.where(scores1 > score_threshold1)[0]
    scores1 = scores1[indices1]
    boxes_pred1 = boxes_pred1[indices1]
    boxes_pred1, scores1 = util.nms(boxes_pred1, scores1, nms_threshold)

    indices2 = np.where(scores2 > score_threshold2)[0]
    scores2 = scores2[indices2]
    boxes_pred2 = boxes_pred2[indices2]
    boxes_pred2, scores2 = util.nms(boxes_pred2, scores2, nms_threshold)

    boxes_pred = np.concatenate((boxes_pred1, boxes_pred2))
    scores = np.concatenate((scores1, scores2))

    boxes_pred, scores = util.averages(boxes_pred, scores, wt_overlap,
                                       solo_min)
    util.shrink(boxes_pred, shrink_factor)

    output = ''
    for j, bb in enumerate(boxes_pred):
def main():
    args = parse_cmdline()
    img_fn = os.path.abspath(args.img_fn)
    save_img = args.save_img
    if not os.path.exists(img_fn):
        print('Not found: {}'.format(img_fn))
        sys.exit(-1)
    else:
        print('Target image: {}'.format(img_fn))

    # Loaa target image
    target_image = cv2.imread(img_fn)

    # Get object proposals
    object_proposals = util.get_object_proposals(target_image)

    # Setup computation graph
    graph_params = setup_graph()

    # Model initialize
    sess = tf.Session(graph=graph_params['graph'])
    tf.global_variables_initializer()
    if os.path.exists('models'):
        save_path = os.path.join('models', 'deep_traffic_sign_model')
        graph_params['saver'].restore(sess, save_path)
        print('Model restored')
    else:
        print('Initialized')

    # traffic sign recognition
    results = []
    for obj_proposal in object_proposals:
        x, y, w, h = obj_proposal
        crop_image = target_image[y:y + h, x:x + w]
        results.append(
            traffic_sign_recognition(sess, crop_image, obj_proposal,
                                     graph_params))
    """
    del_idx = []
    for i, result in enumerate(results):
        if result['pred_class'] == common.CLASS_NAME[-1]:
            del_idx.append(i)
    results = np.delete(results, del_idx)
    """
    # Non-max suppression
    nms_results = util.nms(results, pred_prob_th=0.999999, iou_th=0.4)

    # Draw rectangles on the target image
    fig, ax = plt.subplots(ncols=1, nrows=1, figsize=(6, 6))
    ax.imshow(cv2.cvtColor(target_image, cv2.COLOR_BGR2RGB))

    for result in nms_results:
        print(result)
        (x, y, w, h) = result['obj_proposal']
        ax.text(x,
                y,
                cls2name(result['pred_class']),
                fontsize=13,
                bbox=dict(facecolor='red', alpha=0.7))
        rect = mpatches.Rectangle((x, y),
                                  w,
                                  h,
                                  fill=False,
                                  edgecolor='red',
                                  linewidth=1)
        ax.add_patch(rect)
    plt.show()

    # save the target image
    save_fname = os.path.splitext(os.path.basename(img_fn))[0] + '_result.jpg'
    if save_img:
        fig.savefig(save_fname, bbox_inches='tight', pad_inches=0.0)
示例#17
0
def getFace(image):
    raw_img_bgr = np.asarray(image)
    raw_img = cv2.cvtColor(raw_img_bgr, cv2.COLOR_BGR2RGB)
    raw_img_f = raw_img.astype(np.float32)

    scales = calc_scales(model_face, raw_img, clusters)

    bboxes = np.empty(shape=(0, 5))  # initialize output

    for s in scales:  # process input at different scales
        img = cv2.resize(raw_img_f, (0, 0),
                         fx=s,
                         fy=s,
                         interpolation=cv2.INTER_LINEAR)
        img = img - average_image
        img = img[np.newaxis, :]

        # we don't run every template on every scale ids of templates to ignore
        tids = list(range(4, 12)) + ([] if s <= 1.0 else list(range(18, 25)))
        ignoredTids = list(set(range(0, clusters.shape[0])) - set(tids))

        # run through the net
        score_final_tf = sess.run(score_final, feed_dict={x: img})

        # collect scores
        score_cls_tf, score_reg_tf = score_final_tf[:, :, :, :
                                                    25], score_final_tf[:, :, :,
                                                                        25:125]
        prob_cls_tf = expit(score_cls_tf)
        prob_cls_tf[0, :, :, ignoredTids] = 0.0

        tmp_bboxes = calc_bounding_boxes(prob_cls_tf, score_reg_tf,
                                         score_cls_tf, s)
        bboxes = np.vstack((bboxes, tmp_bboxes))

    refind_idx = nms(bboxes, nms_thresh)
    refined_bboxes = bboxes[refind_idx]

    # convert PIL Image to OpenCV Image
    image_cv = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
    origin_img = image_cv.copy()

    if len(bboxes) == 0:
        return False
    for refined_bbox in refined_bboxes:
        bbox = refined_bbox.astype(np.int64)

        cv2.rectangle(origin_img, (bbox[0], bbox[1]), (bbox[2], bbox[3]),
                      (255, 0, 0), 2)

        orig_w = image_cv.shape[0]
        orig_h = image_cv.shape[1]

        face_h = int((bbox[3] - bbox[1]) * 1.4)
        face_w = int((bbox[2] - bbox[0]) * 1.4)
        face_x = bbox[0] - int(face_w * 0.2)
        face_y = bbox[1] - int(face_h * 0.2)

        if face_x < 0:
            face_x = 0
        if face_y < 0:
            face_y = 0
        if face_w > orig_w:
            face_w = orig_w - 2
        if face_h > orig_h:
            face_h = orig_h - 2

        crop_face = image_cv[face_y:face_y + face_h,
                             face_x:face_x + face_w].copy()
        cv2.imwrite('cropped.jpg', crop_face)
        # crop_face2 = crop_face.copy()

        new_x = bbox[0] if bbox[0] > 0 else 0
        new_y = bbox[1] if bbox[1] > 0 else 0
        new_w = bbox[2] - new_x
        new_h = bbox[3] - new_y

        age, gender, face_cv2 = ageGender(crop_face)
        print(age, gender)
        # print(new_x, new_y, new_w, new_h)
        # age, gender = age_gender_detector(image_cv, face_x, face_y, face_w, face_h)
        # print(age, gender)

        if age is not False:
            font = cv2.FONT_HERSHEY_SIMPLEX
            bottomLeftCornerOfText = (bbox[0], bbox[1] - 15)
            fontScale = 1
            fontColor = (0, 0, 255)
            lineType = 2

            cv2.putText(origin_img, age + gender, bottomLeftCornerOfText, font,
                        fontScale, fontColor, lineType)

        # cv2.rectangle(image_cv, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 0, 0), 2)

    cv2.imwrite('output.jpg', origin_img)
    cv2.namedWindow('output', cv2.WINDOW_NORMAL)
    cv2.imshow('output', origin_img)
    cv2.resizeWindow('output', 600, 600)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

    return len(refined_bboxes)
示例#18
0
for i in range(4998):
    png_name = 'test{:04d}.png'.format(i)
    fpath = os.path.join(test_jpg_dir, png_name)
    print(f"\rPredicting boxes for image : {fpath}", end="", flush=True)

    boxes_pred_list = []
    scores_list = []

    for model in models:

        boxes_pred, scores = util.get_detection_from_file(fpath, model, sz)

        indices = np.where(scores > score_threshold)[0]
        scores = scores[indices]
        boxes_pred = boxes_pred[indices]
        boxes_pred, scores = util.nms(boxes_pred, scores, nms_threshold)

        boxes_pred_list.append(boxes_pred)
        scores_list.append(scores)

    boxes_pred_np = np.concatenate(boxes_pred_list, axis=0)
    scores_np = np.concatenate(scores_list, axis=0)

    boxes_pred_np, scores_np = util.averages(boxes_pred_np, scores_np,
                                             wt_overlap, solo_min)
    util.shrink(boxes_pred_np, shrink_factor)

    hasBbox = False
    for j, bb in enumerate(boxes_pred_np):
        x1 = int(bb[0])
        y1 = int(bb[1])
def main(file_name, graph_params, sess):
    img_fn = os.path.join("images", file_name)
    if not os.path.exists(img_fn):
        print('Not found: {}'.format(img_fn))
        sys.exit(-1)
    else:
        print('Target image: {}'.format(img_fn))

    # Load target image
    target_image = util.load_target_image(img_fn)

    #cv.normalize(target_image, target_image, 0, 255, cv.NORM_MINMAX)
    # limg = np.arcsinh(target_image)
    # limg /= limg.max()
    # low = np.percentile(limg, 0.25)
    # high = np.percentile(limg, 99.5)
    # opt_img = skie.exposure.rescale_intensity(limg, in_range=(low, high))
    # target_image = opt_img
    # target_image = target_image.astype(np.float64)

    # Get object proposals
    object_proposals = util.get_object_proposals(target_image)

    # Logo recognition
    results = []
    for obj_proposal in object_proposals:
        x, y, w, h = obj_proposal
        crop_image = target_image[y:y + h, x:x + w]
        results.append(
            logo_recognition(sess, crop_image, obj_proposal, graph_params))

    del_idx = []
    for i, result in enumerate(results):
        if result['pred_class'] == common.CLASS_NAME[-1]:
            del_idx.append(i)
    results = np.delete(results, del_idx)

    # Non-max suppression
    nms_results = util.nms(results, pred_prob_th=0.9, iou_th=0.4)

    # Draw rectangles on the target image
    fig, ax = plt.subplots(ncols=1, nrows=1, figsize=(6, 6))
    ax.imshow(target_image)
    for result in nms_results:
        print(result)
        (x, y, w, h) = result['obj_proposal']
        ax.text(x,
                y,
                "{} {:.2f}".format(result['pred_class'], result['pred_prob']),
                fontsize=13,
                bbox=dict(facecolor='red', alpha=0.7))
        rect = mpatches.Rectangle((x, y),
                                  w,
                                  h,
                                  fill=False,
                                  edgecolor='red',
                                  linewidth=1)
        ax.add_patch(rect)

    #img = BytesIO()
    plt.tight_layout()
    plt.savefig(os.path.join("results", file_name),
                bbox_inches='tight',
                pad_inches=0)
示例#20
0
    def forward(self, bottom, top):
        # Algorithm:
        #
        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        assert bottom[0].shape[0] == 1, \
            'Only single item batches are supported'

        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs, which we want
        scores = bottom[0][:, self._num_anchors:, :, :]
        bbox_deltas = bottom[1]
        im_info = [float(x.get()) for x in bottom[2]]

        if DEBUG:
            print('im_size: ({}, {})'.format(im_info[0], im_info[1]))
            print('scale: {}'.format(im_info[2]))

        # 1. Generate proposals from bbox deltas and shifted anchors
        height, width = scores.shape[-2:]

        if DEBUG:
            print('score map size: {}'.format(scores.shape))

        # Enumerate all shifts
        shift_x = np.arange(0, width) * self._feat_stride
        shift_y = np.arange(0, height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                            shift_y.ravel())).transpose()

        # Enumerate all shifted anchors:
        #
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = self._num_anchors
        K = shifts.shape[0]

        # Generate anchors in same order as we do in neon for unit testing
        # anchors = self._anchors.reshape((1, A, 4)) + \
        #          shifts.reshape((1, K, 4)).transpose((1, 0, 2))
        anchors = self._anchors.reshape((1, A, 4)).transpose((1, 0, 2)) + \
            shifts.reshape((1, K, 4))

        anchors = anchors.reshape((K * A, 4))

        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:
        #
        # bbox deltas will be (1, 4 * A, H, W) format
        # transpose to (1, H, W, 4 * A)
        # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
        # in slowest to fastest order
        # bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

        # Re-order proposals to match neon for unit testing
        # bbox_deltas = bbox_deltas.reshape((38, 50, 9, 4)).transpose((2, 0, 1, 3)).reshape((-1,4))

        # Same story for the scores:
        #
        # scores are (1, A, H, W) format
        # transpose to (1, H, W, A)
        # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
        # scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

        # Also re-order scores
        # scores = scores.reshape((38, 50, 9, 1)).transpose((2, 0, 1, 3)).reshape((-1, 1))

        scores = scores.reshape((-1, 1))

        # Convert anchors into proposals via bbox transformations
        proposals = bbox_transform_inv(anchors, bbox_deltas)

        # 2. clip predicted boxes to image
        proposals = clip_boxes(proposals, im_info[:2])

        # 3. remove predicted boxes with either height or width < threshold
        # (NOTE: convert min_size to input image scale stored in im_info[2])
        keep = _filter_boxes(proposals, self.min_size * im_info[2])

        proposals = proposals[keep, :]
        scores = scores[keep]

        if DEBUG:
            print("(CAFFE) len(keep) before nms: {}".format(len(keep)))

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        order = scores.ravel().argsort()[::-1]
        if self.pre_nms_topN > 0:
            order = order[:self.pre_nms_topN]
        proposals = proposals[order, :]
        scores = scores[order]

        if DEBUG:
            print("(CAFFE) len(proposals) after get_top_N: {}".format(
                len(proposals)))

        # 6. apply nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        keep = nms(np.hstack((proposals, scores)), self.nms_thresh)

        if DEBUG:
            print("(CAFFE) len(keep) before clipping: {}".format(len(keep)))

        if self.post_nms_topN > 0:
            keep = keep[:self.post_nms_topN]
        proposals = proposals[keep, :]
        scores = scores[keep]

        if DEBUG:
            print("(CAFFE) len(keep) after nms: {}".format(len(keep)))

        # Output rois blob
        # Our RPN implementation only supports a single input image, so all
        # batch inds are 0
        batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
        blob = np.hstack((batch_inds, proposals.astype(np.float32,
                                                       copy=False)))
        top[0] = blob
        top[1] = scores
示例#21
0
    def infer(self, imagePath, confidenceThreshold, minHeight, maxHeight):

        # preparing input

        im = cv2.imread(imagePath).astype(self.net.blobs["data"].data.dtype)

        im -= list(map(int, self.config["channel_shift"]))

        pad = int(self.config["pad"][0])
        h, w = im.shape[0:2]

        padH = (pad - (h % pad)) % pad
        padW = (pad - (w % pad)) % pad

        padded = np.zeros(dtype=self.net.blobs["data"].data.dtype,
                          shape=(h + padH, w + padW, im.shape[2]))
        padded[:h, :w, :] = im[...]
        im = padded
        h += padH
        w += padW

        im = im.transpose(2, 0, 1)  # nhwcf -> nchw

        im_input = im[np.newaxis, ...]

        self.net.blobs["data"].reshape(*im_input.shape)
        # self.net.blobs["data"].data[...] = im_input

        imSrc = None
        # imSrc = im_input[0,...].copy()
        # imSrc = imSrc.transpose(1,2,0)
        # imSrc += list(map(int,self.config["channel_shift"]))
        #
        # # bgr -> rgb
        # temp = imSrc[...,0].copy()
        # imSrc[...,0] = imSrc[...,2]
        # imSrc[...,2] = temp[...]
        #
        # imSrc[imSrc<0] = 0
        # imSrc = Image.fromarray( imSrc.astype(np.uint8) )

        # running net

        # self.net.forward()
        im_input2 = np.empty(shape=im_input.shape, dtype=im_input.dtype)
        im_input2[...] = im_input[...]
        im_input = im_input2
        forwardKwargs = {"data": im_input.astype(np.float32, copy=False)}
        self.net.forward(**forwardKwargs)

        # processing output

        outScores = self.net.blobs[self.config["score_blob"][0]].data
        outBoxes = self.net.blobs[self.config["bb_reg_blob"][0]].data

        return outScores, outBoxes, im_input

        scales = self.config["scales"]
        scales = list(map(int, scales))
        stride = int(self.config["stride"][0])
        lenScales = len(scales)

        boxes, scores = [], []
        for i in range(lenScales):
            for y in range(outScores.shape[2]):
                for x in range(outScores.shape[3]):

                    # anchors & bbox regression

                    currentScore = outScores[0, i + lenScales, y, x]

                    if currentScore > confidenceThreshold:
                        size = scales[i] * stride

                        xCorr = outBoxes[0, 4 * i, y, x] * size
                        yCorr = outBoxes[0, 4 * i + 1, y, x] * size
                        wCorr = np.exp(outBoxes[0, 4 * i + 2, y, x]) * size
                        hCorr = np.exp(outBoxes[0, 4 * i + 3, y, x]) * size

                        xCenter = x * stride + xCorr + stride / 2
                        yCenter = y * stride + yCorr + stride / 2

                        x1 = xCenter - (wCorr / 2)
                        x2 = xCenter + (wCorr / 2)
                        y1 = yCenter - (hCorr / 2)
                        y2 = yCenter + (hCorr / 2)

                        if (x1 >= 0 and y1 >= 0 and x2 <= w and y2 <= h
                                and y2 - y1 + 1 >= minHeight
                                and y2 - y1 + 1 <= maxHeight):
                            boxes.append([x1, y1, x2, y2])
                            scores.append(currentScore)

        if len(scores) == 0:
            return [], [], imSrc

        # grouping detections (nms)

        nmsIouThreshold = float(self.config["iou_threshold"][0])
        groupedScores, groupedBoxes = util.nms(scores, boxes, nmsIouThreshold)

        return groupedScores, groupedBoxes, imSrc