示例#1
0
class Pelee_Det(object):
    def __init__(self):

        self.anchor_config = anchors(cfg.model)
        self.priorbox = PriorBox(self.anchor_config)
        self.net = build_net('test', cfg.model.input_size, cfg.model)
        init_net(self.net, cfg, args.trained_model)
        self.net.eval()

        self.num_classes = cfg.model.num_classes

        with torch.no_grad():
            self.priors = self.priorbox.forward()
            self.net = self.net.cuda()
            self.priors = self.priors.cuda()
            cudnn.benchmark = True
        self._preprocess = BaseTransform(cfg.model.input_size,
                                         cfg.model.rgb_means, (2, 0, 1))
        self.detector = Detect(num_classes, cfg.loss.bkg_label,
                               self.anchor_config)

    def detect(self, image):

        loop_start = time.time()
        w, h = image.shape[1], image.shape[0]
        img = self._preprocess(image).unsqueeze(0)
        if cfg.test_cfg.cuda:
            img = img.cuda()
        scale = torch.Tensor([w, h, w, h])
        out = self.net(img)
        boxes, scores = self.detector.forward(out, self.priors)
        boxes = (boxes[0] * scale).cpu().numpy()
        scores = scores[0].cpu().numpy()
        allboxes = []
        count = 0
        # for j in [2, 6, 7, 14, 15]:
        for j in range(1, len(ch_labels)):
            inds = np.where(scores[:, j] > cfg.test_cfg.score_threshold)[0]
            if len(inds) == 0:
                continue
            c_bboxes = boxes[inds]
            c_scores = scores[inds, j]
            c_dets = np.hstack(
                (c_bboxes, c_scores[:, np.newaxis])).astype(np.float32,
                                                            copy=False)
            soft_nms = cfg.test_cfg.soft_nms
            keep = nms(c_dets, cfg.test_cfg.iou, force_cpu=soft_nms)
            keep = keep[:cfg.test_cfg.keep_per_class]
            c_dets = c_dets[keep, :]
            allboxes.extend([_.tolist() + [j] for _ in c_dets])

        loop_time = time.time() - loop_start
        allboxes = np.array(allboxes)
        boxes = allboxes[:, :4]
        scores = allboxes[:, 4]
        cls_inds = allboxes[:, 5]
        infos, im2show = draw_detection(image, boxes, scores, cls_inds, -1,
                                        args.thresh)
        return infos, im2show
示例#2
0
     if 'm2det' in fname: continue  # ignore the detected images
     image = cv2.imread(fname, cv2.IMREAD_COLOR)
 else:
     ret, image = capture.read()
     if not ret:
         cv2.destroyAllWindows()
         capture.release()
         break
 loop_start = time.time()
 w, h = image.shape[1], image.shape[0]
 img = _preprocess(image).unsqueeze(0)
 if cfg.test_cfg.cuda:
     img = img.cuda()
 scale = torch.Tensor([w, h, w, h])
 out = net(img)
 boxes, scores = detector.forward(out, priors)
 boxes = (boxes[0] * scale).cpu().numpy()
 scores = scores[0].cpu().numpy()
 allboxes = []
 for j in range(1, cfg.model.m2det_config.num_classes):
     inds = np.where(scores[:, j] > cfg.test_cfg.score_threshold)[0]
     if len(inds) == 0:
         continue
     c_bboxes = boxes[inds]
     c_scores = scores[inds, j]
     c_dets = np.hstack((c_bboxes, c_scores[:,
                                            np.newaxis])).astype(np.float32,
                                                                 copy=False)
     soft_nms = cfg.test_cfg.soft_nms
     keep = nms(c_dets, cfg.test_cfg.iou, force_cpu=soft_nms
                )  #min_thresh, device_id=0 if cfg.test_cfg.cuda else None)
示例#3
0
def demo(v_f):
    cfg = Config.fromfile(config_f)
    anchor_config = anchors(cfg)
    priorbox = PriorBox(anchor_config)
    net = build_net('test',
                    size=cfg.model.input_size,
                    config=cfg.model.m2det_config)
    init_net(net, cfg, checkpoint_path)
    net.eval().to(device)
    with torch.no_grad():
        priors = priorbox.forward().to(device)
    _preprocess = BaseTransform(
        cfg.model.input_size, cfg.model.rgb_means, (2, 0, 1))
    detector = Detect(cfg.model.m2det_config.num_classes,
                      cfg.loss.bkg_label, anchor_config)
    logging.info('detector initiated.')

    cap = cv2.VideoCapture(v_f)
    logging.info('detect on: {}'.format(v_f))
    logging.info('video width: {}, height: {}'.format(int(cap.get(3)), int(cap.get(4))))
    out_video = cv2.VideoWriter("result.mp4", cv2.VideoWriter_fourcc(*'MJPG'), 24, (int(cap.get(3)), int(cap.get(4))))

    while True:
        ret, image = cap.read()
        if not ret:
            out_video.release()
            cv2.destroyAllWindows()
            cap.release()
            break
        w, h = image.shape[1], image.shape[0]
        img = _preprocess(image).unsqueeze(0).to(device)
        scale = torch.Tensor([w, h, w, h])
        out = net(img)
        boxes, scores = detector.forward(out, priors)
        boxes = (boxes[0]*scale).cpu().numpy()
        scores = scores[0].cpu().numpy()
        allboxes = []
        for j in range(1, cfg.model.m2det_config.num_classes):
            inds = np.where(scores[:, j] > cfg.test_cfg.score_threshold)[0]
            if len(inds) == 0:
                continue
            c_bboxes = boxes[inds]
            c_scores = scores[inds, j]
            c_dets = np.hstack((c_bboxes, c_scores[:, np.newaxis])).astype(
                np.float32, copy=False)
            soft_nms = cfg.test_cfg.soft_nms
            # min_thresh, device_id=0 if cfg.test_cfg.cuda else None)
            keep = nms(c_dets, cfg.test_cfg.iou, force_cpu=soft_nms)
            keep = keep[:cfg.test_cfg.keep_per_class]
            c_dets = c_dets[keep, :]
            allboxes.extend([_.tolist()+[j] for _ in c_dets])
        if len(allboxes) > 0:
            allboxes = np.array(allboxes)
            # [boxes, scores, label_id] -> [id, score, boxes] 0, 1, 2, 3, 4, 5
            allboxes = allboxes[:, [5, 4, 0, 1, 2, 3]]
            logging.info('allboxes shape: {}'.format(allboxes.shape))
            res = visualize_det_cv2(image, allboxes, classes=classes, thresh=0.2)
            # res = visualize_det_cv2_fancy(image, allboxes, classes=classes, thresh=0.2, r=4, d=6)
            cv2.imshow('rr', res)
            out_video.write(res)
            cv2.waitKey(1)
示例#4
0
def main():
    mean = (104, 117, 123)
    print('loading model!')
    if deform:
        from model.dualrefinedet_vggbn import build_net
        net = build_net('test',
                        size=ssd_dim,
                        num_classes=num_classes,
                        c7_channel=1024,
                        def_groups=deform,
                        multihead=multihead,
                        bn=bn)
    else:
        from model.refinedet_vgg import build_net
        net = build_net('test',
                        size=ssd_dim,
                        num_classes=num_classes,
                        use_refine=refine,
                        c7_channel=1024,
                        bn=bn)
    net.load_state_dict(torch.load(trained_model))
    net.eval()
    print('Finished loading model!', trained_model)
    net = net.to(device)
    detector = Detect(num_classes, 0, top_k, confidence_threshold,
                      nms_threshold)
    priorbox = PriorBox(cfg)
    with torch.no_grad():
        priors = priorbox.forward().to(device)
    for i, line in enumerate(open(img_set, 'r')):
        # if i==10:
        #     break
        if 'COCO' in dataset:
            image_name = line[:-1]
            image_id = int(image_name.split('_')[-1])
        elif 'VOC' in dataset:
            image_name = line[:-1]
            image_id = -1
        else:
            image_name, image_id = line.split(' ')
            image_id = image_id[:-1]
        print(i, image_name, image_id)
        image_path = os.path.join(img_root, image_name + '.jpg')
        image = cv2.imread(image_path, 1)
        h, w, _ = image.shape
        image_draw = cv2.resize(image.copy(), (640, 480))
        im_trans = base_transform(image, ssd_dim, mean)
        ######################## Detection ########################
        with torch.no_grad():
            x = torch.from_numpy(im_trans).unsqueeze(0).permute(0, 3, 1,
                                                                2).to(device)
            if 'RefineDet' in backbone and refine:
                arm_loc, _, loc, conf = net(x)
            else:
                loc, conf = net(x)
                arm_loc = None
            detections = detector.forward(loc,
                                          conf,
                                          priors,
                                          arm_loc_data=arm_loc)
        ############################################################
        out = list()
        for j in range(1, detections.size(1)):
            dets = detections[0, j, :]
            if dets.sum() == 0:
                continue
            mask = dets[:, 0].gt(0.).expand(dets.size(-1), dets.size(0)).t()
            dets = torch.masked_select(dets, mask).view(-1, dets.size(-1))
            boxes = dets[:, 1:-1] if dets.size(-1) == 6 else dets[:, 1:]
            boxes[:, 0] *= w
            boxes[:, 2] *= w
            boxes[:, 1] *= h
            boxes[:, 3] *= h
            scores = dets[:, 0].cpu().numpy()
            boxes_np = boxes.cpu().numpy()

            for b, s in zip(boxes_np, scores):
                if save_dir:
                    out.append(
                        [int(b[0]),
                         int(b[1]),
                         int(b[2]),
                         int(b[3]), j - 1, s])
                    if 'COCO' in dataset:
                        det_list.append({
                            'image_id':
                            image_id,
                            'category_id':
                            labelmap[j],
                            'bbox': [
                                float('{:.1f}'.format(b[0])),
                                float('{:.1f}'.format(b[1])),
                                float('{:.1f}'.format(b[2] - b[0] + 1)),
                                float('{:.1f}'.format(b[3] - b[1] + 1))
                            ],
                            'score':
                            float('{:.2f}'.format(s))
                        })
                    else:
                        results_file.write(
                            str(image_id) + ' ' + str(j) + ' ' + str(s) + ' ' +
                            str(np.around(b[0], 2)) + ' ' +
                            str(np.around(b[1], 2)) + ' ' +
                            str(np.around(b[2], 2)) + ' ' +
                            str(np.around(b[3], 2)) + '\n')
                if display:
                    cv2.rectangle(image_draw,
                                  (int(b[0] / w * 640), int(b[1] / h * 480)),
                                  (int(b[2] / w * 640), int(b[3] / h * 480)),
                                  (0, 255, 0),
                                  thickness=1)

                    cls = class_name[j] if 'COCO' in dataset else str(
                        labelmap[j - 1])
                    put_str = cls + ':' + str(np.around(s, decimals=2))
                    cv2.putText(
                        image_draw,
                        put_str,
                        (int(b[0] / w * 640), int(b[1] / h * 480) - 10),
                        cv2.FONT_HERSHEY_DUPLEX,
                        0.5,
                        color=(0, 255, 0),
                        thickness=1)
        if display:
            cv2.imshow('frame', image_draw)
            ch = cv2.waitKey(0)
            if ch == 115:
                if save_dir:
                    print('save: ', line)
                    torch.save(
                        out, os.path.join(save_dir, '%s.pkl' % str(line[:-1])))
                    cv2.imwrite(
                        os.path.join(save_dir, '%s.jpg' % str(line[:-1])),
                        image)
                    cv2.imwrite(
                        os.path.join(save_dir, '%s_box.jpg' % str(line[:-1])),
                        image_draw)

    cv2.destroyAllWindows()
    if save_dir:
        if dataset == 'COCO':
            json.dump(det_list, results_file)
        results_file.close()
示例#5
0
class ObjDetector(object):
    def __init__(self, img_size=300, thresh=0.56):
        assert img_size == 300 or img_size == 512, 'net input image size must be 300 or 512'
        self.labels_name = LABELS_SET
        self.labels_numb = len(LABELS_SET)
        self.img_size = img_size
        self.cfg = VOC_300 if img_size == 300 else VOC_512
        self.thresh = thresh
        self.gpu_is_available = torch.cuda.is_available()
        self.gpu_numb = torch.cuda.device_count()
        self.net = build_net('test', self.img_size, self.labels_numb)
        self.detect = Detect(self.labels_numb, 0, self.cfg)
        self.transform = BaseTransform(self.img_size)

        # load net weights
        state_dict = torch.load(trained_model, map_location='cpu')
        new_state_dict = OrderedDict()
        for k, v in state_dict.items():
            head = k[:7]
            if head == 'module.':
                name = k[7:]  # remove `module.`
            else:
                name = k
            new_state_dict[name] = v
        self.net.load_state_dict(new_state_dict)
        self.net.eval()
        print('Finished loading model!')

        if self.gpu_numb > 1:
            self.net = torch.nn.DataParallel(self.net,
                                             device_ids=list(
                                                 range(self.gpu_numb)))

        # set net gpu or cpu model
        if self.gpu_is_available:
            self.net.cuda()
            cudnn.benchmark = True

        # define box generator
        priorbox = PriorBox(self.cfg)
        with torch.no_grad():
            self.priors = priorbox.forward()
            if self.gpu_is_available:
                self.priors = self.priors.cuda()

    def __net__(self, img):
        scale = torch.Tensor(
            [img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
        with torch.no_grad():
            x = self.transform(img).unsqueeze(0)
            if self.gpu_is_available:
                x = x.cuda()
                scale = scale.cuda()

        # get net output
        out = self.net(x)
        boxes, scores = self.detect.forward(out, self.priors)
        boxes = boxes[0]
        scores = scores[0]

        # scale each detection back up to the image
        boxes *= scale
        boxes = boxes.cpu().numpy()
        scores = scores.cpu().numpy()
        return boxes, scores

    def __call__(self, image):
        """
        :param image: rgb image
        :return: {'label_name':[x1,y1,x2,y2,score],...}
        """
        boxes = np.empty((0, 4))
        scores = np.empty((0, self.labels_numb))

        for img, p in self.__chips__(image):
            b = [p[0], p[1], p[0], p[1]]
            boxes_t, scores_t = self.__net__(img)
            boxes_t += list(map(float, b))
            boxes = np.vstack((boxes, boxes_t))
            scores = np.vstack((scores, scores_t))

        # filter bounding boxes
        results = dict()
        for j in range(1, self.labels_numb):
            inds = np.where(scores[:, j] > self.thresh)[0]
            if len(inds) == 0:
                continue
            c_bboxes = boxes[inds]
            c_scores = scores[inds, j]
            c_dets = np.hstack(
                (c_bboxes, c_scores[:, np.newaxis])).astype(np.float32,
                                                            copy=False)
            keeped = nms(c_dets, 0.45, force_cpu=0)
            c_dets = c_dets[keeped, :]
            results[self.labels_name[j]] = c_dets
        return results

    def __chips__(self, image):
        h, w, _ = image.shape
        x = w // 2
        y = h // 2
        boxes = []
        if min(h, w) > 1500:
            boxes.append((0, 0, x, y))
            boxes.append((x, 0, w, y))
            boxes.append((0, y, x, h))
            boxes.append((x, y, w, h))
            boxes.append((x // 2, y // 2, x + x // 2, y + y // 2))
        else:
            boxes.append((0, 0, w, h))
        for p in boxes:
            yield image[p[1]:p[3], p[0]:p[2]], p

    def draw(self, image, results):
        # draw bounding boxes
        for label, boxes in results.items():
            for value in boxes:
                x1 = int(value[0])
                y1 = int(value[1])
                x2 = int(value[2])
                y2 = int(value[3])
                # label name and scores
                text = label + ',' + "%.2f" % value[4]
                # select color
                indx = self.labels_name.index(label) % len(COLORS)
                color = COLORS[indx]
                # draw bounding boxe
                cv2.rectangle(image, (x1, y1), (x2, y2), color, 2)
                # draw label
                font = cv2.FONT_HERSHEY_SIMPLEX
                font_scale = 0.58
                size = cv2.getTextSize(text, font, font_scale, 1)
                # text_w = size[0][0]
                text_h = size[0][1]
                cv2.putText(image, text, (x1, max((y1 - text_h), 0)), font,
                            font_scale, color, 1)
        return image
示例#6
0
def test(img_path, model_path='weights/RFB_vgg_COCO_30.3.pth'):
    img_path = img_path
    trained_model = model_path
    if torch.cuda.is_available():
        cuda = True
    if 'mobile' in model_path:
        cfg = COCO_mobile_300
    else:
        cfg = COCO_300

    priorbox = PriorBox(cfg)
    with torch.no_grad():
        priors = priorbox.forward()
        if cuda:
            priors = priors.cuda()
    numclass = 81

    img = cv2.imread(img_path)
    scale = torch.Tensor(
        [img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
    if 'mobile' in model_path:
        net = build_rfb_mobilenet('test', 300, numclass)  # initialize detector
    else:
        net = build_rfb_vgg_net('test', 300, numclass)  # initialize detector

    transform = BaseTransform(net.size, (123, 117, 104), (2, 0, 1))
    with torch.no_grad():
        x = transform(img).unsqueeze(0)
        x = Variable(x)
        if cuda:
            x = x.cuda()
            scale = scale.cuda()
    state_dict = torch.load(trained_model)['state_dict']
    # create new OrderedDict that does not contain `module.`
    from collections import OrderedDict
    new_state_dict = OrderedDict()
    for k, v in state_dict.items():
        head = k[:7]
        if head == 'module.':
            name = k[7:]  # remove `module.`
        else:
            name = k
        new_state_dict[name] = v
    net.load_state_dict(new_state_dict)
    net.eval()
    if cuda:
        net = net.cuda()
        cudnn.benchmark = True
    else:
        net = net.cpu()
    print('Finished loading model!')
    # print(net)
    detector = Detect(numclass, 0, cfg)

    tic = time.time()
    out = net(x)  # forward pass

    boxes, scores = detector.forward(out, priors)
    print('Finished in {}'.format(time.time() - tic))
    boxes = boxes[0]
    scores = scores[0]
    boxes *= scale
    boxes = boxes.cpu().numpy()
    scores = scores.cpu().numpy()
    # Create figure and axes
    # Display the image
    # scale each detection back up to the image
    for j in range(1, numclass):
        # print(max(scores[:, j]))
        inds = np.where(scores[:, j] > 0.6)[0]
        # conf > 0.6
        if inds is None:
            continue
        c_bboxes = boxes[inds]
        c_scores = scores[inds, j]
        c_dets = np.hstack((c_bboxes, c_scores[:,
                                               np.newaxis])).astype(np.float32,
                                                                    copy=False)
        keep = nms(c_dets, 0.6)
        c_dets = c_dets[keep, :]
        c_bboxes = c_dets[:, :4]

        # print(c_bboxes.shape)
        # print(c_bboxes.shape[0])
        if c_bboxes.shape[0] != 0:
            # print(c_bboxes.shape)
            print('{}: {}'.format(j, c_bboxes))
            for box in c_bboxes:
                cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]),
                              (0, 255, 0), 1, 0)
                cv2.putText(img, '{}'.format(j), (box[0], box[1]),
                            cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2,
                            cv2.LINE_AA)
    cv2.imshow('rr', img)
    cv2.waitKey(0)
示例#7
0
class Detector(object):

    def __init__(self, model_path):
        # self.net_name = net_name
        self.model_path = model_path

        self.num_classes = 81
        self.cuda = torch.cuda.is_available()

        self.label_map_list = list(coco_label_map.values())

        self._init_model()

    def _init_model(self):
        if torch.cuda.is_available():
            cuda = True
        if '300' in self.model_path:
            cfg = COCO_300
            self.img_dim = 300
            print('Model input size is 300')
        else:
            cfg = COCO_512
            self.img_dim = 512
            print('Model input size is 512')

        priorbox = PriorBox(cfg)
        with torch.no_grad():
            priors = priorbox.forward()
            if cuda:
                self.priors = priors.cuda()

        self.net = build_rfb_vgg_net('test', self.img_dim, self.num_classes)  # initialize detector
        state_dict = torch.load(self.model_path)['state_dict']
        # create new OrderedDict that does not contain `module.`
        from collections import OrderedDict
        new_state_dict = OrderedDict()
        for k, v in state_dict.items():
            head = k[:7]
            if head == 'module.':
                name = k[7:]  # remove `module.`
            else:
                name = k
            new_state_dict[name] = v
        self.net.load_state_dict(new_state_dict)
        self.net.eval()
        if cuda:
            self.net = self.net.cuda()
            cudnn.benchmark = True
        else:
            self.net = self.net.cpu()
        print('Finished loading model!')
        # print(net)
        self.detector = Detect(self.num_classes, 0, cfg)

    def predict_on_img(self, img):
        scale = torch.Tensor([img.shape[1], img.shape[0],
                              img.shape[1], img.shape[0]])
        transform = BaseTransform(self.net.size, (123, 117, 104), (2, 0, 1))
        with torch.no_grad():
            x = transform(img).unsqueeze(0)
            x = Variable(x)
            if self.cuda:
                x = x.cuda()
                scale = scale.cuda()
        tic = time.time()
        out = self.net(x)  # forward pass
        boxes, scores = self.detector.forward(out, self.priors)
        print('Finished in {}'.format(time.time() - tic))
        boxes = boxes[0]
        scores = scores[0]
        boxes *= scale
        boxes = boxes.cpu().numpy()
        scores = scores.cpu().numpy()
        return boxes, scores

    def predict_on_video(self, v_f):
        cap = cv2.VideoCapture(v_f)

        while cap.isOpened():
            ok, frame = cap.read()
            if ok:
                img = frame
                boxes, scores = self.predict_on_img(frame)
                # print(boxes.shape)
                # print(scores.shape)
                # scale each detection back up to the image
                tic = time.time()
                for j in range(1, self.num_classes):
                    # print(max(scores[:, j]))
                    inds = np.where(scores[:, j] > 0.6)[0]
                    # conf > 0.6
                    if inds is None:
                        continue
                    c_bboxes = boxes[inds]
                    c_scores = scores[inds, j]
                    c_dets = np.hstack((c_bboxes, c_scores[:, np.newaxis])).astype(
                        np.float32, copy=False)
                    keep = nms(c_dets, 0.6)
                    c_dets = c_dets[keep, :]
                    c_bboxes = c_dets[:, :4]

                    # print(c_bboxes.shape)
                    # print(c_bboxes.shape[0])
                    if c_bboxes.shape[0] != 0:
                        # print(c_bboxes.shape)
                        # print('{}: {}'.format(j, c_bboxes))
                        for box in c_bboxes:
                            label = self.label_map_list[j-1]
                            cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]), (0, 255, 0), 1, 0)
                            cv2.putText(img, label, (box[0], box[1]), cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                                        (0, 255, 0),
                                        1, cv2.LINE_AA)
                # print('post process time: {}'.format(time.time() - tic))
                cv2.imshow('rr', frame)
                cv2.waitKey(1)
            else:
                print('Done')
                exit(0)
示例#8
0
from layers.functions import Detect, PriorBox
from data.config import VOC_320
import torch

top_k = 200
confidence_threshold = 0.5
nms_threshold = 0.45

priorbox = PriorBox(VOC_320)
detector = Detect(21, 0, top_k, confidence_threshold, nms_threshold)
with torch.no_grad():
    priors = priorbox.forward()
    loc = torch.randn(1, 6375, 4)
    conf = torch.randn(6375, 21)
    arm_loc = torch.randn(1, 6375, 4)

out = detector.forward(loc, conf, priors, arm_loc_data=None)
示例#9
0
    def detect(self, file_name, object):
        print(file_name)
        start_time = time.time()
        img = cv2.imread(file_name.strip())
        if img is None:
            QtWidgets.QMessageBox.information(self, "Alert",
                                              "Please select images")
            return
        scale = torch.Tensor(
            [img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
        detector = Detect(object.numclass, 0, object.cfg)
        transform = BaseTransform(object.net.size, (123, 117, 104), (2, 0, 1))
        with torch.no_grad():
            x = transform(img).unsqueeze(0)
            if object.cuda:
                x = x.cuda()
                scale = scale.cuda()
        out = object.net(x)
        with torch.no_grad():
            priors = object.priorbox.forward()
            if object.cuda:
                priors = priors.cuda()
        boxes, scores = detector.forward(out, priors)
        boxes = boxes[0]
        scores = scores[0]
        boxes *= scale
        boxes = boxes.cpu().numpy()
        scores = scores.cpu().numpy()

        result_set = []
        for j in range(1, object.numclass):
            max_ = max(scores[:, j])
            inds = np.where(scores[:, j] > 0.2)[0]  # conf > 0.6
            if inds is None:
                continue
            c_bboxes = boxes[inds]
            c_scores = scores[inds, j]
            c_dets = np.hstack(
                (c_bboxes, c_scores[:, np.newaxis])).astype(np.float32,
                                                            copy=False)
            keep = object.nms_py(c_dets, 0.6)
            c_dets = c_dets[keep, :]
            c_bboxes = c_dets[:, :4]
            for bbox in c_bboxes:
                # Create a Rectangle patch
                rect = patches.Rectangle((int(bbox[0]), int(bbox[1])),
                                         int(bbox[2]) - int(bbox[0]) + 1,
                                         int(bbox[3]) - int(bbox[1]) + 1,
                                         linewidth=1,
                                         edgecolor='r')
                result_set.append(str(rect))
                cv2.rectangle(img, (int(bbox[0]), int(bbox[1])),
                              (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2)
                cv2.imwrite("my_test.png", img)

        end_time = time.time()
        print(end_time - start_time)
        img_data = QtGui.QPixmap("my_test.png")
        height = object.height()
        width = object.height() / img_data.height() * img_data.width()
        img_data = img_data.scaled(width, height)
        object.label.resize(width, height)
        object.label.setPixmap(img_data)
        self.setFocus()
示例#10
0
def test_net(net, testset):

    net.eval()
    test_image_nums = len(testset)
    detector = Detect()  # TODO
    num_classes = 2  # TODO
    all_boxes = [[[] for _ in range(test_image_nums)]
                 for _ in range(num_classes)]
    all_landmarks = [[[] for _ in range(test_image_nums)]
                     for _ in range(num_classes)]
    # all_landmarks = [[[] for _ in range(test_image_nums)] for _ in range(num_classes)]
    print(all_boxes)
    for idx in tqdm(range(test_image_nums)):
        with torch.no_grad():
            # image = testset.pull_image(idx)  #  TODO
            image = cv2.imread(testset[idx])  #  TODO

            target_size = 1600
            max_size = 2150
            # target_size = 2000
            # max_size = 3000
            # target_size = 640
            # max_size = 900
            # target_size = 640
            # max_size = 640
            im_shape = image.shape  # H, W, C

            im_size_min = min(im_shape[0:2])
            im_size_max = max(im_shape[0:2])
            im_scale = float(target_size) / float(im_size_min)
            # prevent bigger axis from being more than max_size:
            if round(im_scale * im_size_max) > max_size:
                im_scale = float(max_size) / float(im_size_max)
            scales = [im_scale]

            for im_scale in scales:
                if im_scale != 1.0:
                    image_new = cv2.resize(image,
                                           None,
                                           None,
                                           fx=im_scale,
                                           fy=im_scale,
                                           interpolation=cv2.INTER_LINEAR)
                else:
                    image_new = image.copy()
                PIXEL_MEANS = np.array([0.406, 0.456, 0.485])  # bgr mean
                PIXEL_STDS = np.array([0.225, 0.224, 0.229])
                PIXEL_SCALE = 255.0
                im_tensor = np.zeros(
                    (3, image_new.shape[0], image_new.shape[1]))
                for i in range(3):
                    im_tensor[
                        i, :, :] = (image_new[:, :, 2 - i] / PIXEL_SCALE -
                                    PIXEL_MEANS[2 - i]) / PIXEL_STDS[2 - i]
                im_tensor = im_tensor.astype(np.float32)
                print("im_tensor: ", im_tensor.shape)
                im_tensor = torch.from_numpy(im_tensor)
                im_tensor = im_tensor.unsqueeze(0)
                if args.gpu:
                    im_tensor = im_tensor.cuda()
                net_out = net(im_tensor)

                if cfg.FACE_LANDMARK:
                    print("im_tensor222: ", im_tensor.shape[2:])
                    scores, boxes, landmarks = detector.forward(
                        net_out, im_tensor.shape[2:])
                    # scores, boxes, landmarks = detector.forward(net_out)
                else:
                    scores, boxes = detector.forward(net_out,
                                                     im_tensor.shape[2:])
                scores = scores.cpu().numpy()
                boxes = boxes.cpu().numpy() / im_scale
                # boxes = boxes.cpu().numpy()
                #.cpu().numpy()
                if cfg.FACE_LANDMARK:
                    landmarks = landmarks.cpu().numpy(
                    ) / im_scale  #.cpu().numpy()
                    # landmarks = landmarks.cpu().numpy()
                print(scores.shape)
                print(boxes.shape)
                # TODO split as a function
                for cls in range(1, num_classes):
                    inds = np.where(scores[:, cls] > args.score_thresh)[0]
                    if len(inds) == 0:
                        print("XXXXXXX")
                        all_boxes[cls][idx] = np.empty([0, 5],
                                                       dtype=np.float32)
                        if cfg.FACE_LANDMARK:
                            all_landmarks[cls][idx] = np.empty(
                                [0, 10], dtype=np.float32)
                        continue
                    c_boxes = boxes[inds]
                    c_scores = scores[inds, cls]
                    c_dets = np.hstack(
                        (c_boxes, c_scores[:, np.newaxis])).astype(np.float32,
                                                                   copy=False)
                    if cfg.FACE_LANDMARK:
                        c_landmarks = landmarks[inds]
                    # print(c_dets)
                    keep = nms(c_dets, args.nms_overlap,
                               force_cpu=True)  # TODO   soft_nms
                    box_num = 150  #50
                    keep = keep[:box_num]  # keep only the highest boxes
                    c_dets = c_dets[keep, :]
                    all_boxes[cls][idx] = c_dets
                    if cfg.FACE_LANDMARK:
                        #                         c_landmarks = c_landmarks[keep, :]
                        all_landmarks[cls][idx] = c_landmarks

                bbx = all_boxes[1][0]
                lmks = all_landmarks[1][0]
            print(lmks)
            #             print(bbx)
            DEBUG_I = True
            if DEBUG_I:
                # img = cv2.imread(roi['image_path'])
                # bbxes = roi['boxes']
                # lmks = roi['landmarks']#.reshape(-1, 15)
                # print(roi['image_path'])
                # if roi['flipped']:
                #     img = img[:, ::-1]
                #     print("images/z_flipped_" + osp.basename(roi['image_path']))
                #     cv2.imwrite("images/z_flipped_" + osp.basename(roi['image_path']), img)
                #     img = cv2.imread("images/z_flipped_" + osp.basename(roi['image_path']))

                for jj in range(bbx.shape[0]):
                    sf, st = (int(bbx[jj][0]),
                              int(bbx[jj][1])), (int(bbx[jj][2]),
                                                 int(bbx[jj][3]))
                    print(sf, st)
                    # print(lmks[jj])
                    # print()
                    cv2.rectangle(image, sf, st, (0, 0, 255), thickness=2)
                    # print((lmks[jj][0, 0],lmks[jj][0, 1]))
                    # print((lmks[jj][1, 0],lmks[jj][1, 1]))
                    # print((lmks[jj][2, 0],lmks[jj][2, 1]))
                    # print((lmks[jj][3, 0],lmks[jj][3, 1]))
                    # print((lmks[jj][4, 0],lmks[jj][4, 1]))


#                     cv2.circle(image,(lmks[jj][0, 0],lmks[jj][0, 1]),radius=1,color=(0,0,255),thickness=2)
#                     cv2.circle(image,(lmks[jj][1, 0],lmks[jj][1, 1]),radius=1,color=(0,255,0),thickness=2)
#                     cv2.circle(image,(lmks[jj][2, 0],lmks[jj][2, 1]),radius=1,color=(255,0,0),thickness=2)
#                     cv2.circle(image,(lmks[jj][3, 0],lmks[jj][3, 1]),radius=1,color=(0,255,255),thickness=2)
#                     cv2.circle(image,(lmks[jj][4, 0],lmks[jj][4, 1]),radius=1,color=(255,255,0),thickness=2)

# cv2.circle(image,(lmks[jj][0],lmks[jj][1]),radius=1,color=(0,0,255),thickness=2)
# cv2.circle(image,(lmks[jj][2],lmks[jj][3]),radius=1,color=(0,255,0),thickness=2)
# cv2.circle(image,(lmks[jj][4],lmks[jj][5]),radius=1,color=(255,0,0),thickness=2)
# cv2.circle(image,(lmks[jj][6],lmks[jj][7]),radius=1,color=(0,255,255),thickness=2)
# cv2.circle(image,(lmks[jj][8],lmks[jj][9]),radius=1,color=(255,255,0),thickness=2)
                cv2.imwrite("images/img.jpg", image)
示例#11
0
def detect_parking_spaces(dir,
                          threshold=0.2,
                          save=False,
                          show=False,
                          cam=-1,
                          gpu=False,
                          config='training/m2det/configs/m2det512_vgg.py',
                          weights='training/m2det/weights/m2det512_vgg.pth'):
    print('Detect Parking Spaces Programe')
    cfg = Config.fromfile(config)
    anchor_config = anchors(cfg)

    priorbox = PriorBox(anchor_config)
    net = build_net('test',
                    size=cfg.model.input_size,
                    config=cfg.model.m2det_config)
    init_net(net, cfg, weights)
    net.eval()
    if not gpu:
        cfg.test_cfg.cuda = False

    with torch.no_grad():
        priors = priorbox.forward()
        if cfg.test_cfg.cuda:
            net = net.cuda()
            priors = priors.cuda()
            cudnn.benchmark = True
        else:
            net = net.cpu()
    print_info('===> Finished constructing and loading model')

    _preprocess = BaseTransform(cfg.model.input_size, cfg.model.rgb_means,
                                (2, 0, 1))
    detector = Detect(cfg.model.m2det_config.num_classes, cfg.loss.bkg_label,
                      anchor_config)

    base = int(np.ceil(pow(cfg.model.m2det_config.num_classes, 1. / 3)))
    colors = [
        _to_color(x, base) for x in range(cfg.model.m2det_config.num_classes)
    ]
    cats = [
        _.strip().split(',')[-1]
        for _ in open('training/m2det/data/coco_labels.txt', 'r').readlines()
    ]
    labels = tuple(['__background__'] + cats)

    im_path = dir + '/images'
    cam = cam
    if cam >= 0:
        capture = cv2.VideoCapture(cam)
    im_fnames = sorted((fname for fname in os.listdir(im_path)
                        if os.path.splitext(fname)[-1] == '.jpg'))
    im_fnames = (os.path.join(im_path, fname) for fname in im_fnames)
    im_iter = iter(im_fnames)

    save_dir = dir + '/detection_images'
    os.makedirs(save_dir, exist_ok=True)
    locs_list = {}
    while True:
        if cam < 0:
            try:
                fname = next(im_iter)
            except StopIteration:
                break
            image = cv2.imread(fname, cv2.IMREAD_COLOR)
        else:
            ret, image = capture.read()
            if not ret:
                cv2.destroyAllWindows()
                capture.release()
                break

        loop_start = time.time()
        w, h = image.shape[1], image.shape[0]
        img = _preprocess(image).unsqueeze(0)
        if cfg.test_cfg.cuda:
            img = img.cuda()
        scale = torch.Tensor([w, h, w, h])
        out = net(img)
        if not gpu:
            priors = priors.cpu()

        boxes, scores = detector.forward(out, priors)
        boxes = (boxes[0] * scale).cpu().numpy()
        scores = scores[0].cpu().numpy()
        allboxes = []

        for j in range(1, cfg.model.m2det_config.num_classes):
            inds = np.where(scores[:, j] > cfg.test_cfg.score_threshold)[0]
            if len(inds) == 0:
                continue
            c_bboxes = boxes[inds]
            c_scores = scores[inds, j]
            c_dets = np.hstack(
                (c_bboxes, c_scores[:, np.newaxis])).astype(np.float32,
                                                            copy=False)
            soft_nms = cfg.test_cfg.soft_nms
            keep = nms(
                c_dets, cfg.test_cfg.iou, force_cpu=soft_nms
            )  #min_thresh, device_id=0 if cfg.test_cfg.cuda else None)
            keep = keep[:cfg.test_cfg.keep_per_class]
            c_dets = c_dets[keep, :]
            allboxes.extend([_.tolist() + [j] for _ in c_dets])

        loop_time = time.time() - loop_start
        allboxes = np.array(allboxes)
        boxes = allboxes[:, :4]
        scores = allboxes[:, 4]
        cls_inds = allboxes[:, 5]
        # print('\n'.join(['pos:{}, ids:{}, score:{:.3f}'.format('(%.1f,%.1f,%.1f,%.1f)' % (o[0],o[1],o[2],o[3]) \
        #         ,labels[int(oo)],ooo) for o,oo,ooo in zip(boxes,cls_inds,scores)]))
        fps = 1.0 / float(loop_time) if cam >= 0 else -1
        im2show, loc = draw_detection(image,
                                      boxes,
                                      scores,
                                      cls_inds,
                                      fps,
                                      threshold,
                                      colors=colors,
                                      labels=labels)
        locs_list[fname] = loc

        if im2show.shape[0] > 1100:
            im2show = cv2.resize(im2show, (int(
                1000. * float(im2show.shape[1]) / im2show.shape[0]), 1000))
        if show:
            cv2.imshow('test', im2show)
            if cam < 0:
                cv2.waitKey(1000)
            else:
                if cv2.waitKey(1) & 0xFF == ord('q'):
                    cv2.destroyAllWindows()
                    capture.release()
                    break
        if save:
            name = fname.split('.')[0]
            name = name.split('/')[-1]
            cv2.imwrite(f"{save_dir}/{name}.jpg", im2show)

    save_name = dir + '/labels/split.txt'
    f = open(save_name, 'wb')
    pickle.dump(locs_list, f)
    f.close()
示例#12
0
def main():
    mean = (104, 117, 123)
    if 'FPN' in backbone:
        from model.refinedet_vgg import build_net
        static_net = build_net('test',
                               size=ssd_dim,
                               num_classes=num_classes,
                               c7_channel=c7_channel,
                               bn=bn)
        net = build_net('test',
                        size=ssd_dim,
                        num_classes=num_classes,
                        c7_channel=c7_channel,
                        bn=bn)
    else:
        from model.ssd4scale_vgg import build_net
        static_net = build_net('test',
                               size=ssd_dim,
                               num_classes=num_classes,
                               c7_channel=c7_channel,
                               bn=bn)
        net = build_net('test',
                        size=ssd_dim,
                        num_classes=num_classes,
                        c7_channel=c7_channel,
                        bn=bn,
                        deform=deform)

    print('loading model!')
    static_net.load_state_dict(torch.load(static_dir))
    static_net.eval()
    static_net = static_net.to(device)
    net.load_state_dict(torch.load(trn_dir))
    net.eval()
    net = net.to(device)
    print('Finished loading model!', static_dir, trn_dir)
    detector = Detect(num_classes, 0, top_k, confidence_threshold,
                      nms_threshold)
    priorbox = PriorBox(cfg)
    with torch.no_grad():
        priors = priorbox.forward().to(device)

    frame_num = 0
    cap = cv2.VideoCapture(video_name)
    w, h = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
            int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
    cap.set(cv2.CAP_PROP_POS_FRAMES, frame_num)
    size = (640, 480)
    if save_dir:
        fourcc = cv2.VideoWriter_fourcc('M', 'J', 'P', 'G')
        record = cv2.VideoWriter(
            os.path.join(save_dir,
                         video_name.split('/')[-1].split('.')[0] + '.avi'),
            fourcc, cap.get(cv2.CAP_PROP_FPS), size)
    # static_flag = True
    offset_list = list()
    ref_loc = list()
    while (cap.isOpened()):
        ret, frame = cap.read()
        if not ret:
            break
        h, w, _ = frame.shape
        frame_draw = frame.copy()
        im_trans = base_transform(frame, ssd_dim, mean)
        with torch.no_grad():
            x = torch.from_numpy(im_trans).unsqueeze(0).permute(0, 3, 1,
                                                                2).to(device)
            if frame_num % interval == 0:
                # if static_flag:
                static_out = static_net(x, ret_loc=deform)
                priors_static = center_size(
                    decode(static_out[0][0], priors, [0.1, 0.2]))
                if deform:
                    ref_loc = static_out[
                        2]  # [o * args.loose for o in static_out[2]]
                    offset_list = list()
            out = net(x,
                      ref_loc=ref_loc,
                      offset_list=offset_list,
                      ret_off=(False, True)[deform and not offset_list])
            detections = detector.forward(out[0],
                                          out[1],
                                          priors_static,
                                          scale=torch.cuda.FloatTensor(
                                              [w, h, w, h]))
            if len(detections) == 3:
                offset_list = out[2]
                ref_loc = list()
            # if static_flag:
            #     ref_mask = mask.clone()mask
            #     print('static')
            #     static_flag = False
            # else:
            #     time1 = time.time()
            #     s_score = (mask * ref_mask).sum().float() / (mask + ref_mask).sum().float()
            #     static_flag = (False, True)[s_score<0.45]
            #     time2 = time.time()
            #     print(s_score, 'match time:', time2-time1)
        out = list()
        for j in range(1, detections.size(1)):
            if detections[0, j, :, :].sum() == 0:
                continue
            for k in range(detections.size(2)):
                dets = detections[0, j, k, :]
                if dets.sum() == 0:
                    continue
                boxes = dets[1:-1] if dets.size(0) == 6 else dets[1:]
                identity = dets[-1] if dets.size(0) == 6 else -1
                x_min = int(boxes[0] * w)
                x_max = int(boxes[2] * w)
                y_min = int(boxes[1] * h)
                y_max = int(boxes[3] * h)

                score = dets[0]
                if score > confidence_threshold:
                    put_str = VID_CLASSES_name[j - 1] + ':' + str(
                        np.around(score,
                                  decimals=2)).split('(')[-1].split(',')[0][:4]
                    color = (255, 0, 0)
                    cv2.rectangle(frame_draw, (x_min, y_min), (x_max, y_max),
                                  color,
                                  thickness=2)
                    cv2.putText(frame_draw,
                                put_str, (x_min + 10, y_min - 10),
                                cv2.FONT_HERSHEY_DUPLEX,
                                0.8,
                                color=color,
                                thickness=1)
        print(str(frame_num))
        frame_num += 1
        frame_show = cv2.resize(frame_draw, size)
        cv2.imshow('frame', frame_show)  # 255* mask.cpu().numpy())
        if save_dir:
            record.write(frame_show)
        ch = cv2.waitKey(1)
        if ch == 32:
            # if frame_num % 1 ==0:
            while 1:
                in_ch = cv2.waitKey(10)
                if in_ch == 115:  # 's'
                    if save_dir:
                        print('save: ', frame_num)
                        torch.save(
                            out,
                            os.path.join(save_dir, '_%s.pkl' % str(frame_num)))
                        cv2.imwrite(
                            os.path.join(save_dir, '%s.jpg' % str(frame_num)),
                            frame)
                elif in_ch == 32:
                    break

    cap.release()
    if save_dir:
        record.release()
    cv2.destroyAllWindows()