示例#1
0
class Detector(object):
    def __init__(self):
        self.vdo = cv2.VideoCapture()
        self.yolo3 = YOLO3("YOLO3/cfg/yolo_v3.cfg",
                           "YOLO3/yolov3.weights",
                           "YOLO3/cfg/coco.names",
                           is_xywh=True)
        self.deepsort = DeepSort("deep/checkpoint/ckpt.t7")
        self.class_names = self.yolo3.class_names
        self.write_video = True

    def open(self, video_path):
        assert os.path.isfile(video_path), "Error: path error"
        self.vdo.open(video_path)
        self.im_width = int(self.vdo.get(cv2.CAP_PROP_FRAME_WIDTH))
        self.im_height = int(self.vdo.get(cv2.CAP_PROP_FRAME_HEIGHT))
        self.area = 0, 0, self.im_width, self.im_height
        if self.write_video:
            fourcc = cv2.VideoWriter_fourcc(*'MJPG')
            self.output = cv2.VideoWriter("demo.avi", fourcc, 20,
                                          (self.im_width, self.im_height))
        return self.vdo.isOpened()

    def detect(self):
        xmin, ymin, xmax, ymax = self.area
        while self.vdo.grab():
            start = time.time()
            _, ori_im = self.vdo.retrieve()
            im = ori_im[ymin:ymax, xmin:xmax, (2, 1, 0)]
            bbox_xywh, cls_conf, cls_ids = self.yolo3(im)

            #bbox_xyxy = torch.zeros_like(bbox_xywh, dtype=bbox_xywh.dtype)
            #bbox_xyxy[0] = bbox_xywh[:,0]-bbox_xywh[:,2]/2
            #bbox_xyxy[1] = bbox_xywh[:,1]-bbox_xywh[:,3]/2
            #bbox_xyxy[2] = bbox_xywh[:,0]+bbox_xywh[:,2]/2
            #bbox_xyxy[3] = bbox_xywh[:,1]+bbox_xywh[:,3]/2

            if bbox_xywh is not None:
                mask = cls_ids == 0
                bbox_xywh = bbox_xywh[mask]
                bbox_xywh[:, 3] *= 1.2
                cls_conf = cls_conf[mask]
                outputs = self.deepsort.update(bbox_xywh, cls_conf, im)
                if len(outputs) > 0:
                    bbox_xyxy = outputs[:, :4]
                    identities = outputs[:, -1]
                    ori_im = draw_bboxes(ori_im,
                                         bbox_xyxy,
                                         identities,
                                         offset=(xmin, ymin))

            end = time.time()
            print("time: {}s, fps: {}".format(end - start, 1 / (end - start)))

            cv2.imshow("test", ori_im)
            cv2.waitKey(1)

            if self.write_video:
                self.output.write(ori_im)
示例#2
0
class Detector(object):

    def __init__(self):
        self.vdo = cv2.VideoCapture()
        self.deepsort = DeepSort("deep/checkpoint/ckpt.t7")
        self.write_video = True

    def open(self, video_path):

        assert os.path.isfile(video_path), "Error: path error"
        self.vdo.open(video_path)
        self.im_width = int(self.vdo.get(cv2.CAP_PROP_FRAME_WIDTH))
        self.im_height = int(self.vdo.get(cv2.CAP_PROP_FRAME_HEIGHT))
        self.area = 0, 0, self.im_width, self.im_height
        if self.write_video:
            fourcc = cv2.VideoWriter_fourcc(*'MJPG')
            self.output = cv2.VideoWriter("demo1.avi", fourcc, 20, (self.im_width, self.im_height))

        return self.vdo.isOpened()



    def detect(self):
        xmin, ymin, xmax, ymax = self.area
        frame_no = 0
        avg_fps = 0.0
        
        while self.vdo.grab():
            frame_no +=1
            _, ori_im = self.vdo.retrieve()
            im = ori_im[ymin:ymax, xmin:xmax]



            results = test_net(im, net, detector, args.cuda, 
                     BaseTransform(net.size, rgb_means, (2, 0, 1)),
                     top_k, thresh=0.4)                      
            # RFBNet使用教程
            bbox_xywh, cls_conf = bbox_to_xywh_cls_conf(results)  

            if bbox_xywh is not None:
                outputs = self.deepsort.update(bbox_xywh, cls_conf, im)
                if len(outputs) > 0:
                    bbox_xyxy = outputs[:, :4]
                    identities = outputs[:, -1]
                    ori_im = draw_bboxes(ori_im, bbox_xyxy, identities, offset=(xmin, ymin))
                    
            cv2.imshow("test", ori_im)
            cv2.waitKey(1)

            if self.write_video:
               self.output.write(ori_im)
示例#3
0
class Detector(object):
    def __init__(self, centernet_opt, args):
        # CenterNet detector
        self.detector = detector_factory[centernet_opt.task](centernet_opt)
        # Deep SORT
        self.deepsort = DeepSort(args.deepsort_checkpoint,
                                 args.max_cosine_distance, args.use_cuda)
        self.args = args

    def run(self, video_path, output_path):
        # open input video
        assert os.path.isfile(video_path), "Error: invalid video path"
        vdo = cv2.VideoCapture()
        vdo.open(video_path)
        # open output video
        im_width = int(vdo.get(cv2.CAP_PROP_FRAME_WIDTH))
        im_height = int(vdo.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fourcc = cv2.VideoWriter_fourcc(*"MJPG")
        output_vdo = cv2.VideoWriter(output_path, fourcc, 20, (im_width, im_height))
        # track each frame in video
        start_time = time.time()
        frame_cnt = 0
        while vdo.grab():
            frame_cnt += 1
            _, ori_im = vdo.retrieve()
            im = ori_im[0:im_height, 0:im_width]
            detection = self.detector.run(im)["results"][1]
            bbox_xywh, conf = Detector._bbox_to_xywh_cls_conf(detection, self.args.min_confidence)
            outputs = self.deepsort.update(bbox_xywh, conf, im)
            if(len(outputs) > 0):
                bbox_xyxy = outputs[:, :4]
                identities = outputs[:, -1]
                ori_im = draw_bboxes(ori_im, bbox_xyxy, identities)
            elapsed_time = time.time() - start_time
            print("Frame {:05d}, Time {:.3f}s, FPS {:.3f}".format(
                frame_cnt, elapsed_time, frame_cnt / elapsed_time))
            output_vdo.write(ori_im)

    @staticmethod
    def _bbox_to_xywh_cls_conf(bbox, min_confidence):
        bbox = bbox[bbox[:, 4] > min_confidence, :]
        bbox[:, 2] = bbox[:, 2] - bbox[:, 0]
        bbox[:, 3] = bbox[:, 3] - bbox[:, 1]
        bbox[:, 0] = bbox[:, 0] + bbox[:, 2] / 2
        bbox[:, 1] = bbox[:, 1] + bbox[:, 3] / 2
        return bbox[:, :4], bbox[:, 4]
示例#4
0
class DeepsortTracker(object):
    def __init__(self, config=config):
        self.config = config

        self.deepsort = DeepSort(config.deepsort_checkpoint, use_cuda=config.use_cuda)

    def detect(self, img, boxes_x1y1x2y2conf):
        box_xcycwh = []
        box_conf = []
        for box_x1y1x2y2conf in boxes_x1y1x2y2conf:
            box = box_x1y1x2y2conf
            box_xcycwh.append(np.array([(box[0] + box[2]) // 2, (box[1] + box[3]) // 2, box[2] - box[0], box[3] - box[1]], dtype=np.int32))
            box_conf.append(box[4])
        box_xcycwh = np.array(box_xcycwh)
        outputs, track_states = self.deepsort.update(box_xcycwh, box_conf, img)
        if outputs == []:
            return [], []
        box_x1y1x2y2 = outputs[:, :4]
        identities = outputs[:, -1]
        return box_x1y1x2y2, identities, track_states
示例#5
0
class DeepSortDetector(object):
    def __init__(self,
                 cfg,
                 weights,
                 video_path,
                 deep_checkpoint="deep_sort/deep/checkpoint/resnet50_last.pt",
                 output_file=None,
                 img_size=512,
                 display=True,
                 max_dist=0.2,
                 display_width=800,
                 display_height=600,
                 save_path=None,
                 json_path='./data/pascal_voc_classes.json'):
        device = torch.device(
            'cuda') if torch.cuda.is_available() else torch.device('cpu')
        # init opencv video capturer
        self.vidCap = cv2.VideoCapture()
        # init a detector
        self.yolov3 = InferYOLOv3(cfg, img_size, weights, device, json_path)
        # init a deepsort tracker
        self.deepsort = DeepSort(deep_checkpoint, max_dist)
        # settings
        self.display = display
        self.video_path = video_path
        self.output_file = output_file
        self.save_path = save_path

        if self.display:
            cv2.namedWindow("Test", cv2.WINDOW_NORMAL)
            cv2.resizeWindow("Test", display_width, display_height)

    # define a video writter named self.output
    def __enter__(self):
        assert os.path.isfile(self.video_path), "Error: path error"
        self.vidCap.open(self.video_path)
        self.im_width = int(self.vidCap.get(cv2.CAP_PROP_FRAME_WIDTH))
        self.im_height = int(self.vidCap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        #self.im_width = 1280
        #self.im_height = 720

        if self.save_path is not None:
            fourcc = cv2.VideoWriter_fourcc(*'MJPG')
            self.output = cv2.VideoWriter(self.save_path, fourcc, 15.0,
                                          (self.im_width, self.im_height))
        assert self.vidCap.isOpened()
        return self

    def __exit__(self, exc_type, exc_value, exc_traceback):
        if exc_type:
            print(exc_type, exc_value, exc_traceback)

    # this is the key function to detect and count fishes
    def detect(self):
        json_path = './data/pascal_voc_classes.json'
        json_file = open(json_path, 'r')
        class_dict = json.load(json_file)
        category_index = {v: k for k, v in class_dict.items()}
        # All these classes will be counted as 'catch'
        list_of_catch = ["nephrops", "flat_fish", "round_fish"]
        # these classes will be counted as 'by-catch'
        list_of_bycatch = ["other"]
        LABELS = ['flat_fish', 'round_fish', 'nephrops', 'other']
        # to store the object infomation key:id value: class
        all_obj_info = {}

        frame_no = -1
        num_frames, nephrops_count, flatfish_count, roundfish_count, other_count = 0, 0, 0, 0, 0
        catch_ratio, bycatch_ratio = 0, 0
        # skip_no = 2

        if self.output_file:
            f = open(output_file, "w")

        while self.vidCap.grab():
            frame_no += 1

            # skip frames every n frames
            # if frame_no % skip_no != 0:
            #     continue

            # start time
            total_begin = time.time()

            _, img = self.vidCap.retrieve()
            #img = img[:, :1280]

            # yolov3
            yolo_begin = time.time()
            # get the detections: bbx coordinates, confidences, classes
            bbox_xyxy_ori, cls_conf, cls_ids = self.yolov3.predict(img)
            print(cls_ids)

            # [x1,y1,x2,y2]
            yolo_end = time.time()

            # deepsort
            ds_begin = time.time()
            if bbox_xyxy_ori is not None:
                # transfer the coorinates
                bbox_cxcywh = xyxy2xywh(bbox_xyxy_ori)
                # use the tracker to update
                outputs = self.deepsort.update(bbox_cxcywh, cls_conf, cls_ids,
                                               img)

                if len(outputs) > 0:
                    # [x1,y1,x2,y2] id class
                    # now we can fetch the bbx info, ids and classes
                    bbox_xyxy = outputs[:, :4]
                    ids = outputs[:, -2]
                    object_class = outputs[:, -1]
                    print(ids)
                    print(object_class)

                    ## obj_id and class alignment has some problems
                    #  it is hard to be very acurate
                    # need to make it better
                    # for i in range(len(ids)):
                    #     if ids[i] not in all_obj_info:
                    #         if len(cls_ids) == len(ids) - 1:
                    #             all_obj_info[ids[i]] = cls_ids[i-1]
                    #         elif len(cls_ids) == len(ids) - 2:
                    #             all_obj_info[ids[i]] = cls_ids[i-2]
                    #         elif len(cls_ids) == len(ids) - 3:
                    #             all_obj_info[ids[i]] = cls_ids[i-3]
                    #         elif len(cls_ids) == len(ids) - 4:
                    #             all_obj_info[ids[i]] = cls_ids[i-4]
                    #         elif len(cls_ids) == len(ids) - 5:
                    #             all_obj_info[ids[i]] = cls_ids[i-5]
                    #         elif len(cls_ids) == len(ids) - 6:
                    #             all_obj_info[ids[i]] = cls_ids[i-6]
                    #         elif len(cls_ids) == len(ids) - 7:
                    #             all_obj_info[ids[i]] = cls_ids[i-7]
                    #         elif len(cls_ids) == len(ids) - 8:
                    #             all_obj_info[ids[i]] = cls_ids[i-8]
                    #         elif len(cls_ids) == len(ids) - 9:
                    #             all_obj_info[ids[i]] = cls_ids[i-9]
                    #         elif len(cls_ids) == len(ids) - 10:
                    #             all_obj_info[ids[i]] = cls_ids[i-10]
                    #         else:
                    #             all_obj_info[ids[i]] = cls_ids[i]
                    for i in range(len(ids)):
                        if ids[i] not in all_obj_info:
                            all_obj_info[ids[i]] = object_class[i]
                        else:
                            continue
                    print(all_obj_info)

                    # draw the bbx
                    img = draw_box(img, bbox_xyxy_ori, cls_ids, cls_conf,
                                   category_index)
                    #img = draw_bboxes(img, bbox_xyxy, ids)

                    # frame,id,tlwh,1,-1,-1,-1
                    # record the info
                    if self.output_file:
                        bbox_tlwh = xyxy2xywh(bbox_xyxy)
                        for i in range(len(bbox_tlwh)):
                            write_line = "%d,%d,%d,%d,%d,%d,1,-1,-1,-1\n" % (
                                frame_no + 1, outputs[i, -1],
                                int(bbox_tlwh[i][0]), int(bbox_tlwh[i][1]),
                                int(bbox_tlwh[i][2]), int(bbox_tlwh[i][3]))
                            f.write(write_line)
            ds_end = time.time()

            total_end = time.time()

            # count the current number of each category
            cur_categories = list(all_obj_info.values())
            flatfish_count = cur_categories.count(1)
            roundfish_count = cur_categories.count(2)
            nephrops_count = cur_categories.count(3)
            other_count = cur_categories.count(4)
            # start from frame 3
            if frame_no >= 3:
                catch_ratio = round(
                    (flatfish_count + roundfish_count + nephrops_count) /
                    (flatfish_count + roundfish_count + nephrops_count +
                     other_count), 2)
                bycatch_ratio = round(
                    other_count / (flatfish_count + roundfish_count +
                                   nephrops_count + other_count), 2)
            else:
                catch_ratio = None
                bycatch_ratio = None

            # print info to the console
            if frame_no is not None:
                print(
                    "frame:%04d|det:%.4f|deep sort:%.4f|total:%.4f|det p:%.2f%%|fps:%.2f"
                    % (frame_no, (yolo_end - yolo_begin), (ds_end - ds_begin),
                       (total_end - total_begin),
                       ((yolo_end - yolo_begin) * 100 /
                        (total_end - total_begin)),
                       (1 / (total_end - total_begin))))
            # display all the count info on the screen
            if self.display == True:
                img = np.uint8(img)
                displayNephropsCount(img, nephrops_count)
                displayFlatfishCount(img, flatfish_count)
                displayRoundfishCount(img, roundfish_count)
                displayOtherfishCount(img, other_count)
                displayCatchRatio(img, catch_ratio)
                displayByCatchRatio(img, bycatch_ratio)
                cv2.putText(img,
                            'FPS {:.1f}'.format(1 / (total_end - total_begin)),
                            (20, 280), cv2.FONT_HERSHEY_SIMPLEX, 0.8,
                            (255, 255, 255), 2, cv2.FONT_HERSHEY_COMPLEX_SMALL)
                cv2.imshow("Test", img)
                cv2.waitKey(1)

                # press Q to quit
                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break
            # determine if output the new video
            if self.save_path:
                self.output.write(img)

        if self.output_file:
            f.close()
示例#6
0
class Detector(object):
    def __init__(self):
        self.vdo = cv2.VideoCapture()
        self.yolo3 = YOLO3("YOLO3/cfg/yolo_v3.cfg",
                           "/local/b/cam2/data/HumanBehavior/yolov3.weights",
                           "YOLO3/cfg/coco.names",
                           is_xywh=True)
        self.deepsort = DeepSort("/local/b/cam2/data/HumanBehavior/ckpt.t7")
        self.class_names = self.yolo3.class_names
        self.write_video = True

    def open(self, video_path):
        assert os.path.isfile(video_path), "Error: path error"
        self.vdo.open(video_path)
        self.im_width = int(self.vdo.get(cv2.CAP_PROP_FRAME_WIDTH))
        self.im_height = int(self.vdo.get(cv2.CAP_PROP_FRAME_HEIGHT))
        self.area = 0, 0, self.im_width, self.im_height
        if self.write_video:
            fourcc = cv2.VideoWriter_fourcc(*'MJPG')
            self.output = cv2.VideoWriter("demo.avi", fourcc, 30,
                                          (self.im_width, self.im_height))
        return self.vdo.isOpened()

    def detect(self):
        xmin, ymin, xmax, ymax = self.area

        model = Darknet("./yolov3/cfg/yolov3.cfg")
        model.load_weights("/local/b/cam2/data/HumanBehavior/yolov3.weights")
        model.cuda()
        model.eval()
        print("loaded YOLO")

        while self.vdo.grab():
            start = time.time()
            _, ori_im = self.vdo.retrieve()
            im = ori_im[ymin:ymax, xmin:xmax, (2, 1, 0)]

            #bbox_xywh, cls_conf, cls_ids = self.yolo3(im)
            '''
            print("xy: \n", bbox_xywh)
            print("conf: \n", cls_conf)
            print("ids: \n", cls_ids)
            print("-----------------")
            '''
            bbox_xywh, cls_conf, cls_ids = detect_frame(model, im)
            '''
            print("xy: \n", bbox_xywh)
            print("conf: \n", cls_conf)
            print("ids: \n", cls_ids)
            print("-----------------")
            '''
            if bbox_xywh is not None:
                mask = cls_ids == 0
                bbox_xywh = bbox_xywh[mask]
                bbox_xywh[:, 3] *= 1.2
                cls_conf = cls_conf[mask]
                outputs = self.deepsort.update(bbox_xywh, cls_conf, im)
                if len(outputs) > 0:
                    bbox_xyxy = outputs[:, :4]
                    identities = outputs[:, -1]
                    ori_im = draw_bboxes(ori_im,
                                         bbox_xyxy,
                                         identities,
                                         offset=(xmin, ymin))

            end = time.time()
            print("time: {}s, fps: {}".format(end - start, 1 / (end - start)))

            #cv2.imshow("test", ori_im)
            #cv2.waitKey(1)

            self.output.write(ori_im)
        print("done...")
示例#7
0
class Detector(object):
    def __init__(self, args):
        self.args = args
        use_cuda = bool(strtobool(self.args.use_cuda))

        self.vdo = cv2.VideoCapture()
        self.yolo3 = YOLOv3(args.yolo_cfg, args.yolo_weights, args.yolo_names, is_xywh=True,
                            conf_thresh=args.conf_thresh, nms_thresh=args.nms_thresh, use_cuda=use_cuda)
        self.deepsort = DeepSort(args.deepsort_checkpoint, use_cuda=use_cuda)
        self.class_names = self.yolo3.class_names

    def __enter__(self):
        assert os.path.isfile(self.args.VIDEO_PATH), "Error: path error"
        self.vdo.open(self.args.VIDEO_PATH)
        self.im_width = int(self.vdo.get(cv2.CAP_PROP_FRAME_WIDTH))
        self.im_height = int(self.vdo.get(cv2.CAP_PROP_FRAME_HEIGHT))
        self.end_frame = min(int(self.vdo.get(cv2.CAP_PROP_FRAME_COUNT)), self.args.end_frame)

        if self.args.save_path:
            fourcc = cv2.VideoWriter_fourcc(*'mp4v')
            self.output = cv2.VideoWriter(self.args.save_path, fourcc, 30, (self.im_width, self.im_height))

        assert self.vdo.isOpened()
        return self

    def __exit__(self, exc_type, exc_value, exc_traceback):
        if exc_type:
            print(exc_type, exc_value, exc_traceback)

    def detect(self):
        bbox = {}
        i = 0
        while self.vdo.grab() and i <= self.end_frame:
            start = time.time()
            bbox[i] = {}
            _, ori_im = self.vdo.retrieve()
            im = cv2.cvtColor(ori_im, cv2.COLOR_BGR2RGB)
            im = ori_im
            bbox_xcycwh, cls_conf, cls_ids = self.yolo3(im)
            if bbox_xcycwh is not None:
                # select class person
                mask = cls_ids == 0

                bbox_xcycwh = bbox_xcycwh[mask]
                bbox_xcycwh[:, 3:] *= 1.2

                cls_conf = cls_conf[mask]
                outputs, scores = self.deepsort.update(bbox_xcycwh, cls_conf, im)
                if len(outputs) > 0:
                    bbox_xyxy = outputs[:, :4]
                    identities = outputs[:, -1]
                    states = outputs[:, 4]
                    time_since_updates = outputs[:, 5]
                    for j in range(len(outputs)):
                        bbox[i][int(identities[j])] = [int(bbox_xyxy[j][0]), int(bbox_xyxy[j][1]), int(bbox_xyxy[j][2]),
                                                       int(bbox_xyxy[j][3]), StateLetters[states[j]],
                                                       int(time_since_updates[j]), scores[j]]

            if i % 10 == 0:
                print(f"processing frame {i}, t/frame={time.time()-start}")

            i += 1

        import pickle
        import json
        fileName = self.args.VIDEO_PATH.replace('_original', '').rsplit(".", 1)[0] + "_track"
        pickle.dump(bbox, open(fileName+'.pkl', "wb"))
        json.dump(bbox, open(fileName+'.json', "w"), sort_keys=True, indent=4, separators=(',', ': '))
示例#8
0
class Detector(object):
    def __init__(self, args):
        self.args = args

        if args.display:
            cv2.namedWindow("test", cv2.WINDOW_NORMAL)
            cv2.resizeWindow("test", args.display_width, args.display_height)

        self.vdo = cv2.VideoCapture()
        self.yolo3 = YOLOv3(args.yolo_cfg, args.yolo_weights, args.yolo_names, is_xywh=True, conf_thresh=args.conf_thresh, nms_thresh=args.nms_thresh)
        self.deepsort = DeepSort(args.deepsort_checkpoint)
        self.class_names = self.yolo3.class_names


        #self.maskrcnn = 

    def __enter__(self):
        assert os.path.isfile(self.args.VIDEO_PATH), "Error: path error"
        self.vdo.open(self.args.VIDEO_PATH)
        self.im_width = int(self.vdo.get(cv2.CAP_PROP_FRAME_WIDTH))
        self.im_height = int(self.vdo.get(cv2.CAP_PROP_FRAME_HEIGHT))

        if self.args.save_path:
            fourcc =  cv2.VideoWriter_fourcc(*'MJPG')
            self.output = cv2.VideoWriter(self.args.save_path, fourcc, 30, (self.im_width,self.im_height))

        assert self.vdo.isOpened()
        return self

    
    def __exit__(self, exc_type, exc_value, exc_traceback):
        if exc_type:
            print(exc_type, exc_value, exc_traceback)
        

    def return_user_dict(self):
        return self.user_entry_dict

    def detect(self):
        #xmin, ymin, xmax, ymax = self.area
        jump_flag = 1 
        start = time.time()
        while self.vdo.grab(): 
            #multicore
            #pool = mp.Pool(processes=6) #6-core
            _, ori_im = self.vdo.retrieve()
            im_height, im_width = ori_im.shape[:2]
            x_max = 5
            y_max = 5
            x_grid = int(im_width / x_max)
            y_grid = int(im_height / y_max)
            display_im = ori_im
            
            # for i in range(1, x_max + 1):
            #     cv2.line(ori_im, (x_grid * i, 0), (x_grid * i, im_height), (0, 255, 255), 3)
            # for i in range(1, y_max + 1):
            #     cv2.line(ori_im, (0, y_grid * i), (im_width, y_grid * i), (0, 255, 255), 3)
            # for i in range(len(unseen_frame)):
            #     if unseen_frame[i] > -1:
            #         unseen_frame[i] += 1 
            if jump_flag%2 ==0 : #jump frame  
                #start = time.time()

                clientsocket = socket(AF_INET,SOCK_STREAM)
                clientsocket.connect(('140.114.79.179',10523)) 
                clientsocket.send(pickle.dumps(user_entry_dict))

                im = cv2.cvtColor(ori_im, cv2.COLOR_BGR2RGB)
                #img = ori_im
                
                bbox_xcycwh, cls_conf, cls_ids = self.yolo3(im)
                cv2.circle(ori_im, (3900, 2100), 50, (255,0,0),-1)
                
                if bbox_xcycwh is not None:
                    # select class person
                    mask = cls_ids==0

                    bbox_xcycwh = bbox_xcycwh[mask]
                    bbox_xcycwh[:,3:] *= 1.2

                    cls_conf = cls_conf[mask]
                    outputs = self.deepsort.update(bbox_xcycwh, cls_conf, im)
                    for output in outputs:
                        if output[4] > len(people_path):
                            for i in range(0, output[4] - len(people_path)):
                                people_path.append([])
                                direction_start.append(0)
                                unseen_frame.append(-1)
                        people_path[output[4] - 1].append(np.array(([(output[0] + output[2]) / 2, output[3]])))
                        coordinate = output[:4]
                        bbox_area = get_bbox_area(coordinate)
                        
                        features = []
                        if bbox_area > area_threshold : 
                            try :
                                if area_dic[output[-1]] < bbox_area :
                                    area_dic[output[-1]] = bbox_area
                                    roiImg = im[output[:4][1]:output[:4][3],output[:4][0]:output[:4][2]] #img[y, x]
                                    features = mask_ouput(roiImg) #features=[[t-shirt, 0.9, [coordination]],...]
                                    features = merge_color(roiImg, features)
                                    #result = pool.apply_async(subroi,(ori_im,output))
                                    #results.append(result)
                                    #for wait()
                                    print("re: ---------------",features)

                            except KeyError:
                                area_dic.setdefault(output[-1],bbox_area)
                                roiImg = im[output[:4][1]:output[:4][3],output[:4][0]:output[:4][2]] #img[y, x]
                                features = mask_ouput(roiImg) #features=[[t-shirt, 0.9, [coordination]],...]
                                features = merge_color(roiImg, features)
                                #result = pool.apply_async(subroi,(ori_im,output))
                                #results.append(result)
                                print("wait---------------")
                            
                            
                            if output[-1] not in user_entry_dict:
                                user_entry_dict.setdefault(output[-1],[features,exix_point,CAMERA_ID,[]]) #add entry id 
                            else:
                                for feature in features:
                                    flag = 1 
                                    for i in range(len(user_entry_dict[output[-1]][0])):
                                        if feature[0] in user_entry_dict[output[-1]][0][i]:
                                            user_entry_dict[output[-1]][0][i][1] = max(user_entry_dict[output[-1]][0][i][1],feature[1]) #update the confodence
                                        flag = 0 
                                    if flag == 1 :
                                        user_entry_dict[output[-1]][0].append(feature)
                            print(user_entry_dict)
                            
                        #call project.py
                            find_grids( output, [x_grid, y_grid], 0.3, user_entry_dict[output[-1]])


                        x = []
                        y = []
                        for i in range(direction_start[output[4] - 1], len(people_path[output[4] - 1])):
                            x.append(people_path[output[4] - 1][i][0])
                            y.append(people_path[output[4] - 1][i][1])
                        path_x = (output[0] + output[2]) / 2
                        path_y = output[3]
                        if(len(x) > 1):
                            a, b, c = pu.cal_simple_linear_regression_coefficients(x, y)
                            #print(abs(a * path_x + b * path_y + c) / math.sqrt(a * a + b * b))
                            if abs(a * path_x + b * path_y + c) / math.sqrt(a * a + b * b) > 200 and unseen_frame[output[4] - 1] < 10:
                                continue;
                            if abs(a * path_x + b * path_y + c) / math.sqrt(a * a + b * b) < distance_threshold:
                                #print("projection")
                                path_x, path_y = pu.find_projection(a, b, c, path_x, path_y)
                                if len(people_path[output[4] - 1]) > 0:
                                    prev_x = people_path[output[4] - 1][len(people_path[output[4] - 1]) - 1][0]
                                    prev_y = people_path[output[4] - 1][len(people_path[output[4] - 1]) - 1][1]
                                    velocity = math.sqrt((path_x - prev_x) * (path_x - prev_x) + (path_y - prev_y) * (path_y - prev_y)) * 30 / (unseen_frame[output[4] - 1] + 1)
                                    #print("velocity: {}".format(velocity))
                            else:
                                #print("turn")
                                direction_start[output[4] - 1] = len(people_path[output[4] - 1])
                        people_path[output[4] - 1].append(np.array((path_x, path_y)))
                        unseen_frame[output[4] - 1] = 0
                    if len(outputs) > 0:
                        bbox_xyxy = outputs[:,:4]
                        identities = outputs[:,-1]
                        ori_im = draw_bboxes(ori_im, bbox_xyxy, identities)
                        for id in identities:
                            for i in range(1, len(people_path[id-1])):
                                cv2.line(ori_im, (int(people_path[id-1][i-1][0]), int(people_path[id-1][i-1][1])), 
                                (int(people_path[id-1][i][0]), int(people_path[id-1][i][1])), (0, 0, 255), 3)
                        #pool.close()
                        #pool.join()
                    # for result in results:
                    #     print(result.get())
                #end = time.time()
                #print("time: {}s, fps: {}".format(end-start, 1/(end-start)))
                print(area_dic)
            jump_flag+=1
            if self.args.display:
                cv2.imshow("test", ori_im)
                cv2.waitKey(1)

            if self.args.save_path:
                self.output.write(ori_im)
        end = time.time()
        print(end-start)
示例#9
0
class Detector(object):
    def __init__(self, args):
        self.args = args
        use_cuda = bool(strtobool(self.args.use_cuda))

        #self.vdo = cv2.VideoCapture()
        self.imgList = natsort.natsorted(glob.glob(self.args.imgs_path))
        self.detectron2 = Detectron2()

        # Initialize coordinate mapper
        self.myCoordMapper = coord_mapper.CoordMapperCSG(
            match_code='HUN-BEL 2. Half')
        self.fps = 6

        self.deepsort = DeepSort(args.deepsort_checkpoint,
                                 lambdaParam=0.6,
                                 coordMapper=self.myCoordMapper,
                                 max_dist=1.0,
                                 min_confidence=0.1,
                                 nms_max_overlap=0.7,
                                 max_iou_distance=0.7,
                                 max_age=self.fps * 3,
                                 n_init=3,
                                 nn_budget=50,
                                 use_cuda=use_cuda)

    def __enter__(self):
        #assert os.path.isfile(self.args.video_path), "Error: path error"
        #self.vdo.open(self.args.video_path)
        #self.im_width = int(self.vdo.get(cv2.CAP_PROP_FRAME_WIDTH))
        #self.im_height = int(self.vdo.get(cv2.CAP_PROP_FRAME_HEIGHT))

        img = cv2.imread(self.imgList[0])
        self.im_height, self.im_width, _ = img.shape

        # FIXME: Output FPS is hardcoded to 20
        if self.args.save_path:
            fourcc = cv2.VideoWriter_fourcc(*'XVID')
            self.output = cv2.VideoWriter(self.args.save_path, fourcc,
                                          self.fps,
                                          (self.im_width, self.im_height))

        #assert self.vdo.isOpened()
        return self

    def __exit__(self, exc_type, exc_value, exc_traceback):
        if exc_type:
            print(exc_type, exc_value, exc_traceback)

    def detect(self):
        # Check wheter there is next frame
        results = []
        allDetection = dict()
        idx_frame = 0

        #while self.vdo.grab():
        while idx_frame < len(self.imgList):
            start = time.time()

            # Retrieve next frame
            #_, im = self.vdo.retrieve()
            im = cv2.imread(self.imgList[idx_frame])
            # im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) # only for images

            # Detect object on image
            bbox_xcycwh, cls_conf, cls_ids = self.detectron2.detect(im)
            detection_mask = [(xc, yc + (h / 2))
                              for xc, yc, w, h in bbox_xcycwh]
            detection_mask = self.myCoordMapper.image2xy(detection_mask)
            detection_mask = [
                False if x is None else True for x in detection_mask
            ]
            bbox_xcycwh, cls_conf, cls_ids = bbox_xcycwh[
                detection_mask], cls_conf[detection_mask], cls_ids[
                    detection_mask]

            # TODO: Kell ide null check?
            if bbox_xcycwh is not None:  # and len(bbox_xcycwh) > 0
                # NOTE: This is double check since all the returned boxes are person objects (in the detect funcion it is asserted)
                # select class person
                mask = cls_ids == 0
                cls_conf = cls_conf[mask]

                # NOTE: only the height is multiplies by 1.2, why?
                # ANSWER: bbox dilation just in case bbox too small, delete this line if using a better pedestrian detector
                # TODO: Uncomment 1.1
                bbox_xcycwh = bbox_xcycwh[mask]
                #bbox_xcycwh[:, 3:] *= 1.1

                idx_frame += 1
                # Összes box kirjazolása
                bb_xyxy = [[xc - w / 2, yc - h / 2, xc + w / 2, yc + h / 2]
                           for xc, yc, w, h in bbox_xcycwh]
                bb_xyxy = [
                    x for x, conf in zip(bb_xyxy, cls_conf)
                    if conf > self.deepsort.min_confidence
                ]
                all1 = [None] * len(bb_xyxy)
                im = draw_bboxes(im, bb_xyxy, all1)

                # Do tracking
                outputs, deadtracks = self.deepsort.update(
                    bbox_xcycwh, cls_conf, im)
                print('len outputs:{0}, len deadtracks:{1}'.format(
                    len(outputs), len(deadtracks)))

                # Draw boxes for visualization
                if len(outputs) > 0:
                    bbox_xyxy = outputs[:, :4]
                    identities = outputs[:, -1]
                    im = draw_bboxes(im, bbox_xyxy, identities)

                    # Write to file
                    bbox_tlwh = [
                        self.deepsort._xyxy_to_tlwh(bb) for bb in bbox_xyxy
                    ]
                    results.append((idx_frame - 1, bbox_tlwh, identities))

                im = draw_frameNum(im, (2514, 330), idx_frame - 1)

                # Draw boxes for dead tracks for debugging
                if len(outputs) > 0:
                    bbox_xyxy = [x[:4] for x in deadtracks]
                    labels = [x[-1] for x in deadtracks]
                    im = draw_dead_bboxes(im, bbox_xyxy, labels)

            end = time.time()
            print(
                "time: {}s, fps: {}, frame: {}".format(end - start,
                                                       1 / (end - start),
                                                       idx_frame - 1), '\n',
                '-' * 30, '\n')

            if self.args.save_path:
                self.output.write(im)

        # Write all tracked objs to file
        write_results(self.args.result_path, results, 'mot')
示例#10
0
class Detector(object):
    def __init__(self, args):
        self.args = args
        args.display = False
        if args.display:
            cv2.namedWindow("test", cv2.WINDOW_NORMAL)
            cv2.resizeWindow("test", args.display_width, args.display_height)

        self.vdo = cv2.VideoCapture()
        self.yolo3 = YOLOv3(args.yolo_cfg,
                            args.yolo_weights,
                            args.yolo_names,
                            is_xywh=True,
                            conf_thresh=args.conf_thresh,
                            nms_thresh=args.nms_thresh)
        self.deepsort = DeepSort(args.deepsort_checkpoint)
        self.class_names = self.yolo3.class_names

    def __enter__(self):
        assert os.path.isfile(self.args.VIDEO_PATH), "Error: path error"
        self.vdo.open(self.args.VIDEO_PATH)
        self.im_width = int(self.vdo.get(cv2.CAP_PROP_FRAME_WIDTH))
        self.im_height = int(self.vdo.get(cv2.CAP_PROP_FRAME_HEIGHT))

        if self.args.save_path:
            fourcc = cv2.VideoWriter_fourcc(*'MJPG')
            self.output = cv2.VideoWriter(self.args.save_path, fourcc, 20,
                                          (self.im_width, self.im_height))

        assert self.vdo.isOpened()
        return self

    def __exit__(self, exc_type, exc_value, exc_traceback):
        if exc_type:
            print(exc_type, exc_value, exc_traceback)

    def detect(self):
        while self.vdo.grab():
            start = time.time()
            _, ori_im = self.vdo.retrieve()
            im = cv2.cvtColor(ori_im, cv2.COLOR_BGR2RGB)
            im = ori_im
            bbox_xcycwh, cls_conf, cls_ids = self.yolo3(im)
            if bbox_xcycwh is not None:
                # select class person
                mask = cls_ids == 0

                bbox_xcycwh = bbox_xcycwh[mask]
                bbox_xcycwh[:, 3:] *= 1.2

                cls_conf = cls_conf[mask]
                outputs = self.deepsort.update(bbox_xcycwh, cls_conf, im)
                if len(outputs) > 0:
                    bbox_xyxy = outputs[:, :4]
                    identities = outputs[:, -1]
                    ori_im = draw_bboxes(ori_im, bbox_xyxy, identities)

            end = time.time()
            print("time: {}s, fps: {}".format(end - start, 1 / (end - start)))

            if self.args.display:
                cv2.imshow("test", ori_im)
                cv2.waitKey(1)

            if self.args.save_path:
                self.output.write(ori_im)
示例#11
0
class Detector(object):
    def __init__(self, opt):
        self.vdo = cv2.VideoCapture()

        #centerNet detector
        self.detector = detector_factory[opt.task](opt)
        self.deepsort = DeepSort("deep/checkpoint/ckpt.t7")

        self.write_video = True

    def open(self, video_path):

        if opt.input_type == 'webcam':
            self.vdo.open(opt.webcam_ind)

        elif opt.input_type == 'ipcam':
            # load cam key, secret
            with open("cam_secret.txt") as f:
                lines = f.readlines()
                key = lines[0].strip()
                secret = lines[1].strip()

            self.vdo.open(opt.ipcam_url.format(key, secret, opt.ipcam_no))

        # video
        else:
            assert os.path.isfile(opt.vid_path), "Error: path error"
            self.vdo.open(opt.vid_path)

        self.im_width = int(self.vdo.get(cv2.CAP_PROP_FRAME_WIDTH))
        self.im_height = int(self.vdo.get(cv2.CAP_PROP_FRAME_HEIGHT))

        self.area = 0, 0, self.im_width, self.im_height
        if self.write_video:
            fourcc = cv2.VideoWriter_fourcc(*'MJPG')
            self.output = cv2.VideoWriter("demo1.avi", fourcc, 20,
                                          (self.im_width, self.im_height))
        #return self.vdo.isOpened()

    def detect(self):
        xmin, ymin, xmax, ymax = self.area
        frame_no = 0
        avg_fps = 0.0
        while self.vdo.grab():

            frame_no += 1
            start = time.time()
            _, ori_im = self.vdo.retrieve()
            im = ori_im[ymin:ymax, xmin:xmax]
            #im = ori_im[ymin:ymax, xmin:xmax, :]

            #start_center =  time.time()

            results = self.detector.run(im)['results']
            bbox_xywh, cls_conf = bbox_to_xywh_cls_conf(results)

            if bbox_xywh is not None:
                outputs = self.deepsort.update(bbox_xywh, cls_conf, im)

                if len(outputs) > 0:
                    bbox_xyxy = outputs[:, :4]
                    identities = outputs[:, -1]
                    ori_im = draw_bboxes(ori_im,
                                         bbox_xyxy,
                                         identities,
                                         offset=(xmin, ymin))

            end = time.time()
            #print("deep time: {}s, fps: {}".format(end - start_deep_sort, 1 / (end - start_deep_sort)))

            fps = 1 / (end - start)

            avg_fps += fps
            print("centernet time: {}s, fps: {}, avg fps : {}".format(
                end - start, fps, avg_fps / frame_no))

            cv2.imshow("test", ori_im)
            cv2.waitKey(1)

            if self.write_video:
                self.output.write(ori_im)
示例#12
0
  video_writer = cv2.VideoWriter("output.avi", cv2.VideoWriter_fourcc(*'MJPG'), fps, (width, height))
  while video_capture.isOpened():
    ret, frame = video_capture.read()
    if not ret:
      break
      
    start = time.time()
    xmin, ymin, xmax, ymax = 0, 0, width, height
    im = frame[ymin:ymax, xmin:xmax, (2,1,0)]
    bbox_xywh, cls_conf, cls_ids = yolo3(im)
    if bbox_xywh is not None:
        mask = cls_ids==0
        bbox_xywh = bbox_xywh[mask]
        bbox_xywh[:,3] *= 1.2
        cls_conf = cls_conf[mask]
        outputs = deepsort.update(bbox_xywh, cls_conf, im)
        if len(outputs) > 0:
            bbox_xyxy = outputs[:,:4]
            identities = outputs[:,-1]
            frame = draw_bboxes(frame, bbox_xyxy, identities, offset=(xmin,ymin))

    end = time.time()
    print("time: {}s, fps: {}".format(end-start, 1/(end-start)))
            
    video_writer.write(frame)
  video_capture.release()
  video_writer.release()
  # convert AVI to MP4
  !ffmpeg -y -loglevel info -i output.avi output.mp4
else:
  print("can't open the given input video file!")
示例#13
0
class MOTTracker(object):
    def __init__(self, args):
        self.args = args
        # if args.display:
        #     cv2.namedWindow("test", cv2.WINDOW_NORMAL)
        #     cv2.resizeWindow("test", args.display_width, args.display_height)
        self.open_video()
        #self.yolo3 = YOLOv3(args.yolo_cfg, args.yolo_weights, args.yolo_names,use_cuda=args.use_cuda, is_xywh=True, conf_thresh=args.conf_thresh, nms_thresh=args.nms_thresh)
        self.command_type = args.mot_type
        threshold = np.array([0.7, 0.8, 0.9])
        crop_size = [112, 112]
        if self.command_type == 'face':
            self.mtcnn = MtcnnDetector(threshold, crop_size, args.detect_model)
        elif self.command_type == 'person':
            self.person_detect = RetinanetDetector(args)
        self.deepsort = DeepSort(args.feature_model,
                                 args.face_load_num,
                                 use_cuda=args.use_cuda,
                                 mot_type=self.command_type)
        self.kf = KalmanFilter()
        self.meanes_track = []
        self.convariances_track = []
        self.id_cnt_dict = dict()
        self.moveTrack = MoveTrackerRun(self.kf)
        self.img_clarity = BlurDetection()
        self.score = 60.0

    def open_video(self):
        if not os.path.isfile(self.args.VIDEO_PATH):
            raise Exception("Error:input video path is not exist")
        self.vdo = cv2.VideoCapture(self.args.VIDEO_PATH)
        self.im_width = int(self.vdo.get(cv2.CAP_PROP_FRAME_WIDTH))
        self.im_height = int(self.vdo.get(cv2.CAP_PROP_FRAME_HEIGHT))
        if self.args.save_dir:
            if not os.path.exists(self.args.save_dir):
                os.makedirs(self.args.save_dir)
            #fourcc =  cv2.VideoWriter_fourcc(*'MJPG')
            #self.output = cv2.VideoWriter(self.args.save_path, fourcc, 20, (self.im_width,self.im_height))
        if not self.vdo.isOpened():
            raise Exception('open video failed')

    def xcycah2xcyc(self, xyah):
        xyah = np.array(xyah)
        xyah = xyah[:, :4]
        w = xyah[:, 2] * xyah[:, 3]
        h = xyah[:, 3]
        xc = xyah[:, 0]  #+ w/2
        yc = xyah[:, 1]  #+ h/2
        return np.vstack([xc, yc, w, h]).T

    def xcycah2xyxy(self, xcycah):
        xcycah = np.array(xcycah)
        xcycah = xcycah[:, :4]
        w = xcycah[:, 2] * xcycah[:, 3]
        h = xcycah[:, 3]
        x2 = xcycah[:, 0] + w / 2
        y2 = xcycah[:, 1] + h / 2
        x1 = xcycah[:, 0] - w / 2
        y1 = xcycah[:, 1] - h / 2
        return np.vstack([x1, y1, x2, y2]).T

    def xyxy2xcyc(self, xywh):
        w = xywh[:, 2] - xywh[:, 0]
        h = xywh[:, 3] - xywh[:, 1]
        xc = xywh[:, 0] + w / 2
        yc = xywh[:, 1] + h / 2
        return np.vstack([xc, yc, w, h]).T

    def xyxy2xywh(self, xywh):
        w = xywh[:, 2] - xywh[:, 0]
        h = xywh[:, 3] - xywh[:, 1]
        return np.vstack([xywh[:, 0], xywh[:, 1], w, h]).T

    def xywh2xcycwh(self, xywh):
        xywh = np.array(xywh)
        xc = xywh[:, 0] + xywh[:, 2] / 2
        yc = xywh[:, 1] + xywh[:, 3] / 2
        return np.vstack([xc, yc, xywh[:, 2], xywh[:, 3]]).T

    def xywh2xyxy(self, xywh):
        xywh = np.array(xywh)
        x2 = xywh[:, 0] + xywh[:, 2]
        y2 = xywh[:, 1] + xywh[:, 3]
        return np.vstack([xywh[:, 0], xywh[:, 1], x2, y2]).T

    def xcyc2xcycah(self, bbox_xcycwh):
        bbox_xcycwh = np.array(bbox_xcycwh, dtype=np.float32)
        xc = bbox_xcycwh[:, 0]  #- bbox_xcycwh[:,2]/2
        yc = bbox_xcycwh[:, 1]  #- bbox_xcycwh[:,3]/2
        a = bbox_xcycwh[:, 2] / bbox_xcycwh[:, 3]
        return np.vstack([xc, yc, a, bbox_xcycwh[:, 3]]).T

    def widerbox(self, boxes):
        x1 = boxes[:, 0]
        y1 = boxes[:, 1]
        x2 = boxes[:, 2]
        y2 = boxes[:, 3]
        boxw = x2 - x1
        boxh = y2 - y1
        x1 = np.maximum(0, x1 - 0.3 * boxw)
        y1 = np.maximum(0, y1 - 0.3 * boxh)
        x2 = np.minimum(self.im_width, x2 + 0.3 * boxw)
        y2 = np.minimum(self.im_height, y2 + 0.3 * boxh)
        return np.vstack([x1, y1, x2, y2]).T

    def save_track_results(self, bbox_xyxy, img, identities, offset=[0, 0]):
        for i, box in enumerate(bbox_xyxy):
            x1, y1, x2, y2 = [int(i) for i in box]
            x1 += offset[0]
            x2 += offset[0]
            y1 += offset[1]
            y2 += offset[1]
            x1 = min(max(x1, 0), self.im_width - 1)
            y1 = min(max(y1, 0), self.im_height - 1)
            x2 = min(max(x2, 0), self.im_width - 1)
            y2 = min(max(y2, 0), self.im_height - 1)
            # box text and bar
            id = str(identities[i]) if identities is not None else '0'
            crop_img = img[y1:y2, x1:x2, :]
            if self.img_clarity._blurrDetection(crop_img) > self.score:
                tmp_cnt = self.id_cnt_dict.setdefault(id, 0)
                self.id_cnt_dict[id] = tmp_cnt + 1
                save_dir = os.path.join(self.args.save_dir, id)
                if not os.path.exists(save_dir):
                    os.makedirs(save_dir)
                save_path = os.path.join(save_dir,
                                         id + '_' + str(tmp_cnt) + '.jpg')
                cv2.imwrite(save_path, crop_img)
            else:
                continue

    def detect(self):
        cnt = 0
        update_fg = True
        detect_fg = True
        total_time = 0
        outputs = []
        while self.vdo.isOpened():
            start = time.time()
            _, ori_im = self.vdo.read()
            im = cv2.cvtColor(ori_im, cv2.COLOR_BGR2RGB)
            im = np.array([im])
            if cnt % 5 == 0 or detect_fg:
                # bbox_xcycwh, cls_conf, cls_ids = self.yolo3(im)
                # mask = cls_ids==0
                # bbox_xcycwh = bbox_xcycwh[mask]
                # bbox_xcycwh[:,3:] *= 1.2
                # cls_conf = cls_conf[mask]
                if self.command_type == 'face':
                    rectangles = self.mtcnn.detectFace(im, True)
                    rectangles = rectangles[0]
                    if len(rectangles) < 1:
                        continue
                    bboxes = rectangles[:, :4]
                    bboxes = self.widerbox(bboxes)
                    #
                    bbox_xcycwh = self.xyxy2xcyc(bboxes)
                    cls_conf = rectangles[:, 4]
                elif self.command_type == 'person':
                    bboxes, cls_conf = self.person_detect.test_img_org(ori_im)
                    if len(bboxes) == 0:
                        continue
                    bbox_xcycwh = self.xywh2xcycwh(bboxes)
                #outputs = bboxes #self.xywh2xyxy(bboxes)
                update_fg = True
                box_xcycah = self.xcyc2xcycah(bbox_xcycwh)
                self.moveTrack.track_init(box_xcycah)
                self.moveTrack.track_predict()
                self.moveTrack.track_update(box_xcycah)
                # detect_xywh = self.xyxy2xywh(bboxes) if self.command_type=='face' else bboxes
                # self.tracker_run.init(ori_im,detect_xywh.tolist())
                detect_fg = False
            else:
                if len(bbox_xcycwh) > 0:
                    start1 = time.time()
                    self.moveTrack.track_predict()
                    bbox_xcycwh = self.xcycah2xcyc(self.moveTrack.means_track)
                    #outputs = self.xcycah2xyxy(self.moveTrack.means_track)
                    # boxes_tmp = self.tracker_run.update(ori_im)
                    # bbox_xcycwh = self.xywh2xcycwh(boxes_tmp)
                    end1 = time.time()
                    print('only tracker time consume:', end1 - start1)
                    #outputs = self.xywh2xyxy(boxes_tmp)
                    update_fg = False
                    detect_fg = False
                else:
                    detect_fg = True
            if len(bbox_xcycwh) > 0:
                outputs = self.deepsort.update(bbox_xcycwh, cls_conf, ori_im,
                                               update_fg)
            end = time.time()
            consume = end - start
            if len(outputs) > 0:
                #outputs = rectangles
                bbox_xyxy = outputs[:, :4]
                identities = outputs[:, -1]  #np.zeros(outputs.shape[0])
                ori_im = draw_bboxes(ori_im, bbox_xyxy, identities)
                #self.save_track_results(bbox_xyxy,ori_im,identities)
            print("frame: {} time: {}s, fps: {}".format(
                cnt, consume, 1 / (end - start)))
            cnt += 1
            cv2.imshow("test", ori_im)
            c = cv2.waitKey(1) & 0xFF
            if c == 27 or c == ord('q'):
                break
            #if self.args.save_path:
            #   self.output.write(ori_im)
            total_time += consume
        self.vdo.release()
        cv2.destroyAllWindows()
        print("video ave fps and total_time: ", cnt / total_time, total_time)
示例#14
0
class Detector(object):
    def __init__(self,
                 detections_file: str,
                 resolution: tuple,
                 fps: int,
                 input_images_dir: str,
                 output_video_path: str,
                 output_result_path: str,
                 use_cuda: bool,
                 lambdaParam: float,
                 max_dist: float,
                 min_confidence: float,
                 nms_max_overlap: float,
                 max_iou_distance: float,
                 max_age: int,
                 n_init: int,
                 nn_budget: int,
                 model_path='deep_sort/deep/checkpoint/ckpt.t7',
                 early_stopping=None):

        self.detections_file = detections_file  # A pickle fájl amiben az összes detekció benne van
        self.input_images_dir = input_images_dir  # A mappa ahol a 2.5K-s képek vannak {frameNum}.jpg formátumban
        self.output_video_path = output_video_path  # Ahova a vizualizálandó videót mentem
        self.output_result_path = output_result_path  # Ahová a kimenetet mentem CSV formátumba
        self.early_stopping = early_stopping

        assert self.output_result_path is not None and self.detections_file is not None

        self._use_cuda = use_cuda
        self.fps = fps
        self.resolution = resolution
        # Initialize coordinate mapper
        self.myCoordMapper = coord_mapper.CoordMapperCSG(
            match_code='HUN-BEL 1. Half')

        self.deepsort = DeepSort(model_path=model_path,
                                 lambdaParam=lambdaParam,
                                 coordMapper=self.myCoordMapper,
                                 max_dist=max_dist,
                                 min_confidence=min_confidence,
                                 nms_max_overlap=nms_max_overlap,
                                 max_iou_distance=max_iou_distance,
                                 max_age=max_age,
                                 n_init=n_init,
                                 nn_budget=nn_budget,
                                 use_cuda=self._use_cuda,
                                 resolution=(self.resolution[0] * 2,
                                             self.resolution[1]),
                                 fps=self.fps)

    def initVideoOutput(self):
        if self.input_images_dir is None or self.output_video_path is None:
            return

        # Itt minden kép 2.5K-s
        imgList = natsort.natsorted(glob.glob(self.input_images_dir))
        self.dict_frame2path = {
            int(path.split('/')[-1].split('.')[0]): path
            for path in imgList
        }

        self.out_vid_height, self.out_vid_width = self.resolution[
            1], self.resolution[0] * 2

        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        self.output = cv2.VideoWriter(
            self.output_video_path, fourcc, self.fps,
            (self.out_vid_width, self.out_vid_height))

    def writeVideoOutput(self,
                         frameNum,
                         list_detections,
                         tracks,
                         deadtracks,
                         draw_detections=True,
                         draw_tracks=True,
                         draw_deadtracks=True):
        if self.input_images_dir is None or self.output_video_path is None:
            return

        # Beolvasom a megfelelő képkockát
        img = cv2.imread(self.dict_frame2path[frameNum])
        # Resizeolom
        img = cv2.resize(img, (self.out_vid_width, self.out_vid_height),
                         interpolation=cv2.INTER_AREA)

        # Detection Boxot rajzolok rá...
        if draw_detections:
            bb_xyxy = [det['box'] for det in list_detections]
            all1 = [None] * len(bb_xyxy)
            img = draw_bboxes(img, bb_xyxy, all1)

        resizeFactor = self.resolution[0] / 2560

        # Trackeket rajzolok rá
        if len(tracks) > 0 and draw_tracks:
            bbox_xyxy = tracks[:, :4] * resizeFactor
            identities = tracks[:, 4]
            img = draw_bboxes(img, bbox_xyxy, identities)

        # Draw boxes for dead tracks for debugging
        if len(deadtracks) > 0 and draw_deadtracks:
            bbox_xyxy = [x[:4] for x in deadtracks]
            bbox_xyxy = [np.array(c) * resizeFactor for c in bbox_xyxy]
            labels = [x[4] for x in deadtracks]
            img = draw_dead_bboxes(img, bbox_xyxy, labels)

        # Frame Numbert is felrajzolom
        img = draw_frameNum(
            img, (self.out_vid_width // 2, self.out_vid_height // 10),
            frameNum)

        # Write to file
        self.output.write(img)

    def closeVideoOutput(self):
        if self.input_images_dir is None or self.output_video_path is None:
            return
        self.output.release()

    def writeResults(self, frameNum, tracks, ts_start, ts_end):
        '''
		tracks : np.array = List[ [x1, y1, x2, y2, tID, xWorld, yWorld] ]
		'''
        if len(tracks) == 0:
            return

        list_tracks = [{
            'frame': frameNum,
            'ts_start': ts_start,
            'ts_end': ts_end,
            'xTL': xTL,
            'yTL': yTL,
            'xBR': xBR,
            'yBR': yBR,
            'tID': tID,
            'xWorld': xWorld,
            'yWorld': yWorld
        } for xTL, yTL, xBR, yBR, tID, xWorld, yWorld in tracks]

        pd.DataFrame(list_tracks).to_csv(
            self.output_result_path,
            mode='a',
            index=None,
            header=(not os.path.exists(self.output_result_path)))

    def doTrackingOnDetectionFile(self):
        '''
		A detectionons pickle fájl így néz ki:
		dict( frameNum : List[dict_detection])

		dict_detection = {'worldXY' : tuple(X, Y), 'box' : [xTL, yTL, xBR, yBR], 
							'bigBox' : [xTL, yTL, xBR, yBR], 'score' : float, 'image' : np.array(NxM),
							'team' = ['red', 'yellow', 'other', 'more player from different team']}
		'''
        # Calc frame skipping
        assert 30 % self.fps == 0
        stepFrame = 60 // self.fps

        print('Reading detections pickle')
        # Read in detection pickle
        with open(self.detections_file, 'rb') as handle:
            dict_detections = pickle.load(handle)
        print('Done')

        self.initVideoOutput()

        for frameNum in sorted(dict_detections.keys()):
            if (frameNum % stepFrame) != 0:
                continue
            #list_dets = dict_detections[frameNum]
            # Leszűröm csak a hazai detekciókat
            list_dets = [
                x for x in dict_detections[frameNum] if x['team'] in ['red']
            ]
            print('Frame', frameNum)

            # Mivel leszűröm piros játékosokra ezért lehet hogy nulla játékos lesz
            if len(list_dets) > 0:
                self.doTrackingForOneFrame(frameNum, list_dets)

            if self.early_stopping is not None and frameNum >= self.early_stopping:
                break

        # Végül bezárom a videót ha van
        self.closeVideoOutput()

    def doTrackingForOneFrame(self, frameNum, list_of_detections):
        '''
		list_of_detections : List[
									{'worldXY' : tuple(X, Y), 'box' : [xTL, yTL, xBR, yBR], 
									'bigBox' : [xTL, yTL, xBR, yBR], 'score' : float, 'image' : np.array(NxM)}
								]
		'''

        ts_start = time.time()

        # Létrehozom a BBoxokat, átalakítva, úgy hogy cX, cY, W, H legyen
        # FONTOS: Mivel ki fogom plotolni ezért a kisképen lévő bboxok kellenek
        bbox_xcycwh = [det['bigBox'] for det in list_of_detections]
        bbox_xcycwh = [[(xBR + xTL) / 2, (yBR + yTL) / 2, (xBR - xTL),
                        (yBR - yTL)] for xTL, yTL, xBR, yBR in bbox_xcycwh]
        cls_conf = [det['score'] for det in list_of_detections]
        bbox_imgs = [det['image'] for det in list_of_detections]
        worldCoordXY = [det['worldXY'] for det in list_of_detections]

        outputs, deadtracks = self.deepsort.update(bbox_xcycwh, cls_conf,
                                                   bbox_imgs, worldCoordXY)

        ts_end = time.time()

        self.writeVideoOutput(frameNum, list_of_detections, outputs,
                              deadtracks)

        self.writeResults(frameNum, outputs, ts_start, ts_end)
class Detector(object):
    def __init__(self):
        self.vdo = cv2.VideoCapture()
        self.yolo3 = YOLOv3("YOLOv3/cfg/yolo_v3.cfg",
                            "YOLOv3/yolov3.weights",
                            "YOLOv3/cfg/coco.names",
                            is_xywh=True)
        self.deepsort = DeepSort("deep_sort/deep/checkpoint/ckpt.t7")
        self.class_names = self.yolo3.class_names
        self.write_video = True

    def open(self, video_path):
        assert os.path.isfile(video_path), "Error: path error"
        self.vdo.open(video_path)
        self.im_width = int(self.vdo.get(cv2.CAP_PROP_FRAME_WIDTH))
        self.im_height = int(self.vdo.get(cv2.CAP_PROP_FRAME_HEIGHT))
        self.area = 0, 0, self.im_width, self.im_height
        if self.write_video:
            fourcc = cv2.VideoWriter_fourcc(*'MJPG')
            self.output = cv2.VideoWriter("demo.avi", fourcc, 20,
                                          (self.im_width, self.im_height))
        return self.vdo.isOpened()

    def detect(self):

        # Configure depth and color streams
        pipeline = rs.pipeline()
        config = rs.config()
        config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 30)
        config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 30)

        # Start streaming
        profile = pipeline.start(config)

        xmin, ymin, xmax, ymax = 0, 0, 640, 480

        try:
            while True:
                start = time.time()
                # Wait for a coherent pair of frames: depth and color
                frames = pipeline.wait_for_frames()
                depth_frame = frames.get_depth_frame()
                color_frame = frames.get_color_frame()
                if not depth_frame or not color_frame:
                    continue

                # Convert images to numpy arrays
                depth_image = np.asanyarray(depth_frame.get_data())
                color_image = np.asanyarray(color_frame.get_data())

                ori_im = color_image
                im = ori_im[ymin:ymax, xmin:xmax,
                            (2, 1,
                             0)]  #3dim (0,1,2) --> (2,1,0) index rearrange
                bbox_xywh, cls_conf, cls_ids = self.yolo3(im)

                if bbox_xywh is not None:
                    mask = cls_ids == 0
                    bbox_xywh = bbox_xywh[mask]
                    bbox_xywh[:, 3] *= 1.2
                    cls_conf = cls_conf[mask]
                    outputs = self.deepsort.update(bbox_xywh, cls_conf, im)
                    if len(outputs) > 0:
                        bbox_xyxy = outputs[:, :4]
                        identities = outputs[:, -1]
                        #ori_im = draw_bboxes(ori_im, bbox_xyxy, identities, offset=(xmin,ymin))

                        # Modification of draw_bboxes
                        offset = (xmin, ymin)
                        for i, box in enumerate(bbox_xyxy):
                            x1, y1, x2, y2 = [int(i) for i in box]
                            #most left up point is (0,0)
                            #x1,y1 is left up point, x2,y2 is right down point // pixel unit
                            x1 += offset[0]
                            x2 += offset[0]
                            y1 += offset[1]
                            y2 += offset[1]

                            boxed_depth = depth_image[y1:y2, x1:x2]

                            # #get closest depth in xyxy box
                            # min_depth = np.amin(boxed_depth)
                            # min_result = np.where(boxed_depth == min_depth)
                            # listOfCordinates = list(zip(min_result[0], min_result[1]))
                            # for cord in listOfCordinates:
                            #     min_pixel = cord #only use first cordinate
                            #     break
                            # min_pixel = list(min_pixel)
                            #  #revert to pixel in original depth before sliced
                            # min_pixel[0] += y1
                            # min_pixel[1] += x1

                            # Get real Distance
                            depth_scale = profile.get_device(
                            ).first_depth_sensor().get_depth_scale()
                            depth = boxed_depth * depth_scale
                            #real_dist,_,_,_ = cv2.mean(depth) #meters unit
                            real_dist = np.median(depth)

                            # Get real Width
                            # d434's FOV Horizontal:91.2
                            width_scale = (2 * real_dist * math.tan(
                                math.radians(91.2 / 2))) / 640
                            real_width = width_scale * (x2 - x1)

                            # Get real Height
                            # d434's FOV Vertical:65.5
                            height_scale = (2 * real_dist * math.tan(
                                math.radians(65.5 / 2))) / 480
                            real_height = height_scale * (y2 - y1)

                            # box text and bar
                            id = int(
                                identities[i]) if identities is not None else 0
                            color = COLORS_10[id % len(COLORS_10)]
                            label = '{} {}, d={:.3f} w={:.3f} h={:.3f}'.format(
                                "object", id, real_dist, real_width,
                                real_height)
                            print(label)
                            print('pixel of top left and bottom right')
                            print('(', x1, ',', y1, ')    (', x2, ',', y2, ')')

                end = time.time()
                print("time: {}s, fps: {}".format(end - start,
                                                  1 / (end - start)))

                #if self.write_video:
                #    self.output.write(ori_im)

        finally:
            # Stop streaming
            pipeline.stop()
示例#16
0
def main():
    print('Connecting to camera')
    # cap = cv2.VideoCapture(0)
    cap = ThreadedVideoCapture(
        'rtsp://*****:*****@[email protected]:554/Streaming/Channels/101/')
    # cap = ThreadedVideoCapture('rtsp://*****:*****@[email protected]/H264?ch=1&subtype=0')
    assert cap.isOpened(), 'Unable to connect to camera'
    width, height = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(
        cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    cam_fps = int(cap.get(cv2.CAP_PROP_FPS))

    device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

    print('Loading models')
    detector = Detector('weights/yolov5s.pt',
                        img_size=(640, 640),
                        conf_thresh=0.5,
                        iou_thresh=0.5,
                        agnostic_nms=False,
                        device=device)
    deepsort = DeepSort('weights/ckpt.t7',
                        max_dist=0.2,
                        min_confidence=0.3,
                        nms_max_overlap=0.5,
                        max_iou_distance=0.7,
                        max_age=100,
                        lingering_age=5,
                        n_init=5,
                        nn_budget=100,
                        device=device)
    bboxes_visualizer = BBoxVisualizer()
    fps_estimator = IncrementalMeanTracker(max_count=cam_fps * 5)
    person_cls_id = detector.names.index('person')  # get id of 'person' class

    print(f'Starting capture, camera_fps={cam_fps}')
    # Start
    cap.start()
    win_name = 'MICA ReID Demo'
    cv2.namedWindow(win_name, cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_FREERATIO)
    cv2.resizeWindow(win_name, width, height)
    frame_id = 0
    pbar = tqdm(desc=win_name)
    while True:
        start_it = time.time()
        ret, img = cap.read()
        if not ret:
            print('Unable to read camera')
            break
        detections = detector.detect([img])[0]

        num_people = 0
        if detections is not None:
            detections = detections[detections[:, -1].eq(
                person_cls_id)]  # filter person
            xywh, confs = parse_detection(detections)
            outputs = deepsort.update(xywh, confs, img)
            num_people = len(outputs)
            bboxes_visualizer.remove([
                t.track_id for t in deepsort.tracker.tracks
                if t.time_since_update > 3 or t.is_deleted()
            ])
            bboxes_visualizer.update(outputs)
            # draw detections
            for pid in outputs[:, -1]:
                bboxes_visualizer.box(img,
                                      pid,
                                      label=f'Person {pid}',
                                      line_thickness=5,
                                      trail_trajectory=True,
                                      trail_bbox=False)
        # draw counting
        count_str = f'Number of people: {num_people}'
        img = bboxes_visualizer.text(img,
                                     count_str, (960, 25),
                                     fontScale=0.8,
                                     box_alpha=0.4,
                                     color=(255, 255, 255),
                                     box_color=(0, 0, 0))

        # show
        cv2.imshow(win_name, img)
        key = cv2.waitKey(1)
        elapsed_time = time.time() - start_it
        fps = fps_estimator.update(1 / elapsed_time)

        desc = f'[{frame_id:06d}] num_detections={num_people} fps={fps:.02f} elapsed_time={elapsed_time:.03f}'
        pbar.update()
        pbar.set_description(desc)
        # check key pressed
        if key == ord('q') or key == 27:  # q or esc to quit
            break
        elif key == ord('r'):  # r to reset tracking
            deepsort.reset()
            bboxes_visualizer.clear()
        elif key == 32:  # space to pause
            key = cv2.waitKey(0)
            if key == ord('q') or key == 27:
                break
        frame_id += 1
    cv2.destroyAllWindows()
    cap.release()
示例#17
0
            bbox_xywh[:, 3] = bbox_xywh[:, 3] - bbox_xywh[:, 1]
            bbox_xywh[:, 0] = bbox_xywh[:, 0] + (bbox_xywh[:, 2]) / 2
            bbox_xywh[:, 1] = bbox_xywh[:, 1] + (bbox_xywh[:, 3]) / 2

            cls_conf = output[:, 5]
            cls_ids = output[:, 7]

            if bbox_xywh is not None:
                mask = cls_ids == 0.0
                bbox_xywh = bbox_xywh[mask]
                cls_conf = cls_conf[mask]
                #if bbox_xywh[0]==0 and bbox_xywh[1]==0 and bbox_xywh[2]==0 and bbox_xywh[3]==0:continue
                #print("***********{}".format(bbox_xywh))
                #cv2.imshow("debug",orig_im)
                #cv2.waitKey(0)
                outputs = deepsort.update(bbox_xywh, cls_conf,
                                          orig_im)  #Bbox+ID,naarry 3,5
                #######################################################################################
                # print('outputs = {}'.format(outputs))
                # outputs = np.array(outputs)
                # print(outputs)
                #
                # now_time = time.time()
                # diff_time = now_time-last_time
                # last_time = now_time
                # print('diff_time = {}'.format(diff_time))
                #
                # distance = []
                # speed = []
                # # a = time.time()
                # for i in range(outputs.shape[0]):
                #     if last.shape[0] == 0:
示例#18
0
 sort = DeepSort('checkpoint/net', n_init=2)
 # paddle.enable_static()
 for i in tqdm(ds.file_list):
     image_name = i[0]
     im = cv2.imread(image_name)
     start = time.time()
     result = model.predict(im)
     # print('infer time:{:.6f}s'.format(time.time()-start))
     # print('detected num:', len(result))
     # paddle.disable_static()
     font = cv2.FONT_HERSHEY_SIMPLEX
     threshold = 0.1
     result = list(filter(lambda x: x['score'] > threshold, result))
     bboxes = np.array(list(map(lambda v: np.array(v['bbox']), result)))
     confidence = list(map(lambda v: v['score'], result))
     track = sort.update(bboxes, confidence, im)
     if INTERACTIVE:
         for value in result:
             xmin, ymin, w, h = np.array(value['bbox']).astype(np.int)
             cls = value['category']
             score = value['score']
             cv2.rectangle(im, (xmin, ymin), (xmin + w, ymin + h), (255, 0, 0), 4)
             cv2.putText(im, '{:s} {:.3f}'.format(cls, score),
                         (xmin, ymin), font, 0.5, (0, 225, 0), thickness=1)
     for value in track:
         x, y, w, h, track, conf = value
         if INTERACTIVE:
             cv2.rectangle(im, (x, y), (x + w, y + h), (0, 255, 0), 4)
             cv2.putText(im, '{:d} {:d}'.format(track, track),
                         (x, y), font, 0.5, (255, 0, 0), thickness=2)
         evaluator.write_target(track, left=x, top=y, width=w, height=h, conf=1)  # int(confidence[0]))
示例#19
0
    detector = YOLOv3(0.5, 0.4)

    frame_idx = 0
    for im in video.list:
        frame_idx += 1

        start = time.time()
        #print('detection:')
        detections = detector.detect(im)
        imgs = []

        for d in detections:
            d = d[:4].astype(np.int)
            #print(d)
            imgs.append(im[d[1]:d[3], d[0]:d[2], :])
        detections, ids = deepsort.update(detections, imgs)

        for detection, id in zip(detections, ids):
            detection = detection.astype(np.int)
            img = crop_img(im, detection[:4])
            label = "id:{}".format(id)
            t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 2, 2)[0]
            cv2.putText(im, label,
                        (detection[0], detection[1] + t_size[1] + 4),
                        cv2.FONT_HERSHEY_PLAIN, 2, COLORS_10[id], 2)
            params = predictor.predict(img)
            kpt = predictor.pst68(params, detection)
            new_box = parse_roi_box_from_landmark(kpt)
            detection = new_box.astype(np.int)
            img = crop_img(im, detection[:4])
            params = predictor.predict(img)
示例#20
0
def main():
    args = get_parser().parse_args()
    if args.display:
        cv2.namedWindow("out_vid", cv2.WINDOW_NORMAL)
        cv2.resizeWindow("out_vid", 960, 720)
    sort = Sort()
    deepsort = DeepSort(args.deepsort_checkpoint,
                        nms_max_overlap=args.nms_max_overlap,
                        use_cuda=bool(strtobool(args.use_cuda)))
    assert os.path.isfile(
        os.path.join(args.input, 'via_export_json.json'
                     )), "Error: path error, via_export_json.json not found"
    '''
    if args.out_vid:
        out_vid = cv2.VideoWriter(
            filename=args.out_vid,
            fourcc=cv2.VideoWriter_fourcc(*'MJPG'),
            fps=args.fps,
            frameSize=(1920, 1440),
        )
    '''
    if args.out_txt:
        out_txt = open(args.out_txt, "w+")

    total_counter = [0] * 1000
    json_file = os.path.join(args.input, 'via_export_json.json')
    with open(json_file) as f:
        imgs_anns = json.load(f)
    for idx, v in tqdm(enumerate(imgs_anns.values()),
                       total=len(imgs_anns.values())):
        filename = os.path.join(args.input, v["filename"])
        annos = v["regions"]
        polys = []
        dets = []
        for anno in annos:
            region_attributes = anno["region_attributes"]
            if not region_attributes:
                break
            anno = anno["shape_attributes"]
            if anno["name"] != "polygon":
                break
            px = anno["all_points_x"]
            py = anno["all_points_y"]
            poly = np.array([[x, y] for x, y in zip(px, py)],
                            np.int32).reshape((-1, 1, 2))
            if int(region_attributes["category_id"]):
                dets.append(
                    [np.min(px),
                     np.min(py),
                     np.max(px),
                     np.max(py), 1])
                polys.append(poly)
        start = time.time()
        im = cv2.imread(filename)
        current_counter = []
        if args.tracker == 'sort':
            if len(dets):
                dets = np.array(dets)
            else:
                dets = np.empty((0, 5))
            outputs = sort.update(dets)
            outputs = np.array([element.clip(min=0)
                                for element in outputs]).astype(int)
        else:
            if len(dets):
                ccwh_boxes = []
                for det in dets:
                    ccwh_boxes.append([(det[0] + det[2]) / 2,
                                       (det[1] + det[3]) / 2, det[2] - det[0],
                                       det[3] - det[1]])
                ccwh_boxes = np.array(ccwh_boxes)
                confidences = np.ones(len(dets))
                outputs, __ = deepsort.update(ccwh_boxes, confidences, im)
            else:
                outputs = []
        if len(outputs):
            tlbr_boxes = outputs[:, :4]
            identities = current_counter = outputs[:, -1]
            ordered_identities = []
            for identity in identities:
                if not total_counter[identity]:
                    total_counter[identity] = max(total_counter) + 1
                ordered_identities.append(total_counter[identity])
            im = draw_bboxes(im,
                             tlbr_boxes,
                             ordered_identities,
                             binary_masks=[])
            if args.out_txt:
                for i in range(len(ordered_identities)):
                    tlbr = tlbr_boxes[i]
                    line = [
                        idx + 1, ordered_identities[i], tlbr[0], tlbr[1],
                        tlbr[2] - tlbr[0], tlbr[3] - tlbr[1], 1, 1, 1
                    ]
                    out_txt.write(",".join(str(item) for item in line) + "\n")
        end = time.time()
        im = draw_polys(im, polys)
        im = cv2.putText(im, "Frame ID: " + str(idx), (20, 20), 0, 5e-3 * 200,
                         (0, 255, 0), 2)
        time_fps = "Time: {}s, fps: {}".format(round(end - start, 2),
                                               round(1 / (end - start), 2))
        im = cv2.putText(im, time_fps, (20, 60), 0, 5e-3 * 200, (0, 255, 0), 3)
        im = cv2.putText(im, 'Groundtruth2' + args.tracker, (20, 100), 0,
                         5e-3 * 200, (0, 255, 0), 3)
        im = cv2.putText(im,
                         "Current Hand Counter: " + str(len(current_counter)),
                         (20, 140), 0, 5e-3 * 200, (0, 255, 0), 2)
        im = cv2.putText(im, "Total Hand Counter: " + str(max(total_counter)),
                         (20, 180), 0, 5e-3 * 200, (0, 255, 0), 2)
        if args.display:
            cv2.imshow("out_vid", im)
            cv2.waitKey(1)
        '''
示例#21
0
class Detector(object):
    def __init__(self, centernet_opt, args):
        # CenterNet detector
        self.detector = detector_factory[centernet_opt.task](centernet_opt)
        # Deep SORT
        self.deepsort = DeepSort(args.deepsort_checkpoint,
                                 args.max_cosine_distance, args.use_cuda, args.use_original_model)
        self.debug = args.debug
        if self.debug and not os.path.exists(args.debug_dir):
            os.mkdir(args.debug_dir)
        self.args = args

    def run(self, sequence_dir, output_file):
        assert os.path.isdir(sequence_dir), "Invalid sequence dir: {}".format(sequence_dir)
        seq_info = gather_sequence_info(sequence_dir, None)
        print("Start to handle sequence: {} (image size: {}, frame {} - {})".format(
            seq_info["sequence_name"], seq_info["image_size"], seq_info["min_frame_idx"],
            seq_info["max_frame_idx"]))
        start_time = time.time()
        frame_cnt = 0
        results = []
        for frame in range(seq_info["min_frame_idx"], seq_info["max_frame_idx"] + 1):
            frame_image = seq_info["image_filenames"][frame]
            frame_cnt += 1
            image = cv2.imread(frame_image)
            detection_result = self.detector.run(frame_image)["results"][1]
            xywh, conf = Detector._bbox_to_xywh_cls_conf(detection_result, self.args.min_confidence)
            output = self.deepsort.update(xywh, conf, image)
            for x1, y1, x2, y2, track_id in output:
                results.append((
                    frame, track_id, x1, y1, x2 - x1, y2 - y1  # tlwh
                ))
            elapsed_time = time.time() - start_time
            print("Frame {:05d}, Time {:.3f}s, FPS {:.3f}".format(
                frame_cnt, elapsed_time, frame_cnt / elapsed_time))
            if self.debug:
                detect_xyxy = detection_result[detection_result[:, 4] > self.args.min_confidence, :4]
                detect_image = draw_bboxes(image, detect_xyxy)
                cv2.imwrite(os.path.join(self.args.debug_dir,
                                         "{}-{:05}-detect.jpg".format(seq_info["sequence_name"], frame)), detect_image)
                if len(output) == 0:
                    continue
                image = cv2.imread(frame_image)
                track_image = draw_bboxes(image, output[:, :4], output[:, -1])
                cv2.imwrite(os.path.join(self.args.debug_dir,
                                         "{}-{:05}-track.jpg".format(seq_info["sequence_name"], frame)), track_image)

        print("Done. Now write output to {}".format(args.output_file))
        with open(output_file, mode="w") as f:
            for row in results:
                f.write("%d,%d,%.2f,%.2f,%.2f,%.2f,1,-1,-1,-1\n" % (
                    row[0], row[1], row[2], row[3], row[4], row[5]))

    @staticmethod
    def _bbox_to_xywh_cls_conf(bbox, min_confidence):
        bbox = bbox[bbox[:, 4] > min_confidence, :]
        bbox[:, 2] = bbox[:, 2] - bbox[:, 0]
        bbox[:, 3] = bbox[:, 3] - bbox[:, 1]
        bbox[:, 0] = bbox[:, 0] + bbox[:, 2] / 2
        bbox[:, 1] = bbox[:, 1] + bbox[:, 3] / 2
        return bbox[:, :4], bbox[:, 4]
示例#22
0
def main():
    args = get_parser().parse_args()
    if args.display:
        cv2.namedWindow("out_vid", cv2.WINDOW_NORMAL)
        cv2.resizeWindow("out_vid", 960, 720)
    sort = Sort()
    deepsort = DeepSort(args.deepsort_checkpoint,
                        nms_max_overlap=args.nms_max_overlap,
                        use_cuda=bool(strtobool(args.use_cuda)))
    assert os.path.isfile(
        args.input), "Error: path error, input file not found"

    if args.out_vid:
        out_vid = cv2.VideoWriter(
            filename=args.out_vid,
            fourcc=cv2.VideoWriter_fourcc(*'MJPG'),
            fps=args.fps,
            frameSize=(1920, 1440),
        )

    if args.out_txt:
        out_txt = open(args.out_txt, "w+")
    total_counter = [0] * 1000
    inp_vid = cv2.VideoCapture(args.input)
    num_frames = int(inp_vid.get(cv2.CAP_PROP_FRAME_COUNT))
    predictor = DefaultPredictor(setup_cfg(args))
    for frameID in tqdm(range(num_frames)):
        ret, im = inp_vid.read()
        start = time.time()
        dets, masks, region = detectron2(im, args, predictor)
        if args.region_based:
            im = region
        if args.tracker == 'sort':
            if len(dets):
                dets = np.array(dets)
            else:
                dets = np.empty((0, 5))
            outputs = sort.update(dets)
            outputs = np.array([element.clip(min=0)
                                for element in outputs]).astype(int)
        else:
            if len(dets):
                ccwh_boxes = []
                for det in dets:
                    ccwh_boxes.append([(det[0] + det[2]) / 2,
                                       (det[1] + det[3]) / 2, det[2] - det[0],
                                       det[3] - det[1]])
                ccwh_boxes = np.array(ccwh_boxes)
                confidences = np.ones(len(dets))
                outputs, __ = deepsort.update(ccwh_boxes, confidences, im)
            else:
                outputs = []
        current_counter = []
        if len(outputs):
            tlbr_boxes = outputs[:, :4]
            identities = current_counter = outputs[:, -1]
            ordered_identities = []
            for identity in identities:
                if not total_counter[identity]:
                    total_counter[identity] = max(total_counter) + 1
                ordered_identities.append(total_counter[identity])
            im = draw_bboxes(im,
                             tlbr_boxes,
                             ordered_identities,
                             binary_masks=masks)
            if args.out_txt:
                for i in range(len(ordered_identities)):
                    tlbr = tlbr_boxes[i]
                    line = [
                        frameID + 1, ordered_identities[i], tlbr[0], tlbr[1],
                        tlbr[2] - tlbr[0], tlbr[3] - tlbr[1], 1, 1, 1
                    ]
                    out_txt.write(",".join(str(item) for item in line) + "\n")

        end = time.time()
        im = cv2.putText(im, "Frame ID: " + str(frameID + 1), (20, 30), 0,
                         5e-3 * 200, (0, 255, 0), 2)
        time_fps = "Time: {}s, fps: {}".format(round(end - start, 2),
                                               round(1 / (end - start), 2))
        im = cv2.putText(im, time_fps, (20, 60), 0, 5e-3 * 200, (0, 255, 0), 3)
        im = cv2.putText(
            im,
            os.path.basename(args.config_file) + ' ' + args.tracker, (20, 90),
            0, 5e-3 * 200, (0, 255, 0), 3)
        im = cv2.putText(im,
                         "Current Hand Counter: " + str(len(current_counter)),
                         (20, 120), 0, 5e-3 * 200, (0, 255, 0), 2)
        im = cv2.putText(im, "Total Hand Counter: " + str(max(total_counter)),
                         (20, 150), 0, 5e-3 * 200, (0, 255, 0), 2)
        if args.display:
            cv2.imshow("out_vid", im)
            cv2.waitKey(1)
        if args.out_vid:
            out_vid.write(im)
        frameID += 1
示例#23
0
class Detector(object):
    def __init__(self, args):
        self.args = args
        if args.display:
            cv2.namedWindow("test", cv2.WINDOW_NORMAL)
            cv2.resizeWindow("test", args.display_width, args.display_height)
        device = torch.device(
            'cuda') if torch.cuda.is_available() else torch.device('cpu')
        self.vdo = cv2.VideoCapture()
        self.yolo3 = InferYOLOv3(args.yolo_cfg,
                                 args.img_size,
                                 args.yolo_weights,
                                 args.data_cfg,
                                 device,
                                 conf_thres=args.conf_thresh,
                                 nms_thres=args.nms_thresh)
        self.deepsort = DeepSort(args.deepsort_checkpoint)
        self.class_names = self.yolo3.classes

    def __enter__(self):
        assert os.path.isfile(self.args.VIDEO_PATH), "Error: path error"
        self.vdo.open(self.args.VIDEO_PATH)
        self.im_width = int(self.vdo.get(cv2.CAP_PROP_FRAME_WIDTH))
        self.im_height = int(self.vdo.get(cv2.CAP_PROP_FRAME_HEIGHT))

        if self.args.save_path:
            fourcc = cv2.VideoWriter_fourcc(*'MJPG')
            self.output = cv2.VideoWriter(self.args.save_path, fourcc, 20,
                                          (self.im_width, self.im_height))

        assert self.vdo.isOpened()
        return self

    def __exit__(self, exc_type, exc_value, exc_traceback):
        if exc_type:
            print(exc_type, exc_value, exc_traceback)

    def detect(self):
        frame_cnt = -1
        while self.vdo.grab():
            frame_cnt += 1

            # skip frames every 3 frames
            if frame_cnt % 3 == 0:
                continue

            start = time.time()
            _, ori_im = self.vdo.retrieve()
            # im = cv2.cvtColor(ori_im, cv2.COLOR_BGR2RGB)
            im = ori_im

            t1_begin = time.time()
            bbox_xxyy, cls_conf, cls_ids = self.yolo3.predict(im)
            t1_end = time.time()

            t2_begin = time.time()
            if bbox_xxyy is not None:
                # select class cow
                # mask = cls_ids == 0
                # bbox_xxyy = bbox_xxyy[mask]

                # bbox_xxyy[:, 3:] *= 1.2
                # cls_conf = cls_conf[mask]

                bbox_xcycwh = xyxy2xywh(bbox_xxyy)
                outputs = self.deepsort.update(bbox_xcycwh, cls_conf, im)

                if len(outputs) > 0:
                    bbox_xyxy = outputs[:, :4]
                    identities = outputs[:, -1]
                    ori_im = draw_bboxes(ori_im, bbox_xyxy, identities)
            t2_end = time.time()

            end = time.time()
            print(
                "frame:%d|det:%.4f|sort:%.4f|total:%.4f|det p:%.2f%%|fps:%.2f"
                % (frame_cnt, (t1_end - t1_begin), (t2_end - t2_begin),
                   (end - start), ((t1_end - t1_begin) * 100 /
                                   ((end - start))), (1 / (end - start))))
            if self.args.display:
                cv2.imshow("test", ori_im)
                cv2.waitKey(1)

            if self.args.save_path:
                self.output.write(ori_im)
class Detector(object):
    def __init__(self, args):
        self.args = args
        use_cuda = bool(strtobool(self.args.use_cuda))
        if args.display:
            cv2.namedWindow("test", cv2.WINDOW_NORMAL)
            cv2.resizeWindow("test", args.display_width, args.display_height)

        if not args.image_input:
            self.vdo = cv2.VideoCapture()
        cfg = get_cfg()
        #cfg.merge_from_file("detectron2_repo/configs/COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x.yaml")
        #cfg.MODEL.WEIGHTS = "detectron2://COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x/139686956/model_final_5ad38f.pkl"
        cfg.merge_from_file("../detectron2_repo/configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml")
        cfg.MODEL.WEIGHTS = args.detectron2_weights
        #"detectron2://Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv/18131413/model_0039999_e76410.pkl"
        cfg.MODEL.MASK_ON = False
        cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1 
        #cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5
        cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST = 0.5

        self.predictor = DefaultPredictor(cfg)
        self.deepsort = DeepSort(args.deepsort_checkpoint, use_cuda=use_cuda, extractor_type=args.extractor_type, game_id=args.game_id, team_0=args.team_0)
        #self.class_names = self.yolo3.class_names

    def __enter__(self):
        if not args.image_input:
            assert os.path.isfile(self.args.VIDEO_PATH), "Error: path error"
            self.vdo.open(self.args.VIDEO_PATH)
            assert self.vdo.isOpened()
            self.im_width = int(self.vdo.get(cv2.CAP_PROP_FRAME_WIDTH))
            self.im_height = int(self.vdo.get(cv2.CAP_PROP_FRAME_HEIGHT))
        else:
            self.img_list = sorted(glob.glob(os.path.join(self.args.VIDEO_PATH, "*")))
            img_test = cv2.imread(self.img_list[0])
            self.im_height, self.im_width = img_test.shape[:2]

        if self.args.save_path:
            fourcc = cv2.VideoWriter_fourcc(*'MJPG')
            self.output = cv2.VideoWriter(self.args.save_path, fourcc, args.save_fps, (self.im_width, self.im_height))
            
        if self.args.save_frames:
            if os.path.exists('supervisely'):
                import shutil
                shutil.rmtree('supervisely')
            os.makedirs('supervisely')
            os.makedirs('supervisely/img')
        
        if self.args.save_txt:
            self.txt = open('gt.txt', "w")
        
        return self

    def __exit__(self, exc_type, exc_value, exc_traceback):
        if exc_type:
            print(exc_type, exc_value, exc_traceback)

    def detect(self):
        
        start = time.time()
        if not args.image_input:            

            start_second = 0
            end_second = 8

            fps = self.vdo.get(cv2.CAP_PROP_FPS)

            print('fps: ', fps)

            start_frameid = start_second * fps
            end_frameid = end_second * fps
        else:
            frame_id = 0
            
        if self.args.update_tracks:
            shutil.copytree(self.args.detections_dir, self.args.detections_dir + '_tracked')
        
        while True:
            
            print(f'FRAME_ID: {frame_id}')
            logging.debug(f'FRAME_ID: {frame_id}')
            
            new_sequence = False
            
            if not args.image_input:                
                frame_id = int(round(self.vdo.get(1)))            
                if frame_id < start_frameid:
                    continue
                elif frame_id > end_frameid:
                    break           
                _, ori_im = self.vdo.read() # retrieve()
            else:
                if frame_id>=(len(self.img_list)):
                    break
                    
                if frame_id > 1:
                    prev_im = ori_im
                    
                ori_im = cv2.imread(self.img_list[frame_id])
                    
                if frame_id > 1:

                    im1_gray = cv2.cvtColor(prev_im, cv2.COLOR_RGB2GRAY)
                    im2_gray = cv2.cvtColor(ori_im, cv2.COLOR_RGB2GRAY)

                    cc, _ = cv2.findTransformECC(im1_gray, im2_gray, warp_matrix, warp_mode, criteria, None, 1)
                    
                    new_sequence = cc < args.ecc_threshold
                    logging.debug(f'ECC: {cc}')

                
                frame_id+=1 
                
            logging.debug(f'NEW_SEQUENCE: {new_sequence}')
            
            if self.args.save_frames:
                if not args.image_input:
                    cv2.imwrite(f'./supervisely/img/img_{frame_id:05}.jpg', ori_im)
                else:
                    cv2.imwrite(f'./supervisely/img/' + self.img_list[frame_id-1][-13:], ori_im)
            
            im = ori_im
            predictions = self.predictor(im)
            
            instances = predictions["instances"]

            if instances.pred_classes.numel() > 0:                

                #print(instances.pred_classes)
                
                mask = instances.pred_classes == 0

                scores = instances.scores[mask]
                pred_boxes = instances.pred_boxes[mask]

                xcyc = pred_boxes.get_centers()
                wh = pred_boxes.tensor[:, 2:] - pred_boxes.tensor[:, :2] + torch.ones(pred_boxes.tensor[:, 2:].size()).cuda()
                
                wh_min, _ = torch.min(wh, 1)            
                
                # if "pred_masks" in instances.keys():
                #	pred_masks = instances["pred_masks"][mask]

                bbox_xcycwh = torch.cat((xcyc, wh), 1)[wh_min >=4].detach().cpu().numpy()
                cls_conf = scores.detach().cpu().numpy()
                
                if self.args.detections_dir!="":
                    ann_dir = os.path.join(self.args.detections_dir)
                    
                    ann = os.path.basename(self.img_list[frame_id-1]) + ".json"
                    ann_path = os.path.join(ann_dir, 'MOT', 'ann', ann)
                    
                    with open(ann_path) as f:
                        ann_dict = json.load(f)
                    bboxes = []
                    for obj in ann_dict['objects']:
                        bbox = obj["points"]["exterior"]
                        bbox = bbox[0]+bbox[1]
                        bbox = [min(bbox[0], bbox[2]), min(bbox[1], bbox[3]), max(bbox[0], bbox[2]), max(bbox[1], bbox[3])]
                        bboxes.append([(bbox[2]+bbox[0])/2, (bbox[3]+bbox[1])/2, bbox[2]-bbox[0], bbox[3]-bbox[1]])
                        
                    bbox_xcycwh = np.array(bboxes)
                    cls_conf = np.ones(bbox_xcycwh.shape[0])
                
                #print(bbox_xcycwh, cls_conf)

                #bbox_xcycwh[:, 3:] *= 1.2

                outputs, detections = self.deepsort.update(bbox_xcycwh, cls_conf, im, new_sequence, frame_id-1, self.img_list[frame_id-1])
                self.deepsort.export('/content')
                if len(outputs) > 0:                    
                    bbox_xyxy = outputs[:, :4]
                    #dh = ((0.1/1.2)*(bbox_xyxy[:,3]-bbox_xyxy[:,1])).astype(int)
                    #bbox_xyxy[:,1] += dh
                    #bbox_xyxy[:,3] -= dh
                    identities = outputs[:, 4]
                    match_method = outputs[:, 5]
                    number = outputs[:, 6]
                    number_bbox = outputs[:, 7:11]
                    detection_id = outputs[:, 11]
                    min_cost = outputs[:, 12]
                    draw_im = draw_bboxes(frame_id, new_sequence, ori_im, bbox_xyxy, identities, match_method, number, number_bbox, detection_id, min_cost)
                    
                    if self.args.save_txt:
                        for j in range(bbox_xyxy.shape[0]):
                            x1 = bbox_xyxy[j,0]
                            y1 = bbox_xyxy[j,1]
                            x2 = bbox_xyxy[j,2]
                            y2 = bbox_xyxy[j,3]
                            self.txt.write(f'{frame_id},{identities[j]},{x1},{y1},{x2-x1},{y2-y1},1,0,-1,-1\n')
                if self.args.update_tracks:                    
                    ann_path = os.path.join(self.args.detections_dir + '_tracked', 'MOT', 'ann', ann)
                    print(ann_path)
                    
                    for idx, obj in enumerate(ann_dict['objects']):
                        obj["tags"] = [{"name": "track_id", "value": detections[idx].track_id}]
                        
                    with open(ann_path, 'w') as f:
                        json.dump(ann_dict, f)

            end = time.time()
            print("time: {}s, fps: {}".format(end - start, 1 / (end - start)))

            if self.args.display:
                cv2.imshow("test", draw_im)
                cv2.waitKey(1)

            if self.args.save_path:
                self.output.write(draw_im)
示例#25
0
class Detector(object):
    def __init__(self, opt):
        self.vdo = cv2.VideoCapture()
        #self.yolo_info = YOLO3("YOLO3/cfg/yolo_v3.cfg", "YOLO3/yolov3.weights", "YOLO3/cfg/coco.names", is_xywh=True)


        #centerNet detector
        self.detector = detector_factory[opt.task](opt)
        self.deepsort = DeepSort("deep/checkpoint/ckpt.t7")
        # self.deepsort = DeepSort("deep/checkpoint/ori_net_last.pth")


        self.write_video = True

    def open(self, video_path):
        assert os.path.isfile(video_path), "Error: path error"
        self.vdo.open(video_path)
        self.im_width = int(self.vdo.get(cv2.CAP_PROP_FRAME_WIDTH))
        self.im_height = int(self.vdo.get(cv2.CAP_PROP_FRAME_HEIGHT))
        self.area = 0, 0, self.im_width, self.im_height
        if self.write_video:
            fourcc = cv2.VideoWriter_fourcc(*'MJPG')
            self.output = cv2.VideoWriter("demo1.avi", fourcc, 20, (self.im_width, self.im_height))
        return self.vdo.isOpened()

    def detect(self):
        xmin, ymin, xmax, ymax = self.area
        frame_no = 0
        while self.vdo.grab():

            frame_no +=1
            start = time.time()
            _, ori_im = self.vdo.retrieve()
            im = ori_im[ymin:ymax, xmin:xmax, (2, 1, 0)]


            #start_center =  time.time()

            person_id = 1
            confidence = 0.5
            # only person ( id == 1)
            bbox = self.detector.run(im)['results'][person_id]
            #bbox = ret['results'][person_id]
            bbox = bbox[bbox[:, 4] >  confidence, :]
            #box_info = ret['results']

            bbox[:, 2] =  bbox[:, 2] - bbox[:, 0] #+  (bbox[:, 2] - bbox[:, 0]) /2
            bbox[:, 3] =  bbox[:, 3] - bbox[:, 1] #+  (bbox[:, 3] - bbox[:, 1]) /2


            #start_deep_sort = time.time()


            cls_conf = bbox[:, 4]

            if frame_no == 10 :
                print('a')
            outputs = self.deepsort.update(bbox[:,:4], cls_conf, im)



            if len(outputs) > 0:
                bbox_xyxy = outputs[:, :4]
                identities = outputs[:, -1]
                ori_im = draw_bboxes(ori_im, bbox_xyxy, identities, offset=(xmin, ymin))


            end = time.time()
            #print("deep time: {}s, fps: {}".format(end - start_deep_sort, 1 / (end - start_deep_sort)))

            print("centernet time: {}s, fps: {}".format(end - start, 1 / (end - start)))
            cv2.imshow("test", ori_im)
            cv2.waitKey(1)

            if self.write_video:
                self.output.write(ori_im)
class Detector(object):
    def __init__(self, args):
        self.args = args
        if args.display:
            cv2.namedWindow("test", cv2.WINDOW_NORMAL)
            cv2.resizeWindow("test", args.display_width, args.display_height)

        self.vdo = cv2.VideoCapture()
        self.yolo3 = YOLOv3(args.yolo_cfg,
                            args.yolo_weights,
                            args.yolo_names,
                            is_xywh=True,
                            conf_thresh=args.conf_thresh,
                            nms_thresh=args.nms_thresh)
        self.deepsort = DeepSort(args.deepsort_checkpoint)
        self.class_names = self.yolo3.class_names

    def __enter__(self):
        assert os.path.isfile(self.args.VIDEO_PATH), "Error: path error"
        self.vdo.open(self.args.VIDEO_PATH)
        self.im_width = int(self.vdo.get(cv2.CAP_PROP_FRAME_WIDTH))
        self.im_height = int(self.vdo.get(cv2.CAP_PROP_FRAME_HEIGHT))

        if self.args.save_path:
            fourcc = cv2.VideoWriter_fourcc(*'MJPG')
            self.output = cv2.VideoWriter(self.args.save_path, fourcc, 30,
                                          (self.im_width, self.im_height))

        assert self.vdo.isOpened()
        return self

    def __exit__(self, exc_type, exc_value, exc_traceback):
        if exc_type:
            print(exc_type, exc_value, exc_traceback)

    def detect(self):
        #multicore
        pool = mp.Pool(processes=6)  #6-core
        #xmin, ymin, xmax, ymax = self.area
        jump_flag = 1
        while self.vdo.grab():
            _, ori_im = self.vdo.retrieve()
            im_height, im_width = ori_im.shape[:2]
            x_max = 10
            y_max = 10
            x_grid = int(im_width / x_max)
            y_grid = int(im_height / y_max)
            for i in range(1, x_max + 1):
                cv2.line(ori_im, (x_grid * i, 0), (x_grid * i, im_height),
                         (0, 255, 255), 3)
            for i in range(1, y_max + 1):
                cv2.line(ori_im, (0, y_grid * i), (im_width, y_grid * i),
                         (0, 255, 255), 3)
            for i in range(len(unseen_frame)):
                if unseen_frame[i] > -1:
                    unseen_frame[i] += 1
            if jump_flag % 2 == 0:  #jump frame
                start = time.time()
                im = cv2.cvtColor(ori_im, cv2.COLOR_BGR2RGB)
                im = ori_im

                bbox_xcycwh, cls_conf, cls_ids = self.yolo3(im)
                cv2.circle(ori_im, (3900, 2100), 50, (255, 0, 0), -1)

                if bbox_xcycwh is not None:
                    # select class person
                    mask = cls_ids == 0

                    bbox_xcycwh = bbox_xcycwh[mask]
                    bbox_xcycwh[:, 3:] *= 1.2

                    cls_conf = cls_conf[mask]
                    outputs = self.deepsort.update(bbox_xcycwh, cls_conf, im)
                    for output in outputs:
                        if output[4] > len(people_path):
                            for i in range(0, output[4] - len(people_path)):
                                people_path.append([])
                                direction_start.append(0)
                                unseen_frame.append(-1)
                        people_path[output[4] - 1].append(
                            np.array(([(output[0] + output[2]) / 2,
                                       output[3]])))
                        coordinate = output[:4]
                        bbox_area = get_bbox_area(coordinate)
                        try:
                            if area_dic[output[-1]] < bbox_area:
                                area_dic[output[-1]] = bbox_area
                                pool.apply_async(subroi, (ori_im, output))
                                print("---------------")

                        except KeyError:
                            area_dic.setdefault(output[-1], bbox_area)
                            pool.apply_async(subroi, (ori_im, output))
                            print("---------------")
                        x = []
                        y = []
                        for i in range(direction_start[output[4] - 1],
                                       len(people_path[output[4] - 1])):
                            x.append(people_path[output[4] - 1][i][0])
                            y.append(people_path[output[4] - 1][i][1])
                        path_x = (output[0] + output[2]) / 2
                        path_y = output[3]
                        if (len(x) > 1):
                            a, b, c = pu.cal_simple_linear_regression_coefficients(
                                x, y)
                            #print(abs(a * path_x + b * path_y + c) / math.sqrt(a * a + b * b))
                            if abs(a * path_x + b * path_y + c) / math.sqrt(
                                    a * a + b * b) > 200 and unseen_frame[
                                        output[4] - 1] < 10:
                                continue
                            if abs(a * path_x + b * path_y + c) / math.sqrt(
                                    a * a + b * b) < distance_threshold:
                                #print("projection")
                                path_x, path_y = pu.find_projection(
                                    a, b, c, path_x, path_y)
                                if len(people_path[output[4] - 1]) > 0:
                                    prev_x = people_path[output[4] - 1][
                                        len(people_path[output[4] - 1]) - 1][0]
                                    prev_y = people_path[output[4] - 1][
                                        len(people_path[output[4] - 1]) - 1][1]
                                    velocity = math.sqrt(
                                        (path_x - prev_x) * (path_x - prev_x) +
                                        (path_y - prev_y) *
                                        (path_y - prev_y)) * 30 / (
                                            unseen_frame[output[4] - 1] + 1)
                                    print("velocity: {}".format(velocity))
                            else:
                                #print("turn")
                                direction_start[output[4] - 1] = len(
                                    people_path[output[4] - 1])
                        people_path[output[4] - 1].append(
                            np.array((path_x, path_y)))
                        unseen_frame[output[4] - 1] = 0
                    if len(outputs) > 0:
                        bbox_xyxy = outputs[:, :4]
                        identities = outputs[:, -1]
                        ori_im = draw_bboxes(ori_im, bbox_xyxy, identities)
                        for id in identities:
                            for i in range(1, len(people_path[id - 1])):
                                cv2.line(ori_im,
                                         (int(people_path[id - 1][i - 1][0]),
                                          int(people_path[id - 1][i - 1][1])),
                                         (int(people_path[id - 1][i][0]),
                                          int(people_path[id - 1][i][1])),
                                         (0, 0, 255), 3)
                end = time.time()
                print("time: {}s, fps: {}".format(end - start,
                                                  1 / (end - start)))
                print(area_dic)
            jump_flag += 1
            if self.args.display:
                cv2.imshow("test", ori_im)
                cv2.waitKey(1)

            if self.args.save_path:
                self.output.write(ori_im)
class Detector(object):
    def __init__(self, args):
        self.args = args
        use_cuda = bool(strtobool(self.args.use_cuda))
        params = Params(f'projects/{self.args.project}.yml')
        self.submit = True
        self.cam_id = 1
        self.object_list = []
        self.object_list_tracks = []
        if args.display:
            pass
            # cv2.namedWindow("test", cv2.WINDOW_NORMAL)
            # cv2.resizeWindow("test", args.display_width, args.display_height)

        self.vdo = cv2.VideoCapture()
        self.efficientdet = EfficientDetBackbone(
            num_classes=len(params.obj_list),
            compound_coef=self.args.compound_coef,
            ratios=eval(params.anchors_ratios),
            scales=eval(params.anchors_scales)).cuda()
        # self.yolo3 = YOLOv3(args.yolo_cfg, args.yolo_weights, args.yolo_names, is_xywh=True, conf_thresh=args.conf_thresh, nms_thresh=args.nms_thresh, use_cuda=use_cuda)

        self.deepsort = DeepSort(args.deepsort_checkpoint, use_cuda=True)
        # self.class_names = self.yolo3.class_names
        self.efficientdet.load_state_dict(torch.load(
            args.detector_weights_path),
                                          strict=False)

    def __enter__(self):
        self.im_width = 1920
        self.im_height = 1280

        if self.args.save_path:
            fourcc = cv2.VideoWriter_fourcc(*'DIVX')
            self.output = cv2.VideoWriter(self.args.save_path, fourcc, 10,
                                          (self.im_width, self.im_height))

        return self

    def __exit__(self, exc_type, exc_value, exc_traceback):
        if exc_type:
            print(exc_type, exc_value, exc_traceback)

    def detect(self):
        for tf_idx, tfrecord in enumerate(tqdm(tfrecord_paths[2:])):
            self.object_list = []
            self.object_list_tracks = []
            training_set = TUMuchTrackingDataset(tfrecord_path=tfrecord,
                                                 transform=tfs,
                                                 cam_id=self.cam_id)
            training_generator = DataLoader(training_set, **training_params)
            for it, data in enumerate(training_generator):

                imgs = data['img'].to(torch.device("cuda:0"))

                if self.submit:
                    meta = data['meta']
                with torch.no_grad():
                    features, regression, classification, anchors = self.efficientdet(
                        imgs)

                out = postprocess(imgs, anchors, regression, classification,
                                  regressBoxes, clipBoxes, threshold,
                                  iou_threshold)
                # boxes is cx, cy, cw, ch

                boxes = out[0]["rois"]
                for idx in range(out[0]["rois"].shape[0]):
                    cx, cy, lx, ly = out[0]["rois"][idx]
                    cw, ch = lx - cx, ly - cy

                    boxes[idx][0] = cx + cw / 2
                    boxes[idx][1] = cy + ch / 2
                    boxes[idx][2] = cw
                    boxes[idx][3] = ch
                bbox_xcycwh, cls_conf, cls_ids = boxes, out[0]["scores"], out[
                    0]["class_ids"]

                if bbox_xcycwh is not None:

                    mask = cls_ids <= 4

                    bbox_xcycwh = bbox_xcycwh[mask]
                    try:
                        bbox_xcycwh[:, 3:] *= 1
                    except:
                        continue

                    cls_conf = cls_conf[mask]

                    im = imgs.cpu().numpy()
                    im = im[0, :, :, :]

                    im = np.swapaxes(im, 0, 2)
                    im = np.swapaxes(im, 0, 1)
                    im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
                    im = im * 255
                    im = im.astype(np.uint8)
                    outputs = self.deepsort.update(bbox_xcycwh, cls_conf,
                                                   out[0]["class_ids"], im)
                    if len(outputs) > 0:

                        bbox_xyxy = outputs[:, :4]
                        identities = outputs[:, -2]
                        track_class = outputs[:, -1]

                        if self.submit:
                            for box_idx in range(bbox_xyxy.shape[0]):
                                o = meta[:][0]

                                box = label_pb2.Label.Box()

                                box.center_x = (bbox_xyxy[box_idx, 0] +
                                                bbox_xyxy[box_idx, 2]) / 2
                                box.center_y = (bbox_xyxy[box_idx, 1] +
                                                bbox_xyxy[box_idx, 3]) / 2
                                box.length = (bbox_xyxy[box_idx, 2] -
                                              bbox_xyxy[box_idx, 0])
                                box.width = (bbox_xyxy[box_idx, 3] -
                                             bbox_xyxy[box_idx, 1])

                                o.object.box.CopyFrom(box)
                                o.score = 0.9  # CHECK THIS
                                # Use correct type.

                                o.object.type = to_waymo_classes[track_class[
                                    box_idx]]  # MAP THIS TO CORRECT CLASSES

                                self.object_list.append(copy.deepcopy(o))

                                o.object.id = str(identities[box_idx])
                                self.object_list_tracks.append(
                                    copy.deepcopy(o))
                                # import pdb; pdb.set_trace()
                        if self.args.save_path:
                            draw_bboxes(im, bbox_xyxy, identities)

                if self.args.display:
                    pass

                self.args.save_path = "cam_{}.avi".format(self.cam_id)
                if self.args.save_path:
                    self.output.write(im)
            objects = metrics_pb2.Objects()
            # write object detection stuff
            for o in self.object_list:
                objects.objects.append(o)
            f = open("./output/detection/sub_camid_{}.bin".format(self.cam_id),
                     'ab')
            f.write(objects.SerializeToString())
            f.close()
            objects = metrics_pb2.Objects()
            # write object detection stuff
            for o in self.object_list_tracks:
                objects.objects.append(o)
            f = open("./output/tracking/sub_camid_{}.bin".format(self.cam_id),
                     'ab')
            f.write(objects.SerializeToString())
            f.close()
示例#28
0
class DeepSortDetector(object):
    """[summary]
    Arguments:
        yolov3:
            cfg - yolov3.cfg
            weights - weights/best.pt
            data - coco.data
            conf_thres - 0.5
            nms_thres - 0.4
            img_size - 416
        deep sort:
            deep_checkpoint - "deep_sort/deep/checkpoint/ckpt.t7"
            max_dist - 0.2
        video_path - "./data/videosample/vidoe1.mp4"
        output_file - "./data/videoresults/video1.txt"
        display_width - 800
        display_height - 600
        save_path = "./video1_out.mp4"
    """

    def __init__(
            self,
            cfg,
            weights,
            video_path,
            deep_checkpoint="deep_sort/deep/checkpoint/ckpt.t7",
            data="dataset1.data",
            output_file=None,
            img_size=416,
            display=False,
            nms_thres=0.4,
            conf_thres=0.5,
            max_dist=0.2,
            display_width=800,
            display_height=600,
            save_path=None):
        device = torch.device(
            'cuda') if torch.cuda.is_available() else torch.device('cpu')
        self.vidCap = cv2.VideoCapture()
        self.yolov3 = InferYOLOv3(cfg, img_size, weights, data, device,
                                  conf_thres, nms_thres)
        self.deepsort = DeepSort(deep_checkpoint,
                                 max_dist)
        self.display = display
        self.video_path = video_path
        self.output_file = output_file
        self.save_path = save_path

        if self.display:
            cv2.namedWindow("Test", cv2.WINDOW_NORMAL)
            cv2.resizeWindow("Test", display_width, display_height)

    def __enter__(self):
        assert os.path.isfile(self.video_path), "Error: path error"
        self.vidCap.open(self.video_path)
        self.im_width = int(self.vidCap.get(cv2.CAP_PROP_FRAME_WIDTH))
        self.im_height = int(self.vidCap.get(cv2.CAP_PROP_FRAME_HEIGHT))

        if self.save_path is not None:
            fourcc = cv2.VideoWriter_fourcc(*'MJPG')
            self.output = cv2.VideoWriter(self.save_path, fourcc, 20,
                                          (self.im_width, self.im_height))
        assert self.vidCap.isOpened()
        return self

    def __exit__(self, exc_type, exc_value, exc_traceback):
        if exc_type:
            print(exc_type, exc_value, exc_traceback)

    def detect(self):
        frame_no = -1
        # skip_no = 2

        if self.output_file:
            f = open(output_file, "w")

        while self.vidCap.grab():
            frame_no += 1

            # skip frames every n frames
            # if frame_no % skip_no != 0:
            #     continue

            # start time
            total_begin = time.time()

            _, img = self.vidCap.retrieve()

            # yolov3部分
            yolo_begin = time.time()
            bbox_xyxy, cls_conf, cls_ids = self.yolov3.predict(img)
            # [x1,y1,x2,y2]
            yolo_end = time.time()

            # deepsort部分
            ds_begin = time.time()
            if bbox_xyxy is not None:
                bbox_cxcywh = xyxy2xywh(bbox_xyxy)

                outputs = self.deepsort.update(bbox_cxcywh, cls_conf, img)

                if len(outputs) > 0:
                    # [x1,y1,x2,y2] id
                    bbox_xyxy = outputs[:, :4]
                    ids = outputs[:, -1]
                    img = draw_bboxes(img, bbox_xyxy, ids)

                    # frame,id,tlwh,1,-1,-1,-1
                    if self.output_file:
                        bbox_tlwh = xyxy2xywh(bbox_xyxy)
                        for i in range(len(bbox_tlwh)):
                            write_line = "%d,%d,%d,%d,%d,%d,1,-1,-1,-1\n" % (
                                frame_no + 1, outputs[i, -1],
                                int(bbox_tlwh[i][0]), int(bbox_tlwh[i][1]),
                                int(bbox_tlwh[i][2]), int(bbox_tlwh[i][3]))
                            f.write(write_line)
            ds_end = time.time()

            total_end = time.time()

            if frame_no % 500 == 0:
                print("frame:%04d|det:%.4f|deep sort:%.4f|total:%.4f|det p:%.2f%%|fps:%.2f" % (frame_no,
                                                                                               (yolo_end - yolo_begin),
                                                                                               (ds_end - ds_begin),
                                                                                               (total_end - total_begin),
                                                                                               ((yolo_end - yolo_begin) * 100 / (
                                                                                                   total_end - total_begin)),
                                                                                               (1 / (total_end - total_begin))))

            if self.display is True:
                cv2.imshow("Test", img)
                cv2.waitKey(1)

            if self.save_path:
                self.output.write(img)

        if self.output_file:
            f.close()
示例#29
0
def main():
    print('Connecting to camera')
    cap = cv2.VideoCapture(0)
    # cap = cv2.VideoCapture('rtsp://*****:*****@[email protected]/H264?ch=1&subtype=0')  #  - rtsp://admin:comvis@[email protected]:554/H.264
    assert cap.isOpened(), 'Unable to connect to camera'
    device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

    print('Loading models')
    detector = Detector('weights/yolov5s.pt',
                        img_size=(640, 640),
                        conf_thresh=0.4,
                        iou_thresh=0.5,
                        agnostic_nms=False,
                        device=device)
    deepsort = DeepSort('weights/ckpt.t7',
                        max_dist=0.2,
                        min_confidence=0.3,
                        nms_max_overlap=0.5,
                        max_iou_distance=0.7,
                        max_age=70,
                        n_init=3,
                        nn_budget=100,
                        device=device)
    bboxes_visualizer = BoundingBoxesVisualizer()
    fps_estimator = MeanEstimator()
    person_cls_id = detector.names.index('person')  # get id of 'person' class

    width, height = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(
        cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    cam_fps = int(cap.get(cv2.CAP_PROP_FPS))
    print(f'Starting capture, camera_fps={cam_fps}')

    # Start of demo
    win_name = 'MICA ReID Demo'
    cv2.namedWindow(win_name, cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_FREERATIO)
    cv2.resizeWindow(win_name, width, height)
    frame_id = 0
    while True:
        start_it = time.time()
        ret, img = cap.read()
        if not ret:
            print('Unable to read camera')
            break
        detections = detector.detect([img])[0]

        num_people = 0
        if detections is not None:
            detections = detections[detections[:, -1].eq(
                person_cls_id)]  # filter person
            xywh, confs = parse_detection(detections)
            outputs = deepsort.update(xywh, confs, img)
            num_people = len(outputs)
            bboxes_visualizer.remove([
                t.track_id for t in deepsort.tracker.tracks
                if t.time_since_update > 3 or t.is_deleted()
            ])
            bboxes_visualizer.update(outputs)
            # draw detections
            for pid in outputs[:, -1]:
                bboxes_visualizer.plot(img,
                                       pid,
                                       label=f'Person {pid}',
                                       line_thickness=5,
                                       trail_trajectory=True,
                                       trail_bbox=False)
        # draw counting
        overlay = img.copy()
        count_str = f'Number of people: {num_people}'
        text_size = cv2.getTextSize(count_str, 0, fontScale=0.5,
                                    thickness=1)[0]
        cv2.rectangle(overlay, (10, 10 + 10),
                      (15 + text_size[0], 10 + 20 + text_size[1]),
                      (255, 255, 255), -1)
        img = cv2.addWeighted(overlay, 0.4, img, 0.6, 0)
        cv2.putText(img,
                    count_str, (12, 10 + 15 + text_size[1]),
                    0,
                    0.5, (0, 0, 0),
                    thickness=1,
                    lineType=cv2.LINE_AA)

        # show
        cv2.imshow(win_name, img)
        key = cv2.waitKey(1)
        elapsed_time = time.time() - start_it
        fps = fps_estimator.update(1 / elapsed_time)
        print(
            f'[{frame_id:06d}] num_detections={num_people} fps={fps:.02f} elapsed_time={elapsed_time:.03f}'
        )
        # check key pressed
        if key == ord('q') or key == 27:  # q or esc to quit
            break
        elif key == ord('r'):  # r to reset tracking
            deepsort.reset()
            bboxes_visualizer.clear()
        elif key == 32:  # space to pause
            key = cv2.waitKey(0)
            if key == ord('q') or key == 27:
                break
        frame_id += 1
    cv2.destroyAllWindows()
    cap.release()
示例#30
0
class Detector(object):
    def __init__(self, args):
        self.args = args
        use_cuda = bool(strtobool(self.args.use_cuda))
        if args.display:
            cv2.namedWindow("test", cv2.WINDOW_NORMAL)
            cv2.resizeWindow("test", args.display_width, args.display_height)

        self.vdo = cv2.VideoCapture()
        self.detectron2 = Detectron2(args)

        self.deepsort = DeepSort(args.deepsort_checkpoint, use_cuda=use_cuda)
        self.total_counter = [0] * 100

    def __enter__(self):
        assert os.path.isfile(self.args.VIDEO_PATH), "Error: path error"
        self.vdo.open(self.args.VIDEO_PATH)
        self.im_width = int(self.vdo.get(cv2.CAP_PROP_FRAME_WIDTH))
        self.im_height = int(self.vdo.get(cv2.CAP_PROP_FRAME_HEIGHT))

        if self.args.save_path:
            fourcc = cv2.VideoWriter_fourcc(*'MJPG')
            self.output = cv2.VideoWriter(self.args.save_path, fourcc, 20,
                                          (self.im_width, self.im_height))

        assert self.vdo.isOpened()
        return self

    def __exit__(self, exc_type, exc_value, exc_traceback):
        if exc_type:
            print(exc_type, exc_value, exc_traceback)

    def detect(self):
        while self.vdo.grab():
            start = time.time()
            _, im = self.vdo.retrieve()
            # im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
            print(
                '----------------------------------------------DEMO started-----------------------------------------------'
            )
            bbox_xcycwh, cls_conf, cls_ids, cls_masks, bbox_xyxy_detectron2 = self.detectron2.detect(
                im)
            #print('bbox_xcycwh, cls_conf, cls_ids, cls_masks', bbox_xcycwh, cls_conf, cls_ids, cls_masks)

            #if bbox_xcycwh is not None:
            current_counter = []
            if len(bbox_xcycwh):
                mask = cls_ids == 0  # select class person
                #print('mask>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>', mask)

                #print('bbox_xcycwh', bbox_xcycwh)
                bbox_xcycwh = bbox_xcycwh[mask]

                #print('^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^cls_conf', cls_conf)
                cls_conf = cls_conf[mask]
                #print('^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^cls_masks[mask]', cls_conf[mask])
                binary_masks = cls_masks[mask]
                #binary_masks = cls_masks

                #draw detections after NMS, white box

                outputs, detections = self.deepsort.update(
                    bbox_xcycwh, cls_conf, im)
                im = draw_detections(detections, im)
                print(
                    '++++++++++++++++++++++++++++++++++++++ outputs of deepsort.update',
                    outputs)
                if len(outputs):
                    bbox_xyxy = outputs[:, :4]
                    print(
                        "+++++++++++++++++++++++++++++++++++++bbox_xyxy, bbox_xyxy_detectron2",
                        bbox_xyxy, bbox_xyxy_detectron2)
                    identities = current_counter = outputs[:, -1]
                    #print("+++++++++++++++++++++++++++++++++++++identities", identities)
                    ordered_identities = []
                    for identity in identities:
                        if not self.total_counter[identity]:
                            self.total_counter[identity] = max(
                                self.total_counter) + 1
                        ordered_identities.append(self.total_counter[identity])
                    im = draw_bboxes(im, bbox_xyxy, ordered_identities,
                                     binary_masks)
                    #nums = "len(bbox_xyxy): {}, len(identities): {}, len(binary_masks): {}".format(len(bbox_xyxy), len(identities), len(binary_masks))
                    #im = cv2.putText(im, nums, (150, 150), cv2.FONT_HERSHEY_PLAIN, 2, [255,255,255], 2)

            end = time.time()
            time_fps = "time: {}s, fps: {}".format(round(end - start, 2),
                                                   round(1 / (end - start), 2))
            im = cv2.putText(
                im, "Total Hand Counter: " + str(max(self.total_counter)),
                (int(20), int(120)), 0, 5e-3 * 200, (0, 255, 0), 2)
            im = cv2.putText(
                im, "Current Hand Counter: " + str(len(current_counter)),
                (int(20), int(80)), 0, 5e-3 * 200, (0, 255, 0), 2)
            im = cv2.putText(im, time_fps, (int(20), int(40)), 0, 5e-3 * 200,
                             (0, 255, 0), 3)
            if self.args.display:
                cv2.imshow("test", im)
                cv2.waitKey(1)

            if self.args.save_path:
                self.output.write(im)