Exemplo n.º 1
0
    def __init__(self, model_path):
        self.min_confidence = 0.3
        self.nms_max_overlap = 1.0

        self.extractor = Extractor(model_path, use_cuda=True)

        max_cosine_distance = 0.2
        nn_budget = 100
        metric = NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
        self.tracker = Tracker(metric)
Exemplo n.º 2
0
 def __init__(self, model_path,use_cuda,map_location_flag,bad_time,left_time):
     self.min_confidence = 0.3
     self.nms_max_overlap = 1.0
     self.extractor = Extractor(model_path, use_cuda=use_cuda,map_location_flag=map_location_flag)
     # 违规时间
     self.bad_time = bad_time
     max_cosine_distance = 0.2
     nn_budget = 100
     metric = NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
     self.tracker = Tracker(metric,left_time=left_time)
Exemplo n.º 3
0
 def __init__(self, model_path):
     super(DeepSort, self).__init__()
     self.min_confidence = 0.3  #根据置信度对检测框进行过滤,即对置信度不足够高的检测框及特征予以删除;
     self.nms_max_overlap = 1.0  #对检测框进行非最大值抑制,消除一个目标身上多个框的情况;
     self.extractor = Extractor(model_path,
                                use_cuda=True)  #读取当前帧目标检测框的位置及各检测框图像块的深度特征
     max_cosine_distance = 0.2
     nn_budget = 100
     metric = NearestNeighborDistanceMetric("cosine", max_cosine_distance,
                                            nn_budget)
     self.tracker = Tracker(metric)
    def __init__(self, model_path):
        # 检测结果阈值。低于这个阈值的检测结果将会被忽略  # 过滤掉置信度小于self.min_confidence的bbox,生成detections
        self.min_confidence = 0.25
        self.nms_max_overlap = 1.0  # 非极大抑制的阈值 原始值1.0
        # NMS (这里self.nms_max_overlap的值为1,即保留了所有的detections)
        self.extractor = Extractor(model_path, use_cuda=True)

        max_cosine_distance = 0.2  # 0.2 余弦距离的控制阈值 调节这个能改善IDsw
        # 描述的区域的最大值 它是一个列表,列出了每次出现曲目的特征。nn_bodget确定此列表的大小。例如,如果它是10,则仅存储曲目在板上出现的最后10次的特征
        nn_budget = 100
        metric = NearestNeighborDistanceMetric("cosine", max_cosine_distance,
                                               nn_budget)
        self.tracker = Tracker(metric)
Exemplo n.º 5
0
 def __init__(self,
              model_path='yolov3/of_model/yolov3_model_python/',
              gpu_ids='0',
              model_name='resid',
              confidence_l=0.2,
              confidence_h=0.4,
              max_cosine_distance=0.2,
              max_iou_distance=0.7,
              save_feature=False,
              use_filter=False,
              init_extractor=True,
              max_age=30,
              std_Q_w=1e-1,
              std_Q_wv=1e-3,
              std_R_w=5e-2,
              cls_=0):
     self.confidence_l = confidence_l
     self.confidence_h = confidence_h
     self.iou_thresh_l = 0.24
     self.iou_thresh = 0.5
     self.nms_max_overlap = 1.0
     self.extractor = None
     self.height, self.width = None, None
     if init_extractor:
         self.extractor = Extractor(model_name=model_name,
                                    load_path=model_path,
                                    gpu_ids=gpu_ids,
                                    cls=cls_)
     max_iou = max_iou_distance
     nn_budget = 100
     metric = NearestNeighborDistanceMetric("cosine", max_cosine_distance,
                                            nn_budget)
     self.tracker = Tracker(metric,
                            max_iou_distance=max_iou,
                            max_age=max_age,
                            std_Q_w=std_Q_w,
                            std_Q_wv=std_Q_wv,
                            std_R_w=std_R_w)
     self.all_feature = None
     self.save_feature = save_feature
     self.count = 1
     self.result = []
     self.use_filter = use_filter
Exemplo n.º 6
0
def recognize_from_video():
    results = []
    idx_frame = 0

    # net initialize
    detector = init_detector(args.env_id)
    extractor = ailia.Net(EX_MODEL_PATH, EX_WEIGHT_PATH, env_id=args.env_id)

    # tracker class instance
    metric = NearestNeighborDistanceMetric(
        "cosine", MAX_COSINE_DISTANCE, NN_BUDGET
    )
    tracker = Tracker(
        metric,
        max_iou_distance=0.7,
        max_age=70,
        n_init=3
    )

    capture = webcamera_utils.get_capture(args.video)

    # create video writer
    if args.savepath is not None:
        writer = webcamera_utils.get_writer(
            args.savepath,
            int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)),
            int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)),
        )
    else:
        writer = None

    print('Start Inference...')
    while(True):
        idx_frame += 1
        ret, frame = capture.read()
        if (cv2.waitKey(1) & 0xFF == ord('q')) or not ret:
            break

        # In order to use ailia.Detector, the input should have 4 channels.
        input_img = cv2.cvtColor(frame, cv2.COLOR_BGR2BGRA)
        h, w = frame.shape[0], frame.shape[1]

        # do detection
        detector.compute(input_img, THRESHOLD, IOU)
        bbox_xywh, cls_conf, cls_ids = get_detector_result(detector, h, w)

        # select person class
        mask = cls_ids == 0
        bbox_xywh = bbox_xywh[mask]

        # bbox dilation just in case bbox too small,
        # delete this line if using a better pedestrian detector
        bbox_xywh[:, 3:] *= 1.2
        cls_conf = cls_conf[mask]

        # do tracking
        img_crops = []
        for box in bbox_xywh:
            x1, y1, x2, y2 = xywh_to_xyxy(box, h, w)
            img_crops.append(frame[y1:y2, x1:x2])

        if img_crops:
            # preprocess
            img_batch = np.concatenate([
                normalize_image(resize(img), 'ImageNet')[np.newaxis, :, :, :]
                for img in img_crops
            ], axis=0).transpose(0, 3, 1, 2)

            # TODO better to pass a batch at once
            # features = extractor.predict(img_batch)
            features = []
            for img in img_batch:
                features.append(extractor.predict(img[np.newaxis, :, :, :])[0])
            features = np.array(features)
        else:
            features = np.array([])

        bbox_tlwh = xywh_to_tlwh(bbox_xywh)
        detections = [
            Detection(bbox_tlwh[i], conf, features[i])
            for i, conf in enumerate(cls_conf) if conf > MIN_CONFIDENCE
        ]

        # run on non-maximum supression
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        nms_max_overlap = 1.0
        indices = non_max_suppression(boxes, nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        # update tracker
        tracker.predict()
        tracker.update(detections)

        # update bbox identities
        outputs = []
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            box = track.to_tlwh()
            x1, y1, x2, y2 = tlwh_to_xyxy(box, h, w)
            track_id = track.track_id
            outputs.append(np.array([x1, y1, x2, y2, track_id], dtype=np.int))
        if len(outputs) > 0:
            outputs = np.stack(outputs, axis=0)

        # draw box for visualization
        if len(outputs) > 0:
            bbox_tlwh = []
            bbox_xyxy = outputs[:, :4]
            identities = outputs[:, -1]
            frame = draw_boxes(frame, bbox_xyxy, identities)

            for bb_xyxy in bbox_xyxy:
                bbox_tlwh.append(xyxy_to_tlwh(bb_xyxy))

            results.append((idx_frame - 1, bbox_tlwh, identities))

        cv2.imshow('frame', frame)

        if writer is not None:
            writer.write(frame)

        if args.savepath is not None:
            write_results(args.savepath.split('.')[0] + '.txt', results, 'mot')
        else:
            write_results('result.txt', results, 'mot')

    capture.release()
    cv2.destroyAllWindows()
    if writer is not None:
        writer.release()

    print(f'Save results to {args.savepath}')
    print('Script finished successfully.')
Exemplo n.º 7
0
class DeepSort(object):
    def __init__(self, model_path):
        self.min_confidence = 0.3
        self.nms_max_overlap = 1.0

        self.extractor = Extractor(model_path, use_cuda=True)

        max_cosine_distance = 0.2
        nn_budget = 100
        metric = NearestNeighborDistanceMetric("cosine", max_cosine_distance,
                                               nn_budget)
        self.tracker = Tracker(metric)

    def update(self, bbox_xywh, confidences, ori_img):
        self.height, self.width = ori_img.shape[:2]
        # generate detections
        features = self._get_features(bbox_xywh, ori_img)
        detections = [
            Detection(bbox_xywh[i], conf, features[i])
            for i, conf in enumerate(confidences) if conf > self.min_confidence
        ]

        # run on non-maximum supression
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = non_max_suppression(boxes, self.nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        # update tracker
        self.tracker.predict()
        self.tracker.update(detections)

        # output bbox identities
        outputs = []
        for track in self.tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            box = track.to_tlwh()
            x1, y1, x2, y2 = self._xywh_to_xyxy(box)
            track_id = track.track_id
            outputs.append(np.array([x1, y1, x2, y2, track_id], dtype=np.int))
        if len(outputs) > 0:
            outputs = np.stack(outputs, axis=0)
        return outputs

    def _xywh_to_xyxy(self, bbox_xywh):
        x, y, w, h = bbox_xywh
        x1 = max(int(x - w / 2), 0)
        x2 = min(int(x + w / 2), self.width - 1)
        y1 = max(int(y - h / 2), 0)
        y2 = min(int(y + h / 2), self.height - 1)
        return x1, y1, x2, y2

    def _get_features(self, bbox_xywh, ori_img):
        features = []
        for box in bbox_xywh:
            x1, y1, x2, y2 = self._xywh_to_xyxy(box)
            im = ori_img[y1:y2, x1:x2]
            feature = self.extractor(im)[0]
            features.append(feature)
        features = np.stack(features, axis=0)
        return features
Exemplo n.º 8
0
class DeepSort(object):
    def __init__(self, model_path,use_cuda,map_location_flag,bad_time,left_time):
        self.min_confidence = 0.3
        self.nms_max_overlap = 1.0
        self.extractor = Extractor(model_path, use_cuda=use_cuda,map_location_flag=map_location_flag)
        # 违规时间
        self.bad_time = bad_time
        max_cosine_distance = 0.2
        nn_budget = 100
        metric = NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
        self.tracker = Tracker(metric,left_time=left_time)

    def update(self, bbox_xywh, confidences, ori_img, all_name,start_time):
        self.height, self.width = ori_img.shape[:2]
        # generate detections
        features = self._get_features(bbox_xywh, ori_img)
        detections = [Detection(bbox_xywh[i], conf, features[i], all_name[i],start_time) for i, conf in enumerate(confidences) if
                      conf > self.min_confidence]

        # run on non-maximum supression
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])

        #          非  最大值 抑制
        indices = non_max_suppression(boxes, self.nms_max_overlap, scores)

        detections = [detections[i] for i in indices]

        # update tracker
        self.tracker.predict()
        self.tracker.update(detections,start_time)
        # output bbox identities
        outputs = []

        return_name = []
        stay_time_all=[]
        for track in self.tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1 :
            # #如果是不是为确认的,则跳过
            # if not track.is_confirmed():
                continue
            #只显示违规的
            if (start_time-track.start_time)<self.bad_time:
                continue
            box = track.to_tlwh()
            x1, y1, x2, y2 = self._xywh_to_xyxy(box)
            track_id = track.track_id

            class_name = track.class_name
            # print(class_name)
            outputs.append(np.array([x1, y1, x2, y2, track_id], dtype=np.int))
            return_name.append(class_name)
            part=[class_name+str(track_id),start_time-track.start_time]
            stay_time_all.append(part)
        if len(outputs) > 0:
            outputs = np.stack(outputs, axis=0)
        #暂定状态与确定状态的都可以记录其停留时间
        # stay_time_all=[ [track.class_name,track.track_id,start_time-track.start_time] \
        #                 for track in self.tracker.tracks \
        #                 if(start_time-track.start_time)>self.bad_time and track.is_confirmed]
        return outputs, return_name,stay_time_all

    def _xywh_to_xyxy(self, bbox_xywh):
        x,y,w,h = bbox_xywh
        x1 = max(int(x-w/2),0)
        x2 = min(int(x+w/2),self.width-1)
        y1 = max(int(y-h/2),0)
        y2 = min(int(y+h/2),self.height-1)
        return x1,y1,x2,y2
    
    def _get_features(self, bbox_xywh, ori_img):
        features = []
        for box in bbox_xywh:
            x1,y1,x2,y2 = self._xywh_to_xyxy(box)
            im = ori_img[y1:y2,x1:x2]
            feature = self.extractor(im)[0]
            features.append(feature)
        if len(features):
            features = np.stack(features, axis=0)
        else:
            features = np.array([])
        return features
class DeepSort(object):
    def __init__(self, model_path):
        # 检测结果阈值。低于这个阈值的检测结果将会被忽略  # 过滤掉置信度小于self.min_confidence的bbox,生成detections
        self.min_confidence = 0.25
        self.nms_max_overlap = 1.0  # 非极大抑制的阈值 原始值1.0
        # NMS (这里self.nms_max_overlap的值为1,即保留了所有的detections)
        self.extractor = Extractor(model_path, use_cuda=True)

        max_cosine_distance = 0.2  # 0.2 余弦距离的控制阈值 调节这个能改善IDsw
        # 描述的区域的最大值 它是一个列表,列出了每次出现曲目的特征。nn_bodget确定此列表的大小。例如,如果它是10,则仅存储曲目在板上出现的最后10次的特征
        nn_budget = 100
        metric = NearestNeighborDistanceMetric("cosine", max_cosine_distance,
                                               nn_budget)
        self.tracker = Tracker(metric)

    def update(self, bbox_xywh, confidences, class_num, ori_img):
        self.height, self.width = ori_img.shape[:2]
        # generate detections
        detections = []
        try:
            features = self._get_features(bbox_xywh, ori_img)
            for i, conf in enumerate(confidences):
                if conf >= self.min_confidence and features.any():
                    # Detection 在detection.py找到相关的类
                    detections.append(
                        Detection(bbox_xywh[i], conf, class_num[i],
                                  features[i]))
                else:
                    pass
        except Exception as ex:
            # TODO Error: OpenCV(4.1.1) /io/opencv/modules/imgproc/src/resize.cpp:3720: error: (-215:Assertion failed) !ssize.empty() in function 'resize'
            print("{} Error: {}".format(
                time.strftime("%H:%M:%S", time.localtime()), ex))
            # print('Error or video finish ')

        # run on non-maximum supression
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = non_max_suppression(boxes, self.nms_max_overlap,
                                      scores)  # indices = [0] 或者 [0,1]
        detections = [detections[i]
                      for i in indices]  # 根据编号 做 嵌套的list[ [0编号],[1编号] ]
        # print(detections[0].confidence)
        # confidence: 0.5057685971260071
        # print(detections)
        # [bbox_xywh: [1508.47619629  483.33926392   34.95910645   77.69906616],
        #  confidence: 0.5140249729156494,
        #  bbox_xywh: [1678.99377441  526.4251709    36.55554199   80.11364746],
        #  confidence: 0.5057685971260071]

        # update tracker
        self.tracker.predict()
        # 现在输入的detections 是 做了嵌套编号的 list[ [0编号],[1编号] ]
        self.tracker.update(detections)
        # print("confidence {}".format(detections[0].confidence))

        # output bbox identities
        # tracks 存储相关信息
        outputs = []
        # tracker的属性 trackers储存着 很多个track类实例
        for track in self.tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            box = track.to_tlwh(
            )  # (top left x, top left y, width, height) 每帧都刷新
            x1, y1, x2, y2 = self._xywh_to_xyxy_centernet(
                box)  # xywh 转成 矩形的对角点坐标

            # 画运动轨迹
            # 轨迹为检测框中心
            center = (int((x1 + x2) / 2), int((y1 + y2) / 2))  #画轨迹图 记录每一次的中心点
            # 轨迹为检测框底部
            # center = (int((x1+x2)/2), int((y2)))  # 画轨迹图 记录每一次的底部

            points[track.track_id].append(center)  # 用队列先进先出的结构 记录运动中心轨迹
            # print(points[1][-1])  # 查看跟踪号为1的对象的中心点存储记忆
            # for j in range(1, len(points[track.track_id])):
            #     if points[track.track_id][j - 1] is None or points[track.track_id][j] is None:
            #        continue
            #     # thickness = int(np.sqrt(32 / float(j + 1)) * 2) #第一个点重 后续线逐渐变细
            #     cv2.line(ori_img,(points[track.track_id][j-1]), (points[track.track_id][j]),(8,196,255),thickness = 3,lineType=cv2.LINE_AA)

            track_id = track.track_id
            confidences = track.confidence * 100
            cls_num = track.class_num
            # print("track_id {} confidences {}".format(track_id,confidences))

            outputs.append(
                np.array([x1, y1, x2, y2, track_id, confidences, cls_num],
                         dtype=np.int))

        if len(outputs) > 0:
            outputs = np.stack(outputs, axis=0)

        return outputs, points

    # for centernet (x1,x2 w,h -> x1,y1,x2,y2)

    def _xywh_to_xyxy_centernet(self, bbox_xywh):
        x1, y1, w, h = bbox_xywh
        x1 = max(x1, 0)
        y1 = max(y1, 0)
        x2 = min(int(x1 + w), self.width - 1)
        y2 = min(int(y1 + h), self.height - 1)
        return int(x1), int(y1), x2, y2

    # for yolo  (centerx,centerx, w,h -> x1,y1,x2,y2)
    def _xywh_to_xyxy_yolo(self, bbox_xywh):
        x, y, w, h = bbox_xywh
        x1 = max(int(x - w / 2), 0)
        x2 = min(int(x + w / 2), self.width - 1)
        y1 = max(int(y - h / 2), 0)
        y2 = min(int(y + h / 2), self.height - 1)
        return x1, y1, x2, y2

    def _get_features(self, bbox_xywh, ori_img):
        # TODO
        features = []
        for box in bbox_xywh:
            x1, y1, x2, y2 = self._xywh_to_xyxy_centernet(box)
            im = ori_img[y1:y2, x1:x2]
            feature = self.extractor(im)[0]
            features.append(feature)
        if len(features):
            features = np.stack(features, axis=0)
        else:
            features = np.array([])
        return features
def recognize_from_video():
    try:
        print('[INFO] Webcam mode is activated')
        RECORD_TIME = 80
        capture = cv2.VideoCapture(int(args.video))
        if not capture.isOpened():
            print("[ERROR] webcamera not found")
            sys.exit(1)
    except ValueError:
        if check_file_existance(args.video):
            capture = cv2.VideoCapture(args.video)

    frame_rate = capture.get(cv2.CAP_PROP_FPS)
    if FRAME_SKIP:
        action_recognize_fps = int(args.fps)
    else:
        action_recognize_fps = frame_rate

    if args.savepath != "":
        size = (int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)),
                int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)))
        fmt = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
        writer = cv2.VideoWriter(args.savepath, fmt, action_recognize_fps,
                                 size)
    else:
        writer = None

    # pose estimation
    env_id = ailia.get_gpu_environment_id()
    print(f'env_id: {env_id}')
    if args.arch == "lw_human_pose":
        pose = ailia.PoseEstimator(MODEL_PATH,
                                   WEIGHT_PATH,
                                   env_id=env_id,
                                   algorithm=ALGORITHM)

        detector = None
    else:
        detector = ailia.Detector(DETECTOR_MODEL_PATH,
                                  DETECTOR_WEIGHT_PATH,
                                  len(COCO_CATEGORY),
                                  format=ailia.NETWORK_IMAGE_FORMAT_RGB,
                                  channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST,
                                  range=ailia.NETWORK_IMAGE_RANGE_U_FP32,
                                  algorithm=ailia.DETECTOR_ALGORITHM_YOLOV3,
                                  env_id=env_id)

        pose = ailia.Net(POSE_MODEL_PATH, POSE_WEIGHT_PATH, env_id=env_id)

    # tracker class instance
    extractor = ailia.Net(EX_MODEL_PATH, EX_WEIGHT_PATH, env_id=env_id)
    metric = NearestNeighborDistanceMetric("cosine", MAX_COSINE_DISTANCE,
                                           NN_BUDGET)
    tracker = Tracker(metric, max_iou_distance=0.7, max_age=70, n_init=3)

    # action recognition
    env_id = ailia.get_gpu_environment_id()
    print(f'env_id: {env_id}')
    model = ailia.Net(ACTION_MODEL_PATH, ACTION_WEIGHT_PATH, env_id=env_id)

    action_data = {}

    frame_nb = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))
    idx_frame = 0

    time_start = time.time()
    while (True):
        time_curr = time.time()
        if args.video == '0' and time_curr - time_start > RECORD_TIME:
            break
        ret, frame = capture.read()

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
        if (not ret) or (frame_nb >= 1 and idx_frame >= frame_nb):
            break

        if FRAME_SKIP:
            mod = round(frame_rate / action_recognize_fps)
            if mod >= 1:
                if idx_frame % mod != 0:
                    idx_frame = idx_frame + 1
                    continue

        input_image, input_data = adjust_frame_size(
            frame,
            frame.shape[0],
            frame.shape[1],
        )
        input_data = cv2.cvtColor(input_data, cv2.COLOR_BGR2BGRA)

        # inferece
        if args.arch == "lw_human_pose":
            _ = pose.compute(input_data)
        else:
            detector.compute(input_data, THRESHOLD, IOU)

        # deepsort format
        h, w = input_image.shape[0], input_image.shape[1]
        if args.arch == "lw_human_pose":
            bbox_xywh, cls_conf, cls_ids = get_detector_result_lw_human_pose(
                pose, h, w)
        else:
            bbox_xywh, cls_conf, cls_ids = get_detector_result(detector, h, w)

        mask = cls_ids == 0
        bbox_xywh = bbox_xywh[mask]

        # bbox dilation just in case bbox too small,
        # delete this line if using a better pedestrian detector
        if args.arch == "pose_resnet":
            # bbox_xywh[:, 3:] *= 1.2   #May need to be removed in the future
            cls_conf = cls_conf[mask]

        # do tracking
        img_crops = []
        for box in bbox_xywh:
            x1, y1, x2, y2 = xywh_to_xyxy(box, h, w)
            img_crops.append(input_image[y1:y2, x1:x2])

        if img_crops:
            # preprocess
            img_batch = np.concatenate([
                normalize_image(resize(img), 'ImageNet')[np.newaxis, :, :, :]
                for img in img_crops
            ],
                                       axis=0).transpose(0, 3, 1, 2)

            # TODO better to pass a batch at once
            # features = extractor.predict(img_batch)
            features = []
            for img in img_batch:
                features.append(extractor.predict(img[np.newaxis, :, :, :])[0])
            features = np.array(features)
        else:
            features = np.array([])

        bbox_tlwh = xywh_to_tlwh(bbox_xywh)
        detections = [
            Detection(bbox_tlwh[i], conf, features[i])
            for i, conf in enumerate(cls_conf) if conf > MIN_CONFIDENCE
        ]

        # run on non-maximum supression
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        nms_max_overlap = 1.0
        indices = non_max_suppression(boxes, nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        # update tracker
        tracker.predict()
        tracker.update(detections)

        # update bbox identities
        outputs = []
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            box = track.to_tlwh()
            x1, y1, x2, y2 = tlwh_to_xyxy(box, h, w)
            track_id = track.track_id
            outputs.append(np.array([x1, y1, x2, y2, track_id], dtype=np.int))
        if len(outputs) > 0:
            outputs = np.stack(outputs, axis=0)

        # action detection
        actions = []
        persons = []
        if len(outputs) > 0:
            bbox_xyxy = outputs[:, :4]
            identities = outputs[:, -1]
            for i, box in enumerate(bbox_xyxy):
                id = identities[i]

                if not (id in action_data):
                    action_data[id] = np.zeros(
                        (ailia.POSE_KEYPOINT_CNT - 1, TIME_RANGE, 3))

                # action recognition
                action, person = action_recognition(box, input_image, pose,
                                                    detector, model,
                                                    action_data[id])
                actions.append(action)
                persons.append(person)

        # draw box for visualization
        if len(outputs) > 0:
            bbox_tlwh = []
            bbox_xyxy = outputs[:, :4]
            identities = outputs[:, -1]
            frame = draw_boxes(input_image, bbox_xyxy, identities, actions,
                               action_data, (0, 0))

            for bb_xyxy in bbox_xyxy:
                bbox_tlwh.append(xyxy_to_tlwh(bb_xyxy))

        # draw skelton
        for person in persons:
            if person != None:
                display_result(input_image, person)

        if writer is not None:
            writer.write(input_image)

            # show progress
            if idx_frame == "0":
                print()
            print("\r" + str(idx_frame + 1) + " / " + str(frame_nb), end="")
            if idx_frame == frame_nb - 1:
                print()

        cv2.imshow('frame', input_image)

        idx_frame = idx_frame + 1

    if writer is not None:
        writer.release()

    capture.release()
    cv2.destroyAllWindows()
    print('Script finished successfully.')
Exemplo n.º 11
0
class KCTracker(object):
    def __init__(self,
                 model_path='yolov3/of_model/yolov3_model_python/',
                 gpu_ids='0',
                 model_name='resid',
                 confidence_l=0.2,
                 confidence_h=0.4,
                 max_cosine_distance=0.2,
                 max_iou_distance=0.7,
                 save_feature=False,
                 use_filter=False,
                 init_extractor=True,
                 max_age=30,
                 std_Q_w=1e-1,
                 std_Q_wv=1e-3,
                 std_R_w=5e-2,
                 cls_=0):
        self.confidence_l = confidence_l
        self.confidence_h = confidence_h
        self.iou_thresh_l = 0.24
        self.iou_thresh = 0.5
        self.nms_max_overlap = 1.0
        self.extractor = None
        self.height, self.width = None, None
        if init_extractor:
            self.extractor = Extractor(model_name=model_name,
                                       load_path=model_path,
                                       gpu_ids=gpu_ids,
                                       cls=cls_)
        max_iou = max_iou_distance
        nn_budget = 100
        metric = NearestNeighborDistanceMetric("cosine", max_cosine_distance,
                                               nn_budget)
        self.tracker = Tracker(metric,
                               max_iou_distance=max_iou,
                               max_age=max_age,
                               std_Q_w=std_Q_w,
                               std_Q_wv=std_Q_wv,
                               std_R_w=std_R_w)
        self.all_feature = None
        self.save_feature = save_feature
        self.count = 1
        self.result = []
        self.use_filter = use_filter
        #print('batch mode')

    def saveResult(self, file_name):
        if os.path.exists(file_name):
            os.remove(file_name)
        self.result = np.array(self.result)  # frameid_pid_tlwhc
        if self.use_filter:
            self.result = removeUnMoveLowConfObj(self.result)
        else:
            self.result = removeSmallOrBigBbox(self.result)
        writeResult(self.result, file_name)
        print('save result:', file_name)

    def getFeatureFromImage(self, bbox_tlwhcs, data, input_type, type):
        bbox_tlwhs = bbox_tlwhcs[:, 0:4]
        features = None
        if input_type == 'img':
            self.height, self.width = data.shape[:2]
            try:
                features = self._get_features_batch(bbox_tlwhs, data, type)
            except Exception as e:
                print(e)
        else:  # input_type == 'feature'
            features = data
        return features

    def update(self, frame_id, bbox_tlwhcs, ori_img, input_type='img', type=0):

        #print('ini boxs number:',len(bbox_tlwhcs))
        # print('ini confs number:',len(confidences))
        if len(bbox_tlwhcs) == 0:
            self.count += 1
            return [], []
        confidences = bbox_tlwhcs[:, -1]
        mask_l = (confidences >= self.confidence_l) & (confidences <
                                                       self.confidence_h)
        mask_h = confidences >= self.confidence_h
        bbox_tlwhcs_low = bbox_tlwhcs[mask_l, :]
        bbox_tlwhcs_ture = bbox_tlwhcs[mask_h, :]

        bbox_tlwhcs_new = []
        bbox_tlwhcs_temp = bbox_tlwhcs_low.copy()
        for track in self.tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            if len(bbox_tlwhcs_temp) == 0:
                continue
            box_tlwh_temp = track.to_tlwh()
            ious_ = iou(box_tlwh_temp, bbox_tlwhcs_temp[:, 0:4])
            iou_max_ind = np.argmax(ious_)
            if ious_[iou_max_ind] > self.iou_thresh_l:
                bbox_tlwhcs_new.append(bbox_tlwhcs_temp[iou_max_ind])
                np.delete(bbox_tlwhcs_temp, iou_max_ind, axis=0)

        bbox_tlwhcs_new = np.array(bbox_tlwhcs_ture.tolist() + bbox_tlwhcs_new)
        if len(bbox_tlwhcs_new) == 0:
            self.count += 1
            return [], []
        #try:
        #    indices = non_max_suppression(bbox_tlwhcs_new[:, 0:4], 0.6, bbox_tlwhcs_new[:, 4])
        #    bbox_tlwhcs_new = np.array([bbox_tlwhcs_new[i] for i in indices])
        #except Exception as e:
        #    print(e)
        #    return [], []
        if len(bbox_tlwhcs_new) == 0:
            self.count += 1
            return [], []
        bbox_tlwhs_new = bbox_tlwhcs_new[:, 0:4]
        confidences_new = bbox_tlwhcs_new[:, 4]

        features = self.getFeatureFromImage(bbox_tlwhcs_new, ori_img,
                                            input_type, type)

        if self.save_feature:
            if self.all_feature is None and len(features):
                self.all_feature = features
            else:
                self.all_feature = np.vstack((self.all_feature, features))

        detections = [
            Detection(bbox_tlwhs_new[i], conf, features[i], i)
            for i, conf in enumerate(confidences_new)
        ]
        # update tracker
        self.tracker.predict()
        self.tracker.update(detections, self.confidence_h)
        self.count += 1

        # output bbox identities
        outputs = []
        for track in self.tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            box_tlwh = track.to_tlwh()  # tlwh
            x1, y1, x2, y2 = self._tlwh_to_xyxy(box_tlwh)
            track_id = track.track_id
            conf = track.confidence
            ori_id = track.ori_id
            outputs.append(np.array([track_id, x1, y1, x2, y2, conf, ori_id]))
            self.result.append(
                np.array([
                    frame_id, track_id, x1, y1, box_tlwh[2], box_tlwh[3], conf
                ]))
        bbox_tlwhcs_results = []
        for i, bbox in enumerate(bbox_tlwhcs):
            track_id_ = -1
            for output in outputs:
                if int(output[6]) == i:
                    track_id_ = output[0]
            #if track_id_ == -1:
            #    continue
            box_tlwh = bbox[0:4]
            conf_ = bbox[4]
            x1, y1, x2, y2 = self._tlwh_to_xyxy(box_tlwh)
            bbox_tlwhcs_results.append(
                np.array([x1, y1, x2, y2, conf_, track_id_]))

        if len(bbox_tlwhcs_results) > 0:
            bbox_tlwhcs_results = np.stack(bbox_tlwhcs_results, axis=0)

        return bbox_tlwhcs_results, features

    # for centernet (x1,x2 w,h -> x1,y1,x2,y2)
    def _tlwh_to_xyxy(self, bbox_tlwh):
        x1, y1, w, h = bbox_tlwh
        x2 = x1 + w
        y2 = y1 + h
        return x1, y1, x2, y2

    def _tlwh_to_limit_xyxy(self, bbox_tlwh):
        x1, y1, w, h = bbox_tlwh
        x1 = max(x1, 0)
        y1 = max(y1, 0)
        x2 = min(int(x1 + w), self.width - 1)
        y2 = min(int(y1 + h), self.height - 1)
        return int(x1), int(y1), x2, y2

    # for yolo  (centerx,centerx, w,h -> x1,y1,x2,y2)
    def _cxcywh_to_xyxy(self, bbox_xywh):
        x, y, w, h = bbox_xywh
        x1 = max(int(x - w / 2), 0)
        x2 = min(int(x + w / 2), self.width - 1)
        y1 = max(int(y - h / 2), 0)
        y2 = min(int(y + h / 2), self.height - 1)
        return x1, y1, x2, y2

    def _get_features_batch(self, bbox_tlwhs, ori_img, type):
        imgs = []
        if self.width == None:
            self.height, self.width = ori_img.shape[:2]
        for box in bbox_tlwhs:
            x1, y1, x2, y2 = self._tlwh_to_limit_xyxy(box)
            im = ori_img[int(y1):int(y2), int(x1):int(x2)]
            imgs.append(im)
        features = self.extractor(imgs, 20, feature_type=type)
        return features

    def _get_features(self, bbox_tlwh, ori_img):
        features = []
        if self.width == None:
            self.height, self.width = ori_img.shape[:2]
        for box in bbox_tlwh:
            x1, y1, x2, y2 = self._tlwh_to_limit_xyxy(box)
            im = ori_img[int(y1):int(y2), int(x1):int(x2)]
            feature = self.extractor(im)[0]
            features.append(feature)
        if len(features):
            features = np.stack(features, axis=0)
        else:
            features = np.array([])
        return features

    def saveFeature(self, filename=None):
        if filename is not None:
            np.save(filename, self.all_feature)
            print('save feature:', filename)
Exemplo n.º 12
0
class DeepSort(
        object
):  #   DeepSort(torch.jit.ScriptModule):          #按视频帧顺序处理,每一帧的处理
    def __init__(self, model_path):
        super(DeepSort, self).__init__()
        self.min_confidence = 0.3  #根据置信度对检测框进行过滤,即对置信度不足够高的检测框及特征予以删除;
        self.nms_max_overlap = 1.0  #对检测框进行非最大值抑制,消除一个目标身上多个框的情况;
        self.extractor = Extractor(model_path,
                                   use_cuda=True)  #读取当前帧目标检测框的位置及各检测框图像块的深度特征
        max_cosine_distance = 0.2
        nn_budget = 100
        metric = NearestNeighborDistanceMetric("cosine", max_cosine_distance,
                                               nn_budget)
        self.tracker = Tracker(metric)

    # @script_method
    def update(self, bbox_xywh, confidences, ori_img):
        self.height, self.width = ori_img.shape[:2]
        # generate detections,features:为特征向量
        features = self._get_features(bbox_xywh, ori_img)
        # dectections包含 self.tlwh(左上角xy),self.confidence,self.feature
        # dectections为ndarray格式
        # 置信度筛选和nms可以考虑删除
        detections = [
            Detection(bbox_xywh[i], conf, features[i])
            for i, conf in enumerate(confidences) if conf > self.min_confidence
        ]

        # run on non-maximum supression
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = non_max_suppression(boxes, self.nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        # update tracker
        self.tracker.predict()
        self.tracker.update(detections)

        # output bbox identities
        outputs = []
        for track in self.tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            box = track.to_tlwh()
            x1, y1, x2, y2 = self._xywh_to_xyxy(box)
            track_id = track.track_id
            outputs.append(np.array([x1, y1, x2, y2, track_id], dtype=np.int))
        if len(outputs) > 0:
            outputs = np.stack(outputs, axis=0)
        return outputs

    # @script_method
    def _xywh_to_xyxy(self, bbox_xywh):
        x, y, w, h = bbox_xywh
        x1 = max(int(x - w / 2), 0)
        x2 = min(int(x + w / 2), self.width - 1)
        y1 = max(int(y - h / 2), 0)
        y2 = min(int(y + h / 2), self.height - 1)
        return x1, y1, x2, y2

    # @script_method
    def _get_features(self, bbox_xywh, ori_img):
        features = []
        for box in bbox_xywh:
            x1, y1, x2, y2 = self._xywh_to_xyxy(box)
            im = ori_img[y1:y2, x1:x2]
            feature = self.extractor(im)[0]
            features.append(feature)
        if len(features):
            features = np.stack(features, axis=0)
        else:
            features = np.array([])
        return features