Exemplo n.º 1
0
    def update_detection(self, im_blob, img_0):
        """
        更新视频序列或图片序列的检测结果
        :rtype: dict
        :param im_blob:
        :param img_0:
        :return:
        """
        width = img_0.shape[1]
        height = img_0.shape[0]
        inp_height = im_blob.shape[2]
        inp_width = im_blob.shape[3]

        c = np.array([width * 0.5, height * 0.5], dtype=np.float32)  # center
        s = max(float(inp_width) / float(inp_height) * height, width) * 1.0
        meta = {
            'c': c,
            's': s,
            'out_height': inp_height // self.opt.down_ratio,
            'out_width': inp_width // self.opt.down_ratio
        }

        # ----- get detections
        with torch.no_grad():
            dets_dict = defaultdict(list)

            output = self.model.forward(im_blob)[-1]

            # detect outputs
            hm = output['hm'].sigmoid_()
            wh = output['wh']
            reg = output['reg'] if self.opt.reg_offset else None

            # 检测和分类结果解析
            dets, inds, cls_inds_mask = mot_decode(
                heatmap=hm,
                wh=wh,
                reg=reg,
                num_classes=self.opt.num_classes,
                cat_spec_wh=self.opt.cat_spec_wh,
                K=self.opt.K)

            # 检测结果后处理
            dets = self.post_process(dets, meta)
            dets = self.merge_outputs([dets])
            # dets = self.merge_outputs(dets)[1]

            # ----- 解析每个检测类别
            for cls_id in range(self.opt.num_classes):  # cls_id从0开始
                cls_dets = dets[cls_id + 1]

                # 过滤掉score得分太低的dets
                remain_inds = cls_dets[:, 4] > self.opt.conf_thres
                cls_dets = cls_dets[remain_inds]
                # print(cls_dets)
                dets_dict[cls_id] = cls_dets

        return dets_dict
Exemplo n.º 2
0
    def update_detection(self, im_blob, img_0):
        """
        更新视频序列或图片序列的检测结果
        :rtype: dict
        :param im_blob:
        :param img_0:
        :return:
        """
        height, width = img_0.shape[0], img_0.shape[1]  # H, W of original input image
        net_height, net_width = im_blob.shape[2], im_blob.shape[3]  # H, W of net input

        c = np.array([width * 0.5, height * 0.5], dtype=np.float32)  # image center
        s = max(float(net_width) / float(net_height) * height, width) * 1.0

        h_out = net_height // self.opt.down_ratio
        w_out = net_width // self.opt.down_ratio

        # ----- get detections
        with torch.no_grad():
            dets_dict = defaultdict(list)

            # --- network output
            output = self.model.forward(im_blob)[-1]

            # --- detection outputs
            hm = output['hm'].sigmoid_()
            wh = output['wh']
            reg = output['reg'] if self.opt.reg_offset else None

            # --- decode results of detection
            dets, inds, cls_inds_mask = mot_decode(heatmap=hm,
                                                   wh=wh,
                                                   reg=reg,
                                                   num_classes=self.opt.num_classes,
                                                   cat_spec_wh=self.opt.cat_spec_wh,
                                                   K=self.opt.K)

            # --- map to original image coordinate system
            # meta = {'c': c,
            #         's': s,
            #         'out_height': h_out,
            #         'out_width': w_out}
            # dets = self.post_process(dets, meta)  # using affine matrix
            dets = map2orig(dets, h_out, w_out, height, width, self.opt.num_classes)  # translate and scale
            # dets = self.merge_outputs([dets])

            # --- parse detections of each class
            for cls_id in range(self.opt.num_classes):  # cls_id start from index 0
                cls_dets = dets[cls_id]

                # filter out low conf score dets
                remain_inds = cls_dets[:, 4] > self.opt.conf_thres
                cls_dets = cls_dets[remain_inds]
                dets_dict[cls_id] = cls_dets

        return dets_dict
Exemplo n.º 3
0
 def save_result(self, output, batch, results):
     reg = output['reg'] if self.opt.reg_offset else None
     dets = mot_decode(
         output['hm'], output['wh'], reg=reg,
         cat_spec_wh=self.opt.cat_spec_wh, K=self.opt.K)
     dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2])
     dets_out = ctdet_post_process(
         dets.copy(), batch['meta']['c'].cpu().numpy(),
         batch['meta']['s'].cpu().numpy(),
         output['hm'].shape[2], output['hm'].shape[3], output['hm'].shape[1])
     results[batch['meta']['img_id'].cpu().numpy()[0]] = dets_out[0]
Exemplo n.º 4
0
    def save_result(self, output, batch, results):
        reg = output['reg'] if self.opt.reg_offset else None
        dets = mot_decode(heatmap=output['hm'],
                          wh=output['wh'],
                          reg=reg,
                          cat_spec_wh=self.opt.cat_spec_wh,
                          K=self.opt.K)
        dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2])

        dets_out = ctdet_post_process(dets.copy(),
                                      batch['meta']['c'].cpu().numpy(),  # center
                                      batch['meta']['s'].cpu().numpy(),  # scale
                                      output['hm'].shape[2],  # height
                                      output['hm'].shape[3],  # width
                                      output['hm'].shape[1])  # num_classes

        results[batch['meta']['img_id'].cpu().numpy()[0]] = dets_out[0]
Exemplo n.º 5
0
    def update(self, im_blob, img0):
        self.frame_id += 1
        activated_starcks = []
        refind_stracks = []
        lost_stracks = []
        removed_stracks = []

        width = img0.shape[1]
        height = img0.shape[0]
        inp_height = im_blob.shape[2]
        inp_width = im_blob.shape[3]
        c = np.array([width / 2., height / 2.], dtype=np.float32)
        s = max(float(inp_width) / float(inp_height) * height, width) * 1.0
        meta = {
            'c': c,
            's': s,
            'out_height': inp_height // self.opt.down_ratio,
            'out_width': inp_width // self.opt.down_ratio
        }
        ''' Step 1: Network forward, get detections & embeddings'''
        with torch.no_grad():
            output = self.model(im_blob)[-1]
            hm = output['hm'].sigmoid_()
            wh = output['wh']
            id_feature = output['id']
            id_feature = F.normalize(id_feature, dim=1)

            reg = output['reg'] if self.opt.reg_offset else None
            dets, inds = mot_decode(hm,
                                    wh,
                                    reg=reg,
                                    cat_spec_wh=self.opt.cat_spec_wh,
                                    K=self.opt.K)
            id_feature = _tranpose_and_gather_feat(id_feature, inds)
            id_feature = id_feature.squeeze(0)
            id_feature = id_feature.cpu().numpy()
        #torch.Size([1, 128, 6])
        dets = self.post_process(dets, meta)  #
        dets = self.merge_outputs([dets])[1]  #

        remain_inds = dets[:, 4] > self.opt.conf_thres
        dets = dets[remain_inds]
        id_feature = id_feature[remain_inds]

        # vis
        '''
        for i in range(0, dets.shape[0]):
            bbox = dets[i][0:4]
            cv2.rectangle(img0, (bbox[0], bbox[1]),
                          (bbox[2], bbox[3]),
                          (0, 255, 0), 2)
        cv2.imshow('dets', img0)
        cv2.waitKey(0)
        id0 = id0-1
        '''

        if len(dets) > 0:
            '''Detections'''
            occlution = np.zeros(shape=(len(dets), len(dets)))
            for i in range(len(dets)):
                for j in range(i + 1, len(dets)):
                    occ1, occ2 = tlbr_occlution(dets[i, :4], dets[j, :4])
                    occlution[i, j] = occ1
                    occlution[j, i] = occ2
            occlution = np.sum(occlution, axis=0)
            detections = [
                STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f,
                       self.opt.maxLen, occ)
                for (tlbrs, f, occ) in zip(dets[:, :5], id_feature, occlution)
            ]
        else:
            detections = []
        ''' Add newly detected tracklets to tracked_stracks'''
        unconfirmed = []
        tracked_stracks = []  # type: list[STrack]
        for track in self.tracked_stracks:
            if not track.is_activated:
                unconfirmed.append(track)
            else:
                tracked_stracks.append(track)
        ''' Step 2: First association, with embedding'''
        strack_pool = joint_stracks(tracked_stracks, self.lost_stracks)
        # Predict the current location with KF
        #for strack in strack_pool:
        #strack.predict()
        STrack.multi_predict(strack_pool)
        if self.opt.queue_dist:
            #we don't use occlution in detection for now
            dists = matching.queue_embedding_distance(strack_pool,
                                                      detections,
                                                      self.opt,
                                                      metric="cosine",
                                                      occlution=None)
        else:
            dists = matching.embedding_distance(strack_pool, detections)
        #dists = matching.gate_cost_matrix(self.kalman_filter, dists, strack_pool, detections)
        dists = matching.fuse_motion(self.kalman_filter, dists, strack_pool, detections, only_position=self.opt.only_position ,\
            lambda_=self.opt.lambda_)
        matches, u_track, u_detection = matching.linear_assignment(
            dists, thresh=self.opt.matching_threshold)

        for itracked, idet in matches:
            track = strack_pool[itracked]
            det = detections[idet]
            if track.state == TrackState.Tracked:
                if self.opt.queue_dist:
                    track.update(detections[idet], self.frame_id,
                                 occlution[idet])
                else:
                    track.update(detections[idet], self.frame_id)
                activated_starcks.append(track)
            else:
                track.re_activate(det, self.frame_id, new_id=False)
                refind_stracks.append(track)
        ''' Step 3: Second association, with IOU'''
        detections = [detections[i] for i in u_detection]
        r_tracked_stracks = [
            strack_pool[i] for i in u_track
            if strack_pool[i].state == TrackState.Tracked
        ]
        dists = matching.iou_distance(r_tracked_stracks, detections)
        matches, u_track, u_detection = matching.linear_assignment(dists,
                                                                   thresh=0.5)

        for itracked, idet in matches:
            track = r_tracked_stracks[itracked]
            det = detections[idet]
            if track.state == TrackState.Tracked:
                if self.opt.queue_dist:
                    track.update(det, self.frame_id, occlution[idet])
                else:
                    track.update(det, self.frame_id)
                activated_starcks.append(track)
            else:
                track.re_activate(det, self.frame_id, new_id=False)
                refind_stracks.append(track)

        for it in u_track:
            track = r_tracked_stracks[it]
            if not track.state == TrackState.Lost:
                track.mark_lost()
                lost_stracks.append(track)
        '''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
        detections = [detections[i] for i in u_detection]
        dists = matching.iou_distance(unconfirmed, detections)
        matches, u_unconfirmed, u_detection = matching.linear_assignment(
            dists, thresh=0.7)
        for itracked, idet in matches:
            if self.opt.queue_dist:
                unconfirmed[itracked].update(detections[idet], self.frame_id,
                                             occlution[idet])
            else:
                unconfirmed[itracked].update(detections[idet], self.frame_id)
            activated_starcks.append(unconfirmed[itracked])
        for it in u_unconfirmed:
            track = unconfirmed[it]
            track.mark_removed()
            removed_stracks.append(track)
        """ Step 4: Init new stracks"""
        for inew in u_detection:
            track = detections[inew]
            if track.score < self.det_thresh:
                continue
            track.activate(self.kalman_filter, self.frame_id)
            activated_starcks.append(track)
        """ Step 5: Update state"""
        for track in self.lost_stracks:
            if self.frame_id - track.end_frame > self.max_time_lost:
                track.mark_removed()
                removed_stracks.append(track)

        # print('Ramained match {} s'.format(t4-t3))

        self.tracked_stracks = [
            t for t in self.tracked_stracks if t.state == TrackState.Tracked
        ]
        self.tracked_stracks = joint_stracks(self.tracked_stracks,
                                             activated_starcks)
        self.tracked_stracks = joint_stracks(self.tracked_stracks,
                                             refind_stracks)
        self.lost_stracks = sub_stracks(self.lost_stracks,
                                        self.tracked_stracks)
        self.lost_stracks.extend(lost_stracks)
        self.lost_stracks = sub_stracks(self.lost_stracks,
                                        self.removed_stracks)
        self.removed_stracks.extend(removed_stracks)
        self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks(
            self.tracked_stracks, self.lost_stracks)
        # get scores of lost tracks
        output_stracks = [
            track for track in self.tracked_stracks if track.is_activated
        ]

        logger.debug('===========Frame {}=========='.format(self.frame_id))
        logger.debug('Activated: {}'.format(
            [track.track_id for track in activated_starcks]))
        logger.debug('Refind: {}'.format(
            [track.track_id for track in refind_stracks]))
        logger.debug('Lost: {}'.format(
            [track.track_id for track in lost_stracks]))
        logger.debug('Removed: {}'.format(
            [track.track_id for track in removed_stracks]))

        return output_stracks
Exemplo n.º 6
0
def test_single(img_path, dev):
    """
    :param img_path:
    :param dev:
    :return:
    """
    if not os.path.isfile(img_path):
        print('[Err]: invalid image path.')
        return

    # Head dimensions of the net
    heads = {'hm': 5, 'reg': 2, 'wh': 2, 'id': 128}

    # Load model and put to device
    net = create_model(arch='resdcn_18', heads=heads, head_conv=256)
    model_path = '/mnt/diskb/even/MCMOT/exp/mot/default/mcmot_last_det_resdcn_18.pth'
    net = load_model(model=net, model_path=model_path)
    net = net.to(dev)
    net.eval()
    print(net)

    # Read image
    img_0 = cv2.imread(img_path)  # BGR
    assert img_0 is not None, 'Failed to load ' + img_path

    # Padded resize
    h_in, w_in = 608, 1088  # (608, 1088) (320, 640)
    img, _, _, _ = letterbox(img=img_0, height=h_in, width=w_in)

    # Preprocess image: BGR -> RGB and H×W×C -> C×H×W
    img = img[:, :, ::-1].transpose(2, 0, 1)
    img = np.ascontiguousarray(img, dtype=np.float32)
    img /= 255.0

    # Convert to tensor and put to device
    blob = torch.from_numpy(img).unsqueeze(0).to(dev)

    with torch.no_grad():
        # Network output
        output = net.forward(blob)[-1]

        # Tracking output
        hm = output['hm'].sigmoid_()
        reg = output['reg']
        wh = output['wh']
        id_feature = output['id']
        id_feature = F.normalize(id_feature,
                                 dim=1)  # L2 normalization for feature vector

        # Decode output
        dets, inds, cls_inds_mask = mot_decode(hm, wh, reg, 5, False, 128)

        # Get ReID feature vector by object class
        cls_id_feats = []  # topK feature vectors of each object class
        for cls_id in range(5):  # cls_id starts from 0
            # get inds of each object class
            cls_inds = inds[:, cls_inds_mask[cls_id]]

            # gather feats for each object class
            cls_id_feature = _tranpose_and_gather_feat(id_feature,
                                                       cls_inds)  # inds: 1×128
            cls_id_feature = cls_id_feature.squeeze(0)  # n × FeatDim
            if dev == 'cpu':
                cls_id_feature = cls_id_feature.numpy()
            else:
                cls_id_feature = cls_id_feature.cpu().numpy()
            cls_id_feats.append(cls_id_feature)

        # Convert back to original image coordinate system
        height_0, width_0 = img_0.shape[0], img_0.shape[
            1]  # H, W of original input image
        dets = map2orig(dets, h_in // 4, w_in // 4, height_0, width_0,
                        5)  # translate and scale

        # Parse detections of each class
        dets_dict = defaultdict(list)
        for cls_id in range(5):  # cls_id start from index 0
            cls_dets = dets[cls_id]

            # filter out low conf score dets
            remain_inds = cls_dets[:, 4] > 0.4
            cls_dets = cls_dets[remain_inds]
            # cls_id_feature = cls_id_feats[cls_id][remain_inds]  # if need re-id
            dets_dict[cls_id] = cls_dets

    # Visualize detection results
    img_draw = plot_detects(img_0, dets_dict, 5, frame_id=0, fps=30.0)
    # cv2.imshow('Detection', img_draw)
    # cv2.waitKey()
    cv2.imwrite('/mnt/diskb/even/MCMOT/results/00000.jpg', img_draw)
Exemplo n.º 7
0
def gen_det(opt,
            batch_size=12,
            img_size=(1088, 608)):
    data_cfg = opt.data_cfg
    f = open(data_cfg)
    data_cfg_dict = json.load(f)
    f.close()
    test_path = data_cfg_dict['test']
    dataset_root = data_cfg_dict['root']
    if opt.gpus[0] >= 0:
        opt.device = torch.device('cuda')
    else:
        opt.device = torch.device('cpu')
    print('Creating model...')
    model = create_model(opt.arch, opt.heads, opt.head_conv)
    model = load_model(model, opt.load_model)
    # model = torch.nn.DataParallel(model)
    model = model.to(opt.device)
    model.eval()
    # dummy_input = torch.rand(1, 3, 1088, 608).cuda()  # 假设输入13张1*28*28的图片
    # with SummaryWriter(comment='model') as w:
    #     w.add_graph(model, dummy_input)
    # Get dataloader
    transforms = T.Compose([T.ToTensor()])
    dataset = DetDataset(dataset_root, test_path, img_size, augment=False, transforms=transforms)
    dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=False,
                                             num_workers=0, drop_last=False)
    seen = 0
    dataloader = tqdm(dataloader)
    for batch_i, (imgs, paths, shapes) in enumerate(dataloader):
        seen += batch_size
        if seen < 3148:
            continue
        path = paths[0]
        split = path.split("/")
        split[0] += "/"
        if "MOT16-03" in path:
            continue
        if "MOT16-01" in path:
            continue
        # if int(split[-1].strip(".jpg")) < 736:
        #     continue
        output = model(imgs.cuda())[-1]
        origin_shape = shapes[0]
        width = origin_shape[1]
        height = origin_shape[0]
        inp_height = img_size[1]
        inp_width = img_size[0]
        c = np.array([width / 2., height / 2.], dtype=np.float32)
        s = max(float(inp_width) / float(inp_height) * height, width) * 1.0
        meta = {'c': c, 's': s,
                'out_height': inp_height // opt.down_ratio,
                'out_width': inp_width // opt.down_ratio}
        hm = output['hm'].sigmoid_()
        wh = output['wh']
        reg = output['reg'] if opt.reg_offset else None
        opt.K = 200
        detections, inds = mot_decode(hm, wh, reg=reg, cat_spec_wh=opt.cat_spec_wh, K=opt.K)
        # Compute average precision for each sample
        for si, _ in enumerate(imgs):
            seen += 1
            # path = paths[si]
            # img0 = cv2.imread(path)

            dets = detections[si]
            dets = dets.unsqueeze(0)
            dets = post_process(opt, dets, meta)
            dets = merge_outputs(opt, [dets])[1]
            if dets is None:
                continue
            path = paths[si]
            split = path.split("/")
            split[0] += "/"
            det_file = os.path.join(*split[:-2], "det", "FairMOT_det.txt")
            with open(det_file, "a+") as f:
                frame_id = int(split[-1].strip(".jpg"))
                img1 = cv2.imread(path)
                remain_inds = dets[:, 4] > 0.4
                dets = dets[remain_inds]
                xywh = xyxy2ct_xywh(dets[:, :4])

                for t in range(len(dets)):
                    x1 = dets[t, 0]
                    y1 = dets[t, 1]
                    x2 = dets[t, 2]
                    y2 = dets[t, 3]
                    f.write(
                        "%d,-1, %.2f, %.2f, %.2f, %.2f, %.2f, -1,-1,-1\n" % (
                            frame_id, xywh[t, 0], xywh[t, 1], xywh[t, 2], xywh[t, 3], dets[t, 4]))
                    cv2.rectangle(img1, (x1, y1), (x2, y2), (0, 255, 0), 4)
                cv2.imshow("", img1)
                cv2.waitKey(100)
                # cv2.imwrite('pred.jpg', img1)

    return None
Exemplo n.º 8
0
    def update_tracking(self, im_blob, img_0):
        """
        :param im_blob:
        :param img_0:
        :return:
        """
        # update frame id
        self.frame_id += 1

        # 记录跟踪结果
        # 记录跟踪结果: 默认只有一类, 修改为多类别, 用defaultdict(list)代替list
        # 以class id为key
        activated_starcks_dict = defaultdict(list)
        refind_stracks_dict = defaultdict(list)
        lost_stracks_dict = defaultdict(list)
        removed_stracks_dict = defaultdict(list)
        output_stracks_dict = defaultdict(list)

        height, width = img_0.shape[0], img_0.shape[
            1]  # H, W of original input image
        net_height, net_width = im_blob.shape[2], im_blob.shape[
            3]  # H, W of net input

        c = np.array([width * 0.5, height * 0.5], dtype=np.float32)
        s = max(float(net_width) / float(net_height) * height, width) * 1.0
        h_out = net_height // self.opt.down_ratio
        w_out = net_width // self.opt.down_ratio
        ''' Step 1: Network forward, get detections & embeddings'''
        with torch.no_grad():
            output = self.model.forward(im_blob)[-1]

            hm = output['hm'].sigmoid_()
            wh = output['wh']
            reg = output['reg'] if self.opt.reg_offset else None
            id_feature = output['id']
            id_feature = F.normalize(id_feature, dim=1)  # L2 normalize

            #  检测和分类结果解析
            dets, inds, cls_inds_mask = mot_decode(
                heatmap=hm,
                wh=wh,
                reg=reg,
                num_classes=self.opt.num_classes,
                cat_spec_wh=self.opt.cat_spec_wh,
                K=self.opt.K)

            # ----- get ReID feature vector by object class
            cls_id_feats = []  # topK feature vectors of each object class
            for cls_id in range(self.opt.num_classes):  # cls_id starts from 0
                # get inds of each object class
                cls_inds = inds[:, cls_inds_mask[cls_id]]

                # gather feats for each object class
                cls_id_feature = _tranpose_and_gather_feat(
                    id_feature, cls_inds)  # inds: 1×128
                cls_id_feature = cls_id_feature.squeeze(0)  # n × FeatDim
                cls_id_feature = cls_id_feature.cpu().numpy()
                cls_id_feats.append(cls_id_feature)

        # 检测结果后处理
        # meta = {'c': c,
        #         's': s,
        #         'out_height': h_out,
        #         'out_width': w_out}
        # dets = self.post_process(dets, meta)  # using affine matrix
        # dets = self.merge_outputs([dets])

        dets = map2orig(dets, h_out, w_out, height, width,
                        self.opt.num_classes)  # translate and scale

        # ----- 解析每个检测类别
        for cls_id in range(self.opt.num_classes):  # cls_id从0开始
            cls_dets = dets[cls_id]
            '''
            # 可视化中间的检测结果(每一类)
            for i in range(0, cls_dets.shape[0]):
                bbox = cls_dets[i][0:4]
                cv2.rectangle(img0,
                              (bbox[0], bbox[1]),  # left-top point
                              (bbox[2], bbox[3]),  # right-down point
                              [0, 255, 255],  # yellow
                              2)
                cv2.putText(img0,
                            id2cls[cls_id],
                            (bbox[0], bbox[1]),
                            cv2.FONT_HERSHEY_PLAIN,
                            1.3,
                            [0, 0, 255],  # red
                            2)
            cv2.imshow('{}'.format(id2cls[cls_id]), img0)
            cv2.waitKey(0)
            '''

            # 过滤掉score得分太低的dets
            remain_inds = cls_dets[:, 4] > self.opt.conf_thres
            cls_dets = cls_dets[remain_inds]
            cls_id_feature = cls_id_feats[cls_id][remain_inds]

            if len(cls_dets) > 0:
                '''Detections, tlbrs: top left bottom right score'''
                cls_detections = [
                    STrack(STrack.tlbr_to_tlwh(tlbrs[:4]),
                           tlbrs[4],
                           feat,
                           buff_size=30)
                    for (tlbrs, feat) in zip(cls_dets[:, :5], cls_id_feature)
                ]
            else:
                cls_detections = []

            # reset the track ids for a different object class
            for track in cls_detections:
                track.reset_track_id()
            ''' Add newly detected tracklets to tracked_stracks'''
            unconfirmed_dict = defaultdict(list)
            tracked_stracks_dict = defaultdict(
                list)  # type: key(cls_id), value: list[STrack]
            for track in self.tracked_stracks_dict[cls_id]:
                if not track.is_activated:
                    unconfirmed_dict[cls_id].append(track)
                else:
                    tracked_stracks_dict[cls_id].append(track)
            ''' Step 2: First association, with embedding'''
            strack_pool_dict = defaultdict(list)
            strack_pool_dict[cls_id] = joint_stracks(
                tracked_stracks_dict[cls_id], self.lost_stracks_dict[cls_id])

            # Predict the current location with KF
            # for strack in strack_pool:
            STrack.multi_predict(strack_pool_dict[cls_id])
            dists = matching.embedding_distance(strack_pool_dict[cls_id],
                                                cls_detections)
            dists = matching.fuse_motion(self.kalman_filter, dists,
                                         strack_pool_dict[cls_id],
                                         cls_detections)
            matches, u_track, u_detection = matching.linear_assignment(
                dists, thresh=0.7)  # thresh=0.7

            for i_tracked, i_det in matches:
                track = strack_pool_dict[cls_id][i_tracked]
                det = cls_detections[i_det]
                if track.state == TrackState.Tracked:
                    track.update(cls_detections[i_det], self.frame_id)
                    activated_starcks_dict[cls_id].append(
                        track)  # for multi-class
                else:
                    track.re_activate(det, self.frame_id, new_id=False)
                    refind_stracks_dict[cls_id].append(track)
            ''' Step 3: Second association, with IOU'''
            cls_detections = [cls_detections[i] for i in u_detection]
            r_tracked_stracks = [
                strack_pool_dict[cls_id][i] for i in u_track
                if strack_pool_dict[cls_id][i].state == TrackState.Tracked
            ]
            dists = matching.iou_distance(r_tracked_stracks, cls_detections)
            matches, u_track, u_detection = matching.linear_assignment(
                dists, thresh=0.5)  # thresh=0.5

            for i_tracked, i_det in matches:
                track = r_tracked_stracks[i_tracked]
                det = cls_detections[i_det]
                if track.state == TrackState.Tracked:
                    track.update(det, self.frame_id)
                    activated_starcks_dict[cls_id].append(track)
                else:
                    track.re_activate(det, self.frame_id, new_id=False)
                    refind_stracks_dict[cls_id].append(track)

            for it in u_track:
                track = r_tracked_stracks[it]
                if not track.state == TrackState.Lost:
                    track.mark_lost()
                    lost_stracks_dict[cls_id].append(track)
            '''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
            cls_detections = [cls_detections[i] for i in u_detection]
            dists = matching.iou_distance(unconfirmed_dict[cls_id],
                                          cls_detections)
            matches, u_unconfirmed, u_detection = matching.linear_assignment(
                dists, thresh=0.7)
            for i_tracked, i_det in matches:
                unconfirmed_dict[cls_id][i_tracked].update(
                    cls_detections[i_det], self.frame_id)
                activated_starcks_dict[cls_id].append(
                    unconfirmed_dict[cls_id][i_tracked])
            for it in u_unconfirmed:
                track = unconfirmed_dict[cls_id][it]
                track.mark_removed()
                removed_stracks_dict[cls_id].append(track)
            """ Step 4: Init new stracks"""
            for i_new in u_detection:
                track = cls_detections[i_new]

                if track.score < self.det_thresh:
                    continue

                track.activate(self.kalman_filter, self.frame_id)
                activated_starcks_dict[cls_id].append(track)
            """ Step 5: Update state"""
            for track in self.lost_stracks_dict[cls_id]:
                if self.frame_id - track.end_frame > self.max_time_lost:
                    track.mark_removed()
                    removed_stracks_dict[cls_id].append(track)

            # print('Ramained match {} s'.format(t4-t3))
            self.tracked_stracks_dict[cls_id] = [
                t for t in self.tracked_stracks_dict[cls_id]
                if t.state == TrackState.Tracked
            ]
            self.tracked_stracks_dict[cls_id] = joint_stracks(
                self.tracked_stracks_dict[cls_id],
                activated_starcks_dict[cls_id])
            self.tracked_stracks_dict[cls_id] = joint_stracks(
                self.tracked_stracks_dict[cls_id], refind_stracks_dict[cls_id])
            self.lost_stracks_dict[cls_id] = sub_stracks(
                self.lost_stracks_dict[cls_id],
                self.tracked_stracks_dict[cls_id])
            self.lost_stracks_dict[cls_id].extend(lost_stracks_dict[cls_id])
            self.lost_stracks_dict[cls_id] = sub_stracks(
                self.lost_stracks_dict[cls_id],
                self.removed_stracks_dict[cls_id])
            self.removed_stracks_dict[cls_id].extend(
                removed_stracks_dict[cls_id])
            self.tracked_stracks_dict[cls_id], self.lost_stracks_dict[
                cls_id] = remove_duplicate_stracks(
                    self.tracked_stracks_dict[cls_id],
                    self.lost_stracks_dict[cls_id])

            # get scores of lost tracks
            output_stracks_dict[cls_id] = [
                track for track in self.tracked_stracks_dict[cls_id]
                if track.is_activated
            ]

            logger.debug('===========Frame {}=========='.format(self.frame_id))
            logger.debug('Activated: {}'.format(
                [track.track_id for track in activated_starcks_dict[cls_id]]))
            logger.debug('Refind: {}'.format(
                [track.track_id for track in refind_stracks_dict[cls_id]]))
            logger.debug('Lost: {}'.format(
                [track.track_id for track in lost_stracks_dict[cls_id]]))
            logger.debug('Removed: {}'.format(
                [track.track_id for track in removed_stracks_dict[cls_id]]))

        return output_stracks_dict
Exemplo n.º 9
0
    def update_tracking(self, im_blob, img_0):
        """
        :param im_blob:
        :param img_0:
        :return:
        """
        # update frame id
        self.frame_id += 1

        # ----- reset the track ids for all object classes in the first frame
        if self.frame_id == 1:
            MCTrack.init_count(self.opt.num_classes)
        # -----

        # record tracking results, key: class_id
        activated_tracks_dict = defaultdict(list)
        refined_tracks_dict = defaultdict(list)
        lost_tracks_dict = defaultdict(list)
        removed_tracks_dict = defaultdict(list)
        output_tracks_dict = defaultdict(list)

        height, width = img_0.shape[0], img_0.shape[1]  # H, W of original input image
        net_height, net_width = im_blob.shape[2], im_blob.shape[3]  # H, W of net input

        c = np.array([width * 0.5, height * 0.5], dtype=np.float32)
        s = max(float(net_width) / float(net_height) * height, width) * 1.0
        h_out = net_height // self.opt.down_ratio
        w_out = net_width // self.opt.down_ratio

        ''' Step 1: Network forward, get detections & embeddings'''
        with torch.no_grad():
            output = self.model.forward(im_blob)[-1]

            hm = output['hm'].sigmoid_()
            wh = output['wh']
            reg = output['reg'] if self.opt.reg_offset else None
            id_feature = output['id']

            # L2 normalize the reid feature vector
            id_feature = F.normalize(id_feature, dim=1)

            #  detection decoding
            dets, inds, cls_inds_mask = mot_decode(heatmap=hm,
                                                   wh=wh,
                                                   reg=reg,
                                                   num_classes=self.opt.num_classes,
                                                   cat_spec_wh=self.opt.cat_spec_wh,
                                                   K=self.opt.K)

            # ----- get ReID feature vector by object class
            cls_id_feats = []  # topK feature vectors of each object class
            for cls_id in range(self.opt.num_classes):  # cls_id starts from 0
                # get inds of each object class
                cls_inds = inds[:, cls_inds_mask[cls_id]]

                # gather feats for each object class
                cls_id_feature = _tranpose_and_gather_feat(id_feature, cls_inds)  # inds: 1×128
                cls_id_feature = cls_id_feature.squeeze(0)  # n × FeatDim
                cls_id_feature = cls_id_feature.cpu().numpy()
                cls_id_feats.append(cls_id_feature)

        # translate and scale
        dets = map2orig(dets, h_out, w_out, height, width, self.opt.num_classes)

        # ----- parse each object class
        for cls_id in range(self.opt.num_classes):  # cls_id从0开始
            cls_dets = dets[cls_id]

            # filter out low confidence detections
            remain_inds = cls_dets[:, 4] > self.opt.conf_thres
            cls_dets = cls_dets[remain_inds]
            cls_id_feature = cls_id_feats[cls_id][remain_inds]

            if len(cls_dets) > 0:
                '''Detections, tlbrs: top left bottom right score'''
                cls_detects = [
                    MCTrack(MCTrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], feat, self.opt.num_classes, cls_id, 30)
                    for (tlbrs, feat) in zip(cls_dets[:, :5], cls_id_feature)
                ]
            else:
                cls_detects = []

            ''' Add newly detected tracks to tracked_tracks'''
            unconfirmed_dict = defaultdict(list)
            tracked_tracks_dict = defaultdict(list)
            for track in self.tracked_tracks_dict[cls_id]:
                if not track.is_activated:
                    unconfirmed_dict[cls_id].append(track)
                else:
                    tracked_tracks_dict[cls_id].append(track)

            ''' Step 2: First association, with embedding'''
            # building tracking pool for the current frame
            track_pool_dict = defaultdict(list)
            track_pool_dict[cls_id] = join_tracks(tracked_tracks_dict[cls_id], self.lost_tracks_dict[cls_id])

            # Predict the current location with KF
            Track.multi_predict(track_pool_dict[cls_id])
            dists = matching.embedding_distance(track_pool_dict[cls_id], cls_detects)
            dists = matching.fuse_motion(self.kalman_filter, dists, track_pool_dict[cls_id], cls_detects)
            matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.7)  # thresh=0.7

            for i_tracked, i_det in matches:
                track = track_pool_dict[cls_id][i_tracked]
                det = cls_detects[i_det]
                if track.state == TrackState.Tracked:
                    track.update(cls_detects[i_det], self.frame_id)
                    activated_tracks_dict[cls_id].append(track)  # for multi-class
                else:
                    track.re_activate(det, self.frame_id, new_id=False)
                    refined_tracks_dict[cls_id].append(track)

            ''' Step 3: Second association, with IOU'''
            cls_detects = [cls_detects[i] for i in u_detection]
            r_tracked_tracks = [track_pool_dict[cls_id][i]
                                 for i in u_track if track_pool_dict[cls_id][i].state == TrackState.Tracked]
            dists = matching.iou_distance(r_tracked_tracks, cls_detects)
            matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.5)  # thresh=0.5

            for i_tracked, i_det in matches:
                track = r_tracked_tracks[i_tracked]
                det = cls_detects[i_det]
                if track.state == TrackState.Tracked:
                    track.update(det, self.frame_id)
                    activated_tracks_dict[cls_id].append(track)
                else:
                    track.re_activate(det, self.frame_id, new_id=False)
                    refined_tracks_dict[cls_id].append(track)

            for it in u_track:
                track = r_tracked_tracks[it]
                if not track.state == TrackState.Lost:
                    track.mark_lost()
                    lost_tracks_dict[cls_id].append(track)

            '''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
            cls_detects = [cls_detects[i] for i in u_detection]
            dists = matching.iou_distance(unconfirmed_dict[cls_id], cls_detects)
            matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=0.7)
            for i_tracked, i_det in matches:
                unconfirmed_dict[cls_id][i_tracked].update(cls_detects[i_det], self.frame_id)
                activated_tracks_dict[cls_id].append(unconfirmed_dict[cls_id][i_tracked])
            for it in u_unconfirmed:
                track = unconfirmed_dict[cls_id][it]
                track.mark_removed()
                removed_tracks_dict[cls_id].append(track)

            """ Step 4: Init new tracks"""
            for i_new in u_detection:
                track = cls_detects[i_new]

                if track.score < self.det_thresh:
                    continue

                track.activate(self.kalman_filter, self.frame_id)
                activated_tracks_dict[cls_id].append(track)

            """ Step 5: Update state"""
            for track in self.lost_tracks_dict[cls_id]:
                if self.frame_id - track.end_frame > self.max_time_lost:
                    track.mark_removed()
                    removed_tracks_dict[cls_id].append(track)

            # print('Ramained match {} s'.format(t4-t3))
            self.tracked_tracks_dict[cls_id] = [t for t in self.tracked_tracks_dict[cls_id] if
                                                t.state == TrackState.Tracked]
            self.tracked_tracks_dict[cls_id] = join_tracks(self.tracked_tracks_dict[cls_id],
                                                           activated_tracks_dict[cls_id])
            self.tracked_tracks_dict[cls_id] = join_tracks(self.tracked_tracks_dict[cls_id],
                                                           refined_tracks_dict[cls_id])
            self.lost_tracks_dict[cls_id] = sub_tracks(self.lost_tracks_dict[cls_id],
                                                       self.tracked_tracks_dict[cls_id])
            self.lost_tracks_dict[cls_id].extend(lost_tracks_dict[cls_id])
            self.lost_tracks_dict[cls_id] = sub_tracks(self.lost_tracks_dict[cls_id],
                                                       self.removed_tracks_dict[cls_id])
            self.removed_tracks_dict[cls_id].extend(removed_tracks_dict[cls_id])
            self.tracked_tracks_dict[cls_id], self.lost_tracks_dict[cls_id] = remove_duplicate_tracks(
                self.tracked_tracks_dict[cls_id],
                self.lost_tracks_dict[cls_id])

            # get scores of lost tracks
            output_tracks_dict[cls_id] = [track for track in self.tracked_tracks_dict[cls_id] if track.is_activated]

            logger.debug('===========Frame {}=========='.format(self.frame_id))
            logger.debug('Activated: {}'.format(
                [track.track_id for track in activated_tracks_dict[cls_id]]))
            logger.debug('Refind: {}'.format(
                [track.track_id for track in refined_tracks_dict[cls_id]]))
            logger.debug('Lost: {}'.format(
                [track.track_id for track in lost_tracks_dict[cls_id]]))
            logger.debug('Removed: {}'.format(
                [track.track_id for track in removed_tracks_dict[cls_id]]))

        return output_tracks_dict
Exemplo n.º 10
0
def test_det(
        opt,
        batch_size=12,
        img_size=(1088, 608),
        iou_thres=0.5,
        print_interval=40,
):
    data_cfg = opt.data_cfg
    f = open(data_cfg)
    data_cfg_dict = json.load(f)
    f.close()
    nC = 1
    test_path = data_cfg_dict['test']
    dataset_root = data_cfg_dict['root']
    if opt.gpus[0] >= 0:
        opt.device = torch.device('cuda')
    else:
        opt.device = torch.device('cpu')
    print('Creating model...')
    model = create_model(opt.arch, opt.heads, opt.head_conv)
    model = load_model(model, opt.load_model)
    # model = torch.nn.DataParallel(model)
    model = model.to(opt.device)
    model.eval()

    # Get dataloader
    transforms = T.Compose([T.ToTensor()])
    dataset = DetDataset(dataset_root,
                         test_path,
                         img_size,
                         augment=False,
                         transforms=transforms)
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=batch_size,
                                             shuffle=False,
                                             num_workers=8,
                                             drop_last=False,
                                             collate_fn=collate_fn)
    mean_mAP, mean_R, mean_P, seen = 0.0, 0.0, 0.0, 0
    print('%11s' * 5 % ('Image', 'Total', 'P', 'R', 'mAP'))
    outputs, mAPs, mR, mP, TP, confidence, pred_class, target_class, jdict = \
        [], [], [], [], [], [], [], [], []
    AP_accum, AP_accum_count = np.zeros(nC), np.zeros(nC)
    for batch_i, (imgs, targets, paths, shapes,
                  targets_len) in enumerate(dataloader):
        t = time.time()
        # seen += batch_size

        output = model(imgs.cuda())[-1]
        origin_shape = shapes[0]
        width = origin_shape[1]
        height = origin_shape[0]
        inp_height = img_size[1]
        inp_width = img_size[0]
        c = np.array([width / 2., height / 2.], dtype=np.float32)
        s = max(float(inp_width) / float(inp_height) * height, width) * 1.0
        meta = {
            'c': c,
            's': s,
            'out_height': inp_height // opt.down_ratio,
            'out_width': inp_width // opt.down_ratio
        }
        hm = output['hm'].sigmoid_()
        wh = output['wh']
        reg = output['reg'] if opt.reg_offset else None
        opt.K = 200
        detections, inds = mot_decode(hm,
                                      wh,
                                      reg=reg,
                                      cat_spec_wh=opt.cat_spec_wh,
                                      K=opt.K)
        # Compute average precision for each sample
        targets = [targets[i][:int(l)] for i, l in enumerate(targets_len)]
        for si, labels in enumerate(targets):
            seen += 1
            # path = paths[si]
            # img0 = cv2.imread(path)
            dets = detections[si]
            dets = dets.unsqueeze(0)
            dets = post_process(opt, dets, meta)
            dets = merge_outputs(opt, [dets])[1]

            # remain_inds = dets[:, 4] > opt.det_thres
            # dets = dets[remain_inds]
            if dets is None:
                # If there are labels but no detections mark as zero AP
                if labels.size(0) != 0:
                    mAPs.append(0), mR.append(0), mP.append(0)
                continue

            # If no labels add number of detections as incorrect
            correct = []
            if labels.size(0) == 0:
                # correct.extend([0 for _ in range(len(detections))])
                mAPs.append(0), mR.append(0), mP.append(0)
                continue
            else:
                target_cls = labels[:, 0]

                # Extract target boxes as (x1, y1, x2, y2)
                target_boxes = ct_xywh2xyxy(labels[:, 2:6])
                target_boxes[:, 0] *= width
                target_boxes[:, 2] *= width
                target_boxes[:, 1] *= height
                target_boxes[:, 3] *= height
                '''
                path = paths[si]
                img0 = cv2.imread(path)
                img1 = cv2.imread(path)
                for t in range(len(target_boxes)):
                    x1 = target_boxes[t, 0]
                    y1 = target_boxes[t, 1]
                    x2 = target_boxes[t, 2]
                    y2 = target_boxes[t, 3]
                    cv2.rectangle(img0, (x1, y1), (x2, y2), (0, 255, 0), 4)
                cv2.imwrite('gt.jpg', img0)
                for t in range(len(dets)):
                    x1 = dets[t, 0]
                    y1 = dets[t, 1]
                    x2 = dets[t, 2]
                    y2 = dets[t, 3]
                    cv2.rectangle(img1, (x1, y1), (x2, y2), (0, 255, 0), 4)
                cv2.imwrite('pred.jpg', img1)
                abc = ace
                '''

                detected = []
                for *pred_bbox, conf in dets:
                    obj_pred = 0
                    pred_bbox = torch.FloatTensor(pred_bbox).view(1, -1)
                    # Compute iou with target boxes
                    iou = bbox_iou(pred_bbox, target_boxes, x1y1x2y2=True)[0]
                    # Extract index of largest overlap
                    best_i = np.argmax(iou)
                    # If overlap exceeds threshold and classification is correct mark as correct
                    if iou[best_i] > iou_thres and obj_pred == labels[
                            best_i, 0] and best_i not in detected:
                        correct.append(1)
                        detected.append(best_i)
                    else:
                        correct.append(0)

            # Compute Average Precision (AP) per class
            AP, AP_class, R, P = ap_per_class(
                tp=correct,
                conf=dets[:, 4],
                pred_cls=np.zeros_like(dets[:, 4]),  # detections[:, 6]
                target_cls=target_cls)

            # Accumulate AP per class
            AP_accum_count += np.bincount(AP_class, minlength=nC)
            AP_accum += np.bincount(AP_class, minlength=nC, weights=AP)

            # Compute mean AP across all classes in this image, and append to image list
            mAPs.append(AP.mean())
            mR.append(R.mean())
            mP.append(P.mean())

            # Means of all images
            mean_mAP = np.sum(mAPs) / (AP_accum_count + 1E-16)
            mean_R = np.sum(mR) / (AP_accum_count + 1E-16)
            mean_P = np.sum(mP) / (AP_accum_count + 1E-16)

        if batch_i % print_interval == 0:
            # Print image mAP and running mean mAP
            print(('%11s%11s' + '%11.3g' * 4 + 's') %
                  (seen, dataloader.dataset.nF, mean_P, mean_R, mean_mAP,
                   time.time() - t))
    # Print mAP per class
    print('%11s' * 5 % ('Image', 'Total', 'P', 'R', 'mAP'))

    print('AP: %-.4f\n\n' % (AP_accum[0] / (AP_accum_count[0] + 1E-16)))

    # Return mAP
    return mean_mAP, mean_R, mean_P
Exemplo n.º 11
0
    def update(self, im_blob, img0):
        self.frame_id += 1

        # 记录跟踪结果
        activated_starcks = []
        refind_stracks = []
        lost_stracks = []
        removed_stracks = []

        width = img0.shape[1]
        height = img0.shape[0]
        inp_height = im_blob.shape[2]
        inp_width = im_blob.shape[3]

        c = np.array([width / 2., height / 2.], dtype=np.float32)
        s = max(float(inp_width) / float(inp_height) * height, width) * 1.0
        meta = {
            'c': c,
            's': s,
            'out_height': inp_height // self.opt.down_ratio,
            'out_width': inp_width // self.opt.down_ratio
        }
        ''' Step 1: Network forward, get detections & embeddings'''
        with torch.no_grad():  # 前向推断过程不需要梯度反传
            output = self.model.forward(im_blob)[-1]

            hm = output['hm'].sigmoid_()
            # print("hm shape ", hm.shape, "hm:\n", hm)

            wh = output['wh']
            # print("wh shape ", wh.shape, "wh:\n", wh)

            id_feature = output['id']
            id_feature = F.normalize(id_feature, dim=1)

            reg = output['reg'] if self.opt.reg_offset else None
            # print("reg shape ", reg.shape, "reg:\n", reg)

            #  检测和分类结果解析
            dets, inds = mot_decode(
                heat=hm,  # heatmap
                wh=wh,
                reg=reg,
                cat_spec_wh=self.opt.cat_spec_wh,
                K=self.opt.K)

            # 组织用于Re-IDd的特征向量
            id_feature = _tranpose_and_gather_feat(id_feature, inds)
            id_feature = id_feature.squeeze(0)  # K × FeatDim
            id_feature = id_feature.cpu().numpy()

        # 检测结果后处理
        dets = self.post_process(dets, meta)
        dets = self.merge_outputs([dets])[1]

        # 过滤掉score得分太低的dets
        remain_inds = dets[:, 4] > self.opt.conf_thres
        dets = dets[remain_inds]
        id_feature = id_feature[remain_inds]

        # vis可视化bbox
        '''
        for i in range(0, dets.shape[0]):
            bbox = dets[i][0:4]
            cv2.rectangle(img0, 
                          (bbox[0], bbox[1]),  # left-top point
                          (bbox[2], bbox[3]),  # right-down point
                          (0, 255, 0), 
                          2)
        cv2.imshow('dets', img0)
        cv2.waitKey(0)
        id0 = id0-1
        '''

        if len(dets) > 0:
            '''Detections, tlbrs: top left bottom right score'''
            detections = [
                STrack(STrack.tlbr_to_tlwh(tlbrs[:4]),
                       tlbrs[4],
                       feat,
                       buff_size=30)
                for (tlbrs, feat) in zip(dets[:, :5], id_feature)
            ]
        else:
            detections = []
        ''' Add newly detected tracklets to tracked_stracks'''
        unconfirmed = []
        tracked_stracks = []  # type: list[STrack]
        for track in self.tracked_stracks:
            if not track.is_activated:
                unconfirmed.append(track)
            else:
                tracked_stracks.append(track)
        ''' Step 2: First association, with embedding'''
        strack_pool = joint_stracks(tracked_stracks, self.lost_stracks)

        # Predict the current location with KF
        # for strack in strack_pool:
        # strack.predict()
        STrack.multi_predict(strack_pool)
        dists = matching.embedding_distance(strack_pool, detections)
        # dists = matching.gate_cost_matrix(self.kalman_filter, dists, strack_pool, detections)
        dists = matching.fuse_motion(self.kalman_filter, dists, strack_pool,
                                     detections)
        matches, u_track, u_detection = matching.linear_assignment(dists,
                                                                   thresh=0.7)

        for i_tracked, i_det in matches:
            track = strack_pool[i_tracked]
            det = detections[i_det]
            if track.state == TrackState.Tracked:
                track.update(detections[i_det], self.frame_id)
                activated_starcks.append(track)
            else:
                track.re_activate(det, self.frame_id, new_id=False)
                refind_stracks.append(track)
        ''' Step 3: Second association, with IOU'''
        detections = [detections[i] for i in u_detection]
        r_tracked_stracks = [
            strack_pool[i] for i in u_track
            if strack_pool[i].state == TrackState.Tracked
        ]
        dists = matching.iou_distance(r_tracked_stracks, detections)
        matches, u_track, u_detection = matching.linear_assignment(dists,
                                                                   thresh=0.5)

        for i_tracked, i_det in matches:
            track = r_tracked_stracks[i_tracked]
            det = detections[i_det]
            if track.state == TrackState.Tracked:
                track.update(det, self.frame_id)
                activated_starcks.append(track)
            else:
                track.re_activate(det, self.frame_id, new_id=False)
                refind_stracks.append(track)

        for it in u_track:
            track = r_tracked_stracks[it]
            if not track.state == TrackState.Lost:
                track.mark_lost()
                lost_stracks.append(track)
        '''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
        detections = [detections[i] for i in u_detection]
        dists = matching.iou_distance(unconfirmed, detections)
        matches, u_unconfirmed, u_detection = matching.linear_assignment(
            dists, thresh=0.7)
        for i_tracked, i_det in matches:
            unconfirmed[i_tracked].update(detections[i_det], self.frame_id)
            activated_starcks.append(unconfirmed[i_tracked])
        for it in u_unconfirmed:
            track = unconfirmed[it]
            track.mark_removed()
            removed_stracks.append(track)
        """ Step 4: Init new stracks"""
        for i_new in u_detection:
            track = detections[i_new]
            if track.score < self.det_thresh:
                continue
            track.activate(self.kalman_filter, self.frame_id)
            activated_starcks.append(track)
        """ Step 5: Update state"""
        for track in self.lost_stracks:
            if self.frame_id - track.end_frame > self.max_time_lost:
                track.mark_removed()
                removed_stracks.append(track)

        # print('Ramained match {} s'.format(t4-t3))

        self.tracked_stracks = [
            t for t in self.tracked_stracks if t.state == TrackState.Tracked
        ]
        self.tracked_stracks = joint_stracks(self.tracked_stracks,
                                             activated_starcks)
        self.tracked_stracks = joint_stracks(self.tracked_stracks,
                                             refind_stracks)
        self.lost_stracks = sub_stracks(self.lost_stracks,
                                        self.tracked_stracks)
        self.lost_stracks.extend(lost_stracks)
        self.lost_stracks = sub_stracks(self.lost_stracks,
                                        self.removed_stracks)
        self.removed_stracks.extend(removed_stracks)
        self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks(
            self.tracked_stracks, self.lost_stracks)

        # get scores of lost tracks
        output_stracks = [
            track for track in self.tracked_stracks if track.is_activated
        ]

        logger.debug('===========Frame {}=========='.format(self.frame_id))
        logger.debug('Activated: {}'.format(
            [track.track_id for track in activated_starcks]))
        logger.debug('Refind: {}'.format(
            [track.track_id for track in refind_stracks]))
        logger.debug('Lost: {}'.format(
            [track.track_id for track in lost_stracks]))
        logger.debug('Removed: {}'.format(
            [track.track_id for track in removed_stracks]))

        return output_stracks