def update_detection(self, im_blob, img_0): """ 更新视频序列或图片序列的检测结果 :rtype: dict :param im_blob: :param img_0: :return: """ width = img_0.shape[1] height = img_0.shape[0] inp_height = im_blob.shape[2] inp_width = im_blob.shape[3] c = np.array([width * 0.5, height * 0.5], dtype=np.float32) # center s = max(float(inp_width) / float(inp_height) * height, width) * 1.0 meta = { 'c': c, 's': s, 'out_height': inp_height // self.opt.down_ratio, 'out_width': inp_width // self.opt.down_ratio } # ----- get detections with torch.no_grad(): dets_dict = defaultdict(list) output = self.model.forward(im_blob)[-1] # detect outputs hm = output['hm'].sigmoid_() wh = output['wh'] reg = output['reg'] if self.opt.reg_offset else None # 检测和分类结果解析 dets, inds, cls_inds_mask = mot_decode( heatmap=hm, wh=wh, reg=reg, num_classes=self.opt.num_classes, cat_spec_wh=self.opt.cat_spec_wh, K=self.opt.K) # 检测结果后处理 dets = self.post_process(dets, meta) dets = self.merge_outputs([dets]) # dets = self.merge_outputs(dets)[1] # ----- 解析每个检测类别 for cls_id in range(self.opt.num_classes): # cls_id从0开始 cls_dets = dets[cls_id + 1] # 过滤掉score得分太低的dets remain_inds = cls_dets[:, 4] > self.opt.conf_thres cls_dets = cls_dets[remain_inds] # print(cls_dets) dets_dict[cls_id] = cls_dets return dets_dict
def update_detection(self, im_blob, img_0): """ 更新视频序列或图片序列的检测结果 :rtype: dict :param im_blob: :param img_0: :return: """ height, width = img_0.shape[0], img_0.shape[1] # H, W of original input image net_height, net_width = im_blob.shape[2], im_blob.shape[3] # H, W of net input c = np.array([width * 0.5, height * 0.5], dtype=np.float32) # image center s = max(float(net_width) / float(net_height) * height, width) * 1.0 h_out = net_height // self.opt.down_ratio w_out = net_width // self.opt.down_ratio # ----- get detections with torch.no_grad(): dets_dict = defaultdict(list) # --- network output output = self.model.forward(im_blob)[-1] # --- detection outputs hm = output['hm'].sigmoid_() wh = output['wh'] reg = output['reg'] if self.opt.reg_offset else None # --- decode results of detection dets, inds, cls_inds_mask = mot_decode(heatmap=hm, wh=wh, reg=reg, num_classes=self.opt.num_classes, cat_spec_wh=self.opt.cat_spec_wh, K=self.opt.K) # --- map to original image coordinate system # meta = {'c': c, # 's': s, # 'out_height': h_out, # 'out_width': w_out} # dets = self.post_process(dets, meta) # using affine matrix dets = map2orig(dets, h_out, w_out, height, width, self.opt.num_classes) # translate and scale # dets = self.merge_outputs([dets]) # --- parse detections of each class for cls_id in range(self.opt.num_classes): # cls_id start from index 0 cls_dets = dets[cls_id] # filter out low conf score dets remain_inds = cls_dets[:, 4] > self.opt.conf_thres cls_dets = cls_dets[remain_inds] dets_dict[cls_id] = cls_dets return dets_dict
def save_result(self, output, batch, results): reg = output['reg'] if self.opt.reg_offset else None dets = mot_decode( output['hm'], output['wh'], reg=reg, cat_spec_wh=self.opt.cat_spec_wh, K=self.opt.K) dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2]) dets_out = ctdet_post_process( dets.copy(), batch['meta']['c'].cpu().numpy(), batch['meta']['s'].cpu().numpy(), output['hm'].shape[2], output['hm'].shape[3], output['hm'].shape[1]) results[batch['meta']['img_id'].cpu().numpy()[0]] = dets_out[0]
def save_result(self, output, batch, results): reg = output['reg'] if self.opt.reg_offset else None dets = mot_decode(heatmap=output['hm'], wh=output['wh'], reg=reg, cat_spec_wh=self.opt.cat_spec_wh, K=self.opt.K) dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2]) dets_out = ctdet_post_process(dets.copy(), batch['meta']['c'].cpu().numpy(), # center batch['meta']['s'].cpu().numpy(), # scale output['hm'].shape[2], # height output['hm'].shape[3], # width output['hm'].shape[1]) # num_classes results[batch['meta']['img_id'].cpu().numpy()[0]] = dets_out[0]
def update(self, im_blob, img0): self.frame_id += 1 activated_starcks = [] refind_stracks = [] lost_stracks = [] removed_stracks = [] width = img0.shape[1] height = img0.shape[0] inp_height = im_blob.shape[2] inp_width = im_blob.shape[3] c = np.array([width / 2., height / 2.], dtype=np.float32) s = max(float(inp_width) / float(inp_height) * height, width) * 1.0 meta = { 'c': c, 's': s, 'out_height': inp_height // self.opt.down_ratio, 'out_width': inp_width // self.opt.down_ratio } ''' Step 1: Network forward, get detections & embeddings''' with torch.no_grad(): output = self.model(im_blob)[-1] hm = output['hm'].sigmoid_() wh = output['wh'] id_feature = output['id'] id_feature = F.normalize(id_feature, dim=1) reg = output['reg'] if self.opt.reg_offset else None dets, inds = mot_decode(hm, wh, reg=reg, cat_spec_wh=self.opt.cat_spec_wh, K=self.opt.K) id_feature = _tranpose_and_gather_feat(id_feature, inds) id_feature = id_feature.squeeze(0) id_feature = id_feature.cpu().numpy() #torch.Size([1, 128, 6]) dets = self.post_process(dets, meta) # dets = self.merge_outputs([dets])[1] # remain_inds = dets[:, 4] > self.opt.conf_thres dets = dets[remain_inds] id_feature = id_feature[remain_inds] # vis ''' for i in range(0, dets.shape[0]): bbox = dets[i][0:4] cv2.rectangle(img0, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2) cv2.imshow('dets', img0) cv2.waitKey(0) id0 = id0-1 ''' if len(dets) > 0: '''Detections''' occlution = np.zeros(shape=(len(dets), len(dets))) for i in range(len(dets)): for j in range(i + 1, len(dets)): occ1, occ2 = tlbr_occlution(dets[i, :4], dets[j, :4]) occlution[i, j] = occ1 occlution[j, i] = occ2 occlution = np.sum(occlution, axis=0) detections = [ STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f, self.opt.maxLen, occ) for (tlbrs, f, occ) in zip(dets[:, :5], id_feature, occlution) ] else: detections = [] ''' Add newly detected tracklets to tracked_stracks''' unconfirmed = [] tracked_stracks = [] # type: list[STrack] for track in self.tracked_stracks: if not track.is_activated: unconfirmed.append(track) else: tracked_stracks.append(track) ''' Step 2: First association, with embedding''' strack_pool = joint_stracks(tracked_stracks, self.lost_stracks) # Predict the current location with KF #for strack in strack_pool: #strack.predict() STrack.multi_predict(strack_pool) if self.opt.queue_dist: #we don't use occlution in detection for now dists = matching.queue_embedding_distance(strack_pool, detections, self.opt, metric="cosine", occlution=None) else: dists = matching.embedding_distance(strack_pool, detections) #dists = matching.gate_cost_matrix(self.kalman_filter, dists, strack_pool, detections) dists = matching.fuse_motion(self.kalman_filter, dists, strack_pool, detections, only_position=self.opt.only_position ,\ lambda_=self.opt.lambda_) matches, u_track, u_detection = matching.linear_assignment( dists, thresh=self.opt.matching_threshold) for itracked, idet in matches: track = strack_pool[itracked] det = detections[idet] if track.state == TrackState.Tracked: if self.opt.queue_dist: track.update(detections[idet], self.frame_id, occlution[idet]) else: track.update(detections[idet], self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) ''' Step 3: Second association, with IOU''' detections = [detections[i] for i in u_detection] r_tracked_stracks = [ strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked ] dists = matching.iou_distance(r_tracked_stracks, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.5) for itracked, idet in matches: track = r_tracked_stracks[itracked] det = detections[idet] if track.state == TrackState.Tracked: if self.opt.queue_dist: track.update(det, self.frame_id, occlution[idet]) else: track.update(det, self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) for it in u_track: track = r_tracked_stracks[it] if not track.state == TrackState.Lost: track.mark_lost() lost_stracks.append(track) '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' detections = [detections[i] for i in u_detection] dists = matching.iou_distance(unconfirmed, detections) matches, u_unconfirmed, u_detection = matching.linear_assignment( dists, thresh=0.7) for itracked, idet in matches: if self.opt.queue_dist: unconfirmed[itracked].update(detections[idet], self.frame_id, occlution[idet]) else: unconfirmed[itracked].update(detections[idet], self.frame_id) activated_starcks.append(unconfirmed[itracked]) for it in u_unconfirmed: track = unconfirmed[it] track.mark_removed() removed_stracks.append(track) """ Step 4: Init new stracks""" for inew in u_detection: track = detections[inew] if track.score < self.det_thresh: continue track.activate(self.kalman_filter, self.frame_id) activated_starcks.append(track) """ Step 5: Update state""" for track in self.lost_stracks: if self.frame_id - track.end_frame > self.max_time_lost: track.mark_removed() removed_stracks.append(track) # print('Ramained match {} s'.format(t4-t3)) self.tracked_stracks = [ t for t in self.tracked_stracks if t.state == TrackState.Tracked ] self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks) self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks) self.lost_stracks.extend(lost_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks) self.removed_stracks.extend(removed_stracks) self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks( self.tracked_stracks, self.lost_stracks) # get scores of lost tracks output_stracks = [ track for track in self.tracked_stracks if track.is_activated ] logger.debug('===========Frame {}=========='.format(self.frame_id)) logger.debug('Activated: {}'.format( [track.track_id for track in activated_starcks])) logger.debug('Refind: {}'.format( [track.track_id for track in refind_stracks])) logger.debug('Lost: {}'.format( [track.track_id for track in lost_stracks])) logger.debug('Removed: {}'.format( [track.track_id for track in removed_stracks])) return output_stracks
def test_single(img_path, dev): """ :param img_path: :param dev: :return: """ if not os.path.isfile(img_path): print('[Err]: invalid image path.') return # Head dimensions of the net heads = {'hm': 5, 'reg': 2, 'wh': 2, 'id': 128} # Load model and put to device net = create_model(arch='resdcn_18', heads=heads, head_conv=256) model_path = '/mnt/diskb/even/MCMOT/exp/mot/default/mcmot_last_det_resdcn_18.pth' net = load_model(model=net, model_path=model_path) net = net.to(dev) net.eval() print(net) # Read image img_0 = cv2.imread(img_path) # BGR assert img_0 is not None, 'Failed to load ' + img_path # Padded resize h_in, w_in = 608, 1088 # (608, 1088) (320, 640) img, _, _, _ = letterbox(img=img_0, height=h_in, width=w_in) # Preprocess image: BGR -> RGB and H×W×C -> C×H×W img = img[:, :, ::-1].transpose(2, 0, 1) img = np.ascontiguousarray(img, dtype=np.float32) img /= 255.0 # Convert to tensor and put to device blob = torch.from_numpy(img).unsqueeze(0).to(dev) with torch.no_grad(): # Network output output = net.forward(blob)[-1] # Tracking output hm = output['hm'].sigmoid_() reg = output['reg'] wh = output['wh'] id_feature = output['id'] id_feature = F.normalize(id_feature, dim=1) # L2 normalization for feature vector # Decode output dets, inds, cls_inds_mask = mot_decode(hm, wh, reg, 5, False, 128) # Get ReID feature vector by object class cls_id_feats = [] # topK feature vectors of each object class for cls_id in range(5): # cls_id starts from 0 # get inds of each object class cls_inds = inds[:, cls_inds_mask[cls_id]] # gather feats for each object class cls_id_feature = _tranpose_and_gather_feat(id_feature, cls_inds) # inds: 1×128 cls_id_feature = cls_id_feature.squeeze(0) # n × FeatDim if dev == 'cpu': cls_id_feature = cls_id_feature.numpy() else: cls_id_feature = cls_id_feature.cpu().numpy() cls_id_feats.append(cls_id_feature) # Convert back to original image coordinate system height_0, width_0 = img_0.shape[0], img_0.shape[ 1] # H, W of original input image dets = map2orig(dets, h_in // 4, w_in // 4, height_0, width_0, 5) # translate and scale # Parse detections of each class dets_dict = defaultdict(list) for cls_id in range(5): # cls_id start from index 0 cls_dets = dets[cls_id] # filter out low conf score dets remain_inds = cls_dets[:, 4] > 0.4 cls_dets = cls_dets[remain_inds] # cls_id_feature = cls_id_feats[cls_id][remain_inds] # if need re-id dets_dict[cls_id] = cls_dets # Visualize detection results img_draw = plot_detects(img_0, dets_dict, 5, frame_id=0, fps=30.0) # cv2.imshow('Detection', img_draw) # cv2.waitKey() cv2.imwrite('/mnt/diskb/even/MCMOT/results/00000.jpg', img_draw)
def gen_det(opt, batch_size=12, img_size=(1088, 608)): data_cfg = opt.data_cfg f = open(data_cfg) data_cfg_dict = json.load(f) f.close() test_path = data_cfg_dict['test'] dataset_root = data_cfg_dict['root'] if opt.gpus[0] >= 0: opt.device = torch.device('cuda') else: opt.device = torch.device('cpu') print('Creating model...') model = create_model(opt.arch, opt.heads, opt.head_conv) model = load_model(model, opt.load_model) # model = torch.nn.DataParallel(model) model = model.to(opt.device) model.eval() # dummy_input = torch.rand(1, 3, 1088, 608).cuda() # 假设输入13张1*28*28的图片 # with SummaryWriter(comment='model') as w: # w.add_graph(model, dummy_input) # Get dataloader transforms = T.Compose([T.ToTensor()]) dataset = DetDataset(dataset_root, test_path, img_size, augment=False, transforms=transforms) dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=0, drop_last=False) seen = 0 dataloader = tqdm(dataloader) for batch_i, (imgs, paths, shapes) in enumerate(dataloader): seen += batch_size if seen < 3148: continue path = paths[0] split = path.split("/") split[0] += "/" if "MOT16-03" in path: continue if "MOT16-01" in path: continue # if int(split[-1].strip(".jpg")) < 736: # continue output = model(imgs.cuda())[-1] origin_shape = shapes[0] width = origin_shape[1] height = origin_shape[0] inp_height = img_size[1] inp_width = img_size[0] c = np.array([width / 2., height / 2.], dtype=np.float32) s = max(float(inp_width) / float(inp_height) * height, width) * 1.0 meta = {'c': c, 's': s, 'out_height': inp_height // opt.down_ratio, 'out_width': inp_width // opt.down_ratio} hm = output['hm'].sigmoid_() wh = output['wh'] reg = output['reg'] if opt.reg_offset else None opt.K = 200 detections, inds = mot_decode(hm, wh, reg=reg, cat_spec_wh=opt.cat_spec_wh, K=opt.K) # Compute average precision for each sample for si, _ in enumerate(imgs): seen += 1 # path = paths[si] # img0 = cv2.imread(path) dets = detections[si] dets = dets.unsqueeze(0) dets = post_process(opt, dets, meta) dets = merge_outputs(opt, [dets])[1] if dets is None: continue path = paths[si] split = path.split("/") split[0] += "/" det_file = os.path.join(*split[:-2], "det", "FairMOT_det.txt") with open(det_file, "a+") as f: frame_id = int(split[-1].strip(".jpg")) img1 = cv2.imread(path) remain_inds = dets[:, 4] > 0.4 dets = dets[remain_inds] xywh = xyxy2ct_xywh(dets[:, :4]) for t in range(len(dets)): x1 = dets[t, 0] y1 = dets[t, 1] x2 = dets[t, 2] y2 = dets[t, 3] f.write( "%d,-1, %.2f, %.2f, %.2f, %.2f, %.2f, -1,-1,-1\n" % ( frame_id, xywh[t, 0], xywh[t, 1], xywh[t, 2], xywh[t, 3], dets[t, 4])) cv2.rectangle(img1, (x1, y1), (x2, y2), (0, 255, 0), 4) cv2.imshow("", img1) cv2.waitKey(100) # cv2.imwrite('pred.jpg', img1) return None
def update_tracking(self, im_blob, img_0): """ :param im_blob: :param img_0: :return: """ # update frame id self.frame_id += 1 # 记录跟踪结果 # 记录跟踪结果: 默认只有一类, 修改为多类别, 用defaultdict(list)代替list # 以class id为key activated_starcks_dict = defaultdict(list) refind_stracks_dict = defaultdict(list) lost_stracks_dict = defaultdict(list) removed_stracks_dict = defaultdict(list) output_stracks_dict = defaultdict(list) height, width = img_0.shape[0], img_0.shape[ 1] # H, W of original input image net_height, net_width = im_blob.shape[2], im_blob.shape[ 3] # H, W of net input c = np.array([width * 0.5, height * 0.5], dtype=np.float32) s = max(float(net_width) / float(net_height) * height, width) * 1.0 h_out = net_height // self.opt.down_ratio w_out = net_width // self.opt.down_ratio ''' Step 1: Network forward, get detections & embeddings''' with torch.no_grad(): output = self.model.forward(im_blob)[-1] hm = output['hm'].sigmoid_() wh = output['wh'] reg = output['reg'] if self.opt.reg_offset else None id_feature = output['id'] id_feature = F.normalize(id_feature, dim=1) # L2 normalize # 检测和分类结果解析 dets, inds, cls_inds_mask = mot_decode( heatmap=hm, wh=wh, reg=reg, num_classes=self.opt.num_classes, cat_spec_wh=self.opt.cat_spec_wh, K=self.opt.K) # ----- get ReID feature vector by object class cls_id_feats = [] # topK feature vectors of each object class for cls_id in range(self.opt.num_classes): # cls_id starts from 0 # get inds of each object class cls_inds = inds[:, cls_inds_mask[cls_id]] # gather feats for each object class cls_id_feature = _tranpose_and_gather_feat( id_feature, cls_inds) # inds: 1×128 cls_id_feature = cls_id_feature.squeeze(0) # n × FeatDim cls_id_feature = cls_id_feature.cpu().numpy() cls_id_feats.append(cls_id_feature) # 检测结果后处理 # meta = {'c': c, # 's': s, # 'out_height': h_out, # 'out_width': w_out} # dets = self.post_process(dets, meta) # using affine matrix # dets = self.merge_outputs([dets]) dets = map2orig(dets, h_out, w_out, height, width, self.opt.num_classes) # translate and scale # ----- 解析每个检测类别 for cls_id in range(self.opt.num_classes): # cls_id从0开始 cls_dets = dets[cls_id] ''' # 可视化中间的检测结果(每一类) for i in range(0, cls_dets.shape[0]): bbox = cls_dets[i][0:4] cv2.rectangle(img0, (bbox[0], bbox[1]), # left-top point (bbox[2], bbox[3]), # right-down point [0, 255, 255], # yellow 2) cv2.putText(img0, id2cls[cls_id], (bbox[0], bbox[1]), cv2.FONT_HERSHEY_PLAIN, 1.3, [0, 0, 255], # red 2) cv2.imshow('{}'.format(id2cls[cls_id]), img0) cv2.waitKey(0) ''' # 过滤掉score得分太低的dets remain_inds = cls_dets[:, 4] > self.opt.conf_thres cls_dets = cls_dets[remain_inds] cls_id_feature = cls_id_feats[cls_id][remain_inds] if len(cls_dets) > 0: '''Detections, tlbrs: top left bottom right score''' cls_detections = [ STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], feat, buff_size=30) for (tlbrs, feat) in zip(cls_dets[:, :5], cls_id_feature) ] else: cls_detections = [] # reset the track ids for a different object class for track in cls_detections: track.reset_track_id() ''' Add newly detected tracklets to tracked_stracks''' unconfirmed_dict = defaultdict(list) tracked_stracks_dict = defaultdict( list) # type: key(cls_id), value: list[STrack] for track in self.tracked_stracks_dict[cls_id]: if not track.is_activated: unconfirmed_dict[cls_id].append(track) else: tracked_stracks_dict[cls_id].append(track) ''' Step 2: First association, with embedding''' strack_pool_dict = defaultdict(list) strack_pool_dict[cls_id] = joint_stracks( tracked_stracks_dict[cls_id], self.lost_stracks_dict[cls_id]) # Predict the current location with KF # for strack in strack_pool: STrack.multi_predict(strack_pool_dict[cls_id]) dists = matching.embedding_distance(strack_pool_dict[cls_id], cls_detections) dists = matching.fuse_motion(self.kalman_filter, dists, strack_pool_dict[cls_id], cls_detections) matches, u_track, u_detection = matching.linear_assignment( dists, thresh=0.7) # thresh=0.7 for i_tracked, i_det in matches: track = strack_pool_dict[cls_id][i_tracked] det = cls_detections[i_det] if track.state == TrackState.Tracked: track.update(cls_detections[i_det], self.frame_id) activated_starcks_dict[cls_id].append( track) # for multi-class else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks_dict[cls_id].append(track) ''' Step 3: Second association, with IOU''' cls_detections = [cls_detections[i] for i in u_detection] r_tracked_stracks = [ strack_pool_dict[cls_id][i] for i in u_track if strack_pool_dict[cls_id][i].state == TrackState.Tracked ] dists = matching.iou_distance(r_tracked_stracks, cls_detections) matches, u_track, u_detection = matching.linear_assignment( dists, thresh=0.5) # thresh=0.5 for i_tracked, i_det in matches: track = r_tracked_stracks[i_tracked] det = cls_detections[i_det] if track.state == TrackState.Tracked: track.update(det, self.frame_id) activated_starcks_dict[cls_id].append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks_dict[cls_id].append(track) for it in u_track: track = r_tracked_stracks[it] if not track.state == TrackState.Lost: track.mark_lost() lost_stracks_dict[cls_id].append(track) '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' cls_detections = [cls_detections[i] for i in u_detection] dists = matching.iou_distance(unconfirmed_dict[cls_id], cls_detections) matches, u_unconfirmed, u_detection = matching.linear_assignment( dists, thresh=0.7) for i_tracked, i_det in matches: unconfirmed_dict[cls_id][i_tracked].update( cls_detections[i_det], self.frame_id) activated_starcks_dict[cls_id].append( unconfirmed_dict[cls_id][i_tracked]) for it in u_unconfirmed: track = unconfirmed_dict[cls_id][it] track.mark_removed() removed_stracks_dict[cls_id].append(track) """ Step 4: Init new stracks""" for i_new in u_detection: track = cls_detections[i_new] if track.score < self.det_thresh: continue track.activate(self.kalman_filter, self.frame_id) activated_starcks_dict[cls_id].append(track) """ Step 5: Update state""" for track in self.lost_stracks_dict[cls_id]: if self.frame_id - track.end_frame > self.max_time_lost: track.mark_removed() removed_stracks_dict[cls_id].append(track) # print('Ramained match {} s'.format(t4-t3)) self.tracked_stracks_dict[cls_id] = [ t for t in self.tracked_stracks_dict[cls_id] if t.state == TrackState.Tracked ] self.tracked_stracks_dict[cls_id] = joint_stracks( self.tracked_stracks_dict[cls_id], activated_starcks_dict[cls_id]) self.tracked_stracks_dict[cls_id] = joint_stracks( self.tracked_stracks_dict[cls_id], refind_stracks_dict[cls_id]) self.lost_stracks_dict[cls_id] = sub_stracks( self.lost_stracks_dict[cls_id], self.tracked_stracks_dict[cls_id]) self.lost_stracks_dict[cls_id].extend(lost_stracks_dict[cls_id]) self.lost_stracks_dict[cls_id] = sub_stracks( self.lost_stracks_dict[cls_id], self.removed_stracks_dict[cls_id]) self.removed_stracks_dict[cls_id].extend( removed_stracks_dict[cls_id]) self.tracked_stracks_dict[cls_id], self.lost_stracks_dict[ cls_id] = remove_duplicate_stracks( self.tracked_stracks_dict[cls_id], self.lost_stracks_dict[cls_id]) # get scores of lost tracks output_stracks_dict[cls_id] = [ track for track in self.tracked_stracks_dict[cls_id] if track.is_activated ] logger.debug('===========Frame {}=========='.format(self.frame_id)) logger.debug('Activated: {}'.format( [track.track_id for track in activated_starcks_dict[cls_id]])) logger.debug('Refind: {}'.format( [track.track_id for track in refind_stracks_dict[cls_id]])) logger.debug('Lost: {}'.format( [track.track_id for track in lost_stracks_dict[cls_id]])) logger.debug('Removed: {}'.format( [track.track_id for track in removed_stracks_dict[cls_id]])) return output_stracks_dict
def update_tracking(self, im_blob, img_0): """ :param im_blob: :param img_0: :return: """ # update frame id self.frame_id += 1 # ----- reset the track ids for all object classes in the first frame if self.frame_id == 1: MCTrack.init_count(self.opt.num_classes) # ----- # record tracking results, key: class_id activated_tracks_dict = defaultdict(list) refined_tracks_dict = defaultdict(list) lost_tracks_dict = defaultdict(list) removed_tracks_dict = defaultdict(list) output_tracks_dict = defaultdict(list) height, width = img_0.shape[0], img_0.shape[1] # H, W of original input image net_height, net_width = im_blob.shape[2], im_blob.shape[3] # H, W of net input c = np.array([width * 0.5, height * 0.5], dtype=np.float32) s = max(float(net_width) / float(net_height) * height, width) * 1.0 h_out = net_height // self.opt.down_ratio w_out = net_width // self.opt.down_ratio ''' Step 1: Network forward, get detections & embeddings''' with torch.no_grad(): output = self.model.forward(im_blob)[-1] hm = output['hm'].sigmoid_() wh = output['wh'] reg = output['reg'] if self.opt.reg_offset else None id_feature = output['id'] # L2 normalize the reid feature vector id_feature = F.normalize(id_feature, dim=1) # detection decoding dets, inds, cls_inds_mask = mot_decode(heatmap=hm, wh=wh, reg=reg, num_classes=self.opt.num_classes, cat_spec_wh=self.opt.cat_spec_wh, K=self.opt.K) # ----- get ReID feature vector by object class cls_id_feats = [] # topK feature vectors of each object class for cls_id in range(self.opt.num_classes): # cls_id starts from 0 # get inds of each object class cls_inds = inds[:, cls_inds_mask[cls_id]] # gather feats for each object class cls_id_feature = _tranpose_and_gather_feat(id_feature, cls_inds) # inds: 1×128 cls_id_feature = cls_id_feature.squeeze(0) # n × FeatDim cls_id_feature = cls_id_feature.cpu().numpy() cls_id_feats.append(cls_id_feature) # translate and scale dets = map2orig(dets, h_out, w_out, height, width, self.opt.num_classes) # ----- parse each object class for cls_id in range(self.opt.num_classes): # cls_id从0开始 cls_dets = dets[cls_id] # filter out low confidence detections remain_inds = cls_dets[:, 4] > self.opt.conf_thres cls_dets = cls_dets[remain_inds] cls_id_feature = cls_id_feats[cls_id][remain_inds] if len(cls_dets) > 0: '''Detections, tlbrs: top left bottom right score''' cls_detects = [ MCTrack(MCTrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], feat, self.opt.num_classes, cls_id, 30) for (tlbrs, feat) in zip(cls_dets[:, :5], cls_id_feature) ] else: cls_detects = [] ''' Add newly detected tracks to tracked_tracks''' unconfirmed_dict = defaultdict(list) tracked_tracks_dict = defaultdict(list) for track in self.tracked_tracks_dict[cls_id]: if not track.is_activated: unconfirmed_dict[cls_id].append(track) else: tracked_tracks_dict[cls_id].append(track) ''' Step 2: First association, with embedding''' # building tracking pool for the current frame track_pool_dict = defaultdict(list) track_pool_dict[cls_id] = join_tracks(tracked_tracks_dict[cls_id], self.lost_tracks_dict[cls_id]) # Predict the current location with KF Track.multi_predict(track_pool_dict[cls_id]) dists = matching.embedding_distance(track_pool_dict[cls_id], cls_detects) dists = matching.fuse_motion(self.kalman_filter, dists, track_pool_dict[cls_id], cls_detects) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.7) # thresh=0.7 for i_tracked, i_det in matches: track = track_pool_dict[cls_id][i_tracked] det = cls_detects[i_det] if track.state == TrackState.Tracked: track.update(cls_detects[i_det], self.frame_id) activated_tracks_dict[cls_id].append(track) # for multi-class else: track.re_activate(det, self.frame_id, new_id=False) refined_tracks_dict[cls_id].append(track) ''' Step 3: Second association, with IOU''' cls_detects = [cls_detects[i] for i in u_detection] r_tracked_tracks = [track_pool_dict[cls_id][i] for i in u_track if track_pool_dict[cls_id][i].state == TrackState.Tracked] dists = matching.iou_distance(r_tracked_tracks, cls_detects) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.5) # thresh=0.5 for i_tracked, i_det in matches: track = r_tracked_tracks[i_tracked] det = cls_detects[i_det] if track.state == TrackState.Tracked: track.update(det, self.frame_id) activated_tracks_dict[cls_id].append(track) else: track.re_activate(det, self.frame_id, new_id=False) refined_tracks_dict[cls_id].append(track) for it in u_track: track = r_tracked_tracks[it] if not track.state == TrackState.Lost: track.mark_lost() lost_tracks_dict[cls_id].append(track) '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' cls_detects = [cls_detects[i] for i in u_detection] dists = matching.iou_distance(unconfirmed_dict[cls_id], cls_detects) matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=0.7) for i_tracked, i_det in matches: unconfirmed_dict[cls_id][i_tracked].update(cls_detects[i_det], self.frame_id) activated_tracks_dict[cls_id].append(unconfirmed_dict[cls_id][i_tracked]) for it in u_unconfirmed: track = unconfirmed_dict[cls_id][it] track.mark_removed() removed_tracks_dict[cls_id].append(track) """ Step 4: Init new tracks""" for i_new in u_detection: track = cls_detects[i_new] if track.score < self.det_thresh: continue track.activate(self.kalman_filter, self.frame_id) activated_tracks_dict[cls_id].append(track) """ Step 5: Update state""" for track in self.lost_tracks_dict[cls_id]: if self.frame_id - track.end_frame > self.max_time_lost: track.mark_removed() removed_tracks_dict[cls_id].append(track) # print('Ramained match {} s'.format(t4-t3)) self.tracked_tracks_dict[cls_id] = [t for t in self.tracked_tracks_dict[cls_id] if t.state == TrackState.Tracked] self.tracked_tracks_dict[cls_id] = join_tracks(self.tracked_tracks_dict[cls_id], activated_tracks_dict[cls_id]) self.tracked_tracks_dict[cls_id] = join_tracks(self.tracked_tracks_dict[cls_id], refined_tracks_dict[cls_id]) self.lost_tracks_dict[cls_id] = sub_tracks(self.lost_tracks_dict[cls_id], self.tracked_tracks_dict[cls_id]) self.lost_tracks_dict[cls_id].extend(lost_tracks_dict[cls_id]) self.lost_tracks_dict[cls_id] = sub_tracks(self.lost_tracks_dict[cls_id], self.removed_tracks_dict[cls_id]) self.removed_tracks_dict[cls_id].extend(removed_tracks_dict[cls_id]) self.tracked_tracks_dict[cls_id], self.lost_tracks_dict[cls_id] = remove_duplicate_tracks( self.tracked_tracks_dict[cls_id], self.lost_tracks_dict[cls_id]) # get scores of lost tracks output_tracks_dict[cls_id] = [track for track in self.tracked_tracks_dict[cls_id] if track.is_activated] logger.debug('===========Frame {}=========='.format(self.frame_id)) logger.debug('Activated: {}'.format( [track.track_id for track in activated_tracks_dict[cls_id]])) logger.debug('Refind: {}'.format( [track.track_id for track in refined_tracks_dict[cls_id]])) logger.debug('Lost: {}'.format( [track.track_id for track in lost_tracks_dict[cls_id]])) logger.debug('Removed: {}'.format( [track.track_id for track in removed_tracks_dict[cls_id]])) return output_tracks_dict
def test_det( opt, batch_size=12, img_size=(1088, 608), iou_thres=0.5, print_interval=40, ): data_cfg = opt.data_cfg f = open(data_cfg) data_cfg_dict = json.load(f) f.close() nC = 1 test_path = data_cfg_dict['test'] dataset_root = data_cfg_dict['root'] if opt.gpus[0] >= 0: opt.device = torch.device('cuda') else: opt.device = torch.device('cpu') print('Creating model...') model = create_model(opt.arch, opt.heads, opt.head_conv) model = load_model(model, opt.load_model) # model = torch.nn.DataParallel(model) model = model.to(opt.device) model.eval() # Get dataloader transforms = T.Compose([T.ToTensor()]) dataset = DetDataset(dataset_root, test_path, img_size, augment=False, transforms=transforms) dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=8, drop_last=False, collate_fn=collate_fn) mean_mAP, mean_R, mean_P, seen = 0.0, 0.0, 0.0, 0 print('%11s' * 5 % ('Image', 'Total', 'P', 'R', 'mAP')) outputs, mAPs, mR, mP, TP, confidence, pred_class, target_class, jdict = \ [], [], [], [], [], [], [], [], [] AP_accum, AP_accum_count = np.zeros(nC), np.zeros(nC) for batch_i, (imgs, targets, paths, shapes, targets_len) in enumerate(dataloader): t = time.time() # seen += batch_size output = model(imgs.cuda())[-1] origin_shape = shapes[0] width = origin_shape[1] height = origin_shape[0] inp_height = img_size[1] inp_width = img_size[0] c = np.array([width / 2., height / 2.], dtype=np.float32) s = max(float(inp_width) / float(inp_height) * height, width) * 1.0 meta = { 'c': c, 's': s, 'out_height': inp_height // opt.down_ratio, 'out_width': inp_width // opt.down_ratio } hm = output['hm'].sigmoid_() wh = output['wh'] reg = output['reg'] if opt.reg_offset else None opt.K = 200 detections, inds = mot_decode(hm, wh, reg=reg, cat_spec_wh=opt.cat_spec_wh, K=opt.K) # Compute average precision for each sample targets = [targets[i][:int(l)] for i, l in enumerate(targets_len)] for si, labels in enumerate(targets): seen += 1 # path = paths[si] # img0 = cv2.imread(path) dets = detections[si] dets = dets.unsqueeze(0) dets = post_process(opt, dets, meta) dets = merge_outputs(opt, [dets])[1] # remain_inds = dets[:, 4] > opt.det_thres # dets = dets[remain_inds] if dets is None: # If there are labels but no detections mark as zero AP if labels.size(0) != 0: mAPs.append(0), mR.append(0), mP.append(0) continue # If no labels add number of detections as incorrect correct = [] if labels.size(0) == 0: # correct.extend([0 for _ in range(len(detections))]) mAPs.append(0), mR.append(0), mP.append(0) continue else: target_cls = labels[:, 0] # Extract target boxes as (x1, y1, x2, y2) target_boxes = ct_xywh2xyxy(labels[:, 2:6]) target_boxes[:, 0] *= width target_boxes[:, 2] *= width target_boxes[:, 1] *= height target_boxes[:, 3] *= height ''' path = paths[si] img0 = cv2.imread(path) img1 = cv2.imread(path) for t in range(len(target_boxes)): x1 = target_boxes[t, 0] y1 = target_boxes[t, 1] x2 = target_boxes[t, 2] y2 = target_boxes[t, 3] cv2.rectangle(img0, (x1, y1), (x2, y2), (0, 255, 0), 4) cv2.imwrite('gt.jpg', img0) for t in range(len(dets)): x1 = dets[t, 0] y1 = dets[t, 1] x2 = dets[t, 2] y2 = dets[t, 3] cv2.rectangle(img1, (x1, y1), (x2, y2), (0, 255, 0), 4) cv2.imwrite('pred.jpg', img1) abc = ace ''' detected = [] for *pred_bbox, conf in dets: obj_pred = 0 pred_bbox = torch.FloatTensor(pred_bbox).view(1, -1) # Compute iou with target boxes iou = bbox_iou(pred_bbox, target_boxes, x1y1x2y2=True)[0] # Extract index of largest overlap best_i = np.argmax(iou) # If overlap exceeds threshold and classification is correct mark as correct if iou[best_i] > iou_thres and obj_pred == labels[ best_i, 0] and best_i not in detected: correct.append(1) detected.append(best_i) else: correct.append(0) # Compute Average Precision (AP) per class AP, AP_class, R, P = ap_per_class( tp=correct, conf=dets[:, 4], pred_cls=np.zeros_like(dets[:, 4]), # detections[:, 6] target_cls=target_cls) # Accumulate AP per class AP_accum_count += np.bincount(AP_class, minlength=nC) AP_accum += np.bincount(AP_class, minlength=nC, weights=AP) # Compute mean AP across all classes in this image, and append to image list mAPs.append(AP.mean()) mR.append(R.mean()) mP.append(P.mean()) # Means of all images mean_mAP = np.sum(mAPs) / (AP_accum_count + 1E-16) mean_R = np.sum(mR) / (AP_accum_count + 1E-16) mean_P = np.sum(mP) / (AP_accum_count + 1E-16) if batch_i % print_interval == 0: # Print image mAP and running mean mAP print(('%11s%11s' + '%11.3g' * 4 + 's') % (seen, dataloader.dataset.nF, mean_P, mean_R, mean_mAP, time.time() - t)) # Print mAP per class print('%11s' * 5 % ('Image', 'Total', 'P', 'R', 'mAP')) print('AP: %-.4f\n\n' % (AP_accum[0] / (AP_accum_count[0] + 1E-16))) # Return mAP return mean_mAP, mean_R, mean_P
def update(self, im_blob, img0): self.frame_id += 1 # 记录跟踪结果 activated_starcks = [] refind_stracks = [] lost_stracks = [] removed_stracks = [] width = img0.shape[1] height = img0.shape[0] inp_height = im_blob.shape[2] inp_width = im_blob.shape[3] c = np.array([width / 2., height / 2.], dtype=np.float32) s = max(float(inp_width) / float(inp_height) * height, width) * 1.0 meta = { 'c': c, 's': s, 'out_height': inp_height // self.opt.down_ratio, 'out_width': inp_width // self.opt.down_ratio } ''' Step 1: Network forward, get detections & embeddings''' with torch.no_grad(): # 前向推断过程不需要梯度反传 output = self.model.forward(im_blob)[-1] hm = output['hm'].sigmoid_() # print("hm shape ", hm.shape, "hm:\n", hm) wh = output['wh'] # print("wh shape ", wh.shape, "wh:\n", wh) id_feature = output['id'] id_feature = F.normalize(id_feature, dim=1) reg = output['reg'] if self.opt.reg_offset else None # print("reg shape ", reg.shape, "reg:\n", reg) # 检测和分类结果解析 dets, inds = mot_decode( heat=hm, # heatmap wh=wh, reg=reg, cat_spec_wh=self.opt.cat_spec_wh, K=self.opt.K) # 组织用于Re-IDd的特征向量 id_feature = _tranpose_and_gather_feat(id_feature, inds) id_feature = id_feature.squeeze(0) # K × FeatDim id_feature = id_feature.cpu().numpy() # 检测结果后处理 dets = self.post_process(dets, meta) dets = self.merge_outputs([dets])[1] # 过滤掉score得分太低的dets remain_inds = dets[:, 4] > self.opt.conf_thres dets = dets[remain_inds] id_feature = id_feature[remain_inds] # vis可视化bbox ''' for i in range(0, dets.shape[0]): bbox = dets[i][0:4] cv2.rectangle(img0, (bbox[0], bbox[1]), # left-top point (bbox[2], bbox[3]), # right-down point (0, 255, 0), 2) cv2.imshow('dets', img0) cv2.waitKey(0) id0 = id0-1 ''' if len(dets) > 0: '''Detections, tlbrs: top left bottom right score''' detections = [ STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], feat, buff_size=30) for (tlbrs, feat) in zip(dets[:, :5], id_feature) ] else: detections = [] ''' Add newly detected tracklets to tracked_stracks''' unconfirmed = [] tracked_stracks = [] # type: list[STrack] for track in self.tracked_stracks: if not track.is_activated: unconfirmed.append(track) else: tracked_stracks.append(track) ''' Step 2: First association, with embedding''' strack_pool = joint_stracks(tracked_stracks, self.lost_stracks) # Predict the current location with KF # for strack in strack_pool: # strack.predict() STrack.multi_predict(strack_pool) dists = matching.embedding_distance(strack_pool, detections) # dists = matching.gate_cost_matrix(self.kalman_filter, dists, strack_pool, detections) dists = matching.fuse_motion(self.kalman_filter, dists, strack_pool, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.7) for i_tracked, i_det in matches: track = strack_pool[i_tracked] det = detections[i_det] if track.state == TrackState.Tracked: track.update(detections[i_det], self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) ''' Step 3: Second association, with IOU''' detections = [detections[i] for i in u_detection] r_tracked_stracks = [ strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked ] dists = matching.iou_distance(r_tracked_stracks, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.5) for i_tracked, i_det in matches: track = r_tracked_stracks[i_tracked] det = detections[i_det] if track.state == TrackState.Tracked: track.update(det, self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) for it in u_track: track = r_tracked_stracks[it] if not track.state == TrackState.Lost: track.mark_lost() lost_stracks.append(track) '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' detections = [detections[i] for i in u_detection] dists = matching.iou_distance(unconfirmed, detections) matches, u_unconfirmed, u_detection = matching.linear_assignment( dists, thresh=0.7) for i_tracked, i_det in matches: unconfirmed[i_tracked].update(detections[i_det], self.frame_id) activated_starcks.append(unconfirmed[i_tracked]) for it in u_unconfirmed: track = unconfirmed[it] track.mark_removed() removed_stracks.append(track) """ Step 4: Init new stracks""" for i_new in u_detection: track = detections[i_new] if track.score < self.det_thresh: continue track.activate(self.kalman_filter, self.frame_id) activated_starcks.append(track) """ Step 5: Update state""" for track in self.lost_stracks: if self.frame_id - track.end_frame > self.max_time_lost: track.mark_removed() removed_stracks.append(track) # print('Ramained match {} s'.format(t4-t3)) self.tracked_stracks = [ t for t in self.tracked_stracks if t.state == TrackState.Tracked ] self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks) self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks) self.lost_stracks.extend(lost_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks) self.removed_stracks.extend(removed_stracks) self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks( self.tracked_stracks, self.lost_stracks) # get scores of lost tracks output_stracks = [ track for track in self.tracked_stracks if track.is_activated ] logger.debug('===========Frame {}=========='.format(self.frame_id)) logger.debug('Activated: {}'.format( [track.track_id for track in activated_starcks])) logger.debug('Refind: {}'.format( [track.track_id for track in refind_stracks])) logger.debug('Lost: {}'.format( [track.track_id for track in lost_stracks])) logger.debug('Removed: {}'.format( [track.track_id for track in removed_stracks])) return output_stracks