def postprocess_detections(self, class_logits, box_regression, proposals, image_shapes): # type: (Tensor, Tensor, List[Tensor], List[Tuple[int, int]]) device = class_logits.device num_classes = class_logits.shape[-1] boxes_per_image = [len(boxes_in_image) for boxes_in_image in proposals] pred_boxes = self.box_coder.decode(box_regression, proposals) pred_scores = F.softmax(class_logits, -1) # split boxes and scores per image if len(boxes_per_image) == 1: # TODO : remove this when ONNX support dynamic split sizes # and just assign to pred_boxes instead of pred_boxes_list pred_boxes_list = [pred_boxes] pred_scores_list = [pred_scores] else: pred_boxes_list = pred_boxes.split(boxes_per_image, 0) pred_scores_list = pred_scores.split(boxes_per_image, 0) all_boxes = [] all_scores = [] all_labels = [] for boxes, scores, image_shape in zip(pred_boxes_list, pred_scores_list, image_shapes): boxes = box_ops.clip_boxes_to_image(boxes, image_shape) # create labels for each prediction labels = torch.arange(num_classes, device=device) labels = labels.view(1, -1).expand_as(scores) # remove predictions with the background label boxes = boxes[:, 1:] scores = scores[:, 1:] labels = labels[:, 1:] # batch everything, by making every class prediction be a separate instance boxes = boxes.reshape(-1, 4) scores = scores.reshape(-1) labels = labels.reshape(-1) # remove low scoring boxes inds = torch.nonzero(scores > self.score_thresh).squeeze(1) boxes, scores, labels = boxes[inds], scores[inds], labels[inds] # remove empty boxes keep = box_ops.remove_small_boxes(boxes, min_size=1e-2) boxes, scores, labels = boxes[keep], scores[keep], labels[keep] # non-maximum suppression, independently done per class keep = box_ops.batched_nms(boxes, scores, labels, self.nms_thresh) # keep only topk scoring predictions keep = keep[:self.detections_per_img] boxes, scores, labels = boxes[keep], scores[keep], labels[keep] all_boxes.append(boxes) all_scores.append(scores) all_labels.append(labels) return all_boxes, all_scores, all_labels
def filter_proposals(self, loc_delta, anchors): decoded_boxes = decode(loc_delta, anchors, self.variance) decoded_boxes = box_ops.clip_boxes_to_image(decoded_boxes, self.img_size) keep = box_ops.remove_small_boxes(decoded_boxes, self.min_size) decoded_boxes = decoded_boxes[keep] return decoded_boxes
def get_linear_boxes(self): box = [] for t in self.track_actives: box.append(self.get_linear_box(t)) box = torch.stack([torch.Tensor(t) for t in box], 0) box = clip_boxes_to_image(box, self.img.shape[-2:]) return box
def ssm_postprocess_detections(self, class_logits, box_regression, proposals, image_shapes): device = class_logits.device num_classes = class_logits.shape[-1] boxes_per_image = [len(boxes_in_image) for boxes_in_image in proposals] pred_boxes = self.box_coder.decode(box_regression, proposals) pred_scores = F.softmax(class_logits, -1) # split boxes and scores per image pred_boxes = pred_boxes.split(boxes_per_image, 0) pred_scores = pred_scores.split(boxes_per_image, 0) al_idx = 0 all_boxes = torch.empty([0, 4]).cuda() all_scores = torch.tensor([]).cuda() all_labels = [] CONF_THRESH = 0.5 # bigger leads more active learning samples for boxes, scores, image_shape in zip(pred_boxes, pred_scores, image_shapes): boxes = box_ops.clip_boxes_to_image(boxes, image_shape) # create labels for each prediction labels = torch.arange(num_classes, device=device) labels = labels.view(1, -1).expand_as(scores) # remove predictions with the background label boxes = boxes[:, 1:] scores = scores[:, 1:] labels = labels[:, 1:] if torch.max(scores) < CONF_THRESH: al_idx = 1 continue for cls_ind in range(num_classes - 1): cls_boxes = boxes[:, cls_ind] cls_scores = scores[:, cls_ind] cls_labels = labels[:, cls_ind] # batch everything, by making every class prediction be a separate instance cls_boxes = cls_boxes.reshape(-1, 4) cls_scores = cls_scores.flatten() cls_labels = cls_labels.flatten() # remove low scoring boxes # non-maximum suppression, independently done per class keep = box_ops.batched_nms(cls_boxes, cls_scores, cls_labels, self.nms_thresh) # keep only topk scoring predictions keep = keep[:self.detections_per_img] cls_boxes, cls_scores, cls_labels = cls_boxes[keep], cls_scores[keep], cls_labels[keep] inds = torch.nonzero(cls_scores > self.score_thresh).squeeze(1) if len(inds) == 0: continue for j in inds: # boxes, scores, labels = boxes[inds], scores[inds], labels[inds] all_boxes = torch.cat((all_boxes, cls_boxes[j].unsqueeze(0)), 0) k = keep[j] all_scores = torch.cat((all_scores, scores[k].unsqueeze(0)), 0) all_labels.append(judge_y(scores[k])) # all_scores = [torch.cat(all_scores, 1)] return [all_boxes], [all_scores], [all_labels], al_idx
def filter_proposals( self, proposals: Tensor, objectness: Tensor, image_shapes: List[Tuple[int, int]], num_anchors_per_level: List[int], ) -> Tuple[List[Tensor], List[Tensor]]: num_images = proposals.shape[0] device = proposals.device # do not backprop through objectness objectness = objectness.detach() objectness = objectness.reshape(num_images, -1) levels = [ torch.full((n, ), idx, dtype=torch.int64, device=device) for idx, n in enumerate(num_anchors_per_level) ] levels = torch.cat(levels, 0) levels = levels.reshape(1, -1).expand_as(objectness) # select top_n boxes independently per level before applying nms top_n_idx = self._get_top_n_idx(objectness, num_anchors_per_level) image_range = torch.arange(num_images, device=device) batch_idx = image_range[:, None] objectness = objectness[batch_idx, top_n_idx] levels = levels[batch_idx, top_n_idx] proposals = proposals[batch_idx, top_n_idx] objectness_prob = torch.sigmoid(objectness) final_boxes = [] final_scores = [] for boxes, scores, lvl, img_shape in zip(proposals, objectness_prob, levels, image_shapes): boxes = box_ops.clip_boxes_to_image(boxes, img_shape) # remove small boxes keep = box_ops.remove_small_boxes(boxes, self.min_size) boxes, scores, lvl = boxes[keep], scores[keep], lvl[keep] # remove low scoring boxes # use >= for Backwards compatibility keep = torch.where(scores >= self.score_thresh)[0] boxes, scores, lvl = boxes[keep], scores[keep], lvl[keep] # non-maximum suppression, independently done per level keep = box_ops.batched_nms(boxes, scores, lvl, self.nms_thresh) # keep only topk scoring predictions keep = keep[:self.post_nms_top_n()] boxes, scores = boxes[keep], scores[keep] final_boxes.append(boxes) final_scores.append(scores) return final_boxes, final_scores
def postprocess_detections(self, class_logits, box_regression, proposals, image_shapes): device = class_logits.device num_classes = class_logits.shape[-1] boxes_per_image = [len(boxes_in_image) for boxes_in_image in proposals] pred_boxes = self.box_coder.decode(box_regression, proposals) pred_scores = F.softmax(class_logits, -1) # split boxes and scores per image pred_boxes = pred_boxes.split(boxes_per_image, 0) pred_scores = pred_scores.split(boxes_per_image, 0) all_boxes = [] all_scores = [] all_labels = [] for boxes, scores, image_shape in zip(pred_boxes, pred_scores, image_shapes): boxes = box_ops.clip_boxes_to_image(boxes, image_shape) # create labels for each prediction labels = torch.arange(num_classes, device=device) labels = labels.view(1, -1).expand_as(scores) # remove predictions with the background label boxes = boxes[:, 1:] scores = scores[:, 1:] labels = labels[:, 1:] # batch everything, by making every class prediction be a separate instance boxes = boxes.reshape(-1, 4) scores = scores.flatten() labels = labels.flatten() # remove low scoring boxes inds = torch.nonzero(scores > self.score_thresh).squeeze(1) boxes, scores, labels = boxes[inds], scores[inds], labels[inds] # remove empty boxes # TODO: looks like min_size=1. is not enough for us and we need min_size=1.01 # because we are using int(.) when discretizing the bbox and maybe there is a problem # with floats so we get a - b > 1.0, but int(a) - int(b) = 0 keep = box_ops.remove_small_boxes(boxes, min_size=1.01) boxes, scores, labels = boxes[keep], scores[keep], labels[keep] # non-maximum suppression, independently done per class keep = box_ops.batched_nms(boxes, scores, labels, self.nms_thresh) # keep only topk scoring predictions keep = keep[:self.detections_per_img] boxes, scores, labels = boxes[keep], scores[keep], labels[keep] all_boxes.append(boxes) all_scores.append(scores) all_labels.append(labels) return all_boxes, all_scores, all_labels
def postprocess_detections(self, class_logits, # type: Tensor box_regression, # type: Tensor proposals, # type: List[Tensor] image_shapes # type: List[Tuple[int, int]] ): # type: (...) -> Tuple[List[Tensor], List[Tensor], List[Tensor]] device = class_logits.device num_classes = class_logits.shape[-1] boxes_per_image = [boxes_in_image.shape[0] for boxes_in_image in proposals] pred_boxes = self.box_coder.decode(box_regression, proposals) pred_scores = F.softmax(class_logits, -1) pred_boxes_list = pred_boxes.split(boxes_per_image, 0) pred_scores_list = pred_scores.split(boxes_per_image, 0) all_boxes = [] all_scores = [] all_labels = [] for boxes, scores, image_shape in zip(pred_boxes_list, pred_scores_list, image_shapes): boxes = box_ops.clip_boxes_to_image(boxes, image_shape) # create labels for each prediction labels = torch.arange(num_classes, device=device) labels = labels.view(1, -1).expand_as(scores) # remove predictions with the background label boxes = boxes[:, 1:] scores = scores[:, 1:] labels = labels[:, 1:] # batch everything, by making every class prediction be a separate instance boxes = boxes.reshape(-1, 4) scores = scores.reshape(-1) labels = labels.reshape(-1) # remove low scoring boxes inds = torch.where(scores > self.score_thresh)[0] boxes, scores, labels = boxes[inds], scores[inds], labels[inds] # remove empty boxes keep = box_ops.remove_small_boxes(boxes, min_size=1e-2) boxes, scores, labels = boxes[keep], scores[keep], labels[keep] # non-maximum suppression, independently done per class keep = box_ops.batched_nms(boxes, scores, labels, self.nms_thresh) # keep only topk scoring predictions keep = keep[:self.detections_per_img] boxes, scores, labels = boxes[keep], scores[keep], labels[keep] all_boxes.append(boxes) all_scores.append(scores) all_labels.append(labels) return all_boxes, all_scores, all_labels
def regress_tracks(self, blob, plot_compare=False, frame=None): """Regress the position of the tracks and also checks their scores.""" if self.finetuning_config["enabled"]: scores = [] pos = [] for track in self.tracks: # Regress with finetuned bbox head for each track assert track.box_head is not None assert track.box_predictor is not None box, score = self.obj_detect.predict_boxes( track.pos, box_head=track.box_head, box_predictor=track.box_predictor) if plot_compare: box_no_finetune, score_no_finetune = self.obj_detect.predict_boxes( track.pos) plot_compare_bounding_boxes(box, box_no_finetune, blob['img']) scores.append(score) bbox = clip_boxes_to_image(box, blob['img'].shape[-2:]) pos.append(bbox) scores = torch.cat(scores) pos = torch.cat(pos) else: pos = self.get_pos() boxes, scores = self.obj_detect.predict_boxes(pos) pos = clip_boxes_to_image(boxes, blob['img'].shape[-2:]) s = [] for i in range(len(self.tracks) - 1, -1, -1): t = self.tracks[i] t.score = scores[i] if scores[i] <= self.regression_person_thresh: self.tracks_to_inactive([t]) else: s.append(scores[i]) # t.prev_pos = t.pos t.pos = pos[i].view(1, -1) scores_of_active_tracks = torch.Tensor(s[::-1]).to(device) return scores_of_active_tracks
def check_vis_results(self): # dummy inputs. do not affect the vis results last_pos_1 = [t.last_pos[-2] for t in self.tracks] last_pos_2 = [t.last_pos[-1] for t in self.tracks] # same as t.pos last_pos_1 = torch.cat(last_pos_1, 0) last_pos_2 = torch.cat(last_pos_2, 0) curr_pos = self.get_pos() curr_pos = clip_boxes_to_image(curr_pos, self.last_image.shape[-2:]) conv_features, repr_features = self.get_pooled_features(curr_pos) if isinstance(self.vis_model, MotionModel) or isinstance( self.vis_model, MotionModelV2): _, vis = self.vis_model(conv_features, repr_features, last_pos_1, last_pos_2) elif isinstance(self.vis_model, BackboneMotionModel): img = [self.last_image.cuda()] target = [{"boxes": curr_pos}] _, vis = self.vis_model(img, target, last_pos_1, last_pos_2) elif isinstance(self.vis_model, MotionModelReID): historical_reid_features = [ torch.cat(list(t.features), 0) for t in self.tracks ] curr_reid_features = self.reid_network.test_rois( self.last_image.unsqueeze(0), curr_pos) _, vis = self.vis_model(historical_reid_features, curr_reid_features, conv_features, repr_features, last_pos_1, last_pos_2) elif isinstance(self.vis_model, MotionModelSimpleReID) or isinstance( self.vis_model, MotionModelV3): early_reid_features = torch.stack([ torch.mean(torch.cat(t.early_features, 0), 0) for t in self.tracks ], 0) curr_reid_features = self.reid_network.test_rois( self.last_image.unsqueeze(0), curr_pos) _, vis = self.vis_model(early_reid_features, curr_reid_features, conv_features, repr_features, last_pos_1, last_pos_2) elif isinstance(self.vis_model, MotionModelSimpleReIDV2): early_reid_features = torch.stack([ torch.mean(torch.cat(t.early_features, 0), 0) for t in self.tracks ], 0) curr_reid_features = self.reid_network.test_rois( self.last_image.unsqueeze(0), curr_pos) _, vis = self.vis_model(early_reid_features, curr_reid_features, repr_features, last_pos_1, last_pos_2) for i, t in enumerate(self.tracks): t.vis = vis[i].item()
def regress_tracks(self, blob, prev_boxes): """Regress the position of the tracks and also checks their scores.""" if prev_boxes is None: prev_boxes = self.get_pos() boxes_to_shift = prev_boxes enlarged_boxes = clip_boxes_to_image( self.enlarge_boxes(boxes_to_shift), blob['img'].shape[-2:]) else: boxes_to_shift = self.get_pos() enlarged_boxes = clip_boxes_to_image( self.enlarge_boxes(boxes_to_shift), blob['img'].shape[-2:]) positions = enlarged_boxes if self.use_correlation: correlated_boxes = self.obj_detect.predict_with_correlation( prev_boxes, enlarged_boxes, boxes_to_shift) correlated_boxes = clip_boxes_to_image(correlated_boxes, blob['img'].shape[-2:]) positions = correlated_boxes if self.write_debug_images: plot_tracktor_image(blob, positions, [t.id for t in self.tracks], "2_after_correlation") boxes, scores = self.obj_detect.predict_boxes(positions) pos = clip_boxes_to_image(boxes, blob['img'].shape[-2:]) s = [] for i in range(len(self.tracks) - 1, -1, -1): t = self.tracks[i] t.score = scores[i] if scores[i] <= self.regression_person_thresh: self.tracks_to_inactive([t]) self.score_killed_tracks.append({ 'id': t.id, 'frame': self.im_index }) else: s.append(scores[i]) # t.prev_pos = t.pos t.pos = pos[i].view(1, -1) return torch.Tensor(s[::-1]).cuda()
def match(self, boxes, targets): m, n = boxes.shape[0], targets.shape[0] idx = torch.zeros((m, n)) for i in range(m): for j in range(n): if boxes[i, 1] == targets[j, 1]: idx[i, j] = 1 break boxes = boxes[idx.sum(dim=1, dtype=torch.uint8)] targets = targets[idx.sum(dim=0, dtype=torch.uint8)] boxes = boxes[torch.argsort(boxes[:, 1])] targets = targets[torch.argsort(targets[:, 1])] boxes[:, 2:6] = clip_boxes_to_image(boxes[:, 2:6], (self.img_size[1], self.img_size[0])) targets[:, 2:6] = clip_boxes_to_image( targets[:, 2:6], (self.img_size[1], self.img_size[0])) return boxes, targets
def box_decoder(self, box_regression, proposals, image_shapes): boxes_per_image = [len(boxes_in_image) for boxes_in_image in proposals] pred_boxes = self.box_coder.decode(box_regression, proposals) pred_boxes = pred_boxes.split(boxes_per_image, 0) all_boxes = [] for boxes, image_shape in zip(pred_boxes, image_shapes): boxes = box_ops.clip_boxes_to_image(boxes, image_shape) boxes = boxes[:, 1:] boxes = boxes.reshape(-1, 4) all_boxes.append(boxes) return all_boxes
def postprocess_detections(self, class_logits, box_regression, proposals, image_shapes): device = class_logits.device num_classes = class_logits.shape[-1] boxes_per_image = [len(boxes_in_image) for boxes_in_image in proposals] pred_boxes = self.box_coder.decode(box_regression, proposals) pred_scores = F.softmax(class_logits, -1) # split boxes and scores per image pred_boxes = pred_boxes.split(boxes_per_image, 0) pred_scores = pred_scores.split(boxes_per_image, 0) all_boxes = [] all_scores = [] all_labels = [] for boxes, scores, image_shape in zip(pred_boxes, pred_scores, image_shapes): boxes = box_ops.clip_boxes_to_image(boxes, image_shape) # create labels for each prediction labels = torch.arange(num_classes, device=device) labels = labels.view(1, -1).expand_as(scores) # remove predictions with the background label boxes = boxes[:, 1:] scores = scores[:, 1:] labels = labels[:, 1:] # boxes = boxes[:, 0] # scores = scores[:, 0] # labels = labels[:, 0] # batch everything, by making every class prediction be a separate instance boxes = boxes.reshape(-1, 4) scores = scores.flatten() labels = labels.flatten() # remove low scoring boxes inds = torch.nonzero(scores > self.score_thresh).squeeze(1) boxes, scores, labels = boxes[inds], scores[inds], labels[inds] # non-maximum suppression, independently done per class keep = box_ops.batched_nms(boxes, scores, labels, self.nms_thresh) # keep only topk scoring predictions keep = keep[:self.detections_per_img] boxes, scores, labels = boxes[keep], scores[keep], labels[keep] all_boxes.append(boxes) all_scores.append(scores) all_labels.append(labels) return all_boxes, all_scores, all_labels
def forward(self, cls_logits:torch.Tensor, reg_deltas:torch.Tensor, fmap_dims:Tuple[int,int], img_dims:Tuple[int,int], nms_threshold:float=.7, keep_pre_nms:int=1000, keep_post_nms:int=300, dtype=torch.float32, device='cpu'): """ Params: cls_logits torch.Tensor: torch.Tensor(bs x (h'*w'*nA) x 1) reg_deltas torch.Tensor: torch.Tensor(bs x (h'*w'*nA) x 4) fmap_dims:Tuple[int,int] h',w' img_dims:Tuple[int,int] h,w Returns: batched_dets: List[torch.Tensor(N,5)] as xmin,ymin,xmax,ymax,score """ bs = cls_logits.size(0) if self.cached_fmap_dims != fmap_dims: # generate anchors for each input self.anchors = self.anchor_generator(fmap_dims, img_dims, dtype=dtype, device=device) self.cached_fmap_dims = fmap_dims batched_dets:List[torch.Tensor] = [] scores = torch.sigmoid(cls_logits.detach()).reshape(bs,-1) offsets = reg_deltas.detach().reshape(bs,-1,4) # convert offsets to boxes # bs,N,4 | N,4 => bs,N,4 as xmin,ymin,xmax,ymax boxes = offsets2boxes(offsets, self.anchors) # TODO vectorize this loop for i in range(bs): single_boxes = boxes[i] single_scores = scores[i] N = single_scores.size(0) # select top n _,selected_ids = single_scores.topk( min(keep_pre_nms,N) ) single_scores,single_boxes = single_scores[selected_ids], single_boxes[selected_ids] # clip boxes single_boxes = box_ops.clip_boxes_to_image(single_boxes, img_dims) # nms keep = box_ops.nms(single_boxes, single_scores, nms_threshold) single_scores,single_boxes = single_scores[keep], single_boxes[keep] # post_n keep_post_nms = min(keep_post_nms, single_boxes.size(0)) single_scores,single_boxes = single_scores[:keep_post_nms], single_boxes[:keep_post_nms] batched_dets.append( torch.cat([single_boxes,single_scores.unsqueeze(-1)], dim=-1) ) return batched_dets
def predict_boxes(self, boxes): device = list(self.parameters())[0].device boxes = boxes.to(device) try: boxes = resize_boxes(boxes, self.original_image_sizes[0], self.preprocessed_images.image_sizes[0]) except IndexError: print(boxes.size()) raise IndexError proposals = [boxes] box_features = self.roi_heads.box_roi_pool( self.features, proposals, self.preprocessed_images.image_sizes) box_features = self.roi_heads.box_head(box_features) class_logits, box_regression = self.roi_heads.box_predictor( box_features) pred_boxes = self.roi_heads.box_coder.decode(box_regression, proposals) pred_scores = F.softmax(class_logits, -1) # score_thresh = self.roi_heads.score_thresh # nms_thresh = self.roi_heads.nms_thresh # self.roi_heads.score_thresh = self.roi_heads.nms_thresh = 1.0 # self.roi_heads.score_thresh = 0.0 # self.roi_heads.nms_thresh = 1.0 # detections, detector_losses = self.roi_heads( # features, [boxes.squeeze(dim=0)], images.image_sizes, targets) # self.roi_heads.score_thresh = score_thresh # self.roi_heads.nms_thresh = nms_thresh # detections = self.transform.postprocess( # detections, images.image_sizes, original_image_sizes) # detections = detections[0] # return detections['boxes'].detach().cpu(), detections['scores'].detach().cpu() pred_boxes = pred_boxes[:, 1:].squeeze(dim=1).detach() pred_boxes = resize_boxes(pred_boxes, self.preprocessed_images.image_sizes[0], self.original_image_sizes[0]) pred_scores = pred_scores[:, 1:].squeeze(dim=1).detach() pred_boxes = box_ops.clip_boxes_to_image(pred_boxes, self.original_image_sizes[0]) if self.version == 'v2': for box, box_feature in zip(pred_boxes, box_features): self.box_features[str(int(box[0])) + ',' + str(int(box[1])) + ',' + str(int(box[2])) + ',' + str(int(box[3]))] = box_feature return pred_boxes, pred_scores
def postprocess_detections( self, pred_scores, # type: Tensor pred_boxes, # type: Tensor proposals, # type: List[Tensor] image_shapes # type: List[Tuple[int, int]] ): # type: (...) -> Tuple[List[Tensor], List[Tensor], List[Tensor]] # device = class_logits.device # num_classes = class_logits.shape[-1] boxes_per_image = [ boxes_in_image.shape[0] for boxes_in_image in proposals ] pred_boxes_list = pred_boxes.split(boxes_per_image, 0) pred_scores_list = pred_scores.split(boxes_per_image, 0) all_boxes = [] all_scores = [] all_labels = [] for boxes, scores, image_shape in zip(pred_boxes_list, pred_scores_list, image_shapes): boxes = box_ops.clip_boxes_to_image(boxes, image_shape) #去掉背景类的得分 scores = scores[:, 1:] # labels = labels[:, 1:] scores, labels = scores.max(dim=1) labels += 1 #目标的标签是从1开始的 # remove low scoring boxes inds = torch.where(scores > self.score_thresh)[0] boxes, scores, labels = boxes[inds], scores[inds], labels[inds] # remove empty boxes keep = box_ops.remove_small_boxes(boxes, min_size=1e-2) boxes, scores, labels = boxes[keep], scores[keep], labels[keep] # non-maximum suppression, independently done per class keep = box_ops.batched_nms(boxes, scores, labels, self.nms_thresh) # keep only topk scoring predictions keep = keep[:self.detections_per_img] boxes, scores, labels = boxes[keep], scores[keep], labels[keep] all_boxes.append(boxes) all_scores.append(scores) all_labels.append(labels) return all_boxes, all_scores, all_labels
def regress_tracks(self, blob): """Regress the position of the tracks and also checks their scores.""" pos = self.get_pos() pos_now = pos # regress boxes, scores = self.obj_detect.predict_boxes( blob['img'], pos) # raw boxes -> (x1,y1,x2,y2) N*4 pos = clip_boxes_to_image(boxes, blob['img'].shape[-2:]) s = [] for i in range(len(self.tracks) - 1, -1, -1): t = self.tracks[i] t.score = scores[i] # get crf metrics if t.track_count > 2: crf_metric = self.get_crf_metrics(t) self.all_metric_info.append( crf_metric) # append per track crf metric of this frame # ---- # do crf inference if len(self.all_metric_info): marg_tuples = self.crf_inference(self.all_metric_info) for t_id, marg in marg_tuples: vals = range(t_id.n_opts) if len(t_id.labels) > 0: vals = t_id.labels map_rv = np.argmax(marg) if map_rv == 0: self.tracks_to_inactive([ inactive_t for inactive_t in self.tracks if inactive_t.id == eval(t_id.name) ]) else: print( " the crf metric lists is empty,skip crf inference and use score instead" ) # use score metric to inactive track ,此处先不写 score单独判断的情况了,假设先进行了crf的inactive又进行了score的inactive for i in range(len(self.tracks) - 1, -1, -1): if scores[i] <= self.regression_person_thresh: self.tracks_to_inactive([t]) else: s.append(scores[i]) # t.prev_pos = t.pos t.pos = pos[i].view(1, -1) return torch.Tensor(s[::-1]).cuda()
def generate_anchors(self, x: Tensor) -> None: anchors = torch.cat([ _generate_anchors(self.input_size, x.size(-1), listify(anchor_sizes), self.aspect_ratios, stride) for anchor_sizes, stride in zip(self.anchor_sizes, self.strides) ], dim=0) # Filter anchors anchors = box_ops.clip_boxes_to_image( anchors, (self.input_size, self.input_size)) keep = box_ops.remove_small_boxes(anchors, 1e-3) self.anchors = anchors[keep]
def postprocess_detections(self, class_logits, box_regression, proposals, image_shapes): device = class_logits.device # bgr, GGO, C num_classes = class_logits.shape[-1] boxes_per_image = [len(boxes_in_image) for boxes_in_image in proposals] pred_boxes = self.box_coder.decode(box_regression, proposals) pred_scores = F.softmax(class_logits, -1) # split boxes and scores per image pred_boxes = pred_boxes.split(boxes_per_image, 0) pred_scores = pred_scores.split(boxes_per_image, 0) all_boxes = [] all_scores = [] all_labels = [] for boxes, scores, image_shape in zip(pred_boxes, pred_scores, image_shapes): boxes = box_ops.clip_boxes_to_image(boxes, image_shape) # create labels for each prediction labels = torch.arange(num_classes, device=device) labels = labels.view(1, -1).expand_as(scores) # remove predictions with the background label boxes = boxes[:, 1:] scores = scores[:, 1:] labels = labels[:, 1:] # batch everything, by making every class prediction be a separate instance boxes = boxes.reshape(-1, 4) scores = scores.flatten() labels = labels.flatten() # for the prediction/segmentation: only exceeding the threshold(scores>box_score_thresh) inds = torch.nonzero(scores > self.score_thresh).squeeze(1) boxes, scores, labels = boxes[inds], scores[inds], labels[inds] #scores_c, labels_c, res_box = scores[inds_classifier], labels[inds_classifier], res_box[inds_classifier] # Since we have 2 predictions/RoI, need to match the final indices to the corresponding RoI (floor(div(2)) roi_inds = torch.arange(labels.size()[0]).div_(2).to(device) # non-maximum suppression, independently done per class keep = box_ops.batched_nms(boxes, scores, labels, self.nms_thresh) # keep only topk scoring predictions keep = keep[:self.detections_per_img] # Keep the best boxes, sorted, discard the rest scores, boxes, labels = scores[keep], boxes[keep], labels[keep] #all_boxes.append(boxes) all_scores.append(scores) # detections all_boxes.append(boxes) all_labels.append(labels) return all_boxes, all_scores, all_labels
def __getitem__(self, index): single_item = self.roidb[index] # Image im = imread(single_item['img_path']) # RGB, HWC, 0-255 # im = np.array(Image.open(single_item['img_path']).convert('RGB')) if len(im.shape) == 2: im = im[:, :, np.newaxis] im = np.concatenate((im, im, im), axis=2) # divided by 255 for PyTorch pre-trained model if self.div: im = im / 255. # flip the channel, RGB to BGR for caffe pre-trained model if self.BGR: im = im[:, :, ::-1] if single_item['flipped']: im = im[:, ::-1, :] im = im.astype(np.float32, copy=False) image = torch.from_numpy(im).permute(2, 0, 1) # HWC to CHW # Targets gt_boxes = single_item['boxes'].astype(np.int32, copy=False) gt_boxes = torch.from_numpy(gt_boxes).float() # TODO(BUG): dtype # clip boxes of which coordinates out of the image resolution gt_boxes = clip_boxes_to_image(gt_boxes, tuple(image.shape[1:])) target = dict( boxes=gt_boxes, # (num_boxes 4) labels=torch.from_numpy(single_item['gt_classes']).int(), pids=torch.from_numpy(single_item['gt_pids']).long(), img_name=single_item['img_name'], ) if 'mask_path' in single_item: # foreground mask mask = imread(single_item['mask_path']).astype( np.int32, copy=False) # (h w) in {0,255} assert np.ndim(mask) == 2 if single_item['flipped']: mask = mask[:, ::-1] target['mask'] = torch.from_numpy( mask.copy())[None] / 255. # 3D tensor(1HW) in {0,1} item = dict(image=image, target=target) # visualization # util.plot_gt_on_img([image], [target], write_path= # "/home/caffe/code/deep-person-search/cache/img_with_gt_box/gt%d.jpg" % np.random.choice(list(range(10)), 1)) return item
def get_boxes(self, box_regression, proposals, image_shapes): """ Get boxes from proposals. """ boxes_per_image = [len(boxes_in_image) for boxes_in_image in proposals] pred_boxes = self.box_coder.decode(box_regression, proposals) pred_boxes = pred_boxes.split(boxes_per_image, 0) all_boxes = [] for boxes, image_shape in zip(pred_boxes, image_shapes): boxes = box_ops.clip_boxes_to_image(boxes, image_shape) # remove predictions with the background label boxes = boxes[:, 1:].reshape(-1, 4) all_boxes.append(boxes) return all_boxes
def crop(img: Image, target: Dict[str, Any], region: Tuple[int]) -> Tuple[Image, Dict[str, Any]]: """ Args: region: [Top, Left, H, W] """ # crop image src_w, src_h = img.size img = TF.crop(img, *region) target = deepcopy(target) top, left, h, w = region # set new image size if "size" in target.keys(): target["size"] = (h, w) fields: List[str] = list() for k, v in target.items(): if isinstance(v, Tensor): fields.append(k) # crop bounding boxes if "boxes" in target: boxes = target["boxes"] boxes[:, [0, 2]] *= src_w boxes[:, [1, 3]] *= src_h boxes = box_op.box_convert(boxes, "cxcywh", "xyxy") boxes -= torch.tensor([left, top, left, top]) boxes = box_op.clip_boxes_to_image(boxes, (h, w)) keep = box_op.remove_small_boxes(boxes, 1) boxes[:, [0, 2]] /= w boxes[:, [1, 3]] /= h boxes = box_op.box_convert(boxes, "xyxy", "cxcywh") target["boxes"] = boxes for field in fields: target[field] = target[field][keep] if "masks" in target: target['masks'] = target['masks'][:, top:top + h, left:left + w] keep = target['masks'].flatten(1).any(1) for field in fields: target[field] = target[field][keep] return img, target
def filter_proposals(self, proposals, objectness, image_shapes, num_anchors_per_level): # type: (Tensor, Tensor, List[Tuple[int, int]], List[int]) num_images = proposals.shape[0] device = proposals.device # do not backprop throught objectness objectness = objectness.detach() objectness = objectness.reshape(num_images, -1) levels = [ torch.full((n, ), idx, dtype=torch.int64, device=device) for idx, n in enumerate(num_anchors_per_level) ] levels = torch.cat(levels, 0) levels = levels.reshape(1, -1).expand_as(objectness) # select top_n boxes independently per level before applying nms top_n_idx = self._get_top_n_idx(objectness, num_anchors_per_level) image_range = torch.arange(num_images, device=device) batch_idx = image_range[:, None] objectness = objectness[batch_idx, top_n_idx] levels = levels[batch_idx, top_n_idx] proposals = proposals[batch_idx, top_n_idx] final_boxes = [] final_scores = [] for boxes, scores, lvl, img_shape in zip(proposals, objectness, levels, image_shapes): # For onnx export, Clip's min max can not be traced as tensor. if torchvision._is_tracing(): boxes = _onnx_clip_boxes_to_image(boxes, img_shape) else: boxes = box_ops.clip_boxes_to_image(boxes, img_shape) keep = box_ops.remove_small_boxes(boxes, self.min_size) boxes, scores, lvl = boxes[keep], scores[keep], lvl[keep] # non-maximum suppression, independently done per level keep = box_ops.batched_nms(boxes, scores, lvl, self.nms_thresh) # keep only topk scoring predictions keep = keep[:self.post_nms_top_n()] boxes, scores = boxes[keep], scores[keep] final_boxes.append(boxes) final_scores.append(scores) return final_boxes, final_scores
def __call__(self, proposals, image_shapes): # randomly choose an augmentation op augmentop = random.choice(self.ops) prealization = [] for pboxes, image_shape in zip(proposals, image_shapes): # noinspection PyArgumentList boxes = augmentop(pboxes) # make sure it still fits within the image bounds boxes = box_ops.clip_boxes_to_image(boxes, image_shape) prealization.append(boxes) return prealization
def regress_tracks(self, blob): ''' Regresses the position of the tracks and also checks their scores ''' pos = self.get_pos() boxes, scores = self.obj_detect.predict_boxes(blob['img'], pos) pos = clip_boxes_to_image(boxes, blob['img'].shape[-2:]) s = [] for i in range(len(self.tracks) - 1, -1, -1): t = self.tracks[i] t.score = scores[i] if scores[i] < self.regression_person_thresh: self.tracks_to_inactive([t]) else: s.append(scores[i]) t.pos = pos[i].view(1, -1) return torch.Tensor(s[::-1]).cuda()
def regress_tracks(self, blob): pos = self.get_pos() # regress boxes, scores = self.obj_detect.predict_boxes(pos) pos = clip_boxes_to_image(boxes, blob['img'].shape[-2:]) s = [] for i in range(len(self.tracks) - 1, -1, -1): t = self.tracks[i] t.score = scores[i] if scores[i] <= self.regression_person_thresh and not self.kill_oracle: self.tracks_to_inactive([t]) else: s.append(scores[i]) if self.regress: t.pos = pos[i].view(1, -1) return torch.Tensor(s[::-1]).cuda()
def regress_tracks(self, boxes, cls_conf, img): """Regress the position of the tracks and also checks their scores.""" pos = self.get_pos() # regress boxes, scores = boxes, cls_conf pos = clip_boxes_to_image(boxes, img.shape[-2:]) s = [] for i in range(len(self.tracks) - 1, -1, -1): t = self.tracks[i] t.score = scores[i] if scores[i] <= self.regression_person_thresh: self.tracks_to_inactive([t]) else: s.append(scores[i]) # t.prev_pos = t.pos t.pos = pos[i].view(1, -1) return torch.Tensor(s[::-1]).cuda()
def regress_tracks(self, blob): """Regress the position of the tracks and also checks their scores.""" pos = self.get_pos() # print('pos: ',pos) # regress boxes, scores = self.obj_detect.predict_boxes( pos) # FIX THIS, can I just replace with detect, don't think so #boxes,scores self.obj_detect.detect(pos) pos = clip_boxes_to_image(boxes, blob['img'].shape[-2:]) s = [] for i in range(len(self.tracks) - 1, -1, -1): t = self.tracks[i] t.score = scores[i] if scores[i] <= self.regression_person_thresh: self.tracks_to_inactive([t]) else: s.append(scores[i]) # t.prev_pos = t.pos t.pos = pos[i].view(1, -1) return torch.Tensor(s[::-1]).cuda()
def filter_proposals(self, proposals, objectness, image_shapes, num_anchors_per_level): num_images = proposals.shape[0] device = proposals.device # do not backprop throught objectness objectness = objectness.detach() objectness = objectness.reshape(num_images, -1) levels = [ torch.full((n, ), idx, dtype=torch.int64, device=device) for idx, n in enumerate(num_anchors_per_level) ] levels = torch.cat(levels, 0) levels = levels.reshape(1, -1).expand_as(objectness) # select top_n boxes independently per level before applying nms top_n_idx = self._get_top_n_idx(objectness, num_anchors_per_level) batch_idx = torch.arange(num_images, device=device)[:, None] objectness = objectness[batch_idx, top_n_idx] levels = levels[batch_idx, top_n_idx] proposals = proposals[batch_idx, top_n_idx] final_boxes = [] final_scores = [] for boxes, scores, lvl, img_shape in zip(proposals, objectness, levels, image_shapes): boxes = box_ops.clip_boxes_to_image(boxes, img_shape) keep = box_ops.remove_small_boxes(boxes, self.min_size) boxes, scores, lvl = boxes[keep], scores[keep], lvl[keep] # non-maximum suppression, independently done per level #lvl=torch.tensor(np.arange(len(lvl))).to(device) keep = box_ops.batched_nms(boxes, scores, lvl, self.nms_thresh) # keep only topk scoring predictions keep = keep[:self.post_nms_top_n] boxes, scores = boxes[keep], scores[keep] final_boxes.append(boxes) final_scores.append(scores) return final_boxes, final_scores
def step(self, blob): """This function should be called every timestep to perform tracking with a blob containing the image information. """ for t in self.tracks: # add current position to last_pos list t.last_pos.append(t.pos.clone()) ########################### # Look for new detections # ########################### self.obj_detect.load_image(blob['img']) if self.public_detections: dets = blob['dets'].squeeze(dim=0) if dets.nelement() > 0: boxes, scores = self.obj_detect.predict_boxes(dets) else: boxes = scores = torch.zeros(0).cuda() else: boxes, scores = self.obj_detect.detect(blob['img']) if boxes.nelement() > 0: boxes = clip_boxes_to_image(boxes, blob['img'].shape[-2:]) # Filter out tracks that have too low person score inds = torch.gt(scores, self.detection_person_thresh).nonzero().view(-1) else: inds = torch.zeros(0).cuda() if inds.nelement() > 0: det_pos = boxes[inds] det_scores = scores[inds] else: det_pos = torch.zeros(0).cuda() det_scores = torch.zeros(0).cuda() ################## # Predict tracks # ################## num_tracks = 0 nms_inp_reg = torch.zeros(0).cuda() if len(self.tracks): # align if self.do_align: self.align(blob) # apply motion model if self.motion_model_cfg['enabled']: self.motion() self.tracks = [t for t in self.tracks if t.has_positive_area()] # regress person_scores = self.regress_tracks(blob) if len(self.tracks): # create nms input # nms here if tracks overlap keep = nms(self.get_pos(), person_scores, self.regression_nms_thresh) self.tracks_to_inactive([ self.tracks[i] for i in list(range(len(self.tracks))) if i not in keep ]) if keep.nelement() > 0 and self.do_reid: new_features = self.get_appearances(blob) self.add_features(new_features) ##################### # Create new tracks # ##################### # !!! Here NMS is used to filter out detections that are already covered by tracks. This is # !!! done by iterating through the active tracks one by one, assigning them a bigger score # !!! than 1 (maximum score for detections) and then filtering the detections with NMS. # !!! In the paper this is done by calculating the overlap with existing tracks, but the # !!! result stays the same. if det_pos.nelement() > 0: keep = nms(det_pos, det_scores, self.detection_nms_thresh) det_pos = det_pos[keep] det_scores = det_scores[keep] # check with every track in a single run (problem if tracks delete each other) for t in self.tracks: nms_track_pos = torch.cat([t.pos, det_pos]) nms_track_scores = torch.cat( [torch.tensor([2.0]).to(det_scores.device), det_scores]) keep = nms(nms_track_pos, nms_track_scores, self.detection_nms_thresh) keep = keep[torch.ge(keep, 1)] - 1 det_pos = det_pos[keep] det_scores = det_scores[keep] if keep.nelement() == 0: break if det_pos.nelement() > 0: new_det_pos = det_pos new_det_scores = det_scores # try to reidentify tracks new_det_pos, new_det_scores, new_det_features = self.reid( blob, new_det_pos, new_det_scores) # add new if new_det_pos.nelement() > 0: self.add(new_det_pos, new_det_scores, new_det_features) #################### # Generate Results # #################### for t in self.tracks: if t.id not in self.results.keys(): self.results[t.id] = {} self.results[t.id][self.im_index] = np.concatenate( [t.pos[0].cpu().numpy(), np.array([t.score])]) for t in self.inactive_tracks: t.count_inactive += 1 self.inactive_tracks = [ t for t in self.inactive_tracks if t.has_positive_area() and t.count_inactive <= self.inactive_patience ] self.im_index += 1 self.last_image = blob['img'][0]