def forward(self, features, proposals, image_shapes, targets=None): """ Arguments: features (List[Tensor]) proposals (List[Tensor[N, 4]]) image_shapes (List[Tuple[H, W]]) targets (List[Dict]) """ if targets is not None: for t in targets: assert t[ "boxes"].dtype.is_floating_point, 'target boxes must of float type' assert t[ "labels"].dtype == torch.int64, 'target labels must of int64 type' assert t["poses"].dtype.is_floating_point if self.has_keypoint: assert t[ "keypoints"].dtype == torch.float32, 'target keypoints must of float type' if self.training: proposals, matched_idxs, labels, regression_targets = self.select_training_samples( proposals, targets) box_features = self.box_roi_pool( features, proposals, image_shapes) # torch.Size([bs*1000, 256, 7, 7]) box_features = self.box_head( box_features) # torch.Size([bs*1000, 1024]) class_logits, box_regression = self.box_predictor( box_features) # torch.Size([bs*1000, 2]) torch.Size([bs*1000, 8]) result, losses = [], {} # result 是一个字典的列表, 每一个字典存着每张图片的预测值 if self.training: loss_classifier, loss_box_reg = fastrcnn_loss( class_logits, box_regression, labels, regression_targets) losses = dict(loss_classifier=loss_classifier, loss_box_reg=loss_box_reg) else: boxes, scores, labels = self.postprocess_detections( class_logits, box_regression, proposals, image_shapes) num_images = len(boxes) for i in range(num_images): result.append( dict( boxes=boxes[i], labels=labels[i], scores=scores[i], )) # 如果是Mask R-CNN if self.has_mask: mask_proposals = [p["boxes"] for p in result] if self.training: # during training, only focus on positive boxes num_images = len(proposals) mask_proposals = [] pos_matched_idxs = [] for img_id in range(num_images): pos = torch.nonzero(labels[img_id] > 0).squeeze(1) mask_proposals.append(proposals[img_id][pos]) pos_matched_idxs.append(matched_idxs[img_id][pos]) mask_features = self.mask_roi_pool(features, mask_proposals, image_shapes) mask_features = self.mask_head(mask_features) mask_logits = self.mask_predictor(mask_features) loss_mask = {} if self.training: gt_masks = [t["masks"] for t in targets] gt_labels = [t["labels"] for t in targets] loss_mask = maskrcnn_loss(mask_logits, mask_proposals, gt_masks, gt_labels, pos_matched_idxs) loss_mask = dict(loss_mask=loss_mask) else: labels = [r["labels"] for r in result] masks_probs = maskrcnn_inference(mask_logits, labels) for mask_prob, r in zip(masks_probs, result): r["masks"] = mask_prob losses.update(loss_mask) if self.has_keypoint: keypoint_proposals = [p["boxes"] for p in result] if self.training: # during training, only focus on positive boxes num_images = len(proposals) keypoint_proposals = [] pos_matched_idxs = [] for img_id in range(num_images): pos = torch.nonzero(labels[img_id] > 0).squeeze(1) keypoint_proposals.append( proposals[img_id][pos]) # shape=(num_pos, 4) pos_matched_idxs.append(matched_idxs[img_id][pos]) keypoint_features = self.keypoint_roi_pool(features, keypoint_proposals, image_shapes) keypoint_features = self.keypoint_head(keypoint_features) keypoint_logits = self.keypoint_predictor(keypoint_features) loss_keypoint = {} if self.training: gt_keypoints = [t["keypoints"] for t in targets] loss_keypoint = keypointrcnn_loss(keypoint_logits, keypoint_proposals, gt_keypoints, pos_matched_idxs) loss_keypoint = dict(loss_keypoint=loss_keypoint) else: keypoints_probs, kp_scores = keypointrcnn_inference( keypoint_logits, keypoint_proposals) for keypoint_prob, kps, r in zip(keypoints_probs, kp_scores, result): r["keypoints"] = keypoint_prob r["keypoints_scores"] = kps losses.update(loss_keypoint) if self.has_pose: pose_proposals = [p["boxes"] for p in result] if self.training: # during training, only focus on positive boxes num_images = len(proposals) pose_proposals = [] pos_matched_idxs = [] for img_id in range(num_images): pos = torch.nonzero(labels[img_id] > 0).squeeze( 1) # 所有被分配为正样本的proposal的下标 pose_proposals.append( proposals[img_id] [pos]) # proposal的box(xmin, ymin, xmax, ymax) pos_matched_idxs.append( matched_idxs[img_id][pos]) # 每个proposal对应哪个target pose pose_features = self.pose_roi_pool(features, pose_proposals, image_shapes) pose_features = self.pose_head(pose_features) # pose_regression = self.pose_predictor(pose_features) loss_pose = {} if self.training: gt_poses = [t["poses"] for t in targets] # a list of (rx, ry, rz, tz) loss_pose = posercnn_loss(pose_regression, gt_poses, labels, pos_matched_idxs) loss_pose = dict(loss_pose=loss_pose) else: pred_poses = postprocess_poses(pose_regression, pose_proposals) for poses, r in zip(pred_poses, result): r['poses'] = poses losses.update(loss_pose) if self.has_trans: trans_proposals = [p["boxes"] for p in result] if self.training: # during training, only focus on positive boxes num_images = len(proposals) trans_proposals = [] pos_matched_idxs = [] for img_id in range(num_images): # keep_only_positive_boxes pos = torch.nonzero(labels[img_id] > 0).squeeze( 1) # 所有被分配为正样本的proposal的下标 trans_proposals.append( proposals[img_id] [pos]) # proposal的box(xmin, ymin, xmax, ymax) pos_matched_idxs.append( matched_idxs[img_id] [pos]) # 每个proposal对应哪个target pose box_features = torch.cat( trans_proposals, dim=0) # [N, 4] N=batch_size*num_proposal_per_image trans_features = self.translation_head(box_features) trans_pred = self.translation_predictor( trans_features, pose_features) loss_trans = {} if self.training: gt_trans = [t["translations"] for t in targets] # 6DVNET中平移损失的权重是0.05 loss_trans = 0.05 * trans_loss(trans_pred, gt_trans, labels, pos_matched_idxs) loss_trans = dict(loss_trans=loss_trans) else: pred_trans = postprocess_trans(trans_pred, trans_proposals) for translations, r in zip(pred_trans, result): r['translations'] = translations losses.update(loss_trans) return result, losses
def forward(self, features, proposals, image_shapes, targets=None): """ Args: features: List proposals: List image_shapes: List targets: List (Default value = None) Returns: """ maskrcnn_loss_func = maskrcnn_loss fastrcnn_loss_func = fastrcnn_loss keypointrcnn_loss_func = keypointrcnn_loss eval_when_train = not self.training try: if self._eval_when_train: eval_when_train = True except AttributeError: pass if self.maskrcnn_loss_customized is not None: maskrcnn_loss_func = self.maskrcnn_loss_customized if self.fastrcnn_loss_customized is not None: fastrcnn_loss_func = self.fastrcnn_loss_customized if self.keypointrcnn_loss_customized is not None: keypointrcnn_loss_func = self.keypointrcnn_loss_customized if self.training: ( proposals, matched_idxs, labels, regression_targets, ) = self.select_training_samples(proposals, targets) box_features = self.box_roi_pool(features, proposals, image_shapes) box_features = self.box_head(box_features) class_logits, box_regression = self.box_predictor(box_features) result, losses = [], {} if self.training: loss_classifier, loss_box_reg = fastrcnn_loss_func( class_logits, box_regression, labels, regression_targets) losses = dict(loss_classifier=loss_classifier, loss_box_reg=loss_box_reg) if eval_when_train: boxes, scores, labels = self.postprocess_detections( class_logits, box_regression, proposals, image_shapes) num_images = len(boxes) for i in range(num_images): result.append( dict( boxes=boxes[i], labels=labels[i], scores=scores[i], )) if self.has_mask: mask_proposals = [p["boxes"] for p in result] if self.training: # during training, only focus on positive boxes num_images = len(proposals) mask_proposals = [] pos_matched_idxs = [] for img_id in range(num_images): pos = torch.nonzero(labels[img_id] > 0).squeeze(1) mask_proposals.append(proposals[img_id][pos]) pos_matched_idxs.append(matched_idxs[img_id][pos]) mask_features = self.mask_roi_pool(features, mask_proposals, image_shapes) mask_features = self.mask_head(mask_features) mask_logits = self.mask_predictor(mask_features) loss_mask = {} if self.training: gt_masks = [t["masks"] for t in targets] gt_labels = [t["labels"] for t in targets] loss_mask = maskrcnn_loss_func(mask_logits, mask_proposals, gt_masks, gt_labels, pos_matched_idxs) loss_mask = dict(loss_mask=loss_mask) if eval_when_train: labels = [r["labels"] for r in result] masks_probs = maskrcnn_inference(mask_logits, labels) for mask_prob, r in zip(masks_probs, result): r["masks"] = mask_prob losses.update(loss_mask) if self.has_keypoint(): keypoint_proposals = [p["boxes"] for p in result] if self.training: # during training, only focus on positive boxes num_images = len(proposals) keypoint_proposals = [] pos_matched_idxs = [] for img_id in range(num_images): pos = torch.nonzero(labels[img_id] > 0).squeeze(1) keypoint_proposals.append(proposals[img_id][pos]) pos_matched_idxs.append(matched_idxs[img_id][pos]) keypoint_features = self.keypoint_roi_pool(features, keypoint_proposals, image_shapes) keypoint_features = self.keypoint_head(keypoint_features) keypoint_logits = self.keypoint_predictor(keypoint_features) loss_keypoint = {} if self.training: gt_keypoints = [t["keypoints"] for t in targets] loss_keypoint = keypointrcnn_loss_func(keypoint_logits, keypoint_proposals, gt_keypoints, pos_matched_idxs) loss_keypoint = dict(loss_keypoint=loss_keypoint) if eval_when_train: keypoints_probs, kp_scores = keypointrcnn_inference( keypoint_logits, keypoint_proposals) for keypoint_prob, kps, r in zip(keypoints_probs, kp_scores, result): r["keypoints"] = keypoint_prob r["keypoints_scores"] = kps losses.update(loss_keypoint) return result, losses
def forward(self, features, proposals, image_shapes, targets=None, return_loss=False): """ Arguments: features (List[Tensor]) proposals (List[Tensor[N, 4]]) image_shapes (List[Tuple[H, W]]) targets (List[Dict]) return_loss (Bool): return the loss (even if we are in eval mode) """ if targets is not None: for t in targets: assert t["boxes"].dtype.is_floating_point, 'target boxes must of float type' assert t["labels"].dtype == torch.int64, 'target labels must of int64 type' # if self.has_keypoint: # assert t["keypoints"].dtype == torch.float32, 'target keypoints must of float type' if self.training or return_loss: proposals, matched_idxs, labels, regression_targets = self.select_training_samples(proposals, targets) box_features = self.box_roi_pool(features, proposals, image_shapes) box_features = self.box_head(box_features) class_logits, box_regression = self.box_predictor(box_features) result, losses = [], {} if self.training or return_loss: loss_classifier, loss_box_reg = fastrcnn_loss( class_logits, box_regression, labels, regression_targets) losses = dict(loss_classifier=loss_classifier, loss_box_reg=loss_box_reg) else: boxes, scores, labels = self.postprocess_detections(class_logits, box_regression, proposals, image_shapes) num_images = len(boxes) for i in range(num_images): result.append( dict( boxes=boxes[i], labels=labels[i], scores=scores[i], ) ) if self.has_mask: mask_proposals = [p["boxes"] for p in result] if self.training or return_loss: # during training, only focus on positive boxes num_images = len(proposals) mask_proposals = [] pos_matched_idxs = [] for img_id in range(num_images): pos = torch.nonzero(labels[img_id] > 0).squeeze(1) mask_proposals.append(proposals[img_id][pos]) pos_matched_idxs.append(matched_idxs[img_id][pos]) mask_features = self.mask_roi_pool(features, mask_proposals, image_shapes) mask_features = self.mask_head(mask_features) mask_logits = self.mask_predictor(mask_features) loss_mask = {} if self.training or return_loss: gt_masks = [t["masks"] for t in targets] gt_labels = [t["labels"] for t in targets] loss_mask = maskrcnn_loss( mask_logits, mask_proposals, gt_masks, gt_labels, pos_matched_idxs) loss_mask = dict(loss_mask=loss_mask) else: labels = [r["labels"] for r in result] masks_probs = maskrcnn_inference(mask_logits, labels) for mask_prob, r in zip(masks_probs, result): r["masks"] = mask_prob losses.update(loss_mask) if self.has_keypoint: keypoint_proposals = [p["boxes"] for p in result] if self.training or return_loss: # during training, only focus on positive boxes num_images = len(proposals) keypoint_proposals = [] pos_matched_idxs = [] for img_id in range(num_images): pos = torch.nonzero(labels[img_id] > 0).squeeze(1) keypoint_proposals.append(proposals[img_id][pos]) pos_matched_idxs.append(matched_idxs[img_id][pos]) keypoint_features = self.keypoint_roi_pool(features, keypoint_proposals, image_shapes) keypoint_features = self.keypoint_head(keypoint_features) keypoint_logits = self.keypoint_predictor(keypoint_features) loss_keypoint = {} if self.training or return_loss: gt_keypoints = [t["keypoints"] for t in targets] loss_keypoint = keypointrcnn_loss( keypoint_logits, keypoint_proposals, gt_keypoints, pos_matched_idxs) loss_keypoint = dict(loss_keypoint=loss_keypoint) else: keypoints_probs, kp_scores = keypointrcnn_inference(keypoint_logits, keypoint_proposals) for keypoint_prob, kps, r in zip(keypoints_probs, kp_scores, result): r["keypoints"] = keypoint_prob r["keypoints_scores"] = kps losses.update(loss_keypoint) return result, losses
def forward(self, features, proposals, image_shapes, targets=None): # type: (Dict[str, Tensor], List[Tensor], List[Tuple[int, int]], Optional[List[Dict[str, Tensor]]]) """ Arguments: features (List[Tensor]) proposals (List[Tensor[N, 4]]) image_shapes (List[Tuple[H, W]]) targets (List[Dict]) """ if targets is not None: for t in targets: # TODO: https://github.com/pytorch/pytorch/issues/26731 floating_point_types = (torch.float, torch.double, torch.half) assert t[ "boxes"].dtype in floating_point_types, 'target boxes must of float type' assert t[ "labels"].dtype == torch.int64, 'target labels must of int64 type' if self.has_keypoint(): assert t[ "keypoints"].dtype == torch.float32, 'target keypoints must of float type' if self.training: proposals, matched_idxs, labels, regression_targets, attrs = self.select_training_samples( proposals, targets) else: labels = None regression_targets = None matched_idxs = None box_features = self.box_roi_pool(features, proposals, image_shapes) box_features = self.box_head(box_features) class_logits, box_regression = self.box_predictor(box_features) attr_logits = self.attr_predictor(box_features) result = torch.jit.annotate(List[Dict[str, torch.Tensor]], []) losses = {} if self.training: assert labels is not None and regression_targets is not None # labels: 每个proposal和gt匹配后分配得到的class id # class_logits: num_class-d的输出 # loss_classifier, loss_box_reg = fastrcnn_loss( # class_logits, box_regression, labels, regression_targets) loss_classifier, loss_box_reg, loss_attribute = self.triplercnn_loss( class_logits, box_regression, labels, regression_targets, attr_logits, attrs, self.pos_weight) # print(attr_logits) # print("*" * 10) # print(attrs) # set_trace() # TODO: add attribute classify loss losses = { "loss_classifier": loss_classifier, "loss_box_reg": loss_box_reg, "loss_attribute": loss_attribute, } else: boxes, scores, labels, ascores = self.postprocess_detections( class_logits, attr_logits, box_regression, proposals, image_shapes) num_images = len(boxes) # set_trace() for i in range(num_images): result.append({ "boxes": boxes[i], "labels": labels[i], "scores": scores[i], "ascores": ascores[i], }) if self.has_mask(): mask_proposals = [p["boxes"] for p in result] # predict box # print(mask_proposals) # 在训练的时候全部为空 if self.training: assert matched_idxs is not None # during training, only focus on positive boxes num_images = len(proposals) # batch size mask_proposals = [] pos_matched_idxs = [] for img_id in range(num_images): pos = torch.nonzero(labels[img_id] > 0).squeeze( 1) # predict box中class id为非背景类的idx mask_proposals.append(proposals[img_id][pos]) pos_matched_idxs.append( matched_idxs[img_id] [pos]) # batchsize中为非背景的proposal所分配道的gt box的idx # print(mask_proposals[0].shape, mask_proposals[1].shape) # [(nr_roi1, 4), ...] else: pos_matched_idxs = None if self.mask_roi_pool is not None: mask_features = self.mask_roi_pool(features, mask_proposals, image_shapes) mask_features = self.mask_head(mask_features) mask_logits = self.mask_predictor(mask_features) else: mask_logits = torch.tensor(0) raise Exception("Expected mask_roi_pool to be not None") loss_mask = {} if self.training: assert targets is not None assert pos_matched_idxs is not None assert mask_logits is not None gt_masks = [t["masks"] for t in targets] gt_labels = [t["labels"] for t in targets] # [(nr_objs,), (nr_objs2,)] # batch_size, (nr_objs, 800, 800), (nr_objs2, 800, 800), (X, 47, 28, 28) X: 2个图片的Roi数目 # print(len(gt_masks), gt_masks[0].shape, gt_masks[1].shape, mask_logits.shape) # print(gt_labels[0].shape, gt_labels[1].shape) rcnn_loss_mask = maskrcnn_loss(mask_logits, mask_proposals, gt_masks, gt_labels, pos_matched_idxs) loss_mask = {"loss_mask": rcnn_loss_mask} else: labels = [r["labels"] for r in result] masks_probs = maskrcnn_inference(mask_logits, labels) for mask_prob, r in zip(masks_probs, result): r["masks"] = mask_prob losses.update(loss_mask) # keep none checks in if conditional so torchscript will conditionally # compile each branch if self.keypoint_roi_pool is not None and self.keypoint_head is not None \ and self.keypoint_predictor is not None: keypoint_proposals = [p["boxes"] for p in result] if self.training: # during training, only focus on positive boxes num_images = len(proposals) keypoint_proposals = [] pos_matched_idxs = [] assert matched_idxs is not None for img_id in range(num_images): pos = torch.nonzero(labels[img_id] > 0).squeeze(1) keypoint_proposals.append(proposals[img_id][pos]) pos_matched_idxs.append(matched_idxs[img_id][pos]) else: pos_matched_idxs = None keypoint_features = self.keypoint_roi_pool(features, keypoint_proposals, image_shapes) keypoint_features = self.keypoint_head(keypoint_features) keypoint_logits = self.keypoint_predictor(keypoint_features) loss_keypoint = {} if self.training: assert targets is not None assert pos_matched_idxs is not None gt_keypoints = [t["keypoints"] for t in targets] rcnn_loss_keypoint = keypointrcnn_loss(keypoint_logits, keypoint_proposals, gt_keypoints, pos_matched_idxs) loss_keypoint = {"loss_keypoint": rcnn_loss_keypoint} else: assert keypoint_logits is not None assert keypoint_proposals is not None keypoints_probs, kp_scores = keypointrcnn_inference( keypoint_logits, keypoint_proposals) for keypoint_prob, kps, r in zip(keypoints_probs, kp_scores, result): r["keypoints"] = keypoint_prob r["keypoints_scores"] = kps losses.update(loss_keypoint) return result, losses
def forward(self, features, proposals, image_shapes, targets=None, task_heads=None): """ Arguments: features (List[Tensor]) proposals (List[Tensor[N, 4]]) image_shapes (List[Tuple[H, W]]) targets (List[Dict]) """ if targets is not None: for t in targets: # TODO: https://github.com/pytorch/pytorch/issues/26731 floating_point_types = (torch.float, torch.double, torch.half) assert t["boxes"].dtype in floating_point_types, 'target boxes must of float type' assert t["labels"].dtype == torch.int64, 'target labels must of int64 type' if self.training: proposals, matched_idxs, labels, regression_targets = self.select_training_samples(proposals, targets) else: labels = None regression_targets = None matched_idxs = None box_features = self.box_roi_pool(features, proposals, image_shapes) box_features = self.box_head(box_features) class_logits, box_regression = self.box_predictor(box_features) task_results = {task_head.name: task_head(box_features) for task_head in task_heads} result = torch.jit.annotate(List[Dict[str, torch.Tensor]], []) losses = {} if self.training: assert labels is not None and regression_targets is not None loss_classifier, loss_box_reg = fastrcnn_loss( class_logits, box_regression, labels, regression_targets) losses = { "loss_classifier": loss_classifier, "loss_box_reg": loss_box_reg, } # Calculate losses for all the tasks for name, preds in task_results.items(): actuals = [] for idxs, target in zip(matched_idxs, targets): actuals.append(target[name][idxs]) actuals = torch.cat(actuals, dim=0) # Discount the loss for task so that model still prioritises learning # the box / mask properly losses[f"loss_{name}"] = F.cross_entropy(preds, actuals) else: boxes, scores, labels = self.postprocess_detections(class_logits, box_regression, proposals, image_shapes) boxes_per_image = [len(boxes_in_image) for boxes_in_image in proposals] if len(boxes_per_image) == 1: task_results_list = task_results else: task_results_list = {name: preds.split(boxes_per_image, 0) for name, preds in task_results.items()} num_images = len(boxes) for i in range(num_images): result.append( { "boxes": boxes[i], "labels": labels[i], "scores": scores[i], "task_results": {name: preds[i] for name, preds in task_results_list.items()} } ) if self.has_mask(): mask_proposals = [p["boxes"] for p in result] if self.training: assert matched_idxs is not None # during training, only focus on positive boxes num_images = len(proposals) mask_proposals = [] pos_matched_idxs = [] for img_id in range(num_images): pos = torch.nonzero(labels[img_id] > 0).squeeze(1) mask_proposals.append(proposals[img_id][pos]) pos_matched_idxs.append(matched_idxs[img_id][pos]) else: pos_matched_idxs = None if self.mask_roi_pool is not None: mask_features = self.mask_roi_pool(features, mask_proposals, image_shapes) mask_features = self.mask_head(mask_features) mask_logits = self.mask_predictor(mask_features) else: mask_logits = torch.tensor(0) raise Exception("Expected mask_roi_pool to be not None") loss_mask = {} if self.training: assert targets is not None assert pos_matched_idxs is not None assert mask_logits is not None gt_masks = [t["masks"] for t in targets] gt_labels = [t["labels"] for t in targets] rcnn_loss_mask = maskrcnn_loss( mask_logits, mask_proposals, gt_masks, gt_labels, pos_matched_idxs) loss_mask = { "loss_mask": rcnn_loss_mask } else: labels = [r["labels"] for r in result] masks_probs = maskrcnn_inference(mask_logits, labels) for mask_prob, r in zip(masks_probs, result): r["masks"] = mask_prob losses.update(loss_mask) return result, losses
def forward(self, features, proposals, image_shapes, targets=None): # type: (Dict[str, Tensor], List[Tensor], List[Tuple[int, int]], Optional[List[Dict[str, Tensor]]]) """ Arguments: features (List[Tensor]) proposals (List[Tensor[N, 4]]) image_shapes (List[Tuple[H, W]]) targets (List[Dict]) """ if targets is not None: for t in targets: # TODO: https://github.com/pytorch/pytorch/issues/26731 floating_point_types = (torch.float, torch.double, torch.half) assert t[ "boxes"].dtype in floating_point_types, 'target boxes must of float type' assert t[ "labels"].dtype == torch.int64, 'target labels must of int64 type' if self.has_keypoint(): assert t[ "keypoints"].dtype == torch.float32, 'target keypoints must of float type' if self.training: proposals, matched_idxs, labels, regression_targets = self.select_training_samples( proposals, targets) else: labels = None regression_targets = None matched_idxs = None # compute bbox embedding position_matrix = extract_position_matrix(proposals, Nongt_dim) position_embedding = extract_position_embedding(position_matrix, feat_dim=64) box_features = self.box_roi_pool(features, proposals, image_shapes) box_features = self.box_head(box_features, position_embedding) class_logits, box_regression = self.box_predictor(box_features) result = torch.jit.annotate(List[Dict[str, torch.Tensor]], []) losses = {} if self.training: assert labels is not None and regression_targets is not None loss_classifier, loss_box_reg = fastrcnn_loss( class_logits, box_regression, labels, regression_targets) losses = { "loss_classifier": loss_classifier, "loss_box_reg": loss_box_reg } else: boxes, scores, labels = self.postprocess_detections( class_logits, box_regression, proposals, image_shapes) num_images = len(boxes) for i in range(num_images): result.append({ "boxes": boxes[i], "labels": labels[i], "scores": scores[i], }) if self.has_mask(): mask_proposals = [p["boxes"] for p in result] if self.training: assert matched_idxs is not None # during training, only focus on positive boxes num_images = len(proposals) mask_proposals = [] pos_matched_idxs = [] for img_id in range(num_images): pos = torch.nonzero(labels[img_id] > 0).squeeze(1) mask_proposals.append(proposals[img_id][pos]) pos_matched_idxs.append(matched_idxs[img_id][pos]) else: pos_matched_idxs = None if self.mask_roi_pool is not None: mask_features = self.mask_roi_pool(features, mask_proposals, image_shapes) mask_features = self.mask_head(mask_features) mask_logits = self.mask_predictor(mask_features) else: mask_logits = torch.tensor(0) raise Exception("Expected mask_roi_pool to be not None") loss_mask = {} if self.training: assert targets is not None assert pos_matched_idxs is not None assert mask_logits is not None gt_masks = [t["masks"] for t in targets] gt_labels = [t["labels"] for t in targets] rcnn_loss_mask = maskrcnn_loss(mask_logits, mask_proposals, gt_masks, gt_labels, pos_matched_idxs) loss_mask = {"loss_mask": rcnn_loss_mask} else: labels = [r["labels"] for r in result] masks_probs = maskrcnn_inference(mask_logits, labels) for mask_prob, r in zip(masks_probs, result): r["masks"] = mask_prob losses.update(loss_mask) # keep none checks in if conditional so torchscript will conditionally # compile each branch if self.keypoint_roi_pool is not None and self.keypoint_head is not None \ and self.keypoint_predictor is not None: keypoint_proposals = [p["boxes"] for p in result] if self.training: # during training, only focus on positive boxes num_images = len(proposals) keypoint_proposals = [] pos_matched_idxs = [] assert matched_idxs is not None for img_id in range(num_images): pos = torch.nonzero(labels[img_id] > 0).squeeze(1) keypoint_proposals.append(proposals[img_id][pos]) pos_matched_idxs.append(matched_idxs[img_id][pos]) else: pos_matched_idxs = None keypoint_features = self.keypoint_roi_pool(features, keypoint_proposals, image_shapes) keypoint_features = self.keypoint_head(keypoint_features) keypoint_logits = self.keypoint_predictor(keypoint_features) loss_keypoint = {} if self.training: assert targets is not None assert pos_matched_idxs is not None gt_keypoints = [t["keypoints"] for t in targets] rcnn_loss_keypoint = keypointrcnn_loss(keypoint_logits, keypoint_proposals, gt_keypoints, pos_matched_idxs) loss_keypoint = {"loss_keypoint": rcnn_loss_keypoint} else: assert keypoint_logits is not None assert keypoint_proposals is not None keypoints_probs, kp_scores = keypointrcnn_inference( keypoint_logits, keypoint_proposals) for keypoint_prob, kps, r in zip(keypoints_probs, kp_scores, result): r["keypoints"] = keypoint_prob r["keypoints_scores"] = kps losses.update(loss_keypoint) return result, losses
def roi_heads_forward(self, features, proposals, image_shapes, targets=None): """Hack into the torchvision model to obtain features for training caption model; training is assumed to be false https://github.com/pytorch/vision/blob/master/ torchvision/models/detection/roi_heads.py""" box_features = self.box_roi_pool(features, proposals, image_shapes) box_features = self.box_head(box_features) class_logits, box_regression = self.box_predictor(box_features) result = torch.jit.annotate(List[Dict[str, torch.Tensor]], []) boxes, scores, labels, box_features = roi_postprocess_detections( self, class_logits, box_regression, proposals, image_shapes, box_features) num_images = len(boxes) for i in range(num_images): result.append({ "boxes_features": box_features[i], "boxes": boxes[i], "labels": labels[i], "scores": scores[i] }) if self.has_mask(): mask_proposals = [p["boxes"] for p in result] if self.mask_roi_pool is not None: mask_features = self.mask_roi_pool(features, mask_proposals, image_shapes) mask_features = self.mask_head(mask_features) mask_logits = self.mask_predictor(mask_features) else: mask_logits = torch.tensor(0) raise Exception("Expected mask_roi_pool to be not None") labels = [r["labels"] for r in result] masks_probs = maskrcnn_inference(mask_logits, labels) for mask_prob, r in zip(masks_probs, result): r["masks_features"] = mask_features r["masks"] = mask_prob # keep none checks in if conditional so torchscript will conditionally # compile each branch if self.keypoint_roi_pool is not None and self.keypoint_head is not None \ and self.keypoint_predictor is not None: keypoint_proposals = [p["boxes"] for p in result] keypoint_features = self.keypoint_roi_pool(features, keypoint_proposals, image_shapes) keypoint_features = self.keypoint_head(keypoint_features) keypoint_logits = self.keypoint_predictor(keypoint_features) assert keypoint_logits is not None assert keypoint_proposals is not None keypoints_probs, kp_scores = keypointrcnn_inference( keypoint_logits, keypoint_proposals) for keypoint_prob, kps, r in zip(keypoints_probs, kp_scores, result): r["keypoints_features"] = keypoint_features r["keypoints"] = keypoint_prob r["keypoints_scores"] = kps return result, dict()
def forward(self, features, proposals, image_shapes, targets=None): """ Arguments: features (List[Tensor]) proposals (List[Tensor[N, 4]]) image_shapes (List[Tuple[H, W]]) targets (List[Dict]) """ feature_dims = np.array( [features[layer].shape[1] for layer in features]) if np.all(feature_dims == self.in_channels): # RGB only features_rgb = features elif np.all(feature_dims == 2 * self.in_channels): # RGB-depth 6 channel, two backbones from collections import OrderedDict features_rgb = OrderedDict() for key in features.keys(): features_rgb[key] = features[key][:, :self.in_channels] else: # RGB-D 4 channel features_rgb = features # Detection if self.training: proposals, matched_idxs, labels, regression_targets = self.select_training_samples( proposals, targets) box_features = self.box_roi_pool(features_rgb, proposals, image_shapes) box_features = self.box_head(box_features) class_logits, box_regression = self.box_predictor(box_features) result, losses = [], {} if self.training: loss_classifier, loss_box_reg = fastrcnn_loss( class_logits, box_regression, labels, regression_targets) losses = dict(loss_classifier=loss_classifier, loss_box_reg=loss_box_reg) else: boxes, scores, labels = self.postprocess_detections( class_logits, box_regression, proposals, image_shapes) num_images = len(boxes) for i in range(num_images): if boxes[i].shape[0] == 0: return result, losses result.append( dict( boxes=boxes[i], labels=labels[i], scores=scores[i], )) # Proposals selected by detection stage is shared by all other branches box_proposals = [p["boxes"] for p in result] if self.training: # during training, only focus on positive boxes num_images = len(proposals) box_proposals = [] pos_matched_idxs = [] for img_id in range(num_images): pos = torch.nonzero(labels[img_id] > 0).squeeze(1) box_proposals.append(proposals[img_id][pos]) pos_matched_idxs.append(matched_idxs[img_id][pos]) # MultiStage RoIAlign is shared by all other branches shared_features_rgb = self.shared_roi_pool(features_rgb, box_proposals, image_shapes) # Segmentation mask_features = self.mask_head(shared_features_rgb) mask_logits = self.mask_predictor(mask_features) loss_mask = {} masks_on_features = None if self.training: gt_masks = [t["masks"] for t in targets] gt_labels = [t["labels"] for t in targets] gt_is = [t["instance_ids"] for t in targets] loss_mask, masks_for_paf, masks_for_vote = maskrcnn_loss_updated( mask_logits, box_proposals, gt_masks, gt_labels, gt_is, pos_matched_idxs) loss_mask = dict(loss_mask=loss_mask) else: ref_labels = [r["labels"] for r in result] masks_probs = maskrcnn_inference(mask_logits, ref_labels) for mask_prob, r in zip(masks_probs, result): r["masks"] = mask_prob losses.update(loss_mask) if self.with_paf_branch: paf_features = self.paf_head(shared_features_rgb) paf_logits = self.paf_predictor(paf_features) loss_paf = {} if self.training: gt_pafs = [t["target_pafs"] for t in targets] loss_paf = paf_loss_updated(paf_logits, masks_for_paf, pos_matched_idxs, gt_pafs, gt_labels) if torch.isnan(loss_paf): print('error') loss_paf = dict(loss_paf=loss_paf) else: paf_ref_labels = torch.cat(ref_labels) - 1 N, _, H, W = paf_logits.shape paf_logits = paf_logits.view(N, -1, 2, H, W)[torch.arange(N), paf_ref_labels] paf_probs = [paf_logits] for paf_prob, r in zip(paf_probs, result): r["pafs"] = F.normalize(paf_prob, dim=1) losses.update(loss_paf) if self.input_mode == config.INPUT_RGBD: shared_features = self.attention_block( shared_features_rgb ) # shared_features_rgb actually has 4-channel RGBD input bs, c, _, _ = shared_features.shape # shared_features = shared_features.view(bs, c, -1) # for conv1d if self.with_3d_keypoints: keypoint_features = self.vote_keypoint_head(shared_features) # keypoint_features = keypoint_features.view(bs, self.keypoint_dim_reduced, 14, 14) keypoint_offsets = self.vote_keypoint_predictor( keypoint_features) loss_keypoint = {} if self.training: gt_3d_keypoints = [t["frame"][:, :3] for t in targets] ori_depth = [t["ori_image_depth"] for t in targets] gt_labels = [t["labels"] for t in targets] loss_keypoint = vote_keypoint_loss( keypoint_offsets, box_proposals, ori_depth, gt_3d_keypoints, pos_matched_idxs, masks_for_vote, gt_labels) loss_keypoint = dict(loss_keypoint=loss_keypoint) else: ref_labels = torch.cat(ref_labels) - 1 N, _, H, W = keypoint_offsets.shape keypoint_offsets = keypoint_offsets.view( N, -1, 3, H, W)[torch.arange(N), ref_labels] keypoints = [keypoint_offsets] for kps, r in zip(keypoints, result): r["keypoints_offset"] = kps losses.update(loss_keypoint) if self.with_axis_keypoints: keypoint_features = self.orientation_keypoint_head( shared_features) # keypoint_features = keypoint_features.view(bs, self.keypoint_dim_reduced, 14, 14) axis_keypoint_offsets = self.orientation_keypoint_predictor( keypoint_features) N, _, H, W = axis_keypoint_offsets.shape axis_keypoint_offsets = axis_keypoint_offsets.view( N, -1, 2, 3, H, W) loss_orientation = {} if self.training: gt_3d_keypoints = [t["axis_keypoints"] for t in targets] ori_depth = [t["ori_image_depth"] for t in targets] loss_orientation = vote_orientation_loss( axis_keypoint_offsets, box_proposals, ori_depth, gt_3d_keypoints, pos_matched_idxs, masks_for_vote, gt_labels) loss_orientation = dict(loss_orientation=loss_orientation) else: axis_keypoint_offsets = axis_keypoint_offsets[ torch.arange(N), ref_labels] axis_keypoints = [axis_keypoint_offsets] for kps, r in zip(axis_keypoints, result): r["axis_keypoint_offsets"] = kps losses.update(loss_orientation) if self.regress_axis: keypoint_features = self.axis_head(shared_features) keypoint_features = keypoint_features.view( bs, self.keypoint_dim_reduced, 14, 14) axis_keypoint_offsets = self.axis_predictor(keypoint_features) N, _, H, W = axis_keypoint_offsets.shape axis_keypoint_offsets = axis_keypoint_offsets.view( N, -1, 4, H, W) loss_axis = {} if self.training: gt_3d_keypoints = [t["axis_keypoints"] for t in targets] ori_depth = [t["ori_image_depth"] for t in targets] loss_axis = vote_axis_loss(axis_keypoint_offsets, box_proposals, ori_depth, gt_3d_keypoints, pos_matched_idxs, masks_for_vote, gt_labels) loss_axis = dict(loss_axis=loss_axis) else: axis_keypoint_offsets = axis_keypoint_offsets[ torch.arange(N), ref_labels] axis_keypoints = [axis_keypoint_offsets] for kps, r in zip(axis_keypoints, result): r["axis_offsets"] = kps losses.update(loss_axis) if self.estimate_norm_vector: keypoint_features = self.norm_vector_head(shared_features) keypoint_features = keypoint_features.view( bs, self.keypoint_dim_reduced, 14, 14) norm_vectors = self.norm_vector_predictor(keypoint_features) N, _, H, W = norm_vectors.shape norm_vectors = norm_vectors.view(N, -1, 3, H, W) loss_norm_vector = {} if self.training: gt_3d_keypoints = [t["axis_keypoints"] for t in targets] loss_norm_vector = calculate_norm_vectors( norm_vectors, gt_3d_keypoints, pos_matched_idxs, masks_for_vote, gt_labels) loss_norm_vector = dict(loss_norm_vector=loss_norm_vector) else: norm_vectors = F.normalize(norm_vectors, dim=2) norm_vectors = norm_vectors[torch.arange(N), ref_labels] estimate_norm_vectors = [norm_vectors] for norm_v, r in zip(estimate_norm_vectors, result): r["norm_vector"] = norm_v losses.update(loss_norm_vector) return result, losses