def batch_label_anchors(self, gt_boxes, gt_classes, filter_valid=True): batch_size = len(gt_boxes) assert batch_size == len(gt_classes) num_levels = self.anchors.max_level - self.anchors.min_level + 1 cls_targets_out = [[] for _ in range(num_levels)] box_targets_out = [[] for _ in range(num_levels)] num_positives_out = [] anchor_box_list = BoxList(self.anchors.boxes) for i in range(batch_size): last_sample = i == batch_size - 1 if filter_valid: valid_idx = gt_classes[ i] > -1 # filter gt targets w/ label <= -1 gt_box_list = BoxList(gt_boxes[i][valid_idx]) gt_class_i = gt_classes[i][valid_idx] else: gt_box_list = BoxList(gt_boxes[i]) gt_class_i = gt_classes[i] cls_targets, box_targets, matches = self.target_assigner.assign( anchor_box_list, gt_box_list, gt_class_i) # class labels start from 1 and the background class = -1 cls_targets = (cls_targets - 1).long() # Unpack labels. """Unpacks an array of cls/box into multiple scales.""" count = 0 for level in range(self.anchors.min_level, self.anchors.max_level + 1): level_idx = level - self.anchors.min_level feat_size = self.anchors.feat_sizes[level] steps = feat_size[0] * feat_size[ 1] * self.anchors.get_anchors_per_location() cls_targets_out[level_idx].append( cls_targets[count:count + steps].view( [feat_size[0], feat_size[1], -1])) box_targets_out[level_idx].append( box_targets[count:count + steps].view( [feat_size[0], feat_size[1], -1])) count += steps if last_sample: cls_targets_out[level_idx] = torch.stack( cls_targets_out[level_idx]) box_targets_out[level_idx] = torch.stack( box_targets_out[level_idx]) num_positives_out.append( (matches.match_results > -1).float().sum()) if last_sample: num_positives_out = torch.stack(num_positives_out) return cls_targets_out, box_targets_out, num_positives_out
def label_anchors(self, gt_boxes, gt_labels): """Labels anchors with ground truth inputs. Args: gt_boxes: A float tensor with shape [N, 4] representing groundtruth boxes. For each row, it stores [y0, x0, y1, x1] for four corners of a box. gt_labels: A integer tensor with shape [N, 1] representing groundtruth classes. Returns: cls_targets_dict: ordered dictionary with keys [min_level, min_level+1, ..., max_level]. The values are tensor with shape [height_l, width_l, num_anchors]. The height_l and width_l represent the dimension of class logits at l-th level. box_targets_dict: ordered dictionary with keys [min_level, min_level+1, ..., max_level]. The values are tensor with shape [height_l, width_l, num_anchors * 4]. The height_l and width_l represent the dimension of bounding box regression output at l-th level. num_positives: scalar tensor storing number of positives in an image. """ cls_targets_out = [] box_targets_out = [] gt_box_list = BoxList(gt_boxes) anchor_box_list = BoxList(self.anchors.boxes) # cls_weights, box_weights are not used cls_targets, _, box_targets, _, matches = self.target_assigner.assign( anchor_box_list, gt_box_list, gt_labels) # class labels start from 1 and the background class = -1 cls_targets -= 1 cls_targets = cls_targets.long() # Unpack labels. """Unpacks an array of cls/box into multiple scales.""" count = 0 for level in range(self.anchors.min_level, self.anchors.max_level + 1): feat_size = self.feat_size[level] steps = feat_size**2 * self.anchors.get_anchors_per_location() indices = torch.arange(count, count + steps, device=cls_targets.device) count += steps cls_targets_out.append( torch.index_select(cls_targets, 0, indices).view([feat_size, feat_size, -1])) box_targets_out.append( torch.index_select(box_targets, 0, indices).view([feat_size, feat_size, -1])) num_positives = (matches.match_results != -1).float().sum() return cls_targets_out, box_targets_out, num_positives
def label_anchors(self, gt_boxes, gt_classes, filter_valid=True): """Labels anchors with ground truth inputs. Args: gt_boxes: A float tensor with shape [N, 4] representing groundtruth boxes. For each row, it stores [y0, x0, y1, x1] for four corners of a box. gt_classes: A integer tensor with shape [N, 1] representing groundtruth classes. filter_valid: Filter out any boxes w/ gt class <= -1 before assigning Returns: cls_targets_dict: ordered dictionary with keys [min_level, min_level+1, ..., max_level]. The values are tensor with shape [height_l, width_l, num_anchors]. The height_l and width_l represent the dimension of class logits at l-th level. box_targets_dict: ordered dictionary with keys [min_level, min_level+1, ..., max_level]. The values are tensor with shape [height_l, width_l, num_anchors * 4]. The height_l and width_l represent the dimension of bounding box regression output at l-th level. num_positives: scalar tensor storing number of positives in an image. """ cls_targets_out = [] box_targets_out = [] if filter_valid: valid_idx = gt_classes > -1 # filter gt targets w/ label <= -1 gt_boxes = gt_boxes[valid_idx] gt_classes = gt_classes[valid_idx] cls_targets, box_targets, matches = self.target_assigner.assign( BoxList(self.anchors.boxes), BoxList(gt_boxes), gt_classes) # class labels start from 1 and the background class = -1 cls_targets = (cls_targets - 1).long() # Unpack labels. """Unpacks an array of cls/box into multiple scales.""" count = 0 for level in range(self.anchors.min_level, self.anchors.max_level + 1): feat_size = self.anchors.feat_sizes[level] steps = feat_size[0] * feat_size[ 1] * self.anchors.get_anchors_per_location() cls_targets_out.append(cls_targets[count:count + steps].view( [feat_size[0], feat_size[1], -1])) box_targets_out.append(box_targets[count:count + steps].view( [feat_size[0], feat_size[1], -1])) count += steps num_positives = (matches.match_results > -1).float().sum() return cls_targets_out, box_targets_out, num_positives
def batch_label_anchors(self, batch_size: int, gt_boxes, gt_classes): num_levels = self.anchors.max_level - self.anchors.min_level + 1 cls_targets_out = [[] for _ in range(num_levels)] box_targets_out = [[] for _ in range(num_levels)] num_positives_out = [] # FIXME this may be a bottleneck, would be faster if batched, or should be done in loader/dataset? anchor_box_list = BoxList(self.anchors.boxes) for i in range(batch_size): last_sample = i == batch_size - 1 # cls_weights, box_weights are not used cls_targets, _, box_targets, _, matches = self.target_assigner.assign( anchor_box_list, BoxList(gt_boxes[i]), gt_classes[i]) # class labels start from 1 and the background class = -1 cls_targets -= 1 cls_targets = cls_targets.long() # Unpack labels. """Unpacks an array of cls/box into multiple scales.""" for level in range(self.anchors.min_level, self.anchors.max_level + 1): level_index = level - self.anchors.min_level feat_size = self.feat_size[level] indices = self._get_indices(cls_targets.device, level) cls_targets_out[level_index].append( torch.index_select(cls_targets, 0, indices).view( [feat_size, feat_size, -1])) box_targets_out[level_index].append( torch.index_select(box_targets, 0, indices).view( [feat_size, feat_size, -1])) if last_sample: cls_targets_out[level_index] = torch.stack( cls_targets_out[level_index]) box_targets_out[level_index] = torch.stack( box_targets_out[level_index]) num_positives_out.append( (matches.match_results != -1).float().sum()) if last_sample: num_positives_out = torch.stack(num_positives_out) return cls_targets_out, box_targets_out, num_positives_out