def _init_box_head(cls, cfg, input_shape): # fmt: off in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION pooler_scales = tuple(1.0 / input_shape[k].stride for k in in_features) sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO pooler_type = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE # fmt: on # If StandardROIHeads is applied on multiple feature maps (as in FPN), # then we share the same predictors and therefore the channel counts must be the same in_channels = [input_shape[f].channels for f in in_features] # Check all channel counts are equal assert len(set(in_channels)) == 1, in_channels in_channels = in_channels[0] box_pooler = ROIPooler( output_size=pooler_resolution, scales=pooler_scales, sampling_ratio=sampling_ratio, pooler_type=pooler_type, ) # Here we split "box head" and "box predictor", which is mainly due to historical reasons. # They are used together so the "box predictor" layers should be part of the "box head". # New subclasses of ROIHeads do not need "box predictor"s. box_head = build_box_head( cfg, ShapeSpec(channels=in_channels, height=pooler_resolution, width=pooler_resolution) ) box_predictor = WSDDNOutputLayers(cfg, box_head.output_shape) return { "box_in_features": in_features, "box_pooler": box_pooler, "box_head": box_head, "box_predictor": box_predictor, }
def _init_box_head(cls, cfg, input_shape): # fmt: off in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION pooler_scales = tuple(1.0 / input_shape[k].stride for k in in_features) sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO pooler_type = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE # fmt: on assert pooler_type in ["ROIAlignRotated"], pooler_type # assume all channel counts are equal in_channels = [input_shape[f].channels for f in in_features][0] box_pooler = ROIPooler( output_size=pooler_resolution, scales=pooler_scales, sampling_ratio=sampling_ratio, pooler_type=pooler_type, ) box_head = build_box_head( cfg, ShapeSpec(channels=in_channels, height=pooler_resolution, width=pooler_resolution)) # This line is the only difference v.s. StandardROIHeads box_predictor = RotatedFastRCNNOutputLayers(cfg, box_head.output_shape) return { "box_in_features": in_features, "box_pooler": box_pooler, "box_head": box_head, "box_predictor": box_predictor, }
def _init_box_head(self, cfg): # fmt: off pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION pooler_scales = tuple(1.0 / self.feature_strides[k] for k in self.in_features) sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO pooler_type = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE # fmt: on # If StandardROIHeads is applied on multiple feature maps (as in FPN), # then we share the same predictors and therefore the channel counts must be the same in_channels = [self.feature_channels[f] for f in self.in_features] # Check all channel counts are equal assert len(set(in_channels)) == 1, in_channels in_channels = in_channels[0] self.box_pooler = ROIPooler( output_size=pooler_resolution, scales=pooler_scales, sampling_ratio=sampling_ratio, pooler_type=pooler_type, ) # Here we split "box head" and "box predictor", which is mainly due to historical reasons. # They are used together so the "box predictor" layers should be part of the "box head". # New subclasses of ROIHeads do not need "box predictor"s. self.box_head = build_box_head( cfg, ShapeSpec(channels=in_channels, height=pooler_resolution, width=pooler_resolution)) self.box_predictor = FastRCNNOutputLayers(self.box_head.output_size, self.num_classes, self.cls_agnostic_bbox_reg)
def _init_box_head(cls, cfg, input_shape): # fmt: off in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION pooler_scales = tuple(1.0 / input_shape[k].stride for k in in_features) sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO pooler_type = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE # fmt: on # If StandardROIHeads is applied on multiple feature maps (as in FPN), # then we share the same predictors and therefore the channel counts must be the same in_channels = [input_shape[f].channels for f in in_features] # Check all channel counts are equal assert len(set(in_channels)) == 1, in_channels in_channels = in_channels[0] box_pooler = ROIPooler( output_size=pooler_resolution, scales=pooler_scales, sampling_ratio=sampling_ratio, pooler_type=pooler_type, ) # Here we split "box head" and "box predictor", which is mainly due to historical reasons. # They are used together so the "box predictor" layers should be part of the "box head". # New subclasses of ROIHeads do not need "box predictor"s. box_head = build_box_head( cfg, ShapeSpec(channels=in_channels, height=pooler_resolution, width=pooler_resolution)) box_predictor = WSDDNOutputLayers(cfg, box_head.output_shape) cls_agnostic_bbox_reg = cfg.MODEL.ROI_BOX_HEAD.CLS_AGNOSTIC_BBOX_REG refine_K = cfg.WSL.REFINE_NUM refine_mist = cfg.WSL.REFINE_MIST refine_reg = cfg.WSL.REFINE_REG box_refinery = [] for k in range(refine_K): box_refinery_k = OICROutputLayers(cfg, box_head.output_shape, k) box_refinery.append(box_refinery_k) output_dir = cfg.OUTPUT_DIR vis_test = cfg.WSL.VIS_TEST vis_period = cfg.VIS_PERIOD return { "box_in_features": in_features, "box_pooler": box_pooler, "box_head": box_head, "box_predictor": box_predictor, "output_dir": output_dir, "vis_test": vis_test, "vis_period": vis_period, "refine_K": refine_K, "refine_mist": refine_mist, "refine_reg": refine_reg, "box_refinery": box_refinery, "cls_agnostic_bbox_reg": cls_agnostic_bbox_reg, }
def _init_box_head(self, cfg): pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION pooler_scales = tuple(1.0 / self.feature_strides[k] for k in self.in_features) sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO pooler_type = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE in_channels = [self.feature_channels[f] for f in self.in_features] # Check all channel counts are equal assert len(set(in_channels)) == 1, in_channels in_channels = in_channels[0] self.box_pooler = ROIPooler( output_size=pooler_resolution, scales=pooler_scales, sampling_ratio=sampling_ratio, pooler_type=pooler_type, ) pooled_shape = ShapeSpec(channels=in_channels, width=pooler_resolution, height=pooler_resolution) self.association_box_head = build_box_head(cfg, pooled_shape) self.association_box_predictor = FastRCNNOutputLayers( self.association_box_head.output_size, self.num_classes - 1, cls_agnostic_bbox_reg=False) self.box_head = build_box_head(cfg, pooled_shape) self.box_predictor = FastRCNNOutputLayers(self.box_head.output_size, self.num_classes, cls_agnostic_bbox_reg=False) self.light_direction_head = build_box_head(cfg, pooled_shape) self.light_direction_predictor = LightdirectionOutputLayer( self.light_direction_head.output_size)
def _init_box_head(self, cfg, input_shape): # fmt: off pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION pooler_scales = tuple(1.0 / input_shape[k].stride for k in self.in_features) sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO pooler_type = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE self.train_on_pred_boxes = cfg.MODEL.ROI_BOX_HEAD.TRAIN_ON_PRED_BOXES self.lambda_ = cfg.MODEL.ROI_HEADS.LAMBDA self.prior_prob = cfg.MODEL.ROI_HEADS.PRIOR_PROB # fmt: on # If StandardROIHeads is applied on multiple feature maps (as in FPN), # then we share the same predictors and therefore the channel counts must be the same in_channels = [input_shape[f].channels for f in self.in_features] # Check all channel counts are equal assert len(set(in_channels)) == 1, in_channels in_channels = in_channels[0] self.box_pooler = ROIPooler( output_size=pooler_resolution, scales=pooler_scales, sampling_ratio=sampling_ratio, pooler_type=pooler_type, ) # Here we split "box head" and "box predictor", which is mainly due to historical reasons. # They are used together so the "box predictor" layers should be part of the "box head". # New subclasses of ROIHeads do not need "box predictor"s. self.box_head = build_box_head( cfg, ShapeSpec(channels=in_channels, height=pooler_resolution, width=pooler_resolution)) self.box_predictor = EQLFastRCNNOutputLayers( self.box_head.output_size, self.num_classes, self.cls_agnostic_bbox_reg, prior_prob=self.prior_prob) # load freq self.freq_info = torch.FloatTensor(get_image_count_frequency())
def _init_box_head(cls, cfg, input_shape): # fmt: off in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION pooler_scales = tuple(1.0 / input_shape[k].stride for k in in_features) sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO pooler_type = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE # fmt: on in_channels = [input_shape[f].channels for f in in_features] # Check all channel counts are equal assert len(set(in_channels)) == 1, in_channels in_channels = in_channels[0] box_pooler = ROIPooler( output_size=pooler_resolution, scales=pooler_scales, sampling_ratio=sampling_ratio, pooler_type=pooler_type, ) box_head = build_box_head( cfg, ShapeSpec(channels=in_channels, height=pooler_resolution, width=pooler_resolution), ) if cfg.MODEL.ROI_HEADS.LOSS == "CrossEntropy": box_predictor = FastRCNNOutputLayers(cfg, box_head.output_shape) elif cfg.MODEL.ROI_HEADS.LOSS == "FocalLoss": box_predictor = FastRCNNFocaltLossOutputLayers( cfg, box_head.output_shape) else: raise ValueError("Unknown ROI head loss.") return { "box_in_features": in_features, "box_pooler": box_pooler, "box_head": box_head, "box_predictor": box_predictor, }
def __init__(self, cfg): super().__init__() self.backbone = build_backbone(cfg) self.maxpool = nn.MaxPool2d(2, stride=2, padding=0) self.neck = build_box_head( cfg, ShapeSpec(channels=self.backbone.output_shape()["res5"].channels, height=7, width=7), ) input_size = self.neck.output_size if not isinstance(input_size, int): input_size = np.prod(input_size) self.linear = nn.Linear(input_size, 1000) # Sec 5.1 in "Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour": # "The 1000-way fully-connected layer is initialized by # drawing weights from a zero-mean Gaussian with standard deviation of 0.01." nn.init.normal_(self.linear.weight, std=0.01) nn.init.constant_(self.linear.bias, 0.0)
def _init_box_head(cls, cfg, input_shape): # fmt: off in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION pooler_scales = tuple(1.0 / input_shape[k].stride for k in in_features) sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO pooler_type = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE # fmt: on unseen_path = cfg.DATASETS.UNSEEN_LABEL_SET meta = MetadataCatalog.get(cfg.DATASETS.TRAIN[0]) if unseen_path != '': meta_info = {e: i for i, e in enumerate(meta.thing_classes)} with open(unseen_path, 'r') as f: lines = [meta_info[e.replace('\n', '')] for e in f.readlines()] unseen_label_set = sorted(lines) meta.stuff_classes.append('unknown') meta.stuff_colors.append([20, 220, 60]) meta.stuff_dataset_id_to_contiguous_id[201] = 54 if cfg.MODEL.EOPSN.IGNORE_UNLABELED_REGION or not cfg.MODEL.EOPSN.UNLABELED_REGION: label_converter = torch.ones(len(meta.thing_classes) + 1) else: label_converter = torch.ones(len(meta.thing_classes) + 2) for i in unseen_label_set: label_converter[i] = 0 reverse_label_converter = label_converter.nonzero()[:, 0].long() label_converter = torch.cumsum(label_converter, 0).long() - 1 if cfg.MODEL.EOPSN.UNLABELED_REGION: if cfg.MODEL.EOPSN.IGNORE_UNLABELED_REGION: reverse_label_converter[-1] = -1 else: reverse_label_converter[-1] = reverse_label_converter[-2] reverse_label_converter[-2] = -1 else: reverse_label_converter = None label_converter = None # If StandardROIHeads is applied on multiple feature maps (as in FPN), # then we share the same predictors and therefore the channel counts must be the same in_channels = [input_shape[f].channels for f in in_features] # Check all channel counts are equal assert len(set(in_channels)) == 1, in_channels in_channels = in_channels[0] box_pooler = ROIPooler( output_size=pooler_resolution, scales=pooler_scales, sampling_ratio=sampling_ratio, pooler_type=pooler_type, ) # Here we split "box head" and "box predictor", which is mainly due to historical reasons. # They are used together so the "box predictor" layers should be part of the "box head". # New subclasses of ROIHeads do not need "box predictor"s. box_head = build_box_head( cfg, ShapeSpec(channels=in_channels, height=pooler_resolution, width=pooler_resolution)) if cfg.MODEL.EOPSN.PREDICTOR == 'baseline': box_predictor = FastRCNNOutputLayers_baseline( cfg, box_head.output_shape, label_converter, reverse_label_converter) elif cfg.MODEL.EOPSN.PREDICTOR == 'eopsn': from .eopsn_predictor import FastRCNNOutputLayers_eopsn box_predictor = FastRCNNOutputLayers_eopsn( cfg, box_head.output_shape, label_converter, reverse_label_converter) return { "box_in_features": in_features, "box_pooler": box_pooler, "box_head": box_head, "box_predictor": box_predictor, }
def _init_box_head(self, cfg, input_shape): # fmt: off pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION pooler_scales = tuple(1.0 / input_shape[k].stride for k in self.in_features) sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO pooler_type = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE st_pooler_type = cfg.MODEL.SPATIOTEMPORAL.ST_POOLER_TYPE self.train_on_pred_boxes = cfg.MODEL.ROI_BOX_HEAD.TRAIN_ON_PRED_BOXES self.st_cls = cfg.MODEL.SPATIOTEMPORAL.ST_CLS self.spatial_cls = cfg.MODEL.SPATIOTEMPORAL.SPATIAL_CLS self.longterm_proposals = cfg.MODEL.SPATIOTEMPORAL.ROI_BOX_HEAD.REF_POST_NMS_TOP_N self.st_box_head_name = cfg.MODEL.SPATIOTEMPORAL.ROI_BOX_HEAD.NAME self.long_term = cfg.MODEL.SPATIOTEMPORAL.LONG_TERM self.min_box_side_len = cfg.MODEL.PROPOSAL_GENERATOR.MIN_SIZE # fmt: on self.st_cls_short_term_aggregation = cfg.MODEL.SPATIOTEMPORAL.ST_CLS_SHORT_TERM_AGGREGATION self.proposal_tracking = cfg.MODEL.SPATIOTEMPORAL.PROPOSAL_TRACKING self.test_tracking_type = cfg.MODEL.SPATIOTEMPORAL.TEST_TRACKING_TYPE # If StandardROIHeads is applied on multiple feature maps (as in FPN), # then we share the same predictors and therefore the channel counts must be the same in_channels = [input_shape[f].channels for f in self.in_features] # Check all channel counts are equal assert len(set(in_channels)) == 1, in_channels in_channels = in_channels[0] self.long_term_proposal_matcher = Matcher( [0.3], # TODO: config(cfg.MODEL.ROI_HEADS.IOU_THRESHOLDS) [0, 1], # TODO: config(cfg.MODEL.ROI_HEADS.IOU_LABELS) allow_low_quality_matches=False, ) self.box_pooler = ROIPooler( output_size=pooler_resolution, scales=pooler_scales, sampling_ratio=sampling_ratio, pooler_type=pooler_type, ) self.st_box_pooler = ROIPooler( output_size=pooler_resolution, scales=pooler_scales, sampling_ratio=sampling_ratio, pooler_type=st_pooler_type, ) # Here we split "box head" and "box predictor", which is mainly due to historical reasons. # They are used together so the "box predictor" layers should be part of the "box head". # New subclasses of ROIHeads do not need "box predictor"s. if self.st_cls: self.st_box_head = build_st_box_head( cfg, ShapeSpec(channels=in_channels, height=pooler_resolution, width=pooler_resolution)) self.st_cls_predictor = StClassificationOutputLayers( self.st_box_head.output_size, self.num_classes) self.box_head = build_box_head( cfg, ShapeSpec(channels=in_channels, height=pooler_resolution, width=pooler_resolution)) self.box_predictor = FastRCNNOutputLayers(self.box_head.output_size, self.num_classes, self.cls_agnostic_bbox_reg) if cfg.MODEL.SPATIOTEMPORAL.FREEZE_SPATIAL_HEAD: self.freeze_component(self.box_head) self.freeze_component(self.box_predictor)