def from_config(cls, cfg): backbone = build_backbone(cfg) return { "backbone": backbone, "proposal_generator": build_proposal_generator(cfg, backbone.output_shape()), "roi_heads": build_roi_heads(cfg, backbone.output_shape()), "input_format": cfg.INPUT.FORMAT, "vis_period": cfg.VIS_PERIOD, "pixel_mean": cfg.MODEL.PIXEL_MEAN, "pixel_std": cfg.MODEL.PIXEL_STD, "kd_args": cfg.KD, "teacher": build_teacher(cfg), "teacher_input_format": cfg.TEACHER.INPUT.FORMAT, "teacher_pixel_mean": cfg.TEACHER.MODEL.PIXEL_MEAN, "teacher_pixel_std": cfg.TEACHER.MODEL.PIXEL_STD, }
def __init__(self, cfg): super().__init__() self.backbone = build_backbone(cfg) self.proposal_generator = build_proposal_generator(cfg, self.backbone.output_shape()) self.roi_heads = build_roi_heads(cfg, self.backbone.output_shape()) self.vis_period = cfg.VIS_PERIOD self.input_format = cfg.INPUT.FORMAT # self.auxiliary_proposal_generator = build_aux_proposal_generator(cfg, self.backbone.output_shape()) self.auxiliary_proposal_generator = build_proposal_generator(cfg, self.backbone.output_shape()) self.auxiliary_roi_heads = build_roi_heads(cfg, self.backbone.output_shape()) assert len(cfg.MODEL.PIXEL_MEAN) == len(cfg.MODEL.PIXEL_STD) self.register_buffer("pixel_mean", torch.Tensor(cfg.MODEL.PIXEL_MEAN).view(-1, 1, 1)) self.register_buffer("pixel_std", torch.Tensor(cfg.MODEL.PIXEL_STD).view(-1, 1, 1))
def from_config(cls, cfg): backbone = build_backbone(cfg) return { "backbone": backbone, # "proposal_generator": build_proposal_generator(cfg, backbone.output_shape()), "proposal_generator": None, "load_proposals": cfg.MODEL.LOAD_PROPOSALS, "roi_heads": build_roi_heads(cfg, backbone.output_shape()), "input_format": cfg.INPUT.FORMAT, "vis_period": cfg.VIS_PERIOD, "pixel_mean": cfg.MODEL.PIXEL_MEAN, "pixel_std": cfg.MODEL.PIXEL_STD, "cpg": True if "CSC" in cfg.MODEL.ROI_HEADS.NAME or "WSJDS" in cfg.MODEL.ROI_HEADS.NAME or "XROIHeads" in cfg.MODEL.ROI_HEADS.NAME # if "CSC" in cfg.MODEL.ROI_HEADS.NAME or "WSJDS" in cfg.MODEL.ROI_HEADS.NAME else False, }
def __init__(self, cfg): super().__init__() self.device = torch.device(cfg.MODEL.DEVICE) # loss weight self.instance_loss_weight = cfg.MODEL.SOGNET.INSTANCE_LOSS_WEIGHT # options when combining instance & semantic outputs # TODO: build inference self.stuff_area_limit = cfg.MODEL.SOGNET.POSTPROCESS.STUFF_AREA_LIMIT self.stuff_num_classes = (cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES - cfg.MODEL.ROI_HEADS.NUM_CLASSES) self.combine_on = cfg.MODEL.SOGNET.COMBINE.ENABLED if self.combine_on: self.combine_overlap_threshold = cfg.MODEL.SOGNET.COMBINE.OVERLAP_THRESH self.combine_stuff_area_limit = cfg.MODEL.SOGNET.COMBINE.STUFF_AREA_LIMIT self.combine_instances_confidence_threshold = ( cfg.MODEL.SOGNET.COMBINE.INSTANCES_CONFIDENCE_THRESH) self.backbone = build_backbone(cfg) self.proposal_generator = build_proposal_generator( cfg, self.backbone.output_shape()) self.roi_heads = build_roi_heads(cfg, self.backbone.output_shape()) self.sem_seg_head = build_sem_seg_head(cfg, self.backbone.output_shape()) self.panoptic_head = build_panoptic_head(cfg) pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view( 3, 1, 1) pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view( 3, 1, 1) self.normalizer = lambda x: (x - pixel_mean) / pixel_std self.to(self.device)
def test_rroi_heads(self): torch.manual_seed(121) cfg = get_cfg() cfg.MODEL.PROPOSAL_GENERATOR.NAME = "RRPN" cfg.MODEL.ANCHOR_GENERATOR.NAME = "RotatedAnchorGenerator" cfg.MODEL.ROI_HEADS.NAME = "RROIHeads" cfg.MODEL.ROI_BOX_HEAD.NAME = "FastRCNNConvFCHead" cfg.MODEL.ROI_BOX_HEAD.NUM_FC = 2 cfg.MODEL.RPN.BBOX_REG_WEIGHTS = (1, 1, 1, 1, 1) cfg.MODEL.RPN.HEAD_NAME = "StandardRPNHead" cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE = "ROIAlignRotated" cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS = (10, 10, 5, 5, 1) num_images = 2 images_tensor = torch.rand(num_images, 20, 30) image_sizes = [(10, 10), (20, 30)] images = ImageList(images_tensor, image_sizes) num_channels = 1024 features = {"res4": torch.rand(num_images, num_channels, 1, 2)} feature_shape = {"res4": ShapeSpec(channels=num_channels, stride=16)} image_shape = (15, 15) gt_boxes0 = torch.tensor([[2, 2, 2, 2, 30], [4, 4, 4, 4, 0]], dtype=torch.float32) gt_instance0 = Instances(image_shape) gt_instance0.gt_boxes = RotatedBoxes(gt_boxes0) gt_instance0.gt_classes = torch.tensor([2, 1]) gt_boxes1 = torch.tensor([[1.5, 5.5, 1, 3, 0], [8.5, 4, 3, 2, -50]], dtype=torch.float32) gt_instance1 = Instances(image_shape) gt_instance1.gt_boxes = RotatedBoxes(gt_boxes1) gt_instance1.gt_classes = torch.tensor([1, 2]) gt_instances = [gt_instance0, gt_instance1] proposal_generator = build_proposal_generator(cfg, feature_shape) roi_heads = build_roi_heads(cfg, feature_shape) with EventStorage(): # capture events in a new storage to discard them proposals, proposal_losses = proposal_generator( images, features, gt_instances) _, detector_losses = roi_heads(images, features, proposals, gt_instances) detector_losses.update(proposal_losses) expected_losses = { "loss_cls": 4.365657806396484, "loss_box_reg": 0.0015851043863222003, "loss_rpn_cls": 0.2427729219198227, "loss_rpn_loc": 0.3646621108055115, } succ = all( torch.allclose(detector_losses[name], torch.tensor(expected_losses.get(name, 0.0))) for name in detector_losses.keys()) self.assertTrue( succ, "Losses has changed! New losses: {}".format( {k: v.item() for k, v in detector_losses.items()}), )
def __init__(self, cfg): super().__init__() # Detectron 2 expects a dict of ShapeSpec object as input_shape input_shape = dict() for name, shape in zip(cfg.MODEL.RPN.IN_FEATURES, [4, 8, 16, 32]): input_shape[name] = ShapeSpec(channels=256, stride=shape) self.rpn = build_proposal_generator(cfg, input_shape=input_shape) self.roi_heads = build_roi_heads(cfg, input_shape)
def test_rroi_heads(self): torch.manual_seed(121) cfg = get_cfg() cfg.MODEL.PROPOSAL_GENERATOR.NAME = "RRPN" cfg.MODEL.ANCHOR_GENERATOR.NAME = "RotatedAnchorGenerator" cfg.MODEL.ROI_HEADS.NAME = "RROIHeads" cfg.MODEL.ROI_BOX_HEAD.NAME = "FastRCNNConvFCHead" cfg.MODEL.ROI_BOX_HEAD.NUM_FC = 2 cfg.MODEL.RPN.BBOX_REG_WEIGHTS = (1, 1, 1, 1, 1) cfg.MODEL.RPN.HEAD_NAME = "StandardRPNHead" cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE = "ROIAlignRotated" cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS = (10, 10, 5, 5, 1) backbone = build_backbone(cfg) num_images = 2 images_tensor = torch.rand(num_images, 20, 30) image_sizes = [(10, 10), (20, 30)] images = ImageList(images_tensor, image_sizes) num_channels = 1024 features = {"res4": torch.rand(num_images, num_channels, 1, 2)} image_shape = (15, 15) gt_boxes0 = torch.tensor([[2, 2, 2, 2, 30], [4, 4, 4, 4, 0]], dtype=torch.float32) gt_instance0 = Instances(image_shape) gt_instance0.gt_boxes = RotatedBoxes(gt_boxes0) gt_instance0.gt_classes = torch.tensor([2, 1]) gt_boxes1 = torch.tensor([[1.5, 5.5, 1, 3, 0], [8.5, 4, 3, 2, -50]], dtype=torch.float32) gt_instance1 = Instances(image_shape) gt_instance1.gt_boxes = RotatedBoxes(gt_boxes1) gt_instance1.gt_classes = torch.tensor([1, 2]) gt_instances = [gt_instance0, gt_instance1] proposal_generator = build_proposal_generator(cfg, backbone.output_shape()) roi_heads = build_roi_heads(cfg, backbone.output_shape()) with EventStorage(): # capture events in a new storage to discard them proposals, proposal_losses = proposal_generator( images, features, gt_instances) _, detector_losses = roi_heads(images, features, proposals, gt_instances) expected_losses = { "loss_cls": torch.tensor(4.381618499755859), "loss_box_reg": torch.tensor(0.0011829272843897343), } for name in expected_losses.keys(): err_msg = "detector_losses[{}] = {}, expected losses = {}".format( name, detector_losses[name], expected_losses[name]) self.assertTrue( torch.allclose(detector_losses[name], expected_losses[name]), err_msg)
def __init__(self, cfg): super().__init__() self.device = torch.device(cfg.MODEL.DEVICE) self.backbone = build_backbone(cfg) self.proposal_generator = build_proposal_generator(cfg, self.backbone.output_shape()) self.roi_heads = build_roi_heads(cfg, self.backbone.output_shape()) self.vis_period = cfg.VIS_PERIOD self.input_format = cfg.INPUT.FORMAT self.current_video = None self.frame_idx = 0 if cfg.MODEL.SPATIOTEMPORAL.FREEZE_BACKBONE: self.freeze_component(self.backbone) if cfg.MODEL.SPATIOTEMPORAL.FREEZE_PROPOSAL_GENERATOR: self.freeze_component(self.proposal_generator) self.long_term = cfg.MODEL.SPATIOTEMPORAL.LONG_TERM self.temporal_dropout = cfg.MODEL.SPATIOTEMPORAL.TEMPORAL_DROPOUT self.num_frames = cfg.MODEL.SPATIOTEMPORAL.NUM_FRAMES self.num_keyframes = cfg.MODEL.SPATIOTEMPORAL.NUM_KEYFRAMES self.keyframe_interval = cfg.MODEL.SPATIOTEMPORAL.KEYFRAME_INTERVAL self.reference_frame_idx = -1 if cfg.MODEL.SPATIOTEMPORAL.FORWARD_AGGREGATION: # (f_{t-NUM_FRAMES}, ..., f_{t-1}, f_t, f_{t+1}, ..., f_{t+NUM_FRAMES}) self.num_frames = (2 * self.num_frames) + 1 self.reference_frame_idx = cfg.MODEL.SPATIOTEMPORAL.NUM_FRAMES if self.temporal_dropout: assert cfg.MODEL.SPATIOTEMPORAL.FORWARD_AGGREGATION, "Temporal dropout without forward aggregation." if self.temporal_dropout: self.reference_frame_idx = cfg.MODEL.SPATIOTEMPORAL.NUM_FRAMES self.train_reference_frame_idx = 1 else: self.train_reference_frame_idx = self.reference_frame_idx self.short_term_feature_buffer = deque(maxlen=self.num_frames) self.long_term_feature_buffer = deque(maxlen=self.num_keyframes) self.long_term_roi_buffer = deque(maxlen=self.num_keyframes) # RPN buffers self.predict_proposals = None self.predict_objectness_logits = None assert len(cfg.MODEL.PIXEL_MEAN) == len(cfg.MODEL.PIXEL_STD) num_channels = len(cfg.MODEL.PIXEL_MEAN) pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(num_channels, 1, 1) pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(num_channels, 1, 1) self.normalizer = lambda x: (x - pixel_mean) / pixel_std self.to(self.device)
def from_config(cls, cfg): backbone = build_backbone(cfg) out_shape = backbone.output_shape() return { "backbone": backbone, "proposal_generator": build_proposal_generator(cfg, out_shape), "roi_heads": build_roi_heads(cfg, out_shape), "unsupervised_head": build_unsupervised_head(cfg, out_shape), "input_format": cfg.INPUT.FORMAT, "vis_period": cfg.VIS_PERIOD, "pixel_mean": cfg.MODEL.PIXEL_MEAN, "pixel_std": cfg.MODEL.PIXEL_STD, }
def build_teacher(cfg): teacher_cfg = cfg.TEACHER backbone = build_backbone(teacher_cfg) if not 'Retina' in teacher_cfg.MODEL.META_ARCHITECTURE: proposal_generator = build_proposal_generator(teacher_cfg, backbone.output_shape()) roi_heads = build_roi_heads(teacher_cfg, backbone.output_shape()) else: proposal_generator = None roi_heads = None teacher = Teacher(backbone, proposal_generator, roi_heads) for param in teacher.parameters(): param.requires_grad = False return teacher
def __init__(self, cfg): super().__init__() self.device = torch.device(cfg.MODEL.DEVICE) self.bua_caffe = cfg.MODEL.BUA.CAFFE self.backbone = build_backbone(cfg) self.proposal_generator = build_proposal_generator( cfg, self.backbone.output_shape()) self.roi_heads = build_roi_heads(cfg, self.backbone.output_shape()) assert len(cfg.MODEL.PIXEL_MEAN) == len(cfg.MODEL.PIXEL_STD) self.extract_on = cfg.MODEL.BUA.EXTRACT_FEATS self.extractor = cfg.MODEL.BUA.EXTRACTOR self.to(self.device)
def __init__(self, cfg): super().__init__() self.device = torch.device(cfg.MODEL.DEVICE) self.backbone = build_backbone(cfg) self.proposal_generator = build_proposal_generator(cfg, self.backbone.output_shape()) self.roi_heads = build_roi_heads(cfg, self.backbone.output_shape()) self.vis_period = cfg.VIS_PERIOD self.input_format = cfg.INPUT.FORMAT assert len(cfg.MODEL.PIXEL_MEAN) == len(cfg.MODEL.PIXEL_STD) num_channels = len(cfg.MODEL.PIXEL_MEAN) pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(num_channels, 1, 1) pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(num_channels, 1, 1) self.normalizer = lambda x: (x - pixel_mean) / pixel_std self.to(self.device)
def test_roi_heads(self): torch.manual_seed(121) cfg = get_cfg() cfg.MODEL.ROI_HEADS.NAME = "StandardROIHeads" cfg.MODEL.ROI_BOX_HEAD.NAME = "FastRCNNConvFCHead" cfg.MODEL.ROI_BOX_HEAD.NUM_FC = 2 cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE = "ROIAlignV2" cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS = (10, 10, 5, 5) backbone = build_backbone(cfg) num_images = 2 images_tensor = torch.rand(num_images, 20, 30) image_sizes = [(10, 10), (20, 30)] images = ImageList(images_tensor, image_sizes) num_channels = 1024 features = {"res4": torch.rand(num_images, num_channels, 1, 2)} image_shape = (15, 15) gt_boxes0 = torch.tensor([[1, 1, 3, 3], [2, 2, 6, 6]], dtype=torch.float32) gt_instance0 = Instances(image_shape) gt_instance0.gt_boxes = Boxes(gt_boxes0) gt_instance0.gt_classes = torch.tensor([2, 1]) gt_boxes1 = torch.tensor([[1, 5, 2, 8], [7, 3, 10, 5]], dtype=torch.float32) gt_instance1 = Instances(image_shape) gt_instance1.gt_boxes = Boxes(gt_boxes1) gt_instance1.gt_classes = torch.tensor([1, 2]) gt_instances = [gt_instance0, gt_instance1] proposal_generator = build_proposal_generator(cfg, backbone.output_shape()) roi_heads = build_roi_heads(cfg, backbone.output_shape()) with EventStorage(): # capture events in a new storage to discard them proposals, proposal_losses = proposal_generator( images, features, gt_instances) _, detector_losses = roi_heads(images, features, proposals, gt_instances) expected_losses = { "loss_cls": torch.tensor(4.4236516953), "loss_box_reg": torch.tensor(0.0091214813), } for name in expected_losses.keys(): self.assertTrue( torch.allclose(detector_losses[name], expected_losses[name]))
def __init__(self, cfg): super().__init__() self.device = torch.device(cfg.MODEL.DEVICE) self.backbone = build_backbone(cfg) self.attention = build_attention(cfg) self.mse_loss = nn.MSELoss( reduction="sum") if cfg.MODEL.ATTENTION_LOSS else None self.mse_weight = cfg.MODEL.ATTENTION_LOSS_WEIGHT self.proposal_generator = build_proposal_generator( cfg, self.backbone.output_shape()) self.roi_heads = build_roi_heads(cfg, self.backbone.output_shape()) self.vis_period = cfg.VIS_PERIOD self.input_format = cfg.INPUT.FORMAT self.tmp = nn.Linear(10, 10) trans_center = pickle.load(open(cfg.MODEL.TRANSFORM_CENTER, 'rb')) trans_center['pos_center'] = torch.FloatTensor( trans_center['pos_center']).to(self.device) trans_center['neg_center'] = torch.FloatTensor( trans_center['neg_center']).to(self.device) self.trans_center = trans_center self.transformation = build_transformation() self.box_head = deepcopy(self.roi_heads.box_head) self.box_predictor = deepcopy(self.roi_heads.box_predictor) self.sl1_loss = nn.SmoothL1Loss( reduction="none") if cfg.MODEL.TRANSFORM_LOSS else None self.sl1_weight = cfg.MODEL.TRANSFORM_LOSS_WEIGHT self.reg_loss = cfg.MODEL.REG_LOSS assert len(cfg.MODEL.PIXEL_MEAN) == len(cfg.MODEL.PIXEL_STD) num_channels = len(cfg.MODEL.PIXEL_MEAN) pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view( num_channels, 1, 1) pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view( num_channels, 1, 1) self.normalizer = lambda x: (x - pixel_mean) / pixel_std self.to(self.device)
def __init__(self, cfg): super().__init__() self.instance_loss_weight = cfg.MODEL.PANOPTIC_FPN.INSTANCE_LOSS_WEIGHT # options when combining instance & semantic outputs self.combine_on = cfg.MODEL.PANOPTIC_FPN.COMBINE.ENABLED self.combine_overlap_threshold = cfg.MODEL.PANOPTIC_FPN.COMBINE.OVERLAP_THRESH self.combine_stuff_area_limit = cfg.MODEL.PANOPTIC_FPN.COMBINE.STUFF_AREA_LIMIT self.combine_instances_confidence_threshold = ( cfg.MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH) self.backbone = build_backbone(cfg) self.proposal_generator = build_proposal_generator( cfg, self.backbone.output_shape()) self.roi_heads = build_roi_heads(cfg, self.backbone.output_shape()) self.sem_seg_head = build_sem_seg_head(cfg, self.backbone.output_shape()) self.register_buffer("pixel_mean", torch.Tensor(cfg.MODEL.PIXEL_MEAN).view(-1, 1, 1)) self.register_buffer("pixel_std", torch.Tensor(cfg.MODEL.PIXEL_STD).view(-1, 1, 1))
def __init__(self, cfg): super().__init__() self.device = torch.device(cfg.MODEL.DEVICE) self.backbone = build_backbone(cfg) self.attention = build_attention(cfg) self.mse_loss = nn.MSELoss( reduction="sum") if cfg.MODEL.ATTENTION_LOSS else None self.mse_weight = cfg.MODEL.ATTENTION_LOSS_WEIGHT self.proposal_generator = build_proposal_generator( cfg, self.backbone.output_shape()) self.roi_heads = build_roi_heads(cfg, self.backbone.output_shape()) self.vis_period = cfg.VIS_PERIOD self.input_format = cfg.INPUT.FORMAT self.tmp = nn.Linear(10, 10) assert len(cfg.MODEL.PIXEL_MEAN) == len(cfg.MODEL.PIXEL_STD) num_channels = len(cfg.MODEL.PIXEL_MEAN) pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view( num_channels, 1, 1) pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view( num_channels, 1, 1) self.normalizer = lambda x: (x - pixel_mean) / pixel_std self.to(self.device)