def __init__(self, num_classes, in_channels, feat_channels=256, anchor_generator=dict(type='AnchorGenerator', scales=[8, 16, 32], ratios=[0.5, 1.0, 2.0], strides=[4, 8, 16, 32, 64]), bbox_coder=dict(type='DeltaXYWHBBoxCoder', target_means=(.0, .0, .0, .0), target_stds=(1.0, 1.0, 1.0, 1.0)), reg_decoded_bbox=False, background_label=None, loss_cls=dict(type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0), train_cfg=None, test_cfg=None): super(AnchorHead, self).__init__() self.in_channels = in_channels self.num_classes = num_classes self.feat_channels = feat_channels self.use_sigmoid_cls = loss_cls.get('use_sigmoid', False) # TODO better way to determine whether sample or not self.sampling = loss_cls['type'] not in [ 'FocalLoss', 'GHMC', 'QualityFocalLoss' ] if self.use_sigmoid_cls: self.cls_out_channels = num_classes else: self.cls_out_channels = num_classes + 1 if self.cls_out_channels <= 0: raise ValueError(f'num_classes={num_classes} is too small') self.reg_decoded_bbox = reg_decoded_bbox self.background_label = (num_classes if background_label is None else background_label) # background_label should be either 0 or num_classes assert (self.background_label == 0 or self.background_label == num_classes) self.bbox_coder = build_bbox_coder(bbox_coder) self.loss_cls = build_loss(loss_cls) self.loss_bbox = build_loss(loss_bbox) self.train_cfg = train_cfg self.test_cfg = test_cfg if self.train_cfg: self.assigner = build_assigner(self.train_cfg.assigner) # use PseudoSampler when sampling is False if self.sampling and hasattr(self.train_cfg, 'sampler'): sampler_cfg = self.train_cfg.sampler else: sampler_cfg = dict(type='PseudoSampler') self.sampler = build_sampler(sampler_cfg, context=self) self.fp16_enabled = False self.anchor_generator = build_anchor_generator(anchor_generator) # usually the numbers of anchors for each level are the same # except SSD detectors self.num_anchors = self.anchor_generator.num_base_anchors[0] self._init_layers()
def __init__( self, num_classes, in_channels, feat_channels=256, approx_anchor_generator=dict( type='AnchorGenerator', octave_base_scale=8, scales_per_octave=3, ratios=[0.5, 1.0, 2.0], strides=[4, 8, 16, 32, 64]), square_anchor_generator=dict( type='AnchorGenerator', ratios=[1.0], scales=[8], strides=[4, 8, 16, 32, 64]), anchor_coder=dict( type='DeltaXYWHBBoxCoder', target_means=[.0, .0, .0, .0], target_stds=[1.0, 1.0, 1.0, 1.0] ), bbox_coder=dict( type='DeltaXYWHBBoxCoder', target_means=[.0, .0, .0, .0], target_stds=[1.0, 1.0, 1.0, 1.0] ), reg_decoded_bbox=False, deform_groups=4, loc_filter_thr=0.01, train_cfg=None, test_cfg=None, loss_loc=dict( type='FocalLoss', use_sigmoid=True, gamma=2.0, alpha=0.25, loss_weight=1.0), loss_shape=dict(type='BoundedIoULoss', beta=0.2, loss_weight=1.0), loss_cls=dict( type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0), init_cfg=dict(type='Normal', layer='Conv2d', std=0.01, override=dict(type='Normal', name='conv_loc', std=0.01, bias_prob=0.01))): # yapf: disable super(AnchorHead, self).__init__(init_cfg) self.in_channels = in_channels self.num_classes = num_classes self.feat_channels = feat_channels self.deform_groups = deform_groups self.loc_filter_thr = loc_filter_thr # build approx_anchor_generator and square_anchor_generator assert (approx_anchor_generator['octave_base_scale'] == square_anchor_generator['scales'][0]) assert (approx_anchor_generator['strides'] == square_anchor_generator['strides']) self.approx_anchor_generator = build_anchor_generator( approx_anchor_generator) self.square_anchor_generator = build_anchor_generator( square_anchor_generator) self.approxs_per_octave = self.approx_anchor_generator \ .num_base_anchors[0] self.reg_decoded_bbox = reg_decoded_bbox # one anchor per location self.num_anchors = 1 self.use_sigmoid_cls = loss_cls.get('use_sigmoid', False) self.loc_focal_loss = loss_loc['type'] in ['FocalLoss'] self.sampling = loss_cls['type'] not in ['FocalLoss'] self.ga_sampling = train_cfg is not None and hasattr( train_cfg, 'ga_sampler') if self.use_sigmoid_cls: self.cls_out_channels = self.num_classes else: self.cls_out_channels = self.num_classes + 1 # build bbox_coder self.anchor_coder = build_bbox_coder(anchor_coder) self.bbox_coder = build_bbox_coder(bbox_coder) # build losses self.loss_loc = build_loss(loss_loc) self.loss_shape = build_loss(loss_shape) self.loss_cls = build_loss(loss_cls) self.loss_bbox = build_loss(loss_bbox) self.train_cfg = train_cfg self.test_cfg = test_cfg if self.train_cfg: self.assigner = build_assigner(self.train_cfg.assigner) # use PseudoSampler when sampling is False if self.sampling and hasattr(self.train_cfg, 'sampler'): sampler_cfg = self.train_cfg.sampler else: sampler_cfg = dict(type='PseudoSampler') self.sampler = build_sampler(sampler_cfg, context=self) self.ga_assigner = build_assigner(self.train_cfg.ga_assigner) if self.ga_sampling: ga_sampler_cfg = self.train_cfg.ga_sampler else: ga_sampler_cfg = dict(type='PseudoSampler') self.ga_sampler = build_sampler(ga_sampler_cfg, context=self) self.fp16_enabled = False self._init_layers()
def __init__(self, num_classes, in_channels, regress_ranges=((-1, 64), (64, 128), (128, 256), (256, 512), (512, INF)), center_sampling=False, center_sample_radius=1.5, sync_num_pos=True, gradient_mul=0.1, bbox_norm_type='reg_denom', loss_cls_fl=dict(type='FocalLoss', use_sigmoid=True, gamma=2.0, alpha=0.25, loss_weight=1.0), use_vfl=True, loss_cls=dict(type='VarifocalLoss', use_sigmoid=True, alpha=0.75, gamma=2.0, iou_weighted=True, loss_weight=1.0), loss_bbox=dict(type='GIoULoss', loss_weight=1.5), loss_bbox_refine=dict(type='GIoULoss', loss_weight=2.0), norm_cfg=dict(type='GN', num_groups=32, requires_grad=True), use_atss=True, reg_cls_branch=False, anchor_generator=dict(type='AnchorGenerator', ratios=[1.0], octave_base_scale=8, scales_per_octave=1, center_offset=0.0, strides=[8, 16, 32, 64, 128]), **kwargs): # dcn base offsets, adapted from reppoints_head.py self.num_out = len(anchor_generator["strides"]) self.anchor_generator = build_anchor_generator(anchor_generator) self.num_anchors = self.anchor_generator.num_base_anchors[0] self.reg_cls_branch = reg_cls_branch super(FCOSHead, self).__init__(num_classes, in_channels, norm_cfg=norm_cfg, **kwargs) self.regress_ranges = regress_ranges self.reg_denoms = [ regress_range[-1] for regress_range in regress_ranges ] self.reg_denoms[-1] = self.reg_denoms[-2] * 2 self.center_sampling = center_sampling self.center_sample_radius = center_sample_radius self.sync_num_pos = sync_num_pos self.bbox_norm_type = bbox_norm_type self.gradient_mul = gradient_mul self.use_vfl = use_vfl if self.use_vfl: self.loss_cls = build_loss(loss_cls) else: self.loss_cls = build_loss(loss_cls_fl) self.loss_bbox = build_loss(loss_bbox) self.loss_bbox_refine = build_loss(loss_bbox_refine) # for getting ATSS targets self.use_atss = use_atss self.use_sigmoid_cls = loss_cls.get('use_sigmoid', False) self.anchor_center_offset = anchor_generator['center_offset'] self.num_anchors = self.anchor_generator.num_base_anchors[0] self.sampling = False if self.train_cfg: self.assigner = build_assigner(self.train_cfg.assigner) sampler_cfg = dict(type='PseudoSampler') self.sampler = build_sampler(sampler_cfg, context=self)
def __init__(self, num_classes, in_channels, out_channels=(1024, 512, 256), anchor_generator=dict(type='YOLOAnchorGenerator', base_sizes=[[(116, 90), (156, 198), (373, 326)], [(30, 61), (62, 45), (59, 119)], [(10, 13), (16, 30), (33, 23)]], strides=[32, 16, 8]), bbox_coder=dict(type='YOLOBBoxCoder'), featmap_strides=[32, 16, 8], one_hot_smoother=0., conv_cfg=None, norm_cfg=dict(type='BN', requires_grad=True), act_cfg=dict(type='LeakyReLU', negative_slope=0.1), loss_cls=dict(type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), loss_conf=dict(type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), loss_xy=dict(type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), loss_wh=dict(type='MSELoss', loss_weight=1.0), train_cfg=None, test_cfg=None): super(YOLOV3Head, self).__init__() # Check params assert (len(in_channels) == len(out_channels) == len(featmap_strides)) self.num_classes = num_classes self.in_channels = in_channels self.out_channels = out_channels self.featmap_strides = featmap_strides self.train_cfg = train_cfg self.test_cfg = test_cfg if self.train_cfg: self.assigner = build_assigner(self.train_cfg.assigner) if hasattr(self.train_cfg, 'sampler'): sampler_cfg = self.train_cfg.sampler else: sampler_cfg = dict(type='PseudoSampler') self.sampler = build_sampler(sampler_cfg, context=self) self.one_hot_smoother = one_hot_smoother self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg self.act_cfg = act_cfg self.bbox_coder = build_bbox_coder(bbox_coder) self.anchor_generator = build_anchor_generator(anchor_generator) self.loss_cls = build_loss(loss_cls) self.loss_conf = build_loss(loss_conf) self.loss_xy = build_loss(loss_xy) self.loss_wh = build_loss(loss_wh) # usually the numbers of anchors for each level are the same # except SSD detectors self.num_anchors = self.anchor_generator.num_base_anchors[0] assert len( self.anchor_generator.num_base_anchors) == len(featmap_strides) self._init_layers()
def __init__(self, num_classes, in_channels, regress_ranges=((-1, 64), (64, 128), (128, 256), (256, 512), (512, INF)), center_sampling=False, center_sample_radius=1.5, sync_num_pos=True, gradient_mul=0.1, bbox_norm_type='reg_denom', loss_cls_fl=dict(type='FocalLoss', use_sigmoid=True, gamma=2.0, alpha=0.25, loss_weight=1.0), use_vfl=True, loss_cls=dict(type='VarifocalLoss', use_sigmoid=True, alpha=0.75, gamma=2.0, iou_weighted=True, loss_weight=1.0), loss_bbox=dict(type='GIoULoss', loss_weight=1.5), loss_bbox_refine=dict(type='GIoULoss', loss_weight=2.0), norm_cfg=dict(type='GN', num_groups=32, requires_grad=True), use_atss=True, anchor_generator=dict(type='AnchorGenerator', ratios=[1.0], octave_base_scale=8, scales_per_octave=1, center_offset=0.0, strides=[8, 16, 32, 64, 128]), **kwargs): # dcn base offsets, adapted from reppoints_head.py self.num_dconv_points = 9 self.dcn_kernel = int(np.sqrt(self.num_dconv_points)) self.dcn_pad = int((self.dcn_kernel - 1) / 2) dcn_base = np.arange(-self.dcn_pad, self.dcn_pad + 1).astype(np.float64) dcn_base_y = np.repeat(dcn_base, self.dcn_kernel) dcn_base_x = np.tile(dcn_base, self.dcn_kernel) dcn_base_offset = np.stack([dcn_base_y, dcn_base_x], axis=1).reshape( (-1)) self.dcn_base_offset = torch.tensor(dcn_base_offset).view(1, -1, 1, 1) super(FCOSHead, self).__init__(num_classes, in_channels, norm_cfg=norm_cfg, **kwargs) self.regress_ranges = regress_ranges self.reg_denoms = [ regress_range[-1] for regress_range in regress_ranges ] self.reg_denoms[-1] = self.reg_denoms[-2] * 2 self.center_sampling = center_sampling self.center_sample_radius = center_sample_radius self.sync_num_pos = sync_num_pos self.bbox_norm_type = bbox_norm_type self.gradient_mul = gradient_mul self.use_vfl = use_vfl if self.use_vfl: self.loss_cls = build_loss(loss_cls) else: self.loss_cls = build_loss(loss_cls_fl) self.loss_bbox = build_loss(loss_bbox) self.loss_bbox_refine = build_loss(loss_bbox_refine) # for getting ATSS targets self.use_atss = use_atss self.use_sigmoid_cls = loss_cls.get('use_sigmoid', False) self.anchor_generator = build_anchor_generator(anchor_generator) self.anchor_center_offset = anchor_generator['center_offset'] self.num_anchors = self.anchor_generator.num_base_anchors[0] self.sampling = False if self.train_cfg: self.assigner = build_assigner(self.train_cfg.assigner) sampler_cfg = dict(type='PseudoSampler') self.sampler = build_sampler(sampler_cfg, context=self)
def __init__(self, num_classes, in_channels, stacked_convs=4, feat_channels=256, approx_anchor_generator=dict( type='AnchorGenerator', octave_base_scale=4, scales_per_octave=3, ratios=[0.5, 1.0, 2.0], strides=[8, 16, 32, 64, 128]), square_anchor_generator=dict( type='AnchorGenerator', ratios=[1.0], scales=[4], strides=[8, 16, 32, 64, 128]), conv_cfg=None, norm_cfg=None, bbox_coder=dict( type='BucketingBBoxCoder', num_buckets=14, scale_factor=3.0), reg_decoded_bbox=False, background_label=None, train_cfg=None, test_cfg=None, loss_cls=dict( type='FocalLoss', use_sigmoid=True, gamma=2.0, alpha=0.25, loss_weight=1.0), loss_bbox_cls=dict( type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.5), loss_bbox_reg=dict( type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.5)): super(SABLRetinaHead, self).__init__() self.in_channels = in_channels self.num_classes = num_classes self.feat_channels = feat_channels self.num_buckets = bbox_coder['num_buckets'] self.side_num = int(np.ceil(self.num_buckets / 2)) assert (approx_anchor_generator['octave_base_scale'] == square_anchor_generator['scales'][0]) assert (approx_anchor_generator['strides'] == square_anchor_generator['strides']) self.approx_anchor_generator = build_anchor_generator( approx_anchor_generator) self.square_anchor_generator = build_anchor_generator( square_anchor_generator) self.approxs_per_octave = ( self.approx_anchor_generator.num_base_anchors[0]) # one anchor per location self.num_anchors = 1 self.stacked_convs = stacked_convs self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg self.reg_decoded_bbox = reg_decoded_bbox self.background_label = ( num_classes if background_label is None else background_label) self.use_sigmoid_cls = loss_cls.get('use_sigmoid', False) self.sampling = loss_cls['type'] not in [ 'FocalLoss', 'GHMC', 'QualityFocalLoss' ] if self.use_sigmoid_cls: self.cls_out_channels = num_classes else: self.cls_out_channels = num_classes + 1 self.bbox_coder = build_bbox_coder(bbox_coder) self.loss_cls = build_loss(loss_cls) self.loss_bbox_cls = build_loss(loss_bbox_cls) self.loss_bbox_reg = build_loss(loss_bbox_reg) self.train_cfg = train_cfg self.test_cfg = test_cfg if self.train_cfg: self.assigner = build_assigner(self.train_cfg.assigner) # use PseudoSampler when sampling is False if self.sampling and hasattr(self.train_cfg, 'sampler'): sampler_cfg = self.train_cfg.sampler else: sampler_cfg = dict(type='PseudoSampler') self.sampler = build_sampler(sampler_cfg, context=self) self.fp16_enabled = False self._init_layers()
def __init__(self, num_classes, in_channels, train_cfg, test_cfg, feat_channels=256, use_direction_classifier=True, anchor_generator=dict( type='Anchor3DRangeGenerator', range=[0, -39.68, -1.78, 69.12, 39.68, -1.78], strides=[2], sizes=[[1.6, 3.9, 1.56]], rotations=[0, 1.57], custom_values=[], reshape_out=False), assigner_per_size=False, assign_per_class=False, diff_rad_by_sin=True, dir_offset=0, dir_limit_offset=1, bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'), loss_cls=dict( type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), loss_bbox=dict( type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0), loss_dir=dict(type='CrossEntropyLoss', loss_weight=0.2)): super().__init__() self.in_channels = in_channels self.num_classes = num_classes self.feat_channels = feat_channels self.diff_rad_by_sin = diff_rad_by_sin self.use_direction_classifier = use_direction_classifier self.train_cfg = train_cfg self.test_cfg = test_cfg self.assigner_per_size = assigner_per_size self.assign_per_class = assign_per_class self.dir_offset = dir_offset self.dir_limit_offset = dir_limit_offset self.fp16_enabled = False # build anchor generator self.anchor_generator = build_anchor_generator(anchor_generator) # In 3D detection, the anchor stride is connected with anchor size self.num_anchors = self.anchor_generator.num_base_anchors # num_rot * num_size # build box coder self.bbox_coder = build_bbox_coder(bbox_coder) self.box_code_size = self.bbox_coder.code_size # build loss function self.use_sigmoid_cls = loss_cls.get('use_sigmoid', False) self.sampling = loss_cls['type'] not in ['FocalLoss', 'GHMC'] if not self.use_sigmoid_cls: self.num_classes += 1 self.loss_cls = build_loss(loss_cls) self.loss_bbox = build_loss(loss_bbox) self.loss_dir = build_loss(loss_dir) self.fp16_enabled = False self._init_layers() self._init_assigner_sampler()
def __init__( self, num_classes=80, in_channels=(512, 1024, 512, 256, 256, 256), cls_weight=1.0, ## bbox_weight=1.0, ## anchor_generator=dict(type='SSDAnchorGenerator', scale_major=False, input_size=300, strides=[8, 16, 32, 64, 100, 300], ratios=([2], [2, 3], [2, 3], [2, 3], [2], [2]), basesize_ratio_range=(0.1, 0.9)), bbox_coder=dict( type='DeltaXYWHBBoxCoder', clip_border=True, target_means=[.0, .0, .0, .0], target_stds=[1.0, 1.0, 1.0, 1.0], ), reg_decoded_bbox=False, train_cfg=None, test_cfg=None): super(AnchorHead, self).__init__() self.num_classes = num_classes self.in_channels = in_channels self.cls_out_channels = num_classes + 1 # add background class self.anchor_generator = build_anchor_generator(anchor_generator) num_anchors = self.anchor_generator.num_base_anchors reg_convs = [] cls_convs = [] for i in range(len(in_channels)): reg_convs.append( nn.Conv2d(in_channels[i], num_anchors[i] * 4, kernel_size=3, padding=1)) cls_convs.append( nn.Conv2d(in_channels[i], num_anchors[i] * (num_classes + 1), kernel_size=3, padding=1)) self.reg_convs = nn.ModuleList(reg_convs) self.cls_convs = nn.ModuleList(cls_convs) self.bbox_coder = build_bbox_coder(bbox_coder) self.reg_decoded_bbox = reg_decoded_bbox self.use_sigmoid_cls = False self.cls_focal_loss = False self.train_cfg = train_cfg self.test_cfg = test_cfg # set sampling=False for archor_target self.sampling = False if self.train_cfg: self.assigner = build_assigner(self.train_cfg.assigner) # SSD sampling=False so use PseudoSampler sampler_cfg = dict(type='PseudoSampler') self.sampler = build_sampler(sampler_cfg, context=self) self.fp16_enabled = False ########################## self.loss_cls_history = [] self.loss_bbox_history = [] self.cls_weight = cls_weight ## self.bbox_weight = bbox_weight ##
def __init__(self, num_classes=80, in_channels=(512, 1024, 512, 256, 256, 256), anchor_generator=dict( type='SSDAnchorGenerator', scale_major=False, input_size=300, strides=[8, 16, 32, 64, 100, 300], ratios=([2], [2, 3], [2, 3], [2, 3], [2], [2]), basesize_ratio_range=(0.1, 0.9)), background_label=None, bbox_coder=dict( type='DeltaXYWHBBoxCoder', target_means=[.0, .0, .0, .0], target_stds=[1.0, 1.0, 1.0, 1.0], ), reg_decoded_bbox=False, train_cfg=None, test_cfg=None): super(AnchorHead, self).__init__() self.num_classes = num_classes self.in_channels = in_channels self.cls_out_channels = num_classes + 1 # add background class self.anchor_generator = build_anchor_generator(anchor_generator) num_anchors = self.anchor_generator.num_base_anchors reg_convs = [] cls_convs = [] for i in range(len(in_channels)): reg_convs.append( nn.Conv2d( in_channels[i], num_anchors[i] * 4, kernel_size=3, padding=1)) cls_convs.append( nn.Conv2d( in_channels[i], num_anchors[i] * (num_classes + 1), kernel_size=3, padding=1)) self.reg_convs = nn.ModuleList(reg_convs) self.cls_convs = nn.ModuleList(cls_convs) self.background_label = ( num_classes if background_label is None else background_label) # background_label should be either 0 or num_classes assert (self.background_label == 0 or self.background_label == num_classes) self.bbox_coder = build_bbox_coder(bbox_coder) self.reg_decoded_bbox = reg_decoded_bbox self.use_sigmoid_cls = False self.cls_focal_loss = False self.train_cfg = train_cfg self.test_cfg = test_cfg # set sampling=False for archor_target self.sampling = False if self.train_cfg: self.assigner = build_assigner(self.train_cfg.assigner) # SSD sampling=False so use PseudoSampler sampler_cfg = dict(type='PseudoSampler') self.sampler = build_sampler(sampler_cfg, context=self) self.fp16_enabled = False self.TP = 0 self.FP = 0 self.num_GT = 0 self.num_PRED = 0 self.stats = torch.zeros((21, 2)) self.matrix = torch.zeros((21, 21), dtype=torch.long) print('we are using loss4 head')
def __init__(self, num_classes=80, in_channels=(512, 1024, 512, 256, 256, 256), stacked_convs=0, feat_channels=256, use_depthwise=False, conv_cfg=None, norm_cfg=None, act_cfg=None, anchor_generator=dict( type='SSDAnchorGenerator', scale_major=False, input_size=300, strides=[8, 16, 32, 64, 100, 300], ratios=([2], [2, 3], [2, 3], [2, 3], [2], [2]), basesize_ratio_range=(0.1, 0.9)), bbox_coder=dict( type='DeltaXYWHBBoxCoder', clip_border=True, target_means=[.0, .0, .0, .0], target_stds=[1.0, 1.0, 1.0, 1.0], ), reg_decoded_bbox=False, train_cfg=None, test_cfg=None, init_cfg=dict( type='Xavier', layer='Conv2d', distribution='uniform', bias=0)): super(AnchorHead, self).__init__(init_cfg) self.num_classes = num_classes self.in_channels = in_channels self.stacked_convs = stacked_convs self.feat_channels = feat_channels self.use_depthwise = use_depthwise self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg self.act_cfg = act_cfg self.cls_out_channels = num_classes + 1 # add background class self.anchor_generator = build_anchor_generator(anchor_generator) self.num_anchors = self.anchor_generator.num_base_anchors self._init_layers() self.bbox_coder = build_bbox_coder(bbox_coder) self.reg_decoded_bbox = reg_decoded_bbox self.use_sigmoid_cls = False self.cls_focal_loss = False self.train_cfg = train_cfg self.test_cfg = test_cfg # set sampling=False for archor_target self.sampling = False if self.train_cfg: self.assigner = build_assigner(self.train_cfg.assigner) # SSD sampling=False so use PseudoSampler sampler_cfg = dict(type='PseudoSampler') self.sampler = build_sampler(sampler_cfg, context=self) self.fp16_enabled = False self.draw_assign_results_path = self.train_cfg.draw_assign_results_path if hasattr(self.train_cfg, 'draw_assign_results_path') else '' self.draw_label = self.train_cfg.draw_label if hasattr(self.train_cfg, 'draw_label') else []
def train_detector(model, dataset, cfg, distributed=False, validate=False, timestamp=None, meta=None): logger = get_root_logger(cfg.log_level) # prepare data loaders dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset] if 'imgs_per_gpu' in cfg.data: logger.warning('"imgs_per_gpu" is deprecated in MMDet V2.0. ' 'Please use "samples_per_gpu" instead') if 'samples_per_gpu' in cfg.data: logger.warning( f'Got "imgs_per_gpu"={cfg.data.imgs_per_gpu} and ' f'"samples_per_gpu"={cfg.data.samples_per_gpu}, "imgs_per_gpu"' f'={cfg.data.imgs_per_gpu} is used in this experiments') else: logger.warning( 'Automatically set "samples_per_gpu"="imgs_per_gpu"=' f'{cfg.data.imgs_per_gpu} in this experiments') cfg.data.samples_per_gpu = cfg.data.imgs_per_gpu data_loaders = [ build_dataloader( ds, cfg.data.samples_per_gpu, cfg.data.workers_per_gpu, # cfg.gpus will be ignored if distributed len(cfg.gpu_ids), dist=distributed, seed=cfg.seed) for ds in dataset ] # put model on gpus if distributed: find_unused_parameters = cfg.get('find_unused_parameters', False) # Sets the `find_unused_parameters` parameter in # torch.nn.parallel.DistributedDataParallel model = MMDistributedDataParallel( model.cuda(), device_ids=[torch.cuda.current_device()], broadcast_buffers=False, find_unused_parameters=find_unused_parameters) else: model = MMDataParallel( model.cuda(cfg.gpu_ids[0]), device_ids=cfg.gpu_ids) # build runner optimizer = build_optimizer(model, cfg.optimizer) if 'runner' not in cfg: cfg.runner = { 'type': 'EpochBasedRunner', 'max_epochs': cfg.total_epochs } warnings.warn( 'config is now expected to have a `runner` section, ' 'please set `runner` in your config.', UserWarning) else: if 'total_epochs' in cfg: assert cfg.total_epochs == cfg.runner.max_epochs runner = build_runner( cfg.runner, default_args=dict( model=model, optimizer=optimizer, work_dir=cfg.work_dir, logger=logger, meta=meta)) # an ugly workaround to make .log and .log.json filenames the same runner.timestamp = timestamp # fp16 setting fp16_cfg = cfg.get('fp16', None) if fp16_cfg is not None: optimizer_config = Fp16OptimizerHook( **cfg.optimizer_config, **fp16_cfg, distributed=distributed) elif distributed and 'type' not in cfg.optimizer_config: optimizer_config = OptimizerHook(**cfg.optimizer_config) else: optimizer_config = cfg.optimizer_config # register hooks runner.register_training_hooks(cfg.lr_config, optimizer_config, cfg.checkpoint_config, cfg.log_config, cfg.get('momentum_config', None)) if distributed: if isinstance(runner, EpochBasedRunner): runner.register_hook(DistSamplerSeedHook()) # register eval hooks if validate: # Support batch_size > 1 in validation val_samples_per_gpu = cfg.data.val.pop('samples_per_gpu', 1) if val_samples_per_gpu > 1: # Replace 'ImageToTensor' to 'DefaultFormatBundle' cfg.data.val.pipeline = replace_ImageToTensor( cfg.data.val.pipeline) val_dataset = build_dataset(cfg.data.val, dict(test_mode=True)) val_dataloader = build_dataloader( val_dataset, samples_per_gpu=val_samples_per_gpu, workers_per_gpu=cfg.data.workers_per_gpu, dist=distributed, shuffle=False) eval_cfg = cfg.get('evaluation', {}) eval_cfg['by_epoch'] = cfg.runner['type'] != 'IterBasedRunner' eval_hook = DistEvalHook if distributed else EvalHook runner.register_hook(eval_hook(val_dataloader, **eval_cfg)) # user-defined hooks if cfg.get('custom_hooks', None): custom_hooks = cfg.custom_hooks assert isinstance(custom_hooks, list), \ f'custom_hooks expect list type, but got {type(custom_hooks)}' for hook_cfg in cfg.custom_hooks: assert isinstance(hook_cfg, dict), \ 'Each item in custom_hooks expects dict type, but got ' \ f'{type(hook_cfg)}' hook_cfg = hook_cfg.copy() priority = hook_cfg.pop('priority', 'NORMAL') hook = build_from_cfg(hook_cfg, HOOKS) runner.register_hook(hook, priority=priority) if cfg.resume_from: runner.resume(cfg.resume_from) elif cfg.load_from: runner.load_checkpoint(cfg.load_from) # runner.run(data_loaders, cfg.workflow) anchor_generator = build_anchor_generator(cfg.model.rpn_head.anchor_generator) assigner = build_assigner(cfg.model.train_cfg.rpn.assigner) total_num_targets = torch.tensor([0] * 5) for iteration, data in enumerate(data_loaders): for i in data: # print(i.keys()) img_metas = i['img_metas']._data # print(img_metas) num_imgs = len(img_metas) images = i['img']._data gt_bboxes = i['gt_bboxes']._data h, w = images[0].size()[-2:] features_shape = [] for i in range(2, 7): f_shape = [int(h/(2**i)), int(w/(2**i))] features_shape.append(f_shape) multi_level_anchors = anchor_generator.grid_anchors( features_shape) anchor_list = [multi_level_anchors for _ in range(num_imgs)] # for each image, we compute valid flags of multi level anchors valid_flag_list = [] for img_id, img_meta in enumerate(img_metas): multi_level_flags = anchor_generator.valid_flags( features_shape, img_meta[0]['pad_shape']) valid_flag_list.append(multi_level_flags) # print(anchor_list, valid_flag_list) assert len(anchor_list) == len(valid_flag_list) == num_imgs # anchor number of multi levels num_level_anchors = [anchors.size(0) for anchors in anchor_list[0]] # concat all level anchors to a single tensor concat_anchor_list = [] concat_valid_flag_list = [] for i in range(num_imgs): assert len(anchor_list[i]) == len(valid_flag_list[i]) concat_anchor_list.append(torch.cat(anchor_list[i])) concat_valid_flag_list.append(torch.cat(valid_flag_list[i])) gt_bboxes_ignore_list= None # compute targets for each image if gt_bboxes_ignore_list is None: gt_bboxes_ignore_list = [None for _ in range(num_imgs)] inside_flags = anchor_inside_flags(concat_anchor_list[0], concat_valid_flag_list[0], img_metas[0][0]['img_shape'][:2], 0) if not inside_flags.any(): return (None, ) * 7 # assign gt and sample anchors anchors = concat_anchor_list[0][inside_flags, :] assign_result = assigner.assign( anchors.cpu(), gt_bboxes[0][0], gt_bboxes_ignore_list[0], None) print(assign_result.pos_gt_bboxes) pos_inds = torch.nonzero(assign_result.gt_inds > 0, as_tuple=False) labels = anchors.new_full((anchors.shape[0], ), -1, dtype=torch.long) labels[pos_inds] = 1 num_total_anchors = concat_anchor_list[0].size(0) labels = unmap( labels, num_total_anchors, inside_flags, fill=-1) # fill bg label match_results = images_to_levels([labels], num_level_anchors) # print(match_results) for idx, match_result in enumerate(match_results): num = torch.where(match_result==1)[0].numel() total_num_targets[idx] += num # print(total_num_targets) print(total_num_targets) print(total_num_targets)
def __init__(self, num_classes, in_channels, anchor_generator=dict(type='YOLOAnchorGenerator', base_sizes=[[(32, 32), (48, 48), (24, 32), (32, 48)], [(64, 64), (72, 72), (72, 96), (96, 96)], [(72, 96), (96, 96), (128, 128), (96, 128)]], strides=[16, 32, 64]), bbox_coder=dict(type='YOLOBBoxCoder'), featmap_strides=[16, 32, 64], one_hot_smoother=0., loss_cls=dict(type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), loss_conf=dict(type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), loss_xy=dict(type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), loss_wh=dict(type='MSELoss', loss_weight=1.0), loss_iou=dict(type='CIoULoss', loss_weight=2.0), train_cfg=None, test_cfg=None): super(CZ_CoarseHead, self).__init__() # Check params assert (len(in_channels) == len(featmap_strides)) self.num_classes = num_classes self.in_channels = in_channels self.featmap_strides = featmap_strides self.train_cfg = train_cfg self.test_cfg = test_cfg self.sampling = False if self.train_cfg: self.assigner = build_assigner(self.train_cfg.assigner) if hasattr(self.train_cfg, 'sampler'): sampler_cfg = self.train_cfg.sampler else: sampler_cfg = dict(type='PseudoSampler') self.sampler = build_sampler(sampler_cfg, context=self) self.one_hot_smoother = one_hot_smoother self.bbox_coder = build_bbox_coder(bbox_coder) self.anchor_generator = build_anchor_generator(anchor_generator) self.loss_cls = build_loss(loss_cls) self.loss_conf = build_loss(loss_conf) self.loss_xy = build_loss(loss_xy) self.loss_wh = build_loss(loss_wh) self.loss_iou = build_loss(loss_iou) # usually the numbers of anchors for each level are the same # except SSD detectors self.num_anchors = self.anchor_generator.num_base_anchors[0] assert len( self.anchor_generator.num_base_anchors) == len(featmap_strides) self._init_layers()