def __init__(self, num_classes=80, in_channels=(512, 1024, 512, 256, 256, 256), anchor_generator=dict(type='SSDAnchorGenerator', scale_major=False, input_size=300, strides=[8, 16, 32, 64, 100, 300], ratios=([2], [2, 3], [2, 3], [2, 3], [2], [2]), basesize_ratio_range=(0.1, 0.9)), bbox_coder=dict( type='DeltaXYWHBBoxCoder', target_means=[.0, .0, .0, .0], target_stds=[1.0, 1.0, 1.0, 1.0], ), reg_decoded_bbox=False, train_cfg=None, test_cfg=None): super(AnchorHead, self).__init__() self.num_classes = num_classes self.in_channels = in_channels self.cls_out_channels = num_classes + 1 # add background class self.anchor_generator = build_anchor_generator(anchor_generator) num_anchors = self.anchor_generator.num_base_anchors reg_convs = [] cls_convs = [] for i in range(len(in_channels)): reg_convs.append( nn.Conv2d(in_channels[i], num_anchors[i] * 4, kernel_size=3, padding=1)) cls_convs.append( nn.Conv2d(in_channels[i], num_anchors[i] * (num_classes + 1), kernel_size=3, padding=1)) self.reg_convs = nn.ModuleList(reg_convs) self.cls_convs = nn.ModuleList(cls_convs) self.bbox_coder = build_bbox_coder(bbox_coder) self.reg_decoded_bbox = reg_decoded_bbox self.use_sigmoid_cls = False self.cls_focal_loss = False self.train_cfg = train_cfg self.test_cfg = test_cfg # set sampling=False for archor_target self.sampling = False self.debug = False if self.train_cfg: self.assigner = build_assigner(self.train_cfg.assigner) # SSD sampling=False so use PseudoSampler sampler_cfg = dict(type='PseudoSampler') self.sampler = build_sampler(sampler_cfg, context=self) self.debug = self.train_cfg.debug self.fp16_enabled = False
def __init__(self, with_avg_pool=False, with_cls=True, with_reg=True, roi_feat_size=7, in_channels=256, num_classes=80, bbox_coder=dict(type='DeltaXYWHBBoxCoder', target_means=[0., 0., 0., 0.], target_stds=[0.1, 0.1, 0.2, 0.2]), reg_class_agnostic=False, reg_decoded_bbox=False, loss_cls=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)): super(BBoxHead, self).__init__() assert with_cls or with_reg self.with_avg_pool = with_avg_pool self.with_cls = with_cls self.with_reg = with_reg self.roi_feat_size = _pair(roi_feat_size) self.roi_feat_area = self.roi_feat_size[0] * self.roi_feat_size[1] self.in_channels = in_channels self.num_classes = num_classes self.reg_class_agnostic = reg_class_agnostic self.reg_decoded_bbox = reg_decoded_bbox self.fp16_enabled = False self.bbox_coder = build_bbox_coder(bbox_coder) self.loss_cls = build_loss(loss_cls) self.loss_bbox = build_loss(loss_bbox) in_channels = self.in_channels if self.with_avg_pool: self.avg_pool = nn.AvgPool2d(self.roi_feat_size) else: in_channels *= self.roi_feat_area if self.with_cls: # need to add background class self.fc_cls = nn.Linear(in_channels, num_classes + 1) if self.with_reg: out_dim_reg = 4 if reg_class_agnostic else 4 * num_classes self.fc_reg = nn.Linear(in_channels, out_dim_reg) self.debug_imgs = None
def __init__( self, num_classes, in_channels, feat_channels=256, approx_anchor_generator=dict( type='AnchorGenerator', octave_base_scale=8, scales_per_octave=3, ratios=[0.5, 1.0, 2.0], strides=[4, 8, 16, 32, 64]), square_anchor_generator=dict( type='AnchorGenerator', ratios=[1.0], scales=[8], strides=[4, 8, 16, 32, 64]), anchor_coder=dict( type='DeltaXYWHBBoxCoder', target_means=[.0, .0, .0, .0], target_stds=[1.0, 1.0, 1.0, 1.0] ), bbox_coder=dict( type='DeltaXYWHBBoxCoder', target_means=[.0, .0, .0, .0], target_stds=[1.0, 1.0, 1.0, 1.0] ), reg_decoded_bbox=False, deform_groups=4, loc_filter_thr=0.01, train_cfg=None, test_cfg=None, loss_loc=dict( type='FocalLoss', use_sigmoid=True, gamma=2.0, alpha=0.25, loss_weight=1.0), loss_shape=dict(type='BoundedIoULoss', beta=0.2, loss_weight=1.0), loss_cls=dict( type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)): # yapf: disable super(AnchorHead, self).__init__() self.in_channels = in_channels self.num_classes = num_classes self.feat_channels = feat_channels self.deform_groups = deform_groups self.loc_filter_thr = loc_filter_thr # build approx_anchor_generator and square_anchor_generator assert (approx_anchor_generator['octave_base_scale'] == square_anchor_generator['scales'][0]) assert (approx_anchor_generator['strides'] == square_anchor_generator['strides']) self.approx_anchor_generator = build_anchor_generator( approx_anchor_generator) self.square_anchor_generator = build_anchor_generator( square_anchor_generator) self.approxs_per_octave = self.approx_anchor_generator \ .num_base_anchors[0] self.reg_decoded_bbox = reg_decoded_bbox # one anchor per location self.num_anchors = 1 self.use_sigmoid_cls = loss_cls.get('use_sigmoid', False) self.loc_focal_loss = loss_loc['type'] in ['FocalLoss'] self.sampling = loss_cls['type'] not in ['FocalLoss'] self.ga_sampling = train_cfg is not None and hasattr( train_cfg, 'ga_sampler') if self.use_sigmoid_cls: self.cls_out_channels = self.num_classes else: self.cls_out_channels = self.num_classes + 1 # build bbox_coder self.anchor_coder = build_bbox_coder(anchor_coder) self.bbox_coder = build_bbox_coder(bbox_coder) # build losses self.loss_loc = build_loss(loss_loc) self.loss_shape = build_loss(loss_shape) self.loss_cls = build_loss(loss_cls) self.loss_bbox = build_loss(loss_bbox) self.train_cfg = train_cfg self.test_cfg = test_cfg self.debug = False if self.train_cfg: self.assigner = build_assigner(self.train_cfg.assigner) # use PseudoSampler when sampling is False if self.sampling and hasattr(self.train_cfg, 'sampler'): sampler_cfg = self.train_cfg.sampler else: sampler_cfg = dict(type='PseudoSampler') self.sampler = build_sampler(sampler_cfg, context=self) self.ga_assigner = build_assigner(self.train_cfg.ga_assigner) if self.ga_sampling: ga_sampler_cfg = self.train_cfg.ga_sampler else: ga_sampler_cfg = dict(type='PseudoSampler') self.ga_sampler = build_sampler(ga_sampler_cfg, context=self) self.debug = self.train_cfg.debug self.fp16_enabled = False self._init_layers()
def __init__(self, num_classes, cls_in_channels=256, reg_in_channels=256, roi_feat_size=7, reg_feat_up_ratio=2, reg_pre_kernel=3, reg_post_kernel=3, reg_pre_num=2, reg_post_num=1, cls_out_channels=1024, reg_offset_out_channels=256, reg_cls_out_channels=256, num_cls_fcs=1, num_reg_fcs=0, reg_class_agnostic=True, norm_cfg=None, bbox_coder=dict(type='BucketingBBoxCoder', num_buckets=14, scale_factor=1.7), loss_cls=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), loss_bbox_cls=dict(type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), loss_bbox_reg=dict(type='SmoothL1Loss', beta=0.1, loss_weight=1.0)): super(SABLHead, self).__init__() self.cls_in_channels = cls_in_channels self.reg_in_channels = reg_in_channels self.roi_feat_size = roi_feat_size self.reg_feat_up_ratio = int(reg_feat_up_ratio) self.num_buckets = bbox_coder['num_buckets'] assert self.reg_feat_up_ratio // 2 >= 1 self.up_reg_feat_size = roi_feat_size * self.reg_feat_up_ratio assert self.up_reg_feat_size == bbox_coder['num_buckets'] self.reg_pre_kernel = reg_pre_kernel self.reg_post_kernel = reg_post_kernel self.reg_pre_num = reg_pre_num self.reg_post_num = reg_post_num self.num_classes = num_classes self.cls_out_channels = cls_out_channels self.reg_offset_out_channels = reg_offset_out_channels self.reg_cls_out_channels = reg_cls_out_channels self.num_cls_fcs = num_cls_fcs self.num_reg_fcs = num_reg_fcs self.reg_class_agnostic = reg_class_agnostic assert self.reg_class_agnostic self.norm_cfg = norm_cfg self.bbox_coder = build_bbox_coder(bbox_coder) self.loss_cls = build_loss(loss_cls) self.loss_bbox_cls = build_loss(loss_bbox_cls) self.loss_bbox_reg = build_loss(loss_bbox_reg) self.cls_fcs = self._add_fc_branch(self.num_cls_fcs, self.cls_in_channels, self.roi_feat_size, self.cls_out_channels) self.side_num = int(np.ceil(self.num_buckets / 2)) if self.reg_feat_up_ratio > 1: self.upsample_x = nn.ConvTranspose1d(reg_in_channels, reg_in_channels, self.reg_feat_up_ratio, stride=self.reg_feat_up_ratio) self.upsample_y = nn.ConvTranspose1d(reg_in_channels, reg_in_channels, self.reg_feat_up_ratio, stride=self.reg_feat_up_ratio) self.reg_pre_convs = nn.ModuleList() for i in range(self.reg_pre_num): reg_pre_conv = ConvModule(reg_in_channels, reg_in_channels, kernel_size=reg_pre_kernel, padding=reg_pre_kernel // 2, norm_cfg=norm_cfg, act_cfg=dict(type='ReLU')) self.reg_pre_convs.append(reg_pre_conv) self.reg_post_conv_xs = nn.ModuleList() for i in range(self.reg_post_num): reg_post_conv_x = ConvModule(reg_in_channels, reg_in_channels, kernel_size=(1, reg_post_kernel), padding=(0, reg_post_kernel // 2), norm_cfg=norm_cfg, act_cfg=dict(type='ReLU')) self.reg_post_conv_xs.append(reg_post_conv_x) self.reg_post_conv_ys = nn.ModuleList() for i in range(self.reg_post_num): reg_post_conv_y = ConvModule(reg_in_channels, reg_in_channels, kernel_size=(reg_post_kernel, 1), padding=(reg_post_kernel // 2, 0), norm_cfg=norm_cfg, act_cfg=dict(type='ReLU')) self.reg_post_conv_ys.append(reg_post_conv_y) self.reg_conv_att_x = nn.Conv2d(reg_in_channels, 1, 1) self.reg_conv_att_y = nn.Conv2d(reg_in_channels, 1, 1) self.fc_cls = nn.Linear(self.cls_out_channels, self.num_classes + 1) self.relu = nn.ReLU(inplace=True) self.reg_cls_fcs = self._add_fc_branch(self.num_reg_fcs, self.reg_in_channels, 1, self.reg_cls_out_channels) self.reg_offset_fcs = self._add_fc_branch(self.num_reg_fcs, self.reg_in_channels, 1, self.reg_offset_out_channels) self.fc_reg_cls = nn.Linear(self.reg_cls_out_channels, 1) self.fc_reg_offset = nn.Linear(self.reg_offset_out_channels, 1)
def __init__(self, num_classes, in_channels, out_channels=(1024, 512, 256), anchor_generator=dict(type='YOLOAnchorGenerator', base_sizes=[[(116, 90), (156, 198), (373, 326)], [(30, 61), (62, 45), (59, 119)], [(10, 13), (16, 30), (33, 23)]], strides=[32, 16, 8]), bbox_coder=dict(type='YOLOBBoxCoder'), featmap_strides=[32, 16, 8], one_hot_smoother=0., conv_cfg=None, norm_cfg=dict(type='BN', requires_grad=True), act_cfg=dict(type='LeakyReLU', negative_slope=0.1), loss_cls=dict(type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), loss_conf=dict(type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), loss_xy=dict(type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), loss_wh=dict(type='MSELoss', loss_weight=1.0), train_cfg=None, test_cfg=None): super(YOLOV3Head, self).__init__() # Check params assert (len(in_channels) == len(out_channels) == len(featmap_strides)) self.num_classes = num_classes self.in_channels = in_channels self.out_channels = out_channels self.featmap_strides = featmap_strides self.train_cfg = train_cfg self.test_cfg = test_cfg self.debug = False if self.train_cfg: self.assigner = build_assigner(self.train_cfg.assigner) if hasattr(self.train_cfg, 'sampler'): sampler_cfg = self.train_cfg.sampler else: sampler_cfg = dict(type='PseudoSampler') # yolo系列不需随机采样等操作 self.sampler = build_sampler(sampler_cfg, context=self) self.debug = self.train_cfg.debug self.one_hot_smoother = one_hot_smoother self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg self.act_cfg = act_cfg self.bbox_coder = build_bbox_coder(bbox_coder) self.anchor_generator = build_anchor_generator(anchor_generator) self.loss_cls = build_loss(loss_cls) self.loss_conf = build_loss(loss_conf) self.loss_xy = build_loss(loss_xy) self.loss_wh = build_loss(loss_wh) # usually the numbers of anchors for each level are the same # except SSD detectors self.num_anchors = self.anchor_generator.num_base_anchors[0] assert len( self.anchor_generator.num_base_anchors) == len(featmap_strides) self._init_layers()
def __init__(self, num_classes, in_channels, feat_channels=256, anchor_generator=dict( type='AnchorGenerator', scales=[8, 16, 32], ratios=[0.5, 1.0, 2.0], strides=[4, 8, 16, 32, 64]), bbox_coder=dict( type='DeltaXYWHBBoxCoder', target_means=(.0, .0, .0, .0), target_stds=(1.0, 1.0, 1.0, 1.0)), reg_decoded_bbox=False, background_label=None, loss_cls=dict( type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), loss_bbox=dict( type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0), train_cfg=None, test_cfg=None): super(AnchorHead, self).__init__() self.in_channels = in_channels self.num_classes = num_classes self.feat_channels = feat_channels self.use_sigmoid_cls = loss_cls.get('use_sigmoid', False) # TODO better way to determine whether sample or not self.sampling = loss_cls['type'] not in [ 'FocalLoss', 'GHMC', 'QualityFocalLoss' ] if self.use_sigmoid_cls: self.cls_out_channels = num_classes else: self.cls_out_channels = num_classes + 1 if self.cls_out_channels <= 0: raise ValueError(f'num_classes={num_classes} is too small') self.reg_decoded_bbox = reg_decoded_bbox self.background_label = ( num_classes if background_label is None else background_label) # background_label should be either 0 or num_classes assert (self.background_label == 0 or self.background_label == num_classes) self.bbox_coder = build_bbox_coder(bbox_coder) self.loss_cls = build_loss(loss_cls) self.loss_bbox = build_loss(loss_bbox) self.train_cfg = train_cfg self.test_cfg = test_cfg self.debug = False if self.train_cfg: self.assigner = build_assigner(self.train_cfg.assigner) # use PseudoSampler when sampling is False if self.sampling and hasattr(self.train_cfg, 'sampler'): sampler_cfg = self.train_cfg.sampler else: sampler_cfg = dict(type='PseudoSampler') self.sampler = build_sampler(sampler_cfg, context=self) self.debug = self.train_cfg.debug self.fp16_enabled = False self.anchor_generator = build_anchor_generator(anchor_generator) # usually the numbers of anchors for each level are the same # except SSD detectors self.num_anchors = self.anchor_generator.num_base_anchors[0] self._init_layers()
def __init__(self, num_classes, in_channels, regress_ranges=((-1, 64), (64, 128), (128, 256), (256, 512), (512, INF)), center_sampling=False, center_sample_radius=1.5, sync_num_pos=True, gradient_mul=0.1, bbox_norm_type='reg_denom', loss_cls_fl=dict(type='FocalLoss', use_sigmoid=True, gamma=2.0, alpha=0.25, loss_weight=1.0), use_vfl=True, loss_cls=dict(type='VarifocalLoss', use_sigmoid=True, alpha=0.75, gamma=2.0, iou_weighted=True, loss_weight=1.0), loss_bbox=dict(type='GIoULoss', loss_weight=1.5), loss_bbox_refine=dict(type='GIoULoss', loss_weight=2.0), norm_cfg=dict(type='GN', num_groups=32, requires_grad=True), use_atss=True, bbox_coder=dict(type='DeltaXYWHBBoxCoder', target_means=[.0, .0, .0, .0], target_stds=[0.1, 0.1, 0.2, 0.2]), anchor_generator=dict(type='AnchorGenerator', ratios=[1.0], octave_base_scale=8, scales_per_octave=1, center_offset=0.0, strides=[8, 16, 32, 64, 128]), **kwargs): self.bbox_coder = build_bbox_coder(bbox_coder) # dcn base offsets, adapted from reppoints_head.py self.num_dconv_points = 9 self.dcn_kernel = int(np.sqrt(self.num_dconv_points)) self.dcn_pad = int((self.dcn_kernel - 1) / 2) dcn_base = np.arange(-self.dcn_pad, self.dcn_pad + 1).astype(np.float64) dcn_base_y = np.repeat(dcn_base, self.dcn_kernel) dcn_base_x = np.tile(dcn_base, self.dcn_kernel) dcn_base_offset = np.stack([dcn_base_y, dcn_base_x], axis=1).reshape( (-1)) # 3x3的位置offset self.dcn_base_offset = torch.tensor(dcn_base_offset).view(1, -1, 1, 1) super(FCOSHead, self).__init__(num_classes, in_channels, norm_cfg=norm_cfg, **kwargs) self.regress_ranges = regress_ranges self.reg_denoms = [ regress_range[-1] for regress_range in regress_ranges ] self.reg_denoms[-1] = self.reg_denoms[-2] * 2 self.center_sampling = center_sampling self.center_sample_radius = center_sample_radius self.sync_num_pos = sync_num_pos self.bbox_norm_type = bbox_norm_type self.gradient_mul = gradient_mul self.use_vfl = use_vfl if self.use_vfl: self.loss_cls = build_loss(loss_cls) else: self.loss_cls = build_loss(loss_cls_fl) self.loss_bbox = build_loss(loss_bbox) self.loss_bbox_refine = build_loss(loss_bbox_refine) # for getting ATSS targets self.use_atss = use_atss self.use_sigmoid_cls = loss_cls.get('use_sigmoid', False) self.anchor_generator = build_anchor_generator(anchor_generator) self.anchor_center_offset = anchor_generator['center_offset'] self.num_anchors = self.anchor_generator.num_base_anchors[0] self.sampling = False self.debug = False if self.train_cfg: self.assigner = build_assigner(self.train_cfg.assigner) sampler_cfg = dict(type='PseudoSampler') self.sampler = build_sampler(sampler_cfg, context=self) self.debug = self.train_cfg.debug
def __init__(self, num_classes, in_channels, stacked_convs=4, feat_channels=256, approx_anchor_generator=dict(type='AnchorGenerator', octave_base_scale=4, scales_per_octave=3, ratios=[0.5, 1.0, 2.0], strides=[8, 16, 32, 64, 128]), square_anchor_generator=dict(type='AnchorGenerator', ratios=[1.0], scales=[4], strides=[8, 16, 32, 64, 128]), conv_cfg=None, norm_cfg=None, bbox_coder=dict(type='BucketingBBoxCoder', num_buckets=14, scale_factor=3.0), reg_decoded_bbox=False, train_cfg=None, test_cfg=None, loss_cls=dict(type='FocalLoss', use_sigmoid=True, gamma=2.0, alpha=0.25, loss_weight=1.0), loss_bbox_cls=dict(type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.5), loss_bbox_reg=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.5)): super(SABLRetinaHead, self).__init__() self.in_channels = in_channels self.num_classes = num_classes self.feat_channels = feat_channels self.num_buckets = bbox_coder['num_buckets'] self.side_num = int(np.ceil(self.num_buckets / 2)) assert (approx_anchor_generator['octave_base_scale'] == square_anchor_generator['scales'][0]) assert (approx_anchor_generator['strides'] == square_anchor_generator['strides']) self.approx_anchor_generator = build_anchor_generator( approx_anchor_generator) self.square_anchor_generator = build_anchor_generator( square_anchor_generator) self.approxs_per_octave = ( self.approx_anchor_generator.num_base_anchors[0]) # one anchor per location self.num_anchors = 1 self.stacked_convs = stacked_convs self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg self.reg_decoded_bbox = reg_decoded_bbox self.use_sigmoid_cls = loss_cls.get('use_sigmoid', False) self.sampling = loss_cls['type'] not in [ 'FocalLoss', 'GHMC', 'QualityFocalLoss' ] if self.use_sigmoid_cls: self.cls_out_channels = num_classes else: self.cls_out_channels = num_classes + 1 self.bbox_coder = build_bbox_coder(bbox_coder) self.loss_cls = build_loss(loss_cls) self.loss_bbox_cls = build_loss(loss_bbox_cls) self.loss_bbox_reg = build_loss(loss_bbox_reg) self.train_cfg = train_cfg self.test_cfg = test_cfg self.debug = False if self.train_cfg: self.assigner = build_assigner(self.train_cfg.assigner) # use PseudoSampler when sampling is False if self.sampling and hasattr(self.train_cfg, 'sampler'): sampler_cfg = self.train_cfg.sampler else: sampler_cfg = dict(type='PseudoSampler') self.sampler = build_sampler(sampler_cfg, context=self) self.debug = self.train_cfg.debug self.fp16_enabled = False self._init_layers()