def __init__(self, in_channels=[128], tasks=None, train_cfg=None, test_cfg=None, bbox_coder=None, common_heads=dict(), loss_cls=dict(type='GaussianFocalLoss', reduction='mean'), loss_bbox=dict(type='L1Loss', reduction='none', loss_weight=0.25), seperate_head=dict(type='SeparateHead', init_bias=-2.19, final_kernel=3), feature_channel=64, num_heatmap_convs=2, conv_cfg=dict(type='Conv2d'), norm_cfg=dict(type='BN2d'), bias='auto', norm_bbox=True, pred_mode=['voxel'], num_head_convs=2, num_classes=17, ignore_label=0, grid_size=[480, 360, 32]): super(SelectivesegHead, self).__init__() self.class_names = [t['class_names'] for t in tasks] self.train_cfg = train_cfg self.test_cfg = test_cfg self.in_channels = in_channels self.num_classes = num_classes self.norm_bbox = norm_bbox self.loss_cls = build_loss(loss_cls) self.loss_bbox = build_loss(loss_bbox) self.bbox_coder = build_bbox_coder(bbox_coder) self.fp16_enabled = False self.pred_mode = pred_mode assert ('voxel' in self.pred_mode) or ('point' in self.pred_mode) or ( 'pillar' in self.pred_mode) self.ignore_label = ignore_label self.grid_size = grid_size semantic_conv = [] for i in range(num_head_convs): if i > 0: in_channels = feature_channel conv = ConvModule(in_channels, feature_channel, kernel_size=1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, bias=False) semantic_conv.append(conv) self.semantic_conv = nn.Sequential(*semantic_conv) self.semantic_cls = nn.Conv1d(feature_channel, self.num_classes, 1) self.semantic_CE_loss = torch.nn.CrossEntropyLoss( ignore_index=ignore_label)
class MobileNetV2(nn.Module): """MobileNetV2 backbone. Args: widen_factor (float): Width multiplier, multiply number of channels in each layer by this amount. Default: 1.0. strides (Sequence[int], optional): Strides of the first block of each layer. If not specified, default config in ``arch_setting`` will be used. dilations (Sequence[int]): Dilation of each layer. out_indices (None or Sequence[int]): Output from which stages. Default: (7, ). frozen_stages (int): Stages to be frozen (all param fixed). Default: -1, which means not freezing any parameters. conv_cfg (dict): Config dict for convolution layer. Default: None, which means using conv2d. norm_cfg (dict): Config dict for normalization layer. Default: dict(type='BN'). act_cfg (dict): Config dict for activation layer. Default: dict(type='ReLU6'). norm_eval (bool): Whether to set norm layers to eval mode, namely, freeze running stats (mean and var). Note: Effect on Batch Norm and its variants only. Default: False. with_cp (bool): Use checkpoint or not. Using checkpoint will save some memory while slowing down the training speed. Default: False. """ # Parameters to build layers. 3 parameters are needed to construct a # layer, from left to right: expand_ratio, channel, num_blocks. arch_settings = [[1, 16, 1], [6, 24, 2], [6, 32, 3], [6, 64, 4], [6, 96, 3], [6, 160, 3], [6, 320, 1]] def __init__(self, widen_factor=1., strides=(1, 2, 2, 2, 1, 2, 1), dilations=(1, 1, 1, 1, 1, 1, 1), out_indices=(1, 2, 4, 6), frozen_stages=-1, conv_cfg=None, norm_cfg=dict(type='BN'), act_cfg=dict(type='ReLU6'), norm_eval=False, with_cp=False): super(MobileNetV2, self).__init__() self.widen_factor = widen_factor self.strides = strides self.dilations = dilations assert len(strides) == len(dilations) == len(self.arch_settings) self.out_indices = out_indices for index in out_indices: if index not in range(0, 7): raise ValueError('the item in out_indices must in ' f'range(0, 7). But received {index}') if frozen_stages not in range(-1, 7): raise ValueError('frozen_stages must be in range(-1, 7). ' f'But received {frozen_stages}') self.out_indices = out_indices self.frozen_stages = frozen_stages self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg self.act_cfg = act_cfg self.norm_eval = norm_eval self.with_cp = with_cp self.in_channels = make_divisible(32 * widen_factor, 8) self.conv1 = ConvModule(in_channels=3, out_channels=self.in_channels, kernel_size=3, stride=2, padding=1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg) self.layers = [] for i, layer_cfg in enumerate(self.arch_settings): expand_ratio, channel, num_blocks = layer_cfg stride = self.strides[i] dilation = self.dilations[i] out_channels = make_divisible(channel * widen_factor, 8) inverted_res_layer = self.make_layer(out_channels=out_channels, num_blocks=num_blocks, stride=stride, dilation=dilation, expand_ratio=expand_ratio) layer_name = f'layer{i + 1}' self.add_module(layer_name, inverted_res_layer) self.layers.append(layer_name) def make_layer(self, out_channels, num_blocks, stride, dilation, expand_ratio): """Stack InvertedResidual blocks to build a layer for MobileNetV2. Args: out_channels (int): out_channels of block. num_blocks (int): Number of blocks. stride (int): Stride of the first block. dilation (int): Dilation of the first block. expand_ratio (int): Expand the number of channels of the hidden layer in InvertedResidual by this ratio. """ layers = [] for i in range(num_blocks): layers.append( InvertedResidual(self.in_channels, out_channels, stride if i == 0 else 1, expand_ratio=expand_ratio, dilation=dilation if i == 0 else 1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg, with_cp=self.with_cp)) self.in_channels = out_channels return nn.Sequential(*layers) def init_weights(self, pretrained=None): if isinstance(pretrained, str): logger = logging.getLogger() load_checkpoint(self, pretrained, strict=False, logger=logger) elif pretrained is None: for m in self.modules(): if isinstance(m, nn.Conv2d): kaiming_init(m) elif isinstance(m, (_BatchNorm, nn.GroupNorm)): constant_init(m, 1) else: raise TypeError('pretrained must be a str or None') def forward(self, x): x = self.conv1(x) outs = [] for i, layer_name in enumerate(self.layers): layer = getattr(self, layer_name) x = layer(x) if i in self.out_indices: outs.append(x) if len(outs) == 1: return outs[0] else: return tuple(outs) def _freeze_stages(self): if self.frozen_stages >= 0: for param in self.conv1.parameters(): param.requires_grad = False for i in range(1, self.frozen_stages + 1): layer = getattr(self, f'layer{i}') layer.eval() for param in layer.parameters(): param.requires_grad = False def train(self, mode=True): super(MobileNetV2, self).train(mode) self._freeze_stages() if mode and self.norm_eval: for m in self.modules(): if isinstance(m, _BatchNorm): m.eval()
def __init__(self, in_channels, out_channels, num_outs, start_level=0, end_level=-1, add_extra_convs=False, extra_convs_on_inputs=False, relu_before_extra_convs=False, no_norm_on_lateral=False, conv_cfg=None, norm_cfg=None, act_cfg=None, upsample_cfg=dict(mode='nearest')): super(FPN_involution, self).__init__() assert isinstance(in_channels, list) self.in_channels = in_channels self.out_channels = out_channels self.num_ins = len(in_channels) self.num_outs = num_outs self.relu_before_extra_convs = relu_before_extra_convs self.no_norm_on_lateral = no_norm_on_lateral self.fp16_enabled = False self.upsample_cfg = upsample_cfg.copy() if end_level == -1: self.backbone_end_level = self.num_ins assert num_outs >= self.num_ins - start_level else: # if end_level < inputs, no extra level is allowed self.backbone_end_level = end_level assert end_level <= len(in_channels) assert num_outs == end_level - start_level self.start_level = start_level self.end_level = end_level self.add_extra_convs = add_extra_convs assert isinstance(add_extra_convs, (str, bool)) if isinstance(add_extra_convs, str): # Extra_convs_source choices: 'on_input', 'on_lateral', 'on_output' assert add_extra_convs in ('on_input', 'on_lateral', 'on_output') elif add_extra_convs: # True if extra_convs_on_inputs: # For compatibility with previous release # TODO: deprecate `extra_convs_on_inputs` self.add_extra_convs = 'on_input' else: self.add_extra_convs = 'on_output' self.lateral_convs = nn.ModuleList() self.fpn_convs = nn.ModuleList() for i in range(self.start_level, self.backbone_end_level): l_conv = ConvModule( in_channels[i], out_channels, 1, conv_cfg=conv_cfg, norm_cfg=norm_cfg if not self.no_norm_on_lateral else None, act_cfg=act_cfg, inplace=False) fpn_conv = involution(out_channels, 7, 1) #ConvModule( #out_channels, #out_channels, #3, #padding=1, #conv_cfg=conv_cfg, #norm_cfg=norm_cfg, #act_cfg=act_cfg, #inplace=False) self.lateral_convs.append(l_conv) self.fpn_convs.append(fpn_conv) # add extra conv layers (e.g., RetinaNet) extra_levels = num_outs - self.backbone_end_level + self.start_level if self.add_extra_convs and extra_levels >= 1: for i in range(extra_levels): if i == 0 and self.add_extra_convs == 'on_input': in_channels = self.in_channels[self.backbone_end_level - 1] else: in_channels = out_channels extra_fpn_conv = ConvModule(in_channels, out_channels, 3, stride=2, padding=1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg, inplace=False) self.fpn_convs.append(extra_fpn_conv)
class X3D(nn.Module): """X3D backbone. https://arxiv.org/pdf/2004.04730.pdf. Args: gamma_w (float): Global channel width expansion factor. Default: 1. gamma_b (float): Bottleneck channel width expansion factor. Default: 1. gamma_d (float): Network depth expansion factor. Default: 1. pretrained (str | None): Name of pretrained model. Default: None. in_channels (int): Channel num of input features. Default: 3. num_stages (int): Resnet stages. Default: 4. spatial_strides (Sequence[int]): Spatial strides of residual blocks of each stage. Default: ``(1, 2, 2, 2)``. frozen_stages (int): Stages to be frozen (all param fixed). If set to -1, it means not freezing any parameters. Default: -1. se_style (str): The style of inserting SE modules into BlockX3D, 'half' denotes insert into half of the blocks, while 'all' denotes insert into all blocks. Default: 'half'. se_ratio (float | None): The reduction ratio of squeeze and excitation unit. If set as None, it means not using SE unit. Default: 1 / 16. use_swish (bool): Whether to use swish as the activation function before and after the 3x3x3 conv. Default: True. conv_cfg (dict): Config for conv layers. required keys are ``type`` Default: ``dict(type='Conv3d')``. norm_cfg (dict): Config for norm layers. required keys are ``type`` and ``requires_grad``. Default: ``dict(type='BN3d', requires_grad=True)``. act_cfg (dict): Config dict for activation layer. Default: ``dict(type='ReLU', inplace=True)``. norm_eval (bool): Whether to set BN layers to eval mode, namely, freeze running stats (mean and var). Default: False. with_cp (bool): Use checkpoint or not. Using checkpoint will save some memory while slowing down the training speed. Default: False. zero_init_residual (bool): Whether to use zero initialization for residual block, Default: True. kwargs (dict, optional): Key arguments for "make_res_layer". """ def __init__(self, gamma_w=1.0, gamma_b=1.0, gamma_d=1.0, pretrained=None, in_channels=3, num_stages=4, spatial_strides=(2, 2, 2, 2), frozen_stages=-1, se_style='half', se_ratio=1 / 16, use_swish=True, conv_cfg=dict(type='Conv3d'), norm_cfg=dict(type='BN3d', requires_grad=True), act_cfg=dict(type='ReLU', inplace=True), norm_eval=False, with_cp=False, zero_init_residual=True, **kwargs): super().__init__() self.gamma_w = gamma_w self.gamma_b = gamma_b self.gamma_d = gamma_d self.pretrained = pretrained self.in_channels = in_channels # Hard coded, can be changed by gamma_w self.base_channels = 24 self.stage_blocks = [1, 2, 5, 3] # apply parameters gamma_w and gamma_d self.base_channels = self._round_width(self.base_channels, self.gamma_w) self.stage_blocks = [ self._round_repeats(x, self.gamma_d) for x in self.stage_blocks ] self.num_stages = num_stages assert 1 <= num_stages <= 4 self.spatial_strides = spatial_strides assert len(spatial_strides) == num_stages self.frozen_stages = frozen_stages self.se_style = se_style assert self.se_style in ['all', 'half'] self.se_ratio = se_ratio assert (self.se_ratio is None) or (self.se_ratio > 0) self.use_swish = use_swish self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg self.act_cfg = act_cfg self.norm_eval = norm_eval self.with_cp = with_cp self.zero_init_residual = zero_init_residual self.block = BlockX3D self.stage_blocks = self.stage_blocks[:num_stages] self.layer_inplanes = self.base_channels self._make_stem_layer() self.res_layers = [] for i, num_blocks in enumerate(self.stage_blocks): spatial_stride = spatial_strides[i] inplanes = self.base_channels * 2**i planes = int(inplanes * self.gamma_b) res_layer = self.make_res_layer(self.block, self.layer_inplanes, inplanes, planes, num_blocks, spatial_stride=spatial_stride, se_style=self.se_style, se_ratio=self.se_ratio, use_swish=self.use_swish, norm_cfg=self.norm_cfg, conv_cfg=self.conv_cfg, act_cfg=self.act_cfg, with_cp=with_cp, **kwargs) self.layer_inplanes = inplanes layer_name = f'layer{i + 1}' self.add_module(layer_name, res_layer) self.res_layers.append(layer_name) self.feat_dim = self.base_channels * 2**(len(self.stage_blocks) - 1) self.conv5 = ConvModule(self.feat_dim, int(self.feat_dim * self.gamma_b), kernel_size=1, stride=1, padding=0, bias=False, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg) self.feat_dim = int(self.feat_dim * self.gamma_b) @staticmethod def _round_width(width, multiplier, min_depth=8, divisor=8): """Round width of filters based on width multiplier.""" if not multiplier: return width width *= multiplier min_depth = min_depth or divisor new_filters = max(min_depth, int(width + divisor / 2) // divisor * divisor) if new_filters < 0.9 * width: new_filters += divisor return int(new_filters) @staticmethod def _round_repeats(repeats, multiplier): """Round number of layers based on depth multiplier.""" if not multiplier: return repeats return int(math.ceil(multiplier * repeats)) # the module is parameterized with gamma_b # no temporal_stride def make_res_layer(self, block, layer_inplanes, inplanes, planes, blocks, spatial_stride=1, se_style='half', se_ratio=None, use_swish=True, norm_cfg=None, act_cfg=None, conv_cfg=None, with_cp=False, **kwargs): """Build residual layer for ResNet3D. Args: block (nn.Module): Residual module to be built. layer_inplanes (int): Number of channels for the input feature of the res layer. inplanes (int): Number of channels for the input feature in each block, which equals to base_channels * gamma_w. planes (int): Number of channels for the output feature in each block, which equals to base_channel * gamma_w * gamma_b. blocks (int): Number of residual blocks. spatial_stride (int): Spatial strides in residual and conv layers. Default: 1. se_style (str): The style of inserting SE modules into BlockX3D, 'half' denotes insert into half of the blocks, while 'all' denotes insert into all blocks. Default: 'half'. se_ratio (float | None): The reduction ratio of squeeze and excitation unit. If set as None, it means not using SE unit. Default: None. use_swish (bool): Whether to use swish as the activation function before and after the 3x3x3 conv. Default: True. conv_cfg (dict | None): Config for norm layers. Default: None. norm_cfg (dict | None): Config for norm layers. Default: None. act_cfg (dict | None): Config for activate layers. Default: None. with_cp (bool | None): Use checkpoint or not. Using checkpoint will save some memory while slowing down the training speed. Default: False. Returns: nn.Module: A residual layer for the given config. """ downsample = None if spatial_stride != 1 or layer_inplanes != inplanes: downsample = ConvModule(layer_inplanes, inplanes, kernel_size=1, stride=(1, spatial_stride, spatial_stride), padding=0, bias=False, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=None) use_se = [False] * blocks if self.se_style == 'all': use_se = [True] * blocks elif self.se_style == 'half': use_se = [i % 2 == 0 for i in range(blocks)] else: raise NotImplementedError layers = [] layers.append( block(layer_inplanes, planes, inplanes, spatial_stride=spatial_stride, downsample=downsample, se_ratio=se_ratio if use_se[0] else None, use_swish=use_swish, norm_cfg=norm_cfg, conv_cfg=conv_cfg, act_cfg=act_cfg, with_cp=with_cp, **kwargs)) for i in range(1, blocks): layers.append( block(inplanes, planes, inplanes, spatial_stride=1, se_ratio=se_ratio if use_se[i] else None, use_swish=use_swish, norm_cfg=norm_cfg, conv_cfg=conv_cfg, act_cfg=act_cfg, with_cp=with_cp, **kwargs)) return nn.Sequential(*layers) def _make_stem_layer(self): """Construct the stem layers consists of a conv+norm+act module and a pooling layer.""" self.conv1_s = ConvModule(self.in_channels, self.base_channels, kernel_size=(1, 3, 3), stride=(1, 2, 2), padding=(0, 1, 1), bias=False, conv_cfg=self.conv_cfg, norm_cfg=None, act_cfg=None) self.conv1_t = ConvModule(self.base_channels, self.base_channels, kernel_size=(5, 1, 1), stride=(1, 1, 1), padding=(2, 0, 0), groups=self.base_channels, bias=False, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg) def _freeze_stages(self): """Prevent all the parameters from being optimized before ``self.frozen_stages``.""" if self.frozen_stages >= 0: self.conv1_s.eval() self.conv1_t.eval() for param in self.conv1_s.parameters(): param.requires_grad = False for param in self.conv1_t.parameters(): param.requires_grad = False for i in range(1, self.frozen_stages + 1): m = getattr(self, f'layer{i}') m.eval() for param in m.parameters(): param.requires_grad = False def init_weights(self): """Initiate the parameters either from existing checkpoint or from scratch.""" if isinstance(self.pretrained, str): logger = get_root_logger() logger.info(f'load model from: {self.pretrained}') load_checkpoint(self, self.pretrained, strict=False, logger=logger) elif self.pretrained is None: for m in self.modules(): if isinstance(m, nn.Conv3d): kaiming_init(m) elif isinstance(m, _BatchNorm): constant_init(m, 1) if self.zero_init_residual: for m in self.modules(): if isinstance(m, BlockX3D): constant_init(m.conv3.bn, 0) else: raise TypeError('pretrained must be a str or None') def forward(self, x): """Defines the computation performed at every call. Args: x (torch.Tensor): The input data. Returns: torch.Tensor: The feature of the input samples extracted by the backbone. """ x = self.conv1_s(x) x = self.conv1_t(x) for layer_name in self.res_layers: res_layer = getattr(self, layer_name) x = res_layer(x) x = self.conv5(x) return x def train(self, mode=True): """Set the optimization status when training.""" super().train(mode) self._freeze_stages() if mode and self.norm_eval: for m in self.modules(): if isinstance(m, _BatchNorm): m.eval()
def make_res_layer(self, block, layer_inplanes, inplanes, planes, blocks, spatial_stride=1, se_style='half', se_ratio=None, use_swish=True, norm_cfg=None, act_cfg=None, conv_cfg=None, with_cp=False, **kwargs): """Build residual layer for ResNet3D. Args: block (nn.Module): Residual module to be built. layer_inplanes (int): Number of channels for the input feature of the res layer. inplanes (int): Number of channels for the input feature in each block, which equals to base_channels * gamma_w. planes (int): Number of channels for the output feature in each block, which equals to base_channel * gamma_w * gamma_b. blocks (int): Number of residual blocks. spatial_stride (int): Spatial strides in residual and conv layers. Default: 1. se_style (str): The style of inserting SE modules into BlockX3D, 'half' denotes insert into half of the blocks, while 'all' denotes insert into all blocks. Default: 'half'. se_ratio (float | None): The reduction ratio of squeeze and excitation unit. If set as None, it means not using SE unit. Default: None. use_swish (bool): Whether to use swish as the activation function before and after the 3x3x3 conv. Default: True. conv_cfg (dict | None): Config for norm layers. Default: None. norm_cfg (dict | None): Config for norm layers. Default: None. act_cfg (dict | None): Config for activate layers. Default: None. with_cp (bool | None): Use checkpoint or not. Using checkpoint will save some memory while slowing down the training speed. Default: False. Returns: nn.Module: A residual layer for the given config. """ downsample = None if spatial_stride != 1 or layer_inplanes != inplanes: downsample = ConvModule(layer_inplanes, inplanes, kernel_size=1, stride=(1, spatial_stride, spatial_stride), padding=0, bias=False, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=None) use_se = [False] * blocks if self.se_style == 'all': use_se = [True] * blocks elif self.se_style == 'half': use_se = [i % 2 == 0 for i in range(blocks)] else: raise NotImplementedError layers = [] layers.append( block(layer_inplanes, planes, inplanes, spatial_stride=spatial_stride, downsample=downsample, se_ratio=se_ratio if use_se[0] else None, use_swish=use_swish, norm_cfg=norm_cfg, conv_cfg=conv_cfg, act_cfg=act_cfg, with_cp=with_cp, **kwargs)) for i in range(1, blocks): layers.append( block(inplanes, planes, inplanes, spatial_stride=1, se_ratio=se_ratio if use_se[i] else None, use_swish=use_swish, norm_cfg=norm_cfg, conv_cfg=conv_cfg, act_cfg=act_cfg, with_cp=with_cp, **kwargs)) return nn.Sequential(*layers)
def make_res_layer(block, inplanes, planes, blocks, stride=1, dilation=1, style='pytorch', conv_cfg=None, norm_cfg=None, act_cfg=None, with_cp=False): """Build residual layer for ResNet. Args: block: (nn.Module): Residual module to be built. inplanes (int): Number of channels for the input feature in each block. planes (int): Number of channels for the output feature in each block. blocks (int): Number of residual blocks. stride (int): Stride in the conv layer. Default: 1. dilation (int): Spacing between kernel elements. Default: 1. style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two layer is the 3x3 conv layer, otherwise the stride-two layer is the first 1x1 conv layer. Default: 'pytorch'. conv_cfg (dict | None): Config for norm layers. Default: None. norm_cfg (dict | None): Config for norm layers. Default: None. act_cfg (dict | None): Config for activate layers. Default: None. with_cp (bool): Use checkpoint or not. Using checkpoint will save some memory while slowing down the training speed. Default: False. Returns: nn.Module: A residual layer for the given config. """ downsample = None if stride != 1 or inplanes != planes * block.expansion: downsample = ConvModule( inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=None) layers = [] layers.append( block( inplanes, planes, stride, dilation, downsample, style=style, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg, with_cp=with_cp)) inplanes = planes * block.expansion for _ in range(1, blocks): layers.append( block( inplanes, planes, 1, dilation, style=style, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg, with_cp=with_cp)) return nn.Sequential(*layers)
def __init__(self, img_channels, pts_channels, mid_channels, out_channels, img_levels=3, conv_cfg=None, norm_cfg=None, act_cfg=None, activate_out=True, fuse_out=False, dropout_ratio=0, aligned=True, align_corners=True, padding_mode='zeros', lateral_conv=True): super(PointFusion, self).__init__() if isinstance(img_levels, int): img_levels = [img_levels] if isinstance(img_channels, int): img_channels = [img_channels] * len(img_levels) assert isinstance(img_levels, list) assert isinstance(img_channels, list) assert len(img_channels) == len(img_levels) self.img_levels = img_levels self.act_cfg = act_cfg self.activate_out = activate_out self.fuse_out = fuse_out self.dropout_ratio = dropout_ratio self.img_channels = img_channels self.aligned = aligned self.align_corners = align_corners self.padding_mode = padding_mode self.lateral_convs = None if lateral_conv: self.lateral_convs = nn.ModuleList() for i in range(len(img_channels)): l_conv = ConvModule(img_channels[i], mid_channels, 3, padding=1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=self.act_cfg, inplace=False) self.lateral_convs.append(l_conv) self.img_transform = nn.Sequential( nn.Linear(mid_channels * len(img_channels), out_channels), nn.BatchNorm1d(out_channels, eps=1e-3, momentum=0.01), ) else: self.img_transform = nn.Sequential( nn.Linear(sum(img_channels), out_channels), nn.BatchNorm1d(out_channels, eps=1e-3, momentum=0.01), ) self.pts_transform = nn.Sequential( nn.Linear(pts_channels, out_channels), nn.BatchNorm1d(out_channels, eps=1e-3, momentum=0.01), ) if self.fuse_out: self.fuse_conv = nn.Sequential( nn.Linear(mid_channels, out_channels), # For pts the BN is initialized differently by default # TODO: check whether this is necessary nn.BatchNorm1d(out_channels, eps=1e-3, momentum=0.01), nn.ReLU(inplace=False)) self.init_weights()
def __init__(self, in_channels, out_channels, num_outs, start_level=0, end_level=-1, add_extra_convs=False, extra_convs_on_inputs=True, relu_before_extra_convs=False, no_norm_on_lateral=False, conv_cfg=None, norm_cfg=None, act_cfg=None, upsample_cfg=dict(mode='nearest')): super(DyFPN_B_CNNGate, self).__init__(in_channels, out_channels, num_outs) assert isinstance(in_channels, list) self.in_channels = in_channels self.out_channels = out_channels self.num_ins = len(in_channels) self.num_outs = num_outs self.relu_before_extra_convs = relu_before_extra_convs self.no_norm_on_lateral = no_norm_on_lateral self.fp16_enabled = False self.upsample_cfg = upsample_cfg.copy() if end_level == -1: self.backbone_end_level = self.num_ins assert num_outs >= self.num_ins - start_level else: # if end_level < inputs, no extra level is allowed self.backbone_end_level = end_level assert end_level <= len(in_channels) assert num_outs == end_level - start_level self.start_level = start_level self.end_level = end_level self.add_extra_convs = add_extra_convs self.gate_0 = Attention_SEblock(channels=in_channels[0], reduction=4, temperature=1) self.gate_1 = Attention_SEblock(channels=in_channels[1], reduction=4, temperature=1) self.gate_2 = Attention_SEblock(channels=in_channels[2], reduction=4, temperature=1) self.gate_3 = Attention_SEblock(channels=in_channels[3], reduction=4, temperature=1) assert isinstance(add_extra_convs, (str, bool)) if isinstance(add_extra_convs, str): # Extra_convs_source choices: 'on_input', 'on_lateral', 'on_output' assert add_extra_convs in ('on_input', 'on_lateral', 'on_output') elif add_extra_convs: # True if extra_convs_on_inputs: # For compatibility with previous release # TODO: deprecate `extra_convs_on_inputs` self.add_extra_convs = 'on_input' else: self.add_extra_convs = 'on_output' self.lateral_convs = nn.ModuleList() self.fpn_convs = nn.ModuleList() self.skip_lateral_convs = nn.ModuleList() for i in range(self.start_level, self.backbone_end_level): l_conv_11 = ConvModule( in_channels[i], out_channels, 1, conv_cfg=conv_cfg, norm_cfg=norm_cfg if not self.no_norm_on_lateral else None, act_cfg=act_cfg, inplace=False) l_conv_33 = ConvModule( in_channels[i], out_channels, 3, padding=1, conv_cfg=conv_cfg, norm_cfg=norm_cfg if not self.no_norm_on_lateral else None, act_cfg=act_cfg, inplace=False) l_conv_33_dilation_2 = ConvModule( in_channels[i], out_channels, 3, padding=2, dilation=2, conv_cfg=conv_cfg, norm_cfg=norm_cfg if not self.no_norm_on_lateral else None, act_cfg=act_cfg, inplace=False) l_conv_33_dilation_3 = ConvModule( in_channels[i], out_channels, 3, padding=3, dilation=3, conv_cfg=conv_cfg, norm_cfg=norm_cfg if not self.no_norm_on_lateral else None, act_cfg=act_cfg, inplace=False) l_conv_55 = ConvModule( in_channels[i], out_channels, 5, padding=2, conv_cfg=conv_cfg, norm_cfg=norm_cfg if not self.no_norm_on_lateral else None, act_cfg=act_cfg, inplace=False) l_conv_55_dilation_2 = ConvModule( in_channels[i], out_channels, 5, padding=4, dilation=2, conv_cfg=conv_cfg, norm_cfg=norm_cfg if not self.no_norm_on_lateral else None, act_cfg=act_cfg, inplace=False) l_conv_55_dilation_3 = ConvModule( in_channels[i], out_channels, 5, padding=6, dilation=3, conv_cfg=conv_cfg, norm_cfg=norm_cfg if not self.no_norm_on_lateral else None, act_cfg=act_cfg, inplace=False) skip_l_conv_11 = ConvModule( in_channels[i], out_channels, 1, conv_cfg=conv_cfg, norm_cfg=norm_cfg if not self.no_norm_on_lateral else None, act_cfg=act_cfg, inplace=False) self.lateral_convs.append(l_conv_11) self.lateral_convs.append(l_conv_33) self.lateral_convs.append(l_conv_33_dilation_2) self.lateral_convs.append(l_conv_33_dilation_3) self.lateral_convs.append(l_conv_55) self.lateral_convs.append(l_conv_55_dilation_2) self.lateral_convs.append(l_conv_55_dilation_3) self.skip_lateral_convs.append(skip_l_conv_11) fpn_conv = ConvModule(out_channels, out_channels, 3, padding=1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg, inplace=False) self.fpn_convs.append(fpn_conv) # add extra conv layers (e.g., RetinaNet) extra_levels = num_outs - self.backbone_end_level + self.start_level if self.add_extra_convs and extra_levels >= 1: for i in range(extra_levels): if i == 0 and self.add_extra_convs == 'on_input': in_channels = self.in_channels[self.backbone_end_level - 1] else: in_channels = out_channels extra_fpn_conv = ConvModule(in_channels, out_channels, 3, stride=2, padding=1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg, inplace=False) self.fpn_convs.append(extra_fpn_conv)
def __init__(self, num_dims, num_classes, primitive_mode, train_cfg=None, test_cfg=None, vote_moudule_cfg=None, vote_aggregation_cfg=None, feat_channels=(128, 128), upper_thresh=100.0, surface_thresh=0.5, conv_cfg=dict(type='Conv1d'), norm_cfg=dict(type='BN1d'), objectness_loss=None, center_loss=None, semantic_reg_loss=None, semantic_cls_loss=None): super(PrimitiveHead, self).__init__() assert primitive_mode in ['z', 'xy', 'line'] # The dimension of primitive semantic information. self.num_dims = num_dims self.num_classes = num_classes self.primitive_mode = primitive_mode self.train_cfg = train_cfg self.test_cfg = test_cfg self.gt_per_seed = vote_moudule_cfg['gt_per_seed'] self.num_proposal = vote_aggregation_cfg['num_point'] self.upper_thresh = upper_thresh self.surface_thresh = surface_thresh self.objectness_loss = build_loss(objectness_loss) self.center_loss = build_loss(center_loss) self.semantic_reg_loss = build_loss(semantic_reg_loss) self.semantic_cls_loss = build_loss(semantic_cls_loss) assert vote_aggregation_cfg['mlp_channels'][0] == vote_moudule_cfg[ 'in_channels'] # Primitive existence flag prediction self.flag_conv = ConvModule(vote_moudule_cfg['conv_channels'][-1], vote_moudule_cfg['conv_channels'][-1] // 2, 1, padding=0, conv_cfg=conv_cfg, norm_cfg=norm_cfg, bias=True, inplace=True) self.flag_pred = torch.nn.Conv1d( vote_moudule_cfg['conv_channels'][-1] // 2, 2, 1) self.vote_module = VoteModule(**vote_moudule_cfg) self.vote_aggregation = PointSAModule(**vote_aggregation_cfg) prev_channel = vote_aggregation_cfg['mlp_channels'][-1] conv_pred_list = list() for k in range(len(feat_channels)): conv_pred_list.append( ConvModule(prev_channel, feat_channels[k], 1, padding=0, conv_cfg=conv_cfg, norm_cfg=norm_cfg, bias=True, inplace=True)) prev_channel = feat_channels[k] self.conv_pred = nn.Sequential(*conv_pred_list) conv_out_channel = 3 + num_dims + num_classes self.conv_pred.add_module('conv_out', nn.Conv1d(prev_channel, conv_out_channel, 1))
def __init__(self, in_channels, stem_channels=1024, num_blocks=2, kernel_sizes=(3, 3, 3), dropout=0.25, causal=False, residual=True, use_stride_conv=False, conv_cfg=dict(type='Conv1d'), norm_cfg=dict(type='BN1d'), max_norm=None): # Protect mutable default arguments conv_cfg = copy.deepcopy(conv_cfg) norm_cfg = copy.deepcopy(norm_cfg) super().__init__() self.in_channels = in_channels self.stem_channels = stem_channels self.num_blocks = num_blocks self.kernel_sizes = kernel_sizes self.dropout = dropout self.causal = causal self.residual = residual self.use_stride_conv = use_stride_conv self.max_norm = max_norm assert num_blocks == len(kernel_sizes) - 1 for ks in kernel_sizes: assert ks % 2 == 1, 'Only odd filter widths are supported.' self.expand_conv = ConvModule( in_channels, stem_channels, kernel_size=kernel_sizes[0], stride=kernel_sizes[0] if use_stride_conv else 1, bias='auto', conv_cfg=conv_cfg, norm_cfg=norm_cfg) dilation = kernel_sizes[0] self.tcn_blocks = nn.ModuleList() for i in range(1, num_blocks + 1): self.tcn_blocks.append( BasicTemporalBlock(in_channels=stem_channels, out_channels=stem_channels, mid_channels=stem_channels, kernel_size=kernel_sizes[i], dilation=dilation, dropout=dropout, causal=causal, residual=residual, use_stride_conv=use_stride_conv, conv_cfg=conv_cfg, norm_cfg=norm_cfg)) dilation *= kernel_sizes[i] if self.max_norm is not None: # Apply weight norm clip to conv layers weight_clip = WeightNormClipHook(self.max_norm) for module in self.modules(): if isinstance(module, nn.modules.conv._ConvNd): weight_clip.register(module) self.dropout = nn.Dropout(dropout) if dropout > 0 else None
def __init__(self, num_classes, seg_in_channels, part_in_channels, seg_conv_channels=None, part_conv_channels=None, merge_conv_channels=None, down_conv_channels=None, shared_fc_channels=None, cls_channels=None, reg_channels=None, dropout_ratio=0.1, roi_feat_size=14, with_corner_loss=True, bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'), conv_cfg=dict(type='Conv1d'), norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01), loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0), loss_cls=dict(type='CrossEntropyLoss', use_sigmoid=True, reduction='none', loss_weight=1.0), init_cfg=None): super(PartA2BboxHead, self).__init__(init_cfg=init_cfg) self.num_classes = num_classes self.with_corner_loss = with_corner_loss self.bbox_coder = build_bbox_coder(bbox_coder) self.loss_bbox = build_loss(loss_bbox) self.loss_cls = build_loss(loss_cls) self.use_sigmoid_cls = loss_cls.get('use_sigmoid', False) assert down_conv_channels[-1] == shared_fc_channels[0] # init layers part_channel_last = part_in_channels part_conv = [] for i, channel in enumerate(part_conv_channels): part_conv.append( make_sparse_convmodule(part_channel_last, channel, 3, padding=1, norm_cfg=norm_cfg, indice_key=f'rcnn_part{i}', conv_type='SubMConv3d')) part_channel_last = channel self.part_conv = spconv.SparseSequential(*part_conv) seg_channel_last = seg_in_channels seg_conv = [] for i, channel in enumerate(seg_conv_channels): seg_conv.append( make_sparse_convmodule(seg_channel_last, channel, 3, padding=1, norm_cfg=norm_cfg, indice_key=f'rcnn_seg{i}', conv_type='SubMConv3d')) seg_channel_last = channel self.seg_conv = spconv.SparseSequential(*seg_conv) self.conv_down = spconv.SparseSequential() merge_conv_channel_last = part_channel_last + seg_channel_last merge_conv = [] for i, channel in enumerate(merge_conv_channels): merge_conv.append( make_sparse_convmodule(merge_conv_channel_last, channel, 3, padding=1, norm_cfg=norm_cfg, indice_key='rcnn_down0')) merge_conv_channel_last = channel down_conv_channel_last = merge_conv_channel_last conv_down = [] for i, channel in enumerate(down_conv_channels): conv_down.append( make_sparse_convmodule(down_conv_channel_last, channel, 3, padding=1, norm_cfg=norm_cfg, indice_key='rcnn_down1')) down_conv_channel_last = channel self.conv_down.add_module('merge_conv', spconv.SparseSequential(*merge_conv)) self.conv_down.add_module( 'max_pool3d', spconv.SparseMaxPool3d(kernel_size=2, stride=2)) self.conv_down.add_module('down_conv', spconv.SparseSequential(*conv_down)) shared_fc_list = [] pool_size = roi_feat_size // 2 pre_channel = shared_fc_channels[0] * pool_size**3 for k in range(1, len(shared_fc_channels)): shared_fc_list.append( ConvModule(pre_channel, shared_fc_channels[k], 1, padding=0, conv_cfg=conv_cfg, norm_cfg=norm_cfg, inplace=True)) pre_channel = shared_fc_channels[k] if k != len(shared_fc_channels) - 1 and dropout_ratio > 0: shared_fc_list.append(nn.Dropout(dropout_ratio)) self.shared_fc = nn.Sequential(*shared_fc_list) # Classification layer channel_in = shared_fc_channels[-1] cls_channel = 1 cls_layers = [] pre_channel = channel_in for k in range(0, len(cls_channels)): cls_layers.append( ConvModule(pre_channel, cls_channels[k], 1, padding=0, conv_cfg=conv_cfg, norm_cfg=norm_cfg, inplace=True)) pre_channel = cls_channels[k] cls_layers.append( ConvModule(pre_channel, cls_channel, 1, padding=0, conv_cfg=conv_cfg, act_cfg=None)) if dropout_ratio >= 0: cls_layers.insert(1, nn.Dropout(dropout_ratio)) self.conv_cls = nn.Sequential(*cls_layers) # Regression layer reg_layers = [] pre_channel = channel_in for k in range(0, len(reg_channels)): reg_layers.append( ConvModule(pre_channel, reg_channels[k], 1, padding=0, conv_cfg=conv_cfg, norm_cfg=norm_cfg, inplace=True)) pre_channel = reg_channels[k] reg_layers.append( ConvModule(pre_channel, self.bbox_coder.code_size, 1, padding=0, conv_cfg=conv_cfg, act_cfg=None)) if dropout_ratio >= 0: reg_layers.insert(1, nn.Dropout(dropout_ratio)) self.conv_reg = nn.Sequential(*reg_layers) if init_cfg is None: self.init_cfg = dict(type='Xavier', layer=['Conv2d', 'Conv1d'], distribution='uniform')
def __init__(self, out_channels=128, align_corners=False, conv_cfg=None, norm_cfg=dict(type='BN'), act_cfg=dict(type='ReLU'), init_cfg=None): super(BGALayer, self).__init__(init_cfg=init_cfg) self.out_channels = out_channels self.align_corners = align_corners self.detail_dwconv = nn.Sequential( DepthwiseSeparableConvModule( in_channels=self.out_channels, out_channels=self.out_channels, kernel_size=3, stride=1, padding=1, dw_norm_cfg=norm_cfg, dw_act_cfg=None, pw_norm_cfg=None, pw_act_cfg=None, )) self.detail_down = nn.Sequential( ConvModule( in_channels=self.out_channels, out_channels=self.out_channels, kernel_size=3, stride=2, padding=1, bias=False, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=None), nn.AvgPool2d(kernel_size=3, stride=2, padding=1, ceil_mode=False)) self.semantic_conv = nn.Sequential( ConvModule( in_channels=self.out_channels, out_channels=self.out_channels, kernel_size=3, stride=1, padding=1, bias=False, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=None)) self.semantic_dwconv = nn.Sequential( DepthwiseSeparableConvModule( in_channels=self.out_channels, out_channels=self.out_channels, kernel_size=3, stride=1, padding=1, dw_norm_cfg=norm_cfg, dw_act_cfg=None, pw_norm_cfg=None, pw_act_cfg=None, )) self.conv = ConvModule( in_channels=self.out_channels, out_channels=self.out_channels, kernel_size=3, stride=1, padding=1, inplace=True, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg, )
def __init__(self, detail_channels=(64, 64, 128), in_channels=3, conv_cfg=None, norm_cfg=dict(type='BN'), act_cfg=dict(type='ReLU'), init_cfg=None): super(DetailBranch, self).__init__(init_cfg=init_cfg) detail_branch = [] for i in range(len(detail_channels)): if i == 0: detail_branch.append( nn.Sequential( ConvModule( in_channels=in_channels, out_channels=detail_channels[i], kernel_size=3, stride=2, padding=1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg), ConvModule( in_channels=detail_channels[i], out_channels=detail_channels[i], kernel_size=3, stride=1, padding=1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg))) else: detail_branch.append( nn.Sequential( ConvModule( in_channels=detail_channels[i - 1], out_channels=detail_channels[i], kernel_size=3, stride=2, padding=1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg), ConvModule( in_channels=detail_channels[i], out_channels=detail_channels[i], kernel_size=3, stride=1, padding=1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg), ConvModule( in_channels=detail_channels[i], out_channels=detail_channels[i], kernel_size=3, stride=1, padding=1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg))) self.detail_branch = nn.ModuleList(detail_branch)
def __init__(self, in_channels, out_channels, exp_ratio=6, stride=1, conv_cfg=None, norm_cfg=dict(type='BN'), act_cfg=dict(type='ReLU'), init_cfg=None): super(GELayer, self).__init__(init_cfg=init_cfg) mid_channel = in_channels * exp_ratio self.conv1 = ConvModule( in_channels=in_channels, out_channels=in_channels, kernel_size=3, stride=1, padding=1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg) if stride == 1: self.dwconv = nn.Sequential( # ReLU in ConvModule not shown in paper ConvModule( in_channels=in_channels, out_channels=mid_channel, kernel_size=3, stride=stride, padding=1, groups=in_channels, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg)) self.shortcut = None else: self.dwconv = nn.Sequential( ConvModule( in_channels=in_channels, out_channels=mid_channel, kernel_size=3, stride=stride, padding=1, groups=in_channels, bias=False, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=None), # ReLU in ConvModule not shown in paper ConvModule( in_channels=mid_channel, out_channels=mid_channel, kernel_size=3, stride=1, padding=1, groups=mid_channel, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg), ) self.shortcut = nn.Sequential( DepthwiseSeparableConvModule( in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=stride, padding=1, dw_norm_cfg=norm_cfg, dw_act_cfg=None, pw_norm_cfg=norm_cfg, pw_act_cfg=None, )) self.conv2 = nn.Sequential( ConvModule( in_channels=mid_channel, out_channels=out_channels, kernel_size=1, stride=1, padding=0, bias=False, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=None, )) self.act = build_activation_layer(act_cfg)
def __init__(self, in_channels, out_channels, num_outs, start_level=0, end_level=-1, norm_cfg=None, act_cfg=None, order=('conv', 'norm', 'act'), upsample_cfg=dict(type='carafe', up_kernel=5, up_group=1, encoder_kernel=3, encoder_dilation=1)): super(FPN_CARAFE_3_3_pow_norm, self).__init__() assert isinstance(in_channels, list) self.in_channels = in_channels self.out_channels = out_channels self.num_ins = len(in_channels) self.num_outs = num_outs self.norm_cfg = norm_cfg self.act_cfg = act_cfg self.with_bias = norm_cfg is None self.upsample_cfg = upsample_cfg.copy() self.upsample = self.upsample_cfg.get('type') self.relu = nn.ReLU(inplace=False) self.order = order assert order in [('conv', 'norm', 'act'), ('act', 'conv', 'norm')] assert self.upsample in [ 'nearest', 'bilinear', 'deconv', 'pixel_shuffle', 'carafe', None ] if self.upsample in ['deconv', 'pixel_shuffle']: assert hasattr( self.upsample_cfg, 'upsample_kernel') and self.upsample_cfg.upsample_kernel > 0 self.upsample_kernel = self.upsample_cfg.pop('upsample_kernel') if end_level == -1: self.backbone_end_level = self.num_ins assert num_outs >= self.num_ins - start_level else: # if end_level < inputs, no extra level is allowed self.backbone_end_level = end_level assert end_level <= len(in_channels) assert num_outs == end_level - start_level self.start_level = start_level self.end_level = end_level self.lateral_convs = nn.ModuleList() self.fpn_convs = nn.ModuleList() self.upsample_modules = nn.ModuleList() for i in range(self.start_level, self.backbone_end_level): l_conv = ConvModule(in_channels[i], out_channels, 1, norm_cfg=norm_cfg, bias=self.with_bias, act_cfg=act_cfg, inplace=False, order=self.order) fpn_conv = ConvModule(out_channels, out_channels, 3, padding=1, norm_cfg=self.norm_cfg, bias=self.with_bias, act_cfg=act_cfg, inplace=False, order=self.order) if i != self.backbone_end_level - 1: upsample_cfg_ = self.upsample_cfg.copy() if self.upsample == 'deconv': upsample_cfg_.update( in_channels=out_channels, out_channels=out_channels, kernel_size=self.upsample_kernel, stride=2, padding=(self.upsample_kernel - 1) // 2, output_padding=(self.upsample_kernel - 1) // 2) elif self.upsample == 'pixel_shuffle': upsample_cfg_.update(in_channels=out_channels, out_channels=out_channels, scale_factor=2, upsample_kernel=self.upsample_kernel) elif self.upsample == 'carafe': upsample_cfg_.update(channels=out_channels, scale_factor=2) else: # suppress warnings align_corners = (None if self.upsample == 'nearest' else False) upsample_cfg_.update(scale_factor=2, mode=self.upsample, align_corners=align_corners) upsample_module = build_upsample_layer(upsample_cfg_) self.upsample_modules.append(upsample_module) self.lateral_convs.append(l_conv) self.fpn_convs.append(fpn_conv) # add extra conv layers (e.g., RetinaNet) extra_out_levels = (num_outs - self.backbone_end_level + self.start_level) if extra_out_levels >= 1: for i in range(extra_out_levels): in_channels = (self.in_channels[self.backbone_end_level - 1] if i == 0 else out_channels) extra_l_conv = ConvModule(in_channels, out_channels, 3, stride=2, padding=1, norm_cfg=norm_cfg, bias=self.with_bias, act_cfg=act_cfg, inplace=False, order=self.order) if self.upsample == 'deconv': upsampler_cfg_ = dict( in_channels=out_channels, out_channels=out_channels, kernel_size=self.upsample_kernel, stride=2, padding=(self.upsample_kernel - 1) // 2, output_padding=(self.upsample_kernel - 1) // 2) elif self.upsample == 'pixel_shuffle': upsampler_cfg_ = dict(in_channels=out_channels, out_channels=out_channels, scale_factor=2, upsample_kernel=self.upsample_kernel) elif self.upsample == 'carafe': upsampler_cfg_ = dict(channels=out_channels, scale_factor=2, **self.upsample_cfg) else: # suppress warnings align_corners = (None if self.upsample == 'nearest' else False) upsampler_cfg_ = dict(scale_factor=2, mode=self.upsample, align_corners=align_corners) upsampler_cfg_['type'] = self.upsample upsample_module = build_upsample_layer(upsampler_cfg_) extra_fpn_conv = ConvModule(out_channels, out_channels, 3, padding=1, norm_cfg=self.norm_cfg, bias=self.with_bias, act_cfg=act_cfg, inplace=False, order=self.order) self.upsample_modules.append(upsample_module) self.fpn_convs.append(extra_fpn_conv) self.lateral_convs.append(extra_l_conv)
def __init__(self, ind, num_units, in_channels, unit_channels=256, gen_skip=False, gen_cross_conv=False, norm_cfg=dict(type='BN'), out_channels=64): # Protect mutable default arguments norm_cfg = cp.deepcopy(norm_cfg) super().__init__() self.num_units = num_units self.norm_cfg = norm_cfg self.in_skip = ConvModule( in_channels, unit_channels, kernel_size=1, stride=1, padding=0, norm_cfg=self.norm_cfg, act_cfg=None, inplace=True) self.relu = nn.ReLU(inplace=True) self.ind = ind if self.ind > 0: self.up_conv = ConvModule( unit_channels, unit_channels, kernel_size=1, stride=1, padding=0, norm_cfg=self.norm_cfg, act_cfg=None, inplace=True) self.gen_skip = gen_skip if self.gen_skip: self.out_skip1 = ConvModule( in_channels, in_channels, kernel_size=1, stride=1, padding=0, norm_cfg=self.norm_cfg, inplace=True) self.out_skip2 = ConvModule( unit_channels, in_channels, kernel_size=1, stride=1, padding=0, norm_cfg=self.norm_cfg, inplace=True) self.gen_cross_conv = gen_cross_conv if self.ind == num_units - 1 and self.gen_cross_conv: self.cross_conv = ConvModule( unit_channels, out_channels, kernel_size=1, stride=1, padding=0, norm_cfg=self.norm_cfg, inplace=True)
def __init__(self, in_channels, out_channels, num_outs, stack_times, start_level=0, end_level=-1, add_extra_convs=False, norm_cfg=None): super(NASFPN, self).__init__() assert isinstance(in_channels, list) self.in_channels = in_channels self.out_channels = out_channels self.num_ins = len(in_channels) # num of input feature levels self.num_outs = num_outs # num of output feature levels self.stack_times = stack_times self.norm_cfg = norm_cfg if end_level == -1: self.backbone_end_level = self.num_ins assert num_outs >= self.num_ins - start_level else: # if end_level < inputs, no extra level is allowed self.backbone_end_level = end_level assert end_level <= len(in_channels) assert num_outs == end_level - start_level self.start_level = start_level self.end_level = end_level self.add_extra_convs = add_extra_convs # add lateral connections self.lateral_convs = nn.ModuleList() for i in range(self.start_level, self.backbone_end_level): l_conv = ConvModule(in_channels[i], out_channels, 1, norm_cfg=norm_cfg, act_cfg=None) self.lateral_convs.append(l_conv) # add extra downsample layers (stride-2 pooling or conv) extra_levels = num_outs - self.backbone_end_level + self.start_level self.extra_downsamples = nn.ModuleList() for i in range(extra_levels): extra_conv = ConvModule(out_channels, out_channels, 1, norm_cfg=norm_cfg, act_cfg=None) self.extra_downsamples.append( nn.Sequential(extra_conv, nn.MaxPool2d(2, 2))) # add NAS FPN connections self.fpn_stages = nn.ModuleList() for _ in range(self.stack_times): stage = nn.ModuleDict() # gp(p6, p4) -> p4_1 stage['gp_64_4'] = GlobalPoolingCell(in_channels=out_channels, out_channels=out_channels, out_norm_cfg=norm_cfg) # sum(p4_1, p4) -> p4_2 stage['sum_44_4'] = SumCell(in_channels=out_channels, out_channels=out_channels, out_norm_cfg=norm_cfg) # sum(p4_2, p3) -> p3_out stage['sum_43_3'] = SumCell(in_channels=out_channels, out_channels=out_channels, out_norm_cfg=norm_cfg) # sum(p3_out, p4_2) -> p4_out stage['sum_34_4'] = SumCell(in_channels=out_channels, out_channels=out_channels, out_norm_cfg=norm_cfg) # sum(p5, gp(p4_out, p3_out)) -> p5_out stage['gp_43_5'] = GlobalPoolingCell(with_out_conv=False) stage['sum_55_5'] = SumCell(in_channels=out_channels, out_channels=out_channels, out_norm_cfg=norm_cfg) # sum(p7, gp(p5_out, p4_2)) -> p7_out stage['gp_54_7'] = GlobalPoolingCell(with_out_conv=False) stage['sum_77_7'] = SumCell(in_channels=out_channels, out_channels=out_channels, out_norm_cfg=norm_cfg) # gp(p7_out, p5_out) -> p6_out stage['gp_75_6'] = GlobalPoolingCell(in_channels=out_channels, out_channels=out_channels, out_norm_cfg=norm_cfg) self.fpn_stages.append(stage)
def __init__(self, st_feat_channels, lt_feat_channels, latent_channels, num_st_feat, num_lt_feat, use_scale=True, pre_activate=True, pre_activate_with_ln=True, conv_cfg=None, norm_cfg=None, dropout_ratio=0.2, zero_init_out_conv=False): super().__init__() if conv_cfg is None: conv_cfg = dict(type='Conv3d') self.st_feat_channels = st_feat_channels self.lt_feat_channels = lt_feat_channels self.latent_channels = latent_channels self.num_st_feat = num_st_feat self.num_lt_feat = num_lt_feat self.use_scale = use_scale self.pre_activate = pre_activate self.pre_activate_with_ln = pre_activate_with_ln self.dropout_ratio = dropout_ratio self.zero_init_out_conv = zero_init_out_conv self.st_feat_conv = ConvModule(self.st_feat_channels, self.latent_channels, kernel_size=1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=None) self.lt_feat_conv = ConvModule(self.lt_feat_channels, self.latent_channels, kernel_size=1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=None) self.global_conv = ConvModule(self.lt_feat_channels, self.latent_channels, kernel_size=1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=None) if pre_activate: self.ln = nn.LayerNorm([latent_channels, num_st_feat, 1, 1]) else: self.ln = nn.LayerNorm([st_feat_channels, num_st_feat, 1, 1]) self.relu = nn.ReLU() self.out_conv = ConvModule(self.latent_channels, self.st_feat_channels, kernel_size=1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=None) if self.dropout_ratio > 0: self.dropout = nn.Dropout(self.dropout_ratio)
class ResNet(nn.Module): """ResNet backbone. Args: depth (int): Depth of resnet, from {18, 34, 50, 101, 152}. pretrained (str | None): Name of pretrained model. Default: None. in_channels (int): Channel num of input features. Default: 3. num_stages (int): Resnet stages. Default: 4. strides (Sequence[int]): Strides of the first block of each stage. out_indices (Sequence[int]): Indices of output feature. Default: (3, ). dilations (Sequence[int]): Dilation of each stage. style (str): ``pytorch`` or ``caffe``. If set to "pytorch", the stride-two layer is the 3x3 conv layer, otherwise the stride-two layer is the first 1x1 conv layer. Default: ``pytorch``. frozen_stages (int): Stages to be frozen (all param fixed). -1 means not freezing any parameters. Default: -1. conv_cfg (dict): Config for norm layers. Default: dict(type='Conv'). norm_cfg (dict): Config for norm layers. required keys are `type` and `requires_grad`. Default: dict(type='BN2d', requires_grad=True). act_cfg (dict): Config for activate layers. Default: dict(type='ReLU', inplace=True). norm_eval (bool): Whether to set BN layers to eval mode, namely, freeze running stats (mean and var). Default: False. partial_bn (bool): Whether to use partial bn. Default: False. with_cp (bool): Use checkpoint or not. Using checkpoint will save some memory while slowing down the training speed. Default: False. """ arch_settings = { 18: (BasicBlock, (2, 2, 2, 2)), 34: (BasicBlock, (3, 4, 6, 3)), 50: (Bottleneck, (3, 4, 6, 3)), 101: (Bottleneck, (3, 4, 23, 3)), 152: (Bottleneck, (3, 8, 36, 3)) } def __init__(self, depth, pretrained=None, torchvision_pretrain=True, in_channels=3, num_stages=4, out_indices=(3, ), strides=(1, 2, 2, 2), dilations=(1, 1, 1, 1), style='pytorch', frozen_stages=-1, conv_cfg=dict(type='Conv'), norm_cfg=dict(type='BN2d', requires_grad=True), act_cfg=dict(type='ReLU', inplace=True), norm_eval=False, partial_bn=False, with_cp=False): super().__init__() if depth not in self.arch_settings: raise KeyError(f'invalid depth {depth} for resnet') self.depth = depth self.in_channels = in_channels self.pretrained = pretrained self.torchvision_pretrain = torchvision_pretrain self.num_stages = num_stages assert 1 <= num_stages <= 4 self.out_indices = out_indices assert max(out_indices) < num_stages self.strides = strides self.dilations = dilations assert len(strides) == len(dilations) == num_stages self.style = style self.frozen_stages = frozen_stages self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg self.act_cfg = act_cfg self.norm_eval = norm_eval self.partial_bn = partial_bn self.with_cp = with_cp self.block, stage_blocks = self.arch_settings[depth] self.stage_blocks = stage_blocks[:num_stages] self.inplanes = 64 self._make_stem_layer() self.res_layers = [] for i, num_blocks in enumerate(self.stage_blocks): stride = strides[i] dilation = dilations[i] planes = 64 * 2**i res_layer = make_res_layer( self.block, self.inplanes, planes, num_blocks, stride=stride, dilation=dilation, style=self.style, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg, with_cp=with_cp) self.inplanes = planes * self.block.expansion layer_name = f'layer{i + 1}' self.add_module(layer_name, res_layer) self.res_layers.append(layer_name) self.feat_dim = self.block.expansion * 64 * 2**( len(self.stage_blocks) - 1) def _make_stem_layer(self): """Construct the stem layers consists of a conv+norm+act module and a pooling layer.""" self.conv1 = ConvModule( self.in_channels, 64, kernel_size=7, stride=2, padding=3, bias=False, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) @staticmethod def _load_conv_params(conv, state_dict_tv, module_name_tv, loaded_param_names): """Load the conv parameters of resnet from torchvision. Args: conv (nn.Module): The destination conv module. state_dict_tv (OrderedDict): The state dict of pretrained torchvision model. module_name_tv (str): The name of corresponding conv module in the torchvision model. loaded_param_names (list[str]): List of parameters that have been loaded. """ weight_tv_name = module_name_tv + '.weight' if conv.weight.data.shape == state_dict_tv[weight_tv_name].shape: conv.weight.data.copy_(state_dict_tv[weight_tv_name]) loaded_param_names.append(weight_tv_name) if getattr(conv, 'bias') is not None: bias_tv_name = module_name_tv + '.bias' if conv.bias.data.shape == state_dict_tv[bias_tv_name].shape: conv.bias.data.copy_(state_dict_tv[bias_tv_name]) loaded_param_names.append(bias_tv_name) @staticmethod def _load_bn_params(bn, state_dict_tv, module_name_tv, loaded_param_names): """Load the bn parameters of resnet from torchvision. Args: bn (nn.Module): The destination bn module. state_dict_tv (OrderedDict): The state dict of pretrained torchvision model. module_name_tv (str): The name of corresponding bn module in the torchvision model. loaded_param_names (list[str]): List of parameters that have been loaded. """ for param_name, param in bn.named_parameters(): param_tv_name = f'{module_name_tv}.{param_name}' param_tv = state_dict_tv[param_tv_name] if param.data.shape == param_tv.shape: param.data.copy_(param_tv) loaded_param_names.append(param_tv_name) for param_name, param in bn.named_buffers(): param_tv_name = f'{module_name_tv}.{param_name}' # some buffers like num_batches_tracked may not exist if param_tv_name in state_dict_tv: param_tv = state_dict_tv[param_tv_name] if param.data.shape == param_tv.shape: param.data.copy_(param_tv) loaded_param_names.append(param_tv_name) def _load_torchvision_checkpoint(self, logger=None): """Initiate the parameters from torchvision pretrained checkpoint.""" state_dict_torchvision = _load_checkpoint(self.pretrained) if 'state_dict' in state_dict_torchvision: state_dict_torchvision = state_dict_torchvision['state_dict'] loaded_param_names = [] for name, module in self.named_modules(): if isinstance(module, ConvModule): # we use a ConvModule to wrap conv+bn+relu layers, thus the # name mapping is needed if 'downsample' in name: # layer{X}.{Y}.downsample.conv->layer{X}.{Y}.downsample.0 original_conv_name = name + '.0' # layer{X}.{Y}.downsample.bn->layer{X}.{Y}.downsample.1 original_bn_name = name + '.1' else: # layer{X}.{Y}.conv{n}.conv->layer{X}.{Y}.conv{n} original_conv_name = name # layer{X}.{Y}.conv{n}.bn->layer{X}.{Y}.bn{n} original_bn_name = name.replace('conv', 'bn') print(name) self._load_conv_params(module.conv, state_dict_torchvision, original_conv_name, loaded_param_names) self._load_bn_params(module.bn, state_dict_torchvision, original_bn_name, loaded_param_names) import pdb pdb.set_trace() # check if any parameters in the 2d checkpoint are not loaded remaining_names = set( state_dict_torchvision.keys()) - set(loaded_param_names) if remaining_names: logger.info( f'These parameters in pretrained checkpoint are not loaded' f': {remaining_names}') def init_weights(self): """Initiate the parameters either from existing checkpoint or from scratch.""" if isinstance(self.pretrained, str): logger = get_root_logger() if self.torchvision_pretrain: # torchvision's self._load_torchvision_checkpoint(logger) else: # ours load_checkpoint( self, self.pretrained, strict=False, logger=logger) elif self.pretrained is None: for m in self.modules(): if isinstance(m, nn.Conv2d): kaiming_init(m) elif isinstance(m, nn.BatchNorm2d): constant_init(m, 1) else: raise TypeError('pretrained must be a str or None') def forward(self, x): """Defines the computation performed at every call. Args: x (torch.Tensor): The input data. Returns: torch.Tensor: The feature of the input samples extracted by the backbone. """ x = self.conv1(x) x = self.maxpool(x) outs = [] for i, layer_name in enumerate(self.res_layers): res_layer = getattr(self, layer_name) x = res_layer(x) if i in self.out_indices: outs.append(x) if len(outs) == 1: return outs[0] return tuple(outs) def _freeze_stages(self): """Prevent all the parameters from being optimized before ``self.frozen_stages``.""" if self.frozen_stages >= 0: self.conv1.bn.eval() for m in self.conv1.modules(): for param in m.parameters(): param.requires_grad = False for i in range(1, self.frozen_stages + 1): m = getattr(self, f'layer{i}') m.eval() for param in m.parameters(): param.requires_grad = False def _partial_bn(self): logger = get_root_logger() logger.info('Freezing BatchNorm2D except the first one.') count_bn = 0 for m in self.modules(): if isinstance(m, nn.BatchNorm2d): count_bn += 1 if count_bn >= 2: m.eval() # shutdown update in frozen mode m.weight.requires_grad = False m.bias.requires_grad = False def train(self, mode=True): """Set the optimization status when training.""" super().train(mode) self._freeze_stages() if mode and self.norm_eval: for m in self.modules(): if isinstance(m, _BatchNorm): m.eval() if mode and self.partial_bn: self._partial_bn()
class ShuffleNetV1(BaseBackbone): """ShuffleNetV1 backbone. Args: groups (int): The number of groups to be used in grouped 1x1 convolutions in each ShuffleUnit. Default: 3. widen_factor (float): Width multiplier - adjusts the number of channels in each layer by this amount. Default: 1.0. out_indices (Sequence[int]): Output from which stages. Default: (2, ) frozen_stages (int): Stages to be frozen (all param fixed). Default: -1, which means not freezing any parameters. conv_cfg (dict, optional): Config dict for convolution layer. Default: None, which means using conv2d. norm_cfg (dict): Config dict for normalization layer. Default: dict(type='BN'). act_cfg (dict): Config dict for activation layer. Default: dict(type='ReLU'). norm_eval (bool): Whether to set norm layers to eval mode, namely, freeze running stats (mean and var). Note: Effect on Batch Norm and its variants only. Default: False. with_cp (bool): Use checkpoint or not. Using checkpoint will save some memory while slowing down the training speed. Default: False. """ def __init__(self, groups=3, widen_factor=1.0, out_indices=(2, ), frozen_stages=-1, conv_cfg=None, norm_cfg=dict(type='BN'), act_cfg=dict(type='ReLU'), norm_eval=False, with_cp=False): super(ShuffleNetV1, self).__init__() self.stage_blocks = [4, 8, 4] self.groups = groups for index in out_indices: if index not in range(0, 3): raise ValueError('the item in out_indices must in ' f'range(0, 3). But received {index}') if frozen_stages not in range(-1, 3): raise ValueError('frozen_stages must be in range(-1, 3). ' f'But received {frozen_stages}') self.out_indices = out_indices self.frozen_stages = frozen_stages self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg self.act_cfg = act_cfg self.norm_eval = norm_eval self.with_cp = with_cp if groups == 1: channels = (144, 288, 576) elif groups == 2: channels = (200, 400, 800) elif groups == 3: channels = (240, 480, 960) elif groups == 4: channels = (272, 544, 1088) elif groups == 8: channels = (384, 768, 1536) else: raise ValueError(f'{groups} groups is not supported for 1x1 ' 'Grouped Convolutions') channels = [make_divisible(ch * widen_factor, 8) for ch in channels] self.in_channels = int(24 * widen_factor) self.conv1 = ConvModule(in_channels=3, out_channels=self.in_channels, kernel_size=3, stride=2, padding=1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layers = nn.ModuleList() for i, num_blocks in enumerate(self.stage_blocks): first_block = True if i == 0 else False layer = self.make_layer(channels[i], num_blocks, first_block) self.layers.append(layer) def _freeze_stages(self): if self.frozen_stages >= 0: for param in self.conv1.parameters(): param.requires_grad = False for i in range(self.frozen_stages): layer = self.layers[i] layer.eval() for param in layer.parameters(): param.requires_grad = False def init_weights(self, pretrained=None): if isinstance(pretrained, str): logger = logging.getLogger() load_checkpoint(self, pretrained, strict=False, logger=logger) elif pretrained is None: for name, m in self.named_modules(): if isinstance(m, nn.Conv2d): if 'conv1' in name: normal_init(m, mean=0, std=0.01) else: normal_init(m, mean=0, std=1.0 / m.weight.shape[1]) elif isinstance(m, (_BatchNorm, nn.GroupNorm)): constant_init(m.weight, val=1, bias=0.0001) if isinstance(m, _BatchNorm): if m.running_mean is not None: nn.init.constant_(m.running_mean, 0) else: raise TypeError('pretrained must be a str or None. But received ' f'{type(pretrained)}') def make_layer(self, out_channels, num_blocks, first_block=False): """ Stack ShuffleUnit blocks to make a layer. Args: out_channels (int): out_channels of the block. num_blocks (int): Number of blocks. first_block (bool): Whether is the first ShuffleUnit of a sequential ShuffleUnits. Default: False, which means not using the grouped 1x1 convolution. """ layers = [] for i in range(num_blocks): first_block = first_block if i == 0 else False combine_mode = 'concat' if i == 0 else 'add' layers.append( ShuffleUnit(self.in_channels, out_channels, groups=self.groups, first_block=first_block, combine=combine_mode, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg, with_cp=self.with_cp)) self.in_channels = out_channels return nn.Sequential(*layers) def forward(self, x): x = self.conv1(x) x = self.maxpool(x) outs = [] for i, layer in enumerate(self.layers): x = layer(x) if i in self.out_indices: outs.append(x) if len(outs) == 1: return outs[0] else: return tuple(outs) def train(self, mode=True): super(ShuffleNetV1, self).train(mode) self._freeze_stages() if mode and self.norm_eval: for m in self.modules(): if isinstance(m, _BatchNorm): m.eval()
def __init__(self, in_channels, max_channels, num_convs=5, fc_in_channels=None, fc_out_channels=1024, kernel_size=5, conv_cfg=None, norm_cfg=None, act_cfg=dict(type='ReLU'), out_act_cfg=dict(type='ReLU'), with_input_norm=True, with_out_convs=False, with_spectral_norm=False, **kwargs): super().__init__() if fc_in_channels is not None: assert fc_in_channels > 0 self.max_channels = max_channels self.with_fc = fc_in_channels is not None self.num_convs = num_convs self.with_out_act = out_act_cfg is not None self.with_out_convs = with_out_convs cur_channels = in_channels for i in range(num_convs): out_ch = min(64 * 2**i, max_channels) norm_cfg_ = norm_cfg act_cfg_ = act_cfg if i == 0 and not with_input_norm: norm_cfg_ = None elif (i == num_convs - 1 and not self.with_fc and not self.with_out_convs): norm_cfg_ = None act_cfg_ = out_act_cfg self.add_module( f'conv{i + 1}', ConvModule(cur_channels, out_ch, kernel_size=kernel_size, stride=2, padding=kernel_size // 2, norm_cfg=norm_cfg_, act_cfg=act_cfg_, with_spectral_norm=with_spectral_norm, **kwargs)) cur_channels = out_ch if self.with_out_convs: cur_channels = min(64 * 2**(num_convs - 1), max_channels) out_ch = min(64 * 2**num_convs, max_channels) self.add_module( f'conv{num_convs + 1}', ConvModule(cur_channels, out_ch, kernel_size, stride=1, padding=kernel_size // 2, norm_cfg=norm_cfg, act_cfg=act_cfg, with_spectral_norm=with_spectral_norm, **kwargs)) self.add_module( f'conv{num_convs + 2}', ConvModule(out_ch, 1, kernel_size, stride=1, padding=kernel_size // 2, act_cfg=None, with_spectral_norm=with_spectral_norm, **kwargs)) if self.with_fc: self.fc = LinearModule(fc_in_channels, fc_out_channels, bias=True, act_cfg=out_act_cfg, with_spectral_norm=with_spectral_norm)
def __init__(self, groups=3, widen_factor=1.0, out_indices=(2, ), frozen_stages=-1, conv_cfg=None, norm_cfg=dict(type='BN'), act_cfg=dict(type='ReLU'), norm_eval=False, with_cp=False): super(ShuffleNetV1, self).__init__() self.stage_blocks = [4, 8, 4] self.groups = groups for index in out_indices: if index not in range(0, 3): raise ValueError('the item in out_indices must in ' f'range(0, 3). But received {index}') if frozen_stages not in range(-1, 3): raise ValueError('frozen_stages must be in range(-1, 3). ' f'But received {frozen_stages}') self.out_indices = out_indices self.frozen_stages = frozen_stages self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg self.act_cfg = act_cfg self.norm_eval = norm_eval self.with_cp = with_cp if groups == 1: channels = (144, 288, 576) elif groups == 2: channels = (200, 400, 800) elif groups == 3: channels = (240, 480, 960) elif groups == 4: channels = (272, 544, 1088) elif groups == 8: channels = (384, 768, 1536) else: raise ValueError(f'{groups} groups is not supported for 1x1 ' 'Grouped Convolutions') channels = [make_divisible(ch * widen_factor, 8) for ch in channels] self.in_channels = int(24 * widen_factor) self.conv1 = ConvModule(in_channels=3, out_channels=self.in_channels, kernel_size=3, stride=2, padding=1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layers = nn.ModuleList() for i, num_blocks in enumerate(self.stage_blocks): first_block = True if i == 0 else False layer = self.make_layer(channels[i], num_blocks, first_block) self.layers.append(layer)
def __init__(self, gamma_w=1.0, gamma_b=1.0, gamma_d=1.0, pretrained=None, in_channels=3, num_stages=4, spatial_strides=(2, 2, 2, 2), frozen_stages=-1, se_style='half', se_ratio=1 / 16, use_swish=True, conv_cfg=dict(type='Conv3d'), norm_cfg=dict(type='BN3d', requires_grad=True), act_cfg=dict(type='ReLU', inplace=True), norm_eval=False, with_cp=False, zero_init_residual=True, **kwargs): super().__init__() self.gamma_w = gamma_w self.gamma_b = gamma_b self.gamma_d = gamma_d self.pretrained = pretrained self.in_channels = in_channels # Hard coded, can be changed by gamma_w self.base_channels = 24 self.stage_blocks = [1, 2, 5, 3] # apply parameters gamma_w and gamma_d self.base_channels = self._round_width(self.base_channels, self.gamma_w) self.stage_blocks = [ self._round_repeats(x, self.gamma_d) for x in self.stage_blocks ] self.num_stages = num_stages assert 1 <= num_stages <= 4 self.spatial_strides = spatial_strides assert len(spatial_strides) == num_stages self.frozen_stages = frozen_stages self.se_style = se_style assert self.se_style in ['all', 'half'] self.se_ratio = se_ratio assert (self.se_ratio is None) or (self.se_ratio > 0) self.use_swish = use_swish self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg self.act_cfg = act_cfg self.norm_eval = norm_eval self.with_cp = with_cp self.zero_init_residual = zero_init_residual self.block = BlockX3D self.stage_blocks = self.stage_blocks[:num_stages] self.layer_inplanes = self.base_channels self._make_stem_layer() self.res_layers = [] for i, num_blocks in enumerate(self.stage_blocks): spatial_stride = spatial_strides[i] inplanes = self.base_channels * 2**i planes = int(inplanes * self.gamma_b) res_layer = self.make_res_layer(self.block, self.layer_inplanes, inplanes, planes, num_blocks, spatial_stride=spatial_stride, se_style=self.se_style, se_ratio=self.se_ratio, use_swish=self.use_swish, norm_cfg=self.norm_cfg, conv_cfg=self.conv_cfg, act_cfg=self.act_cfg, with_cp=with_cp, **kwargs) self.layer_inplanes = inplanes layer_name = f'layer{i + 1}' self.add_module(layer_name, res_layer) self.res_layers.append(layer_name) self.feat_dim = self.base_channels * 2**(len(self.stage_blocks) - 1) self.conv5 = ConvModule(self.feat_dim, int(self.feat_dim * self.gamma_b), kernel_size=1, stride=1, padding=0, bias=False, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg) self.feat_dim = int(self.feat_dim * self.gamma_b)
def __init__(self, in_channels, out_channels, groups=3, first_block=True, combine='add', conv_cfg=None, norm_cfg=dict(type='BN'), act_cfg=dict(type='ReLU'), with_cp=False): super(ShuffleUnit, self).__init__() self.in_channels = in_channels self.out_channels = out_channels self.first_block = first_block self.combine = combine self.groups = groups self.bottleneck_channels = self.out_channels // 4 self.with_cp = with_cp if self.combine == 'add': self.depthwise_stride = 1 self._combine_func = self._add assert in_channels == out_channels, ( 'in_channels must be equal to out_channels when combine ' 'is add') elif self.combine == 'concat': self.depthwise_stride = 2 self._combine_func = self._concat self.out_channels -= self.in_channels self.avgpool = nn.AvgPool2d(kernel_size=3, stride=2, padding=1) else: raise ValueError(f'Cannot combine tensors with {self.combine}. ' 'Only "add" and "concat" are supported') self.first_1x1_groups = 1 if first_block else self.groups self.g_conv_1x1_compress = ConvModule( in_channels=self.in_channels, out_channels=self.bottleneck_channels, kernel_size=1, groups=self.first_1x1_groups, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg) self.depthwise_conv3x3_bn = ConvModule( in_channels=self.bottleneck_channels, out_channels=self.bottleneck_channels, kernel_size=3, stride=self.depthwise_stride, padding=1, groups=self.bottleneck_channels, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=None) self.g_conv_1x1_expand = ConvModule( in_channels=self.bottleneck_channels, out_channels=self.out_channels, kernel_size=1, groups=self.groups, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=None) self.act = build_activation_layer(act_cfg)
def __init__(self, inplanes, planes, outplanes, spatial_stride=1, downsample=None, se_ratio=None, use_swish=True, conv_cfg=dict(type='Conv3d'), norm_cfg=dict(type='BN3d'), act_cfg=dict(type='ReLU'), with_cp=False): super().__init__() self.inplanes = inplanes self.planes = planes self.outplanes = outplanes self.spatial_stride = spatial_stride self.downsample = downsample self.se_ratio = se_ratio self.use_swish = use_swish self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg self.act_cfg = act_cfg self.act_cfg_swish = dict(type='Swish') self.with_cp = with_cp self.conv1 = ConvModule(in_channels=inplanes, out_channels=planes, kernel_size=1, stride=1, padding=0, bias=False, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg) # Here we use the channel-wise conv self.conv2 = ConvModule(in_channels=planes, out_channels=planes, kernel_size=3, stride=(1, self.spatial_stride, self.spatial_stride), padding=1, groups=planes, bias=False, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=None) self.swish = Swish() self.conv3 = ConvModule(in_channels=planes, out_channels=outplanes, kernel_size=1, stride=1, padding=0, bias=False, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=None) if self.se_ratio is not None: self.se_module = SEModule(planes, self.se_ratio) self.relu = build_activation_layer(self.act_cfg)
def __init__(self, in_channels, out_channels, num_outs, start_level=0, end_level=-1, add_extra_convs=False, extra_convs_on_inputs=True, relu_before_extra_convs=False, no_norm_on_lateral=False, conv_cfg=None, norm_cfg=None, act_cfg=None, upsample_cfg=dict(mode='nearest')): super(NoFPN, self).__init__() assert isinstance(in_channels, list) self.add_extra_convs = add_extra_convs self.in_channels = in_channels self.out_channels = out_channels self.num_ins = len(in_channels) self.num_outs = num_outs self.fp16_enabled = False self.upsample_cfg = upsample_cfg.copy() if end_level == -1: self.backbone_end_level = self.num_ins assert num_outs >= self.num_ins - start_level else: # if end_level < inputs, no extra level is allowed self.backbone_end_level = end_level assert end_level <= len(in_channels) assert num_outs == end_level - start_level self.start_level = start_level self.end_level = end_level self.lateral_convs = nn.ModuleList() self.fpn_convs = nn.ModuleList() self.spatail_gates = nn.ModuleList() self.downsample_convs = nn.ModuleList() for i in range(self.start_level, self.backbone_end_level): d_conv = nn.ModuleList() for j in range(self.start_level, self.backbone_end_level): if i > j: d_conv.append( ConvModule(out_channels, out_channels, 1 + 2 * (i - j), stride=2**(i - j), padding=1, inplace=False)) self.spatail_gates.append( SpatialGate(levels=self.num_ins - start_level)) self.downsample_convs.append(d_conv) # 1, 4 for i in range(self.start_level, self.backbone_end_level): l_conv = ConvModule(in_channels[i], out_channels, 1, inplace=False) fpn_conv = ConvModule(out_channels, out_channels, 3, padding=1, inplace=False) self.lateral_convs.append(l_conv) self.fpn_convs.append(fpn_conv) # add extra conv layers (e.g., RetinaNet) extra_levels = num_outs - self.backbone_end_level + self.start_level if self.add_extra_convs and extra_levels >= 1: for i in range(extra_levels): if i == 0 and self.add_extra_convs == 'on_input': # 2048 in_channels = self.in_channels[self.backbone_end_level - 1] else: # 256 in_channels = out_channels extra_fpn_conv = ConvModule(in_channels, out_channels, 3, stride=2, padding=1, inplace=False) self.fpn_convs.append(extra_fpn_conv)
def __init__(self, in_channels, out_channels, num_outs, start_level=0, end_level=-1, add_extra_convs=False, extra_convs_on_inputs=True, relu_before_extra_convs=False, no_norm_on_lateral=False, conv_cfg=None, norm_cfg=None, act_cfg=None, upsample_cfg=dict(mode='nearest')): super().__init__() assert isinstance(in_channels, list) self.in_channels = in_channels self.out_channels = out_channels self.num_ins = len(in_channels) self.num_outs = num_outs self.relu_before_extra_convs = relu_before_extra_convs self.no_norm_on_lateral = no_norm_on_lateral self.fp16_enabled = False self.upsample_cfg = upsample_cfg.copy() if end_level == -1: self.backbone_end_level = self.num_ins assert num_outs >= self.num_ins - start_level else: # if end_level < inputs, no extra level is allowed self.backbone_end_level = end_level assert end_level <= len(in_channels) assert num_outs == end_level - start_level self.start_level = start_level self.end_level = end_level self.add_extra_convs = add_extra_convs assert isinstance(add_extra_convs, (str, bool)) if isinstance(add_extra_convs, str): # Extra_convs_source choices: 'on_input', 'on_lateral', 'on_output' assert add_extra_convs in ('on_input', 'on_lateral', 'on_output') elif add_extra_convs: # True if extra_convs_on_inputs: # TODO: deprecate `extra_convs_on_inputs` warnings.simplefilter('once') warnings.warn( '"extra_convs_on_inputs" will be deprecated in v2.9.0,' 'Please use "add_extra_convs"', DeprecationWarning) self.add_extra_convs = 'on_input' else: self.add_extra_convs = 'on_output' self.lateral_convs = nn.ModuleList() self.fpn_convs = nn.ModuleList() concat_in_channels = [] for i in range(self.start_level, self.backbone_end_level): if i == self.backbone_end_level - 1: concat_in_channels.append(in_channels[i]) else: concat_in_channels.append(in_channels[i] + out_channels[i + 1]) for i in range(self.start_level, self.backbone_end_level): l_conv = ConvModule( concat_in_channels[i], out_channels[i], 1, conv_cfg=conv_cfg, norm_cfg=norm_cfg if not self.no_norm_on_lateral else None, act_cfg=act_cfg, inplace=False) fpn_conv = ConvModule(out_channels[i], out_channels[i], 3, padding=1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg, inplace=False) self.lateral_convs.append(l_conv) self.fpn_convs.append(fpn_conv) # add extra conv layers (e.g., RetinaNet) extra_levels = num_outs - self.backbone_end_level + self.start_level if self.add_extra_convs and extra_levels >= 1: for i in range(extra_levels): if i == 0 and self.add_extra_convs == 'on_input': in_channels = self.in_channels[self.backbone_end_level - 1] else: in_channels = out_channels extra_fpn_conv = ConvModule(in_channels, out_channels, 3, stride=2, padding=1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg, inplace=False) self.fpn_convs.append(extra_fpn_conv)
def __init__( self, in_channels=[64, 128, 256, 512], out_channels=128, num_outs=4, start_level=0, end_level=-1, add_extra_convs=False, relu_before_extra_convs=False, no_norm_on_lateral=False, conv_cfg=None, norm_cfg=None, act_cfg=None, num_heads=[2, 2, 2, 2], mlp_ratios=[4, 4, 4, 4], sr_ratios=[8, 4, 2, 1], qkv_bias=True, qk_scale=None, drop_rate=0., attn_drop_rate=0., drop_path_rate=0., transformer_norm_cfg=dict(type='LN'), use_sr_conv=False, ): super().__init__() assert isinstance(in_channels, list) self.in_channels = in_channels self.out_channels = out_channels self.num_ins = len(in_channels) self.num_outs = num_outs self.no_norm_on_lateral = no_norm_on_lateral self.fp16_enabled = False self.norm_cfg = norm_cfg self.conv_cfg = conv_cfg self.act_cfg = act_cfg self.mlp_ratios = mlp_ratios if end_level == -1 or end_level == self.num_ins - 1: self.backbone_end_level = self.num_ins assert num_outs >= self.num_ins - start_level else: # if end_level is not the last level, no extra level is allowed self.backbone_end_level = end_level + 1 assert end_level < self.num_ins assert num_outs == end_level - start_level + 1 self.start_level = start_level self.end_level = end_level self.lateral_convs = nn.ModuleList() self.merge_blocks = nn.ModuleList() for i in range(self.start_level, self.backbone_end_level): l_conv = ConvModule( in_channels[i], out_channels, 1, conv_cfg=conv_cfg, norm_cfg=norm_cfg if not self.no_norm_on_lateral else None, act_cfg=act_cfg, inplace=False) self.lateral_convs.append(l_conv) for i in range(self.start_level, self.backbone_end_level - 1): merge_block = TCFormerDynamicBlock( dim=out_channels, num_heads=num_heads[i], mlp_ratio=mlp_ratios[i], qkv_bias=qkv_bias, qk_scale=qk_scale, drop=drop_rate, attn_drop=attn_drop_rate, drop_path=drop_path_rate, norm_cfg=transformer_norm_cfg, sr_ratio=sr_ratios[i], use_sr_conv=use_sr_conv) self.merge_blocks.append(merge_block) # add extra conv layers (e.g., RetinaNet) self.relu_before_extra_convs = relu_before_extra_convs self.add_extra_convs = add_extra_convs assert isinstance(add_extra_convs, (str, bool)) if isinstance(add_extra_convs, str): # Extra_convs_source choices: 'on_input', 'on_output' assert add_extra_convs in ('on_input', 'on_output') elif add_extra_convs: # True self.add_extra_convs = 'on_input' self.extra_convs = nn.ModuleList() extra_levels = num_outs - (self.end_level + 1 - self.start_level) if self.add_extra_convs and extra_levels >= 1: for i in range(extra_levels): if i == 0 and self.add_extra_convs == 'on_input': in_channels = self.in_channels[self.end_level] else: in_channels = out_channels extra_fpn_conv = ConvModule( in_channels, out_channels, 3, stride=2, padding=1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg, inplace=False) self.extra_convs.append(extra_fpn_conv)
def __init__(self, num_point: int, radii: List[float], sample_nums: List[int], mlp_channels: List[List[int]], fps_mod: List[str] = ['D-FPS'], fps_sample_range_list: List[int] = [-1], dilated_group: bool = False, norm_cfg: dict = dict(type='BN2d'), use_xyz: bool = True, pool_mod='max', normalize_xyz: bool = False, bias='auto'): super().__init__() assert len(radii) == len(sample_nums) == len(mlp_channels) assert pool_mod in ['max', 'avg'] assert isinstance(fps_mod, list) or isinstance(fps_mod, tuple) assert isinstance(fps_sample_range_list, list) or isinstance( fps_sample_range_list, tuple) assert len(fps_mod) == len(fps_sample_range_list) if isinstance(mlp_channels, tuple): mlp_channels = list(map(list, mlp_channels)) if isinstance(num_point, int): self.num_point = [num_point] elif isinstance(num_point, list) or isinstance(num_point, tuple): self.num_point = num_point else: raise NotImplementedError('Error type of num_point!') self.pool_mod = pool_mod self.groupers = nn.ModuleList() self.mlps = nn.ModuleList() self.fps_mod_list = fps_mod self.fps_sample_range_list = fps_sample_range_list self.points_sampler = Points_Sampler(self.num_point, self.fps_mod_list, self.fps_sample_range_list) for i in range(len(radii)): radius = radii[i] sample_num = sample_nums[i] if num_point is not None: if dilated_group and i != 0: min_radius = radii[i - 1] else: min_radius = 0 grouper = QueryAndGroup( radius, sample_num, min_radius=min_radius, use_xyz=use_xyz, normalize_xyz=normalize_xyz) else: grouper = GroupAll(use_xyz) self.groupers.append(grouper) mlp_spec = mlp_channels[i] if use_xyz: mlp_spec[0] += 3 mlp = nn.Sequential() for i in range(len(mlp_spec) - 1): mlp.add_module( f'layer{i}', ConvModule( mlp_spec[i], mlp_spec[i + 1], kernel_size=(1, 1), stride=(1, 1), conv_cfg=dict(type='Conv2d'), norm_cfg=norm_cfg, bias=bias)) self.mlps.append(mlp)
def __init__(self, in_channels, out_channels, base_channels=64, norm_cfg=dict(type='IN'), use_dropout=False, num_blocks=9, padding_mode='reflect', init_cfg=dict(type='normal', gain=0.02)): super().__init__() assert num_blocks >= 0, ('Number of residual blocks must be ' f'non-negative, but got {num_blocks}.') assert isinstance(norm_cfg, dict), ("'norm_cfg' should be dict, but" f'got {type(norm_cfg)}') assert 'type' in norm_cfg, "'norm_cfg' must have key 'type'" # We use norm layers in the resnet generator. # Only for IN, use bias since it does not have affine parameters. use_bias = norm_cfg['type'] == 'IN' model = [] model += [ ConvModule(in_channels=in_channels, out_channels=base_channels, kernel_size=7, padding=3, bias=use_bias, norm_cfg=norm_cfg, padding_mode=padding_mode) ] num_down = 2 # add downsampling layers for i in range(num_down): multiple = 2**i model += [ ConvModule(in_channels=base_channels * multiple, out_channels=base_channels * multiple * 2, kernel_size=3, stride=2, padding=1, bias=use_bias, norm_cfg=norm_cfg) ] # add residual blocks multiple = 2**num_down for i in range(num_blocks): model += [ ResidualBlockWithDropout(base_channels * multiple, padding_mode=padding_mode, norm_cfg=norm_cfg, use_dropout=use_dropout) ] # add upsampling layers for i in range(num_down): multiple = 2**(num_down - i) model += [ ConvModule(in_channels=base_channels * multiple, out_channels=base_channels * multiple // 2, kernel_size=3, stride=2, padding=1, bias=use_bias, conv_cfg=dict(type='Deconv', output_padding=1), norm_cfg=norm_cfg) ] model += [ ConvModule(in_channels=base_channels, out_channels=out_channels, kernel_size=7, padding=3, bias=True, norm_cfg=None, act_cfg=dict(type='Tanh'), padding_mode=padding_mode) ] self.model = nn.Sequential(*model) self.init_type = 'normal' if init_cfg is None else init_cfg.get( 'type', 'normal') self.init_gain = 0.02 if init_cfg is None else init_cfg.get( 'gain', 0.02)