def __init__(self): super(Discriminator, self).__init__() def block(in_features, out_features, normalization=True): 'Discriminator block' layers = [ nn.Conv(in_features, out_features, 3, stride=2, padding=1), nn.LeakyReLU(scale=0.2) ] if normalization: layers.append(nn.InstanceNorm2d(out_features, affine=None)) return layers self.model = nn.Sequential( *block(opt.channels, 64, normalization=False), *block(64, 128), *block(128, 256), *block(256, 512), nn.Conv(512, 1, 3, stride=1, padding=1)) for m in self.modules(): weights_init_normal(m)
def build_mlps(self, mlp_spec: List[int], use_xyz: bool = True, bn: bool = True) -> nn.Sequential: layers = [] if use_xyz: mlp_spec[0] += 3 for i in range(1, len(mlp_spec)): layers.append( nn.Conv(mlp_spec[i - 1], mlp_spec[i], kernel_size=1, bias=not bn)) if bn: layers.append(nn.BatchNorm(mlp_spec[i])) layers.append(nn.ReLU()) return nn.Sequential(*layers)
def __init__(self, npoint, nsample, in_channel, mlp, bandwidth, group_all): super(PointConvDensitySetAbstraction, self).__init__() self.npoint = npoint self.nsample = nsample self.mlp_convs = nn.ModuleList() self.mlp_bns = nn.ModuleList() last_channel = in_channel for out_channel in mlp: self.mlp_convs.append(nn.Conv(last_channel, out_channel, 1)) self.mlp_bns.append(nn.BatchNorm(out_channel)) last_channel = out_channel self.weightnet = WeightNet(3, 16) self.densitynet = DensityNet() self.linear = nn.Linear(16 * mlp[-1], mlp[-1]) self.bn_linear = nn.BatchNorm1d(mlp[-1]) self.group_all = group_all self.bandwidth = bandwidth self.relu = nn.ReLU()
def __init__(self, block, nblocks, growth_rate=12, reduction=0.5, num_classes=10): super(DenseNet, self).__init__() self.growth_rate = growth_rate num_planes = 2 * growth_rate self.conv1 = nn.Conv(3, num_planes, kernel_size=3, padding=1, bias=False) self.dense1 = self._make_dense_layers(block, num_planes, nblocks[0]) num_planes += nblocks[0] * growth_rate out_planes = int(math.floor(num_planes * reduction)) self.trans1 = Transition(num_planes, out_planes) num_planes = out_planes self.dense2 = self._make_dense_layers(block, num_planes, nblocks[1]) num_planes += nblocks[1] * growth_rate out_planes = int(math.floor(num_planes * reduction)) self.trans2 = Transition(num_planes, out_planes) num_planes = out_planes self.dense3 = self._make_dense_layers(block, num_planes, nblocks[2]) num_planes += nblocks[2] * growth_rate out_planes = int(math.floor(num_planes * reduction)) self.trans3 = Transition(num_planes, out_planes) num_planes = out_planes self.dense4 = self._make_dense_layers(block, num_planes, nblocks[3]) num_planes += nblocks[3] * growth_rate self.bn = nn.BatchNorm(num_planes) self.linear = nn.Linear(num_planes, num_classes) self.pool = nn.Pool(4)
def _make_layer(self, cfg): """ Each layer is a sequence of conv layers usually preceded by a max pooling. Adapted from torchvision.models.vgg.make_layers. """ layers = [] for v in cfg: # VGG in SSD requires some special layers, so allow layers to be tuples of # (<M or num_features>, kwdargs dict) args = None if isinstance(v, tuple): args = v[1] v = v[0] # v should be either M or a number if v == 'M': # Set default arguments if args is None: args = {'kernel_size': 2, 'stride': 2} layers.append(nn.Pool(**args, op='maximum')) else: # See the comment in __init__ for an explanation of this cur_layer_idx = self.total_layer_count + len(layers) self.state_dict_lookup[cur_layer_idx] = '%d.%d' % (len( self.layers), len(layers)) # Set default arguments if args is None: args = {'kernel_size': 3, 'padding': 1} # Add the layers layers.append(nn.Conv(self.in_channels, v, **args)) layers.append(nn.ReLU()) self.in_channels = v self.total_layer_count += len(layers) self.channels.append(self.in_channels) self.layers.append(nn.Sequential(*layers))
def __init__(self, latent_dim, img_shape): super(Generator, self).__init__() (channels, self.h, self.w) = img_shape self.fc = nn.Linear(latent_dim, (self.h * self.w)) self.down1 = UNetDown((channels + 1), 64, normalize=False) self.down2 = UNetDown(64, 128) self.down3 = UNetDown(128, 256) self.down4 = UNetDown(256, 512) self.down5 = UNetDown(512, 512) self.down6 = UNetDown(512, 512) self.down7 = UNetDown(512, 512, normalize=False) self.up1 = UNetUp(512, 512) self.up2 = UNetUp(1024, 512) self.up3 = UNetUp(1024, 512) self.up4 = UNetUp(1024, 256) self.up5 = UNetUp(512, 128) self.up6 = UNetUp(256, 64) self.final = nn.Sequential(nn.Upsample(scale_factor=2), nn.Conv(128, channels, 3, stride=1, padding=1), nn.Tanh()) for m in self.modules(): weights_init_normal(m)
def __init__(self, in_features: int, out_features: int, drop_rate: int = 0, with_bn: bool = True, activation=nn.ReLU(), groups=1) -> None: """ :param in_features: Length of input featuers (last dimension). :param out_features: Length of output features (last dimension). :param drop_rate: Drop rate to be applied after activation. :param with_bn: Whether or not to apply batch normalization. :param activation: Activation function. """ super(Dense_Conv2d, self).__init__() self.linear = nn.Conv(in_features, out_features, 1, groups=groups) self.activation = activation self.with_bn = with_bn self.drop = nn.Dropout(drop_rate) if drop_rate > 0 else None self.bn = nn.BatchNorm(out_features) if with_bn else None
def __init__(self, in_channels: int, out_channels: int, kernel_size, with_bn=True, activation=nn.ReLU()) -> None: """ :param in_channels: Length of input featuers (first dimension). :param out_channels: Length of output features (first dimension). :param kernel_size: Size of convolutional kernel. :param with_bn: Whether or not to apply batch normalization. :param activation: Activation function. """ super(Conv, self).__init__() self.conv = nn.Conv(in_channels, out_channels, kernel_size, bias=not with_bn) self.activation = activation self.bn = nn.BatchNorm(out_channels, momentum=0.9) if with_bn else None
def __init__(self, input_shape): super(Discriminator, self).__init__() (channels, height, width) = input_shape self.output_shape = (1, (height // (2**4)), (width // (2**4))) def discriminator_block(in_filters, out_filters, normalize=True): 'Returns downsampling layers of each discriminator block' layers = [nn.Conv(in_filters, out_filters, 4, stride=2, padding=1)] if normalize: layers.append(nn.InstanceNorm2d(out_filters, affine=None)) layers.append(nn.LeakyReLU(scale=0.2)) return layers self.model = nn.Sequential( *discriminator_block(channels, 64, normalize=False), *discriminator_block(64, 128), *discriminator_block(128, 256), *discriminator_block(256, 512), nn.ZeroPad2d((1, 0, 1, 0)), nn.Conv(512, 1, 4, padding=1)) for m in self.modules(): weights_init_normal(m)
def __init__(self, norm_layer, image_size, output_nc, latent_dim=512): super(DecoderGenerator_feature_Res, self).__init__() # start from B*1024 latent_size = int(image_size/32) self.latent_size = latent_size longsize = 512*latent_size*latent_size activation = nn.ReLU() padding_type='reflect' norm_layer=nn.BatchNorm self.fc = nn.Sequential(nn.Linear(in_features=latent_dim, out_features=longsize)) layers_list = [] layers_list.append(ResnetBlock(512, padding_type=padding_type, activation=activation, norm_layer=norm_layer)) # 176 176 layers_list.append(DecoderBlock(channel_in=512, channel_out=256, kernel_size=4, padding=1, stride=2, output_padding=0)) #22 22 layers_list.append(ResnetBlock(256, padding_type=padding_type, activation=activation, norm_layer=norm_layer)) # 176 176 layers_list.append(DecoderBlock(channel_in=256, channel_out=256, kernel_size=4, padding=1, stride=2, output_padding=0)) #44 44 layers_list.append(ResnetBlock(256, padding_type=padding_type, activation=activation, norm_layer=norm_layer)) # 176 176 layers_list.append(DecoderBlock(channel_in=256, channel_out=128, kernel_size=4, padding=1, stride=2, output_padding=0)) #88 88 layers_list.append(ResnetBlock(128, padding_type=padding_type, activation=activation, norm_layer=norm_layer)) # 176 176 layers_list.append(DecoderBlock(channel_in=128, channel_out=64, kernel_size=4, padding=1, stride=2, output_padding=0)) #176 176 layers_list.append(ResnetBlock(64, padding_type=padding_type, activation=activation, norm_layer=norm_layer)) # 176 176 layers_list.append(DecoderBlock(channel_in=64, channel_out=64, kernel_size=4, padding=1, stride=2, output_padding=0)) #352 352 layers_list.append(ResnetBlock(64, padding_type=padding_type, activation=activation, norm_layer=norm_layer)) # 176 176 # layers_list.append(DecoderBlock(channel_in=64, channel_out=64, kernel_size=4, padding=1, stride=2, output_padding=0)) #96*160 layers_list.append(nn.ReflectionPad2d(2)) layers_list.append(nn.Conv(64,output_nc,kernel_size=5,padding=0)) self.conv = nn.Sequential(*layers_list) for m in self.modules(): weights_init_normal(m)
def conv3x3(in_channels, out_channels, module_name, postfix, stride=1, groups=1, kernel_size=3, padding=1): """3x3 convolution with padding""" return [ (f'{module_name}_{postfix}/conv', nn.Conv(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding, groups=groups, bias=False)), (f'{module_name}_{postfix}/norm', group_norm(out_channels) if _GN else FrozenBatchNorm2d(out_channels)), (f'{module_name}_{postfix}/relu', nn.ReLU()) ]
def _make_MG_unit(self, block, planes, blocks, stride=1, dilation=1): downsample = None if stride != 1 or self.inplanes != planes * block.expansion: downsample = nn.Sequential( nn.Pool(kernel_size=stride, stride=stride, ceil_mode=True, op='mean'), nn.Conv(self.inplanes, planes * block.expansion, kernel_size=1, stride=1, bias=False), nn.BatchNorm(planes * block.expansion), ) layers = [] layers.append( block(self.inplanes, planes, stride, dilation=blocks[0] * dilation, downsample=downsample, stype='stage', baseWidth=self.baseWidth, scale=self.scale)) self.inplanes = planes * block.expansion for i in range(1, len(blocks)): layers.append( block(self.inplanes, planes, stride=1, dilation=blocks[i] * dilation, baseWidth=self.baseWidth, scale=self.scale)) return nn.Sequential(*layers)
def __init__(self, cin, cout, nf=64, activation=nn.Tanh): super(Encoder, self).__init__() network = [ nn.Conv(cin, nf, 4, stride=2, padding=1, bias=False), nn.ReLU(), nn.Conv(nf, (nf * 2), 4, stride=2, padding=1, bias=False), nn.ReLU(), nn.Conv((nf * 2), (nf * 4), 4, stride=2, padding=1, bias=False), nn.ReLU(), nn.Conv((nf * 4), (nf * 8), 4, stride=2, padding=1, bias=False), nn.ReLU(), nn.Conv((nf * 8), (nf * 8), 4, stride=1, padding=0, bias=False), nn.ReLU(), nn.Conv((nf * 8), cout, 1, stride=1, padding=0, bias=False) ] if (activation is not None): network += [activation()] self.network = nn.Sequential(*network)
def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True): super(MixConv2d, self).__init__() groups = len(k) if equal_ch: # equal c_ per group i = jt.array( np.linspace(0, groups - 1E-6, c2).as_type(np.float32)).floor() # c2 indices c_ = [(i == g).sum() for g in range(groups)] # intermediate channels else: # equal weight.numel() per group b = [c2] + [0] * groups a = np.eye(groups + 1, groups, k=-1) a -= np.roll(a, 1, axis=1) a *= np.array(k)**2 a[0] = 1 c_ = np.linalg.lstsq(a, b, rcond=None)[0].round( ) # solve for equal weight indices, ax = b self.m = nn.ModuleList([ nn.Conv(c1, int(c_[g]), k[g], s, k[g] // 2, bias=False) for g in range(groups) ]) self.bn = nn.BatchNorm(c2) self.act = nn.LeakyReLU(0.1)
def __init__(self, in_channels=3, out_channels=3): super(GeneratorUNet, self).__init__() self.down1 = UNetDown(in_channels, 64, normalize=False) self.down2 = UNetDown(64, 128) self.down3 = UNetDown(128, 256) self.down4 = UNetDown(256, 512, dropout=0.5) self.down5 = UNetDown(512, 512, dropout=0.5) self.down6 = UNetDown(512, 512, dropout=0.5) self.down7 = UNetDown(512, 512, dropout=0.5) self.down8 = UNetDown(512, 512, normalize=False, dropout=0.5) self.up1 = UNetUp(512, 512, dropout=0.5) self.up2 = UNetUp(1024, 512, dropout=0.5) self.up3 = UNetUp(1024, 512, dropout=0.5) self.up4 = UNetUp(1024, 512, dropout=0.5) self.up5 = UNetUp(1024, 256) self.up6 = UNetUp(512, 128) self.up7 = UNetUp(256, 64) self.final = nn.Sequential(nn.Upsample(scale_factor=2), nn.ZeroPad2d((1, 0, 1, 0)), nn.Conv(128, out_channels, 4, padding=1), nn.Tanh()) for m in self.modules(): weights_init_normal(m)
def __init__(self, block, layers, num_classes=1000, zero_init_residual=False, groups=1, width_per_group=64, replace_stride_with_dilation=None, norm_layer=None): super(ResNet, self).__init__() if (norm_layer is None): norm_layer = nn.BatchNorm self._norm_layer = norm_layer self.inplanes = 64 self.dilation = 1 if (replace_stride_with_dilation is None): replace_stride_with_dilation = [False, False, False] if (len(replace_stride_with_dilation) != 3): raise ValueError('replace_stride_with_dilation should be None or a 3-element tuple, got {}'.format(replace_stride_with_dilation)) self.groups = groups self.base_width = width_per_group self.conv1 = nn.Conv(3, self.inplanes, kernel_size=7, stride=2, padding=3, bias=False) jt.init.relu_invariant_gauss_(self.conv1.weight, mode="fan_out") self.bn1 = norm_layer(self.inplanes) self.relu = nn.Relu() self.maxpool = nn.Pool(kernel_size=3, stride=2, padding=1, op='maximum') self.layer1 = self._make_layer(block, 64, layers[0]) self.layer2 = self._make_layer(block, 128, layers[1], stride=2, dilate=replace_stride_with_dilation[0]) self.layer3 = self._make_layer(block, 256, layers[2], stride=2, dilate=replace_stride_with_dilation[1]) self.layer4 = self._make_layer(block, 512, layers[3], stride=2, dilate=replace_stride_with_dilation[2]) self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) self.fc = nn.Linear((512 * block.expansion), num_classes)
def __init__(self, in_channels, out_channels): super(OutConv, self).__init__() self.conv = nn.Conv(in_channels, out_channels, 1)
def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1): conv=nn.Conv(in_planes, out_planes, kernel_size=3, stride=stride, padding=dilation, groups=groups, bias=False, dilation=dilation) jt.init.relu_invariant_gauss_(conv.weight, mode="fan_out") return conv
def conv1x1(in_planes, out_planes, stride=1): conv=nn.Conv(in_planes, out_planes, kernel_size=1, stride=stride, bias=False) jt.init.relu_invariant_gauss_(conv.weight, mode="fan_out") return conv
def __init__(self): super(VGGBase, self).__init__() self.conv1_1 = nn.Conv(3, 64, kernel_size=3, padding=1) self.conv1_2 = nn.Conv(64, 64, kernel_size=3, padding=1) self.pool1 = nn.Pool(kernel_size=2, stride=2, op='maximum') self.conv2_1 = nn.Conv(64, 128, kernel_size=3, padding=1) self.conv2_2 = nn.Conv(128, 128, kernel_size=3, padding=1) self.pool2 = nn.Pool(kernel_size=2, stride=2, op='maximum') self.conv3_1 = nn.Conv(128, 256, kernel_size=3, padding=1) self.conv3_2 = nn.Conv(256, 256, kernel_size=3, padding=1) self.conv3_3 = nn.Conv(256, 256, kernel_size=3, padding=1) self.pool3 = nn.Pool(kernel_size=2, stride=2, ceil_mode=True, op='maximum') self.conv4_1 = nn.Conv(256, 512, kernel_size=3, padding=1) self.conv4_2 = nn.Conv(512, 512, kernel_size=3, padding=1) self.conv4_3 = nn.Conv(512, 512, kernel_size=3, padding=1) self.pool4 = nn.Pool(kernel_size=2, stride=2, op='maximum') self.conv5_1 = nn.Conv(512, 512, kernel_size=3, padding=1) self.conv5_2 = nn.Conv(512, 512, kernel_size=3, padding=1) self.conv5_3 = nn.Conv(512, 512, kernel_size=3, padding=1) self.pool5 = nn.Pool(kernel_size=3, stride=1, padding=1, op='maximum') self.conv6 = nn.Conv(512, 1024, kernel_size=3, padding=6, dilation=6) self.conv7 = nn.Conv(1024, 1024, kernel_size=1)
def __init__(self, in_channels=3, out_channels=1): super(Discriminator, self).__init__() def discriminator_block(in_filters, out_filters, stride=2, normalization=True): 'Returns downsampling layers of each discriminator block' layers = [nn.Conv(in_filters, out_filters, 4, stride=stride, padding=1)] if normalization: layers.append(nn.BatchNorm2d(out_filters)) layers.append(nn.LeakyReLU(scale=0.2)) return layers self.model = nn.Sequential(*discriminator_block((in_channels+out_channels), 64, normalization=False), *discriminator_block(64, 128), *discriminator_block(128, 256), *discriminator_block(256, 512, stride=1), nn.Conv(512, 1, 4, stride=1, padding=1), nn.Sigmoid()) for m in self.modules(): weights_init_normal(m)
def discriminator_block(in_filters, out_filters, bn=True): block = [nn.Conv(in_filters, out_filters, 3, stride=2, padding=1)] if bn: block.append(nn.BatchNorm(out_filters, eps=0.8)) block.extend([nn.LeakyReLU(0.2), nn.Dropout(p=0.25)]) return block
def __init__(self, n_classes): """ Args: n_classes: number of different types of objects """ super(PredictionConvolutions, self).__init__() self.n_classes = n_classes n_boxes = { 'conv4_3': 4, 'conv7': 6, 'conv8_2': 6, 'conv9_2': 6, 'conv10_2': 4, 'conv11_2': 4, } self.loc_conv4_3 = nn.Conv(512, (n_boxes['conv4_3'] * 4), kernel_size=3, padding=1) self.loc_conv7 = nn.Conv(1024, (n_boxes['conv7'] * 4), kernel_size=3, padding=1) self.loc_conv8_2 = nn.Conv(512, (n_boxes['conv8_2'] * 4), kernel_size=3, padding=1) self.loc_conv9_2 = nn.Conv(256, (n_boxes['conv9_2'] * 4), kernel_size=3, padding=1) self.loc_conv10_2 = nn.Conv(256, (n_boxes['conv10_2'] * 4), kernel_size=3, padding=1) self.loc_conv11_2 = nn.Conv(256, (n_boxes['conv11_2'] * 4), kernel_size=3, padding=1) self.cl_conv4_3 = nn.Conv(512, (n_boxes['conv4_3'] * n_classes), kernel_size=3, padding=1) self.cl_conv7 = nn.Conv(1024, (n_boxes['conv7'] * n_classes), kernel_size=3, padding=1) self.cl_conv8_2 = nn.Conv(512, (n_boxes['conv8_2'] * n_classes), kernel_size=3, padding=1) self.cl_conv9_2 = nn.Conv(256, (n_boxes['conv9_2'] * n_classes), kernel_size=3, padding=1) self.cl_conv10_2 = nn.Conv(256, (n_boxes['conv10_2'] * n_classes), kernel_size=3, padding=1) self.cl_conv11_2 = nn.Conv(256, (n_boxes['conv11_2'] * n_classes), kernel_size=3, padding=1) self.init_conv2d()
def __init__(self, in_size, out_size): super(UNetUp, self).__init__() self.model = nn.Sequential(nn.Upsample(scale_factor=2), nn.Conv(in_size, out_size, 3, stride=1, padding=1, bias=False), nn.BatchNorm(out_size, 0.8), nn.Relu())
def __init__(self, in_channels, out_channels=1024, aspect_ratios=[[1]], scales=[1], parent=None, index=0): super().__init__() self.num_classes = cfg.num_classes self.mask_dim = cfg.mask_dim # Defined by Yolact self.num_priors = sum(len(x) * len(scales) for x in aspect_ratios) self.parent = [parent] # Don't include this in the state dict self.index = index self.num_heads = cfg.num_heads # Defined by Yolact if cfg.mask_proto_split_prototypes_by_head and cfg.mask_type == mask_type.lincomb: self.mask_dim = self.mask_dim // self.num_heads if cfg.mask_proto_prototypes_as_features: in_channels += self.mask_dim if parent is None: if cfg.extra_head_net is None: out_channels = in_channels else: self.upfeature, out_channels = make_net( in_channels, cfg.extra_head_net) if cfg.use_prediction_module: self.block = Bottleneck(out_channels, out_channels // 4) self.conv = nn.Conv(out_channels, out_channels, kernel_size=1, bias=True) self.bn = nn.BatchNorm(out_channels) self.bbox_layer = nn.Conv(out_channels, self.num_priors * 4, **cfg.head_layer_params) self.conf_layer = nn.Conv(out_channels, self.num_priors * self.num_classes, **cfg.head_layer_params) self.mask_layer = nn.Conv(out_channels, self.num_priors * self.mask_dim, **cfg.head_layer_params) if cfg.use_mask_scoring: self.score_layer = nn.Conv(out_channels, self.num_priors, **cfg.head_layer_params) if cfg.use_instance_coeff: self.inst_layer = nn.Conv( out_channels, self.num_priors * cfg.num_instance_coeffs, **cfg.head_layer_params) # What is this ugly lambda doing in the middle of all this clean prediction module code? def make_extra(num_layers): if num_layers == 0: return lambda x: x else: # Looks more complicated than it is. This just creates an array of num_layers alternating conv-relu return nn.Sequential(*sum([[ nn.Conv(out_channels, out_channels, kernel_size=3, padding=1), nn.ReLU() ] for _ in range(num_layers)], [])) self.bbox_extra, self.conf_extra, self.mask_extra = [ make_extra(x) for x in cfg.extra_layers ] if cfg.mask_type == mask_type.lincomb and cfg.mask_proto_coeff_gate: self.gate_layer = nn.Conv(out_channels, self.num_priors * self.mask_dim, kernel_size=3, padding=1) self.aspect_ratios = aspect_ratios self.scales = scales self.priors = None self.last_conv_size = None self.last_img_size = None
def init_weights(self, backbone_path): """ Initialize weights for training. """ # Initialize the backbone with the pretrained weights. self.backbone.init_backbone(backbone_path) # Quick lambda to test if one list contains the other def all_in(x, y): for _x in x: if _x not in y: return False return True # Initialize the rest of the conv layers with xavier for name, module in self.named_modules(): # See issue #127 for why we need such a complicated condition if the module is a WeakScriptModuleProxy # Broke in 1.3 (see issue #175), WeakScriptModuleProxy was turned into just ScriptModule. # Broke in 1.4 (see issue #292), where RecursiveScriptModule is the new star of the show. # Note that this might break with future pyjt updates, so let me know if it does is_script_conv = False if 'Script' in type(module).__name__: # 1.4 workaround: now there's an original_name member so just use that if hasattr(module, 'original_name'): is_script_conv = 'Conv' in module.original_name # 1.3 workaround: check if this has the same constants as a conv module else: conv_constants = getattr(nn.Conv(1, 1, 1), '__constants__') is_script_conv = (all_in( module.__dict__['_constants_set'], conv_constants) and all_in( conv_constants, module.__dict__['_constants_set'])) is_conv_layer = isinstance(module, nn.Conv) or is_script_conv if is_conv_layer and module not in self.backbone.backbone_modules: nn.init.xavier_uniform_(module.weight) if module.bias is not None: if cfg.use_focal_loss and 'conf_layer' in name: if not cfg.use_sigmoid_focal_loss: # Initialize the last layer as in the focal loss paper. # Because we use softmax and not sigmoid, I had to derive an alternate expression # on a notecard. Define pi to be the probability of outputting a foreground detection. # Then let z = sum(exp(x)) - exp(x_0). Finally let c be the number of foreground classes. # Chugging through the math, this gives us # x_0 = log(z * (1 - pi) / pi) where 0 is the background class # x_i = log(z / c) for all i > 0 # For simplicity (and because we have a degree of freedom here), set z = 1. Then we have # x_0 = log((1 - pi) / pi) note: don't split up the log for numerical stability # x_i = -log(c) for all i > 0 module.bias.data[0] = np.log( (1 - cfg.focal_loss_init_pi) / cfg.focal_loss_init_pi) module.bias.data[1:] = -np.log( module.bias.shape[0] - 1) else: module.bias.data[0] = -np.log( cfg.focal_loss_init_pi / (1 - cfg.focal_loss_init_pi)) module.bias.data[1:] = -np.log( (1 - cfg.focal_loss_init_pi) / cfg.focal_loss_init_pi) else: jt.nn.init.constant_(module.bias, 0.0)
def __init__(self): super().__init__() self.backbone = construct_backbone(cfg.backbone) if cfg.freeze_bn: self.freeze_bn() # Compute mask_dim here and add it back to the config. Make sure Yolact's constructor is called early! if cfg.mask_type == mask_type.direct: cfg.mask_dim = cfg.mask_size**2 elif cfg.mask_type == mask_type.lincomb: if cfg.mask_proto_use_grid: self.grid = jt.Tensor(np.load(cfg.mask_proto_grid_file)) self.num_grids = self.grid.shape[0] else: self.num_grids = 0 self.proto_src = cfg.mask_proto_src if self.proto_src is None: in_channels = 3 elif cfg.fpn is not None: in_channels = cfg.fpn.num_features else: in_channels = self.backbone.channels[self.proto_src] in_channels += self.num_grids # The include_last_relu=false here is because we might want to change it to another function self.proto_net, cfg.mask_dim = make_net(in_channels, cfg.mask_proto_net, include_last_relu=False) if cfg.mask_proto_bias: cfg.mask_dim += 1 self.selected_layers = cfg.backbone.selected_layers src_channels = self.backbone.channels if cfg.use_maskiou: self.maskiou_net = FastMaskIoUNet() if cfg.fpn is not None: # Some hacky rewiring to accomodate the FPN self.fpn = FPN([src_channels[i] for i in self.selected_layers]) self.selected_layers = list( range(len(self.selected_layers) + cfg.fpn.num_downsample)) src_channels = [cfg.fpn.num_features] * len(self.selected_layers) self.prediction_layers = nn.ModuleList() cfg.num_heads = len(self.selected_layers) for idx, layer_idx in enumerate(self.selected_layers): # If we're sharing prediction module weights, have every module's parent be the first one parent = None if cfg.share_prediction_module and idx > 0: parent = self.prediction_layers[0] pred = PredictionModule( src_channels[layer_idx], src_channels[layer_idx], aspect_ratios=cfg.backbone.pred_aspect_ratios[idx], scales=cfg.backbone.pred_scales[idx], parent=parent, index=idx) self.prediction_layers.append(pred) # Extra parameters for the extra losses if cfg.use_class_existence_loss: # This comes from the smallest layer selected # Also note that cfg.num_classes includes background self.class_existence_fc = nn.Linear(src_channels[-1], cfg.num_classes - 1) if cfg.use_semantic_segmentation_loss: self.semantic_seg_conv = nn.Conv(src_channels[0], cfg.num_classes - 1, kernel_size=1) # For use in evaluation self.detect = Detect(cfg.num_classes, bkg_label=0, top_k=cfg.nms_top_k, conf_thresh=cfg.nms_conf_thresh, nms_thresh=cfg.nms_thresh)
def depthwise_conv(i, o, kernel_size, stride=1, padding=0, bias=False): return nn.Conv(i, o, kernel_size, stride, padding, bias=bias, groups=i)
def __init__(self, extra, norm_eval=True, zero_init_residual=False, frozen_stages=-1): super(HighResolutionNet, self).__init__() self.norm_eval = norm_eval self.frozen_stages = frozen_stages self.zero_init_residual = zero_init_residual # for self.extra = extra # stem network # stem net self.conv1 = nn.Conv(3, 64, kernel_size=3, stride=2, padding=1, bias=False) self.bn1 = BatchNorm2d(64, momentum=BN_MOMENTUM) self.conv2 = nn.Conv(64, 64, kernel_size=3, stride=2, padding=1, bias=False) self.bn2 = BatchNorm2d(64, momentum=BN_MOMENTUM) self.relu = nn.ReLU(inplace=True) # stage 1 self.stage1_cfg = self.extra['stage1'] num_channels = self.stage1_cfg['num_channels'][0] block_type = self.stage1_cfg['block'] num_blocks = self.stage1_cfg['num_blocks'][0] block = blocks_dict[block_type] stage1_out_channels = num_channels * block.expansion self.layer1 = self._make_layer(block, 64, num_channels, num_blocks) # stage 2 self.stage2_cfg = self.extra['stage2'] num_channels = self.stage2_cfg['num_channels'] block_type = self.stage2_cfg['block'] block = blocks_dict[block_type] num_channels = [ num_channels[i] * block.expansion for i in range(len(num_channels)) ] self.transition1 = self._make_transition_layer([stage1_out_channels], num_channels) # num_modules, num_branches, num_blocks, num_channels, block, fuse_method, num_inchannels self.stage2, pre_stage_channels = self._make_stage( self.stage2_cfg, num_channels) # stage 3 self.stage3_cfg = self.extra['stage3'] num_channels = self.stage3_cfg['num_channels'] block_type = self.stage3_cfg['block'] block = blocks_dict[block_type] num_channels = [ num_channels[i] * block.expansion for i in range(len(num_channels)) ] self.transition2 = self._make_transition_layer(pre_stage_channels, num_channels) self.stage3, pre_stage_channels = self._make_stage( self.stage3_cfg, num_channels) # stage 4 self.stage4_cfg = self.extra['stage4'] num_channels = self.stage4_cfg['num_channels'] block_type = self.stage4_cfg['block'] block = blocks_dict[block_type] num_channels = [ num_channels[i] * block.expansion for i in range(len(num_channels)) ] self.transition3 = self._make_transition_layer(pre_stage_channels, num_channels) self.stage4, pre_stage_channels = self._make_stage( self.stage4_cfg, num_channels)
def __init__(self, channel, reduction=4): super(eSEModule, self).__init__() self.avg_pool = nn.AdaptiveAvgPool2d(1) self.fc = nn.Conv(channel, channel, kernel_size=1, padding=0) self.hsigmoid = Hsigmoid()