Пример #1
0
    def __init__(self,
                 output_blocks=[DEFAULT_BLOCK_INDEX],
                 resize_input=True,
                 normalize_input=True,
                 requires_grad=False):
        """Build pretrained InceptionV3

        Parameters
        ----------
        output_blocks : list of int
            Indices of blocks to return features of. Possible values are:
                - 0: corresponds to output of first max pooling
                - 1: corresponds to output of second max pooling
                - 2: corresponds to output which is fed to aux classifier
                - 3: corresponds to output of final average pooling
        resize_input : bool
            If true, bilinearly resizes input to width and height 299 before
            feeding input to model. As the network without fully connected
            layers is fully convolutional, it should be able to handle inputs
            of arbitrary size, so resizing might not be strictly needed
        normalize_input : bool
            If true, normalizes the input to the statistics the pretrained
            Inception network expects
        requires_grad : bool
            If true, parameters of the model require gradient. Possibly useful
            for finetuning the network
        """
        super(InceptionV3, self).__init__()

        self.resize_input = resize_input
        self.normalize_input = normalize_input
        self.output_blocks = sorted(output_blocks)
        self.last_needed_block = max(output_blocks)

        assert self.last_needed_block <= 3, \
            'Last possible output block index is 3'

        self.blocks = nn.ModuleList()

        inception = models.inception_v3(pretrained=True)

        # Block 0: input to maxpool1
        block0 = [
            inception.Conv2d_1a_3x3,
            inception.Conv2d_2a_3x3,
            inception.Conv2d_2b_3x3,
            nn.MaxPool2d(kernel_size=3, stride=2)
        ]
        self.blocks.append(nn.Sequential(*block0))

        # Block 1: maxpool1 to maxpool2
        if self.last_needed_block >= 1:
            block1 = [
                inception.Conv2d_3b_1x1,
                inception.Conv2d_4a_3x3,
                nn.MaxPool2d(kernel_size=3, stride=2)
            ]
            self.blocks.append(nn.Sequential(*block1))

        # Block 2: maxpool2 to aux classifier
        if self.last_needed_block >= 2:
            block2 = [
                inception.Mixed_5b,
                inception.Mixed_5c,
                inception.Mixed_5d,
                inception.Mixed_6a,
                inception.Mixed_6b,
                inception.Mixed_6c,
                inception.Mixed_6d,
                inception.Mixed_6e,
            ]
            self.blocks.append(nn.Sequential(*block2))

        # Block 3: aux classifier to final avgpool
        if self.last_needed_block >= 3:
            block3 = [
                inception.Mixed_7a,
                inception.Mixed_7b,
                inception.Mixed_7c,
                nn.AdaptiveAvgPool2d(output_size=(1, 1))
            ]
            self.blocks.append(nn.Sequential(*block3))

        for param in self.parameters():
            param.requires_grad = requires_grad
Пример #2
0
 def __init__(self):
     super(TestNet, self).__init__()
     self.net = nn.Sequential(
         nn.Conv2d(1, 16, kernel_size=3, stride=1, padding=1),
         nn.MaxPool2d(2, 2), Flatten(), nn.Linear(1 * 14 * 14, 10))
Пример #3
0
 def __init__(self, in_channels, out_channels):
     super().__init__()
     self.maxpool_conv = nn.Sequential(
         nn.MaxPool2d(2), DoubleConv(in_channels, out_channels))
Пример #4
0
    def __init__(self, in_dim, out_dim, args, mean_std=None):
        super(Model, self).__init__()

        ##### required part, no need to change #####

        # mean std of input and output
        in_m, in_s, out_m, out_s = self.prepare_mean_std(in_dim,out_dim,\
                                                         args, mean_std)
        self.input_mean = torch_nn.Parameter(in_m, requires_grad=False)
        self.input_std = torch_nn.Parameter(in_s, requires_grad=False)
        self.output_mean = torch_nn.Parameter(out_m, requires_grad=False)
        self.output_std = torch_nn.Parameter(out_s, requires_grad=False)
        
        # a flag for debugging (by default False)
        self.model_debug = False
        self.validation = False
        #####


        # target data
        protocol_file = prj_conf.optional_argument[0]
        self.protocol_parser = protocol_parse(protocol_file)
        
        # working sampling rate, torchaudio is used to change sampling rate
        self.m_target_sr = 16000
                
        # re-sampling (optional)
        self.m_resampler = torchaudio.transforms.Resample(
            prj_conf.wav_samp_rate, self.m_target_sr)

        # vad (optional)
        self.m_vad = torchaudio.transforms.Vad(sample_rate = self.m_target_sr)
        
        # flag for balanced class (temporary use)
        self.v_flag = 1
    
        # frame shift (number of points)
        self.frame_hops = [160]
        # frame length
        self.frame_lens = [320]
        # FFT length
        self.fft_n = [512]

        # LFCC dim (base component)
        self.lfcc_dim = [20]
        self.lfcc_with_delta = True

        # window type
        self.win = torch.hann_window
        # floor in log-spectrum-amplitude calculating
        self.amp_floor = 0.00001
        
        # manual choose the first 600 frames in the data
        self.v_truncate_lens = [10 * 16 * 750 // x for x in self.frame_hops]

        # number of sub-models
        self.v_submodels = len(self.frame_lens)        

        # dimension of embedding vectors
        self.v_emd_dim = 64

        # output class
        self.v_out_class = 2

        self.m_transform = []
        self.m_output_act = []
        self.m_frontend = []
        self.m_angle = []

        for idx, (trunc_len, fft_n, lfcc_dim) in enumerate(zip(
                self.v_truncate_lens, self.fft_n, self.lfcc_dim)):
            
            fft_n_bins = fft_n // 2 + 1
            if self.lfcc_with_delta:
                lfcc_dim = lfcc_dim * 3
            
            self.m_transform.append(
                torch_nn.Sequential(
                    torch_nn.Conv2d(1, 64, [5, 5], 1, padding=[2, 2]),
                    nii_nn.MaxFeatureMap2D(),
                    torch.nn.MaxPool2d([2, 2], [2, 2]),

                    torch_nn.Conv2d(32, 64, [1, 1], 1, padding=[0, 0]),
                    nii_nn.MaxFeatureMap2D(),
                    torch_nn.BatchNorm2d(32, affine=False),
                    torch_nn.Conv2d(32, 96, [3, 3], 1, padding=[1, 1]),
                    nii_nn.MaxFeatureMap2D(),

                    torch.nn.MaxPool2d([2, 2], [2, 2]),
                    torch_nn.BatchNorm2d(48, affine=False),

                    torch_nn.Conv2d(48, 96, [1, 1], 1, padding=[0, 0]),
                    nii_nn.MaxFeatureMap2D(),
                    torch_nn.BatchNorm2d(48, affine=False),
                    torch_nn.Conv2d(48, 128, [3, 3], 1, padding=[1, 1]),
                    nii_nn.MaxFeatureMap2D(),

                    torch.nn.MaxPool2d([2, 2], [2, 2]),

                    torch_nn.Conv2d(64, 128, [1, 1], 1, padding=[0, 0]),
                    nii_nn.MaxFeatureMap2D(),
                    torch_nn.BatchNorm2d(64, affine=False),
                    torch_nn.Conv2d(64, 64, [3, 3], 1, padding=[1, 1]),
                    nii_nn.MaxFeatureMap2D(),
                    torch_nn.BatchNorm2d(32, affine=False),

                    torch_nn.Conv2d(32, 64, [1, 1], 1, padding=[0, 0]),
                    nii_nn.MaxFeatureMap2D(),
                    torch_nn.BatchNorm2d(32, affine=False),
                    torch_nn.Conv2d(32, 64, [3, 3], 1, padding=[1, 1]),
                    nii_nn.MaxFeatureMap2D(),
                    torch_nn.MaxPool2d([2, 2], [2, 2])
                )
            )
            self.m_output_act.append(
                torch_nn.Sequential(
                    torch_nn.Dropout(0.7),
                    torch_nn.Linear((trunc_len // 16) * 
                                    (lfcc_dim // 16) * 32, 160),
                    nii_nn.MaxFeatureMap2D(),
                    torch_nn.Linear(80, self.v_emd_dim)
                )
            )
            
            self.m_frontend.append(
                nii_front_end.LFCC(self.frame_lens[idx],
                                   self.frame_hops[idx],
                                   self.fft_n[idx],
                                   self.m_target_sr,
                                   self.lfcc_dim[idx],
                                   with_energy=True)
            )

            self.m_angle.append(
                nii_p2sgrad.P2SActivationLayer(self.v_emd_dim, self.v_out_class)
            )

        self.m_transform = torch_nn.ModuleList(self.m_transform)
        self.m_output_act = torch_nn.ModuleList(self.m_output_act)
        self.m_frontend = torch_nn.ModuleList(self.m_frontend)
        self.m_angle = torch_nn.ModuleList(self.m_angle)
        
        # output 

        # done
        return
Пример #5
0
 def __init__(self,
              block,
              layers,
              groups,
              reduction,
              dropout_p=0.2,
              inplanes=128,
              input_3x3=True,
              downsample_kernel_size=3,
              downsample_padding=1,
              num_classes=1000,
              last_stride=2):
     """
     Parameters
     ----------
     block (nn.Module): Bottleneck class.
         - For SENet154: SEBottleneck
         - For SE-ResNet models: SEResNetBottleneck
         - For SE-ResNeXt models:  SEResNeXtBottleneck
     layers (list of ints): Number of residual blocks for 4 layers of the
         network (layer1...layer4).
     groups (int): Number of groups for the 3x3 convolution in each
         bottleneck block.
         - For SENet154: 64
         - For SE-ResNet models: 1
         - For SE-ResNeXt models:  32
     reduction (int): Reduction ratio for Squeeze-and-Excitation modules.
         - For all models: 16
     dropout_p (float or None): Drop probability for the Dropout layer.
         If `None` the Dropout layer is not used.
         - For SENet154: 0.2
         - For SE-ResNet models: None
         - For SE-ResNeXt models: None
     inplanes (int):  Number of input channels for layer1.
         - For SENet154: 128
         - For SE-ResNet models: 64
         - For SE-ResNeXt models: 64
     input_3x3 (bool): If `True`, use three 3x3 convolutions instead of
         a single 7x7 convolution in layer0.
         - For SENet154: True
         - For SE-ResNet models: False
         - For SE-ResNeXt models: False
     downsample_kernel_size (int): Kernel size for downsampling convolutions
         in layer2, layer3 and layer4.
         - For SENet154: 3
         - For SE-ResNet models: 1
         - For SE-ResNeXt models: 1
     downsample_padding (int): Padding for downsampling convolutions in
         layer2, layer3 and layer4.
         - For SENet154: 1
         - For SE-ResNet models: 0
         - For SE-ResNeXt models: 0
     num_classes (int): Number of outputs in `last_linear` layer.
         - For all models: 1000
     """
     super(SENet, self).__init__()
     self.inplanes = inplanes
     if input_3x3:
         layer0_modules = [
             ('conv1', nn.Conv2d(3, 64, 3, stride=2, padding=1,
                                 bias=False)),
             ('bn1', nn.BatchNorm2d(64)),
             ('relu1', nn.ReLU(inplace=True)),
             ('conv2', nn.Conv2d(64, 64, 3, stride=1, padding=1,
                                 bias=False)),
             ('bn2', nn.BatchNorm2d(64)),
             ('relu2', nn.ReLU(inplace=True)),
             ('conv3',
              nn.Conv2d(64, inplanes, 3, stride=1, padding=1, bias=False)),
             ('bn3', nn.BatchNorm2d(inplanes)),
             ('relu3', nn.ReLU(inplace=True)),
         ]
     else:
         layer0_modules = [
             ('conv1',
              nn.Conv2d(3,
                        inplanes,
                        kernel_size=7,
                        stride=2,
                        padding=3,
                        bias=False)),
             ('bn1', nn.BatchNorm2d(inplanes)),
             ('relu1', nn.ReLU(inplace=True)),
         ]
     # To preserve compatibility with Caffe weights `ceil_mode=True`
     # is used instead of `padding=1`.
     layer0_modules.append(('pool', nn.MaxPool2d(3,
                                                 stride=2,
                                                 ceil_mode=True)))
     self.layer0 = nn.Sequential(OrderedDict(layer0_modules))
     self.layer1 = self._make_layer(block,
                                    planes=64,
                                    blocks=layers[0],
                                    groups=groups,
                                    reduction=reduction,
                                    downsample_kernel_size=1,
                                    downsample_padding=0)
     self.layer2 = self._make_layer(
         block,
         planes=128,
         blocks=layers[1],
         stride=2,
         groups=groups,
         reduction=reduction,
         downsample_kernel_size=downsample_kernel_size,
         downsample_padding=downsample_padding)
     self.layer3 = self._make_layer(
         block,
         planes=256,
         blocks=layers[2],
         stride=2,
         groups=groups,
         reduction=reduction,
         downsample_kernel_size=downsample_kernel_size,
         downsample_padding=downsample_padding)
     self.layer4 = self._make_layer(
         block,
         planes=512,
         blocks=layers[3],
         stride=last_stride,
         groups=groups,
         reduction=reduction,
         downsample_kernel_size=downsample_kernel_size,
         downsample_padding=downsample_padding)
     self.avg_pool = nn.AvgPool2d(7, stride=1)
     self.dropout = nn.Dropout(dropout_p) if dropout_p is not None else None
     self.last_linear = nn.Linear(512 * block.expansion, num_classes)
Пример #6
0
 def __init__(self, in_ch, out_ch):
     super(down, self).__init__()
     self.mpconv = nn.Sequential(nn.MaxPool2d(2),
                                 double_conv(in_ch, out_ch))
    def __init__(self, block, layers, num_classes=1000, zero_init_residual=False,
                 groups=1, width_per_group=64, replace_stride_with_dilation=None,
                 norm_layer=None):
        super(ResNet, self).__init__()
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        self._norm_layer = norm_layer

        self.inplanes = 64 #number of filters
        self.dilation = 1
        if replace_stride_with_dilation is None:
            # each element in the tuple indicates if we should replace
            # the 2x2 stride with a dilated convolution instead
            replace_stride_with_dilation = [False, False, False]
        if len(replace_stride_with_dilation) != 3:
            raise ValueError("replace_stride_with_dilation should be None "
                             "or a 3-element tuple, got {}".format(replace_stride_with_dilation))
        self.groups = groups
        self.base_width = width_per_group
        
        #input 3x64x64
        self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3,
                               bias=False)
        #(64 - 7 + 6)/ 2 + 1 = 32.5 = 32
        #Output is 64x32x32
        
        self.bn1 = norm_layer(self.inplanes)
        self.relu = nn.ReLU(inplace=True)
        
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        #(32 - 3 + 2)/2 + 1 = 16.5 = 16
        #Output is 6x16x16
        
        #_make_layer(self, BottleNeck, 64, 3, stride=1, dilate=False):
        self.layer1 = self._make_layer(block, 64, layers[0]) #3
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2, #4
                                       dilate=replace_stride_with_dilation[0])
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2, #6
                                       dilate=replace_stride_with_dilation[1])
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2, #3
                                       dilate=replace_stride_with_dilation[2])
        
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

        # Zero-initialize the last BN in each residual branch,
        # so that the residual branch starts with zeros, and each residual block behaves like an identity.
        # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
        if zero_init_residual:
            for m in self.modules():
                if isinstance(m, Bottleneck):
                    nn.init.constant_(m.bn3.weight, 0)
                elif isinstance(m, BasicBlock):
                    nn.init.constant_(m.bn2.weight, 0)
Пример #8
0
 def __init__(self, config_channels, prefix, bn=True, ratio=1):
     nn.Module.__init__(self)
     # branch0
     channels = config_channels.channels
     branch = []
     branch.append(
         Conv2d(config_channels.channels,
                config_channels(
                    int(192 * ratio),
                    '%s.branch0.%d.conv.weight' % (prefix, len(branch))),
                kernel_size=1,
                stride=1,
                bn=bn))
     branch.append(
         Conv2d(config_channels.channels,
                config_channels(
                    int(192 * ratio),
                    '%s.branch0.%d.conv.weight' % (prefix, len(branch))),
                kernel_size=3,
                stride=2,
                bn=bn))
     self.branch0 = nn.Sequential(*branch)
     # branch1
     config_channels.channels = channels
     branch = []
     branch.append(
         Conv2d(config_channels.channels,
                config_channels(
                    int(256 * ratio),
                    '%s.branch1.%d.conv.weight' % (prefix, len(branch))),
                kernel_size=1,
                stride=1,
                bn=bn))
     branch.append(
         Conv2d(config_channels.channels,
                config_channels(
                    int(256 * ratio),
                    '%s.branch1.%d.conv.weight' % (prefix, len(branch))),
                kernel_size=(1, 7),
                stride=1,
                padding=(0, 3),
                bn=bn))
     branch.append(
         Conv2d(config_channels.channels,
                config_channels(
                    int(320 * ratio),
                    '%s.branch1.%d.conv.weight' % (prefix, len(branch))),
                kernel_size=(7, 1),
                stride=1,
                padding=(3, 0),
                bn=bn))
     branch.append(
         Conv2d(config_channels.channels,
                config_channels(
                    int(320 * ratio),
                    '%s.branch1.%d.conv.weight' % (prefix, len(branch))),
                kernel_size=3,
                stride=2,
                bn=bn))
     self.branch1 = nn.Sequential(*branch)
     self.branch2 = nn.MaxPool2d(3, stride=2)
     # output
     config_channels.channels = self.branch0[-1].conv.weight.size(
         0) + self.branch1[-1].conv.weight.size(0) + channels
 def __init__(self, in_size, out_size):
     super(segnetDown2, self).__init__()
     self.conv1 = conv2DBatchNormRelu(in_size, out_size, 3, 1, 1)
     self.conv2 = conv2DBatchNormRelu(out_size, out_size, 3, 1, 1)
     self.maxpool_with_argmax = nn.MaxPool2d(2, 2, return_indices=True)
Пример #10
0
    def __init__(self, in_channels=1, out_channels=1):
        """Initializes U-Net."""

        super(UNet, self).__init__()

        # Layers: enc_conv0, enc_conv1, pool1
        self._block1 = nn.Sequential(
            nn.Conv2d(in_channels, 48, 3, stride=1, padding=1),
            nn.ReLU(inplace=True), nn.Conv2d(48, 48, 3, padding=1),
            nn.ReLU(inplace=True), nn.MaxPool2d(2))

        # Layers: enc_conv(i), pool(i); i=2..5
        self._block2 = nn.Sequential(nn.Conv2d(48, 48, 3, stride=1, padding=1),
                                     nn.ReLU(inplace=True), nn.MaxPool2d(2))

        # Layers: enc_conv6, upsample5
        self._block3 = nn.Sequential(
            nn.Conv2d(48, 48, 3, stride=1, padding=1), nn.ReLU(inplace=True),
            nn.ConvTranspose2d(48,
                               48,
                               3,
                               stride=2,
                               padding=1,
                               output_padding=1))
        #nn.Upsample(scale_factor=2, mode='nearest'))

        # Layers: dec_conv5a, dec_conv5b, upsample4
        self._block4 = nn.Sequential(
            nn.Conv2d(96, 96, 3, stride=1, padding=1), nn.ReLU(inplace=True),
            nn.Conv2d(96, 96, 3, stride=1, padding=1), nn.ReLU(inplace=True),
            nn.ConvTranspose2d(96,
                               96,
                               3,
                               stride=2,
                               padding=1,
                               output_padding=1))
        #nn.Upsample(scale_factor=2, mode='nearest'))

        # Layers: dec_deconv(i)a, dec_deconv(i)b, upsample(i-1); i=4..2
        self._block5 = nn.Sequential(
            nn.Conv2d(144, 96, 3, stride=1, padding=1), nn.ReLU(inplace=True),
            nn.Conv2d(96, 96, 3, stride=1, padding=1), nn.ReLU(inplace=True),
            nn.ConvTranspose2d(96,
                               96,
                               3,
                               stride=2,
                               padding=1,
                               output_padding=1))
        #nn.Upsample(scale_factor=2, mode='nearest'))

        # Layers: dec_conv1a, dec_conv1b, dec_conv1c,
        self._block6 = nn.Sequential(
            nn.Conv2d(96 + in_channels, 64, 3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 32, 3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(32, out_channels, 3, stride=1, padding=1),
            #nn.LeakyReLU(0.1))
            nn.Tanh())
        #nn.Linear())

        # Initialize weights
        self._init_weights()
 def __init__(self, num_filters, channels_in, stride):
     super(IdentityExpansion, self).__init__()
     # with kernel_size=1, max pooling is equivalent to identity mapping with stride
     self.identity = nn.MaxPool2d(1, stride=stride)
     self.num_zeros = num_filters - channels_in
Пример #12
0
    def __init__(
        self,
        block: Type[Union[MDL_BasicBlock]],
        layers: List[int],
        in_channels = 12,
        num_classes: int = 1000,
        zero_init_residual: bool = False,
        groups: int = 1,
        width_per_group: int = 64,
        replace_stride_with_dilation: Optional[List[bool]] = None,
        norm_layer: Optional[Callable[..., nn.Module]] = None,
        **kwargs
    ) -> None:
        super(MDL_ResNet, self).__init__()
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        self._norm_layer = norm_layer

        self.inplanes = 64
        self.dilation = 1
        self.project_mode = kwargs.get('project_mode', '1111')
        if replace_stride_with_dilation is None:
            # each element in the tuple indicates if we should replace
            # the 2x2 stride with a dilated convolution instead
            replace_stride_with_dilation = [False, False, False]
        if len(replace_stride_with_dilation) != 3:
            raise ValueError("replace_stride_with_dilation should be None "
                             "or a 3-element tuple, got {}".format(replace_stride_with_dilation))

        self.groups = groups
        self.base_width = width_per_group
        

        self.conv1 = conv_task(in_channels, self.inplanes, kernel_size=7, stride=2, pedding=3, is_proj=self.project_mode[0])

        self.bn1 = norm_layer(self.inplanes)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0], is_proj=self.project_mode[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2,
                                       dilate=replace_stride_with_dilation[0], is_proj=self.project_mode[1])
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2,
                                       dilate=replace_stride_with_dilation[1], is_proj=self.project_mode[2])
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2,
                                       dilate=replace_stride_with_dilation[2], is_proj=self.project_mode[3])
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

        # Zero-initialize the last BN in each residual branch,
        # so that the residual branch starts with zeros, and each residual block behaves like an identity.
        # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
        if zero_init_residual:
            for m in self.modules():
                if isinstance(m, MDL_BasicBlock):
                    nn.init.constant_(m.bn2.weight, 0)  # type: ignore[arg-type]
Пример #13
0
 def __init__(self,
              levels,
              block,
              in_channels,
              out_channels,
              stride=1,
              level_root=False,
              root_dim=0,
              root_kernel_size=1,
              dilation=1,
              root_residual=False):
     super(Tree, self).__init__()
     if root_dim == 0:
         root_dim = 2 * out_channels
     if level_root:
         root_dim += in_channels
     if levels == 1:
         self.tree1 = block(in_channels,
                            out_channels,
                            stride,
                            dilation=dilation)
         self.tree2 = block(out_channels,
                            out_channels,
                            1,
                            dilation=dilation)
     else:
         self.tree1 = Tree(levels - 1,
                           block,
                           in_channels,
                           out_channels,
                           stride,
                           root_dim=0,
                           root_kernel_size=root_kernel_size,
                           dilation=dilation,
                           root_residual=root_residual)
         self.tree2 = Tree(levels - 1,
                           block,
                           out_channels,
                           out_channels,
                           root_dim=root_dim + out_channels,
                           root_kernel_size=root_kernel_size,
                           dilation=dilation,
                           root_residual=root_residual)
     if levels == 1:
         self.root = Root(root_dim, out_channels, root_kernel_size,
                          root_residual)
     self.level_root = level_root
     self.root_dim = root_dim
     self.downsample = None
     self.project = None
     self.levels = levels
     if stride > 1:
         self.downsample = nn.MaxPool2d(stride, stride=stride)
     if in_channels != out_channels:
         self.project = nn.Sequential(
             nn.Conv2d(in_channels,
                       out_channels,
                       kernel_size=1,
                       stride=1,
                       bias=False),
             nn.BatchNorm2d(out_channels, momentum=BN_MOMENTUM))
Пример #14
0
    def __init__(self,
                 num_classes,
                 loss,
                 block_rgb,
                 layers_rgb,
                 block_contour,
                 layers_contour,
                 zero_init_residual=False,
                 groups=1,
                 width_per_group=64,
                 replace_stride_with_dilation=None,
                 norm_layer=None,
                 last_stride=2,
                 fc_dims=None,
                 dropout_p=None,
                 part_num=3,
                 part_weight=1.0,
                 **kwargs):
        super(MyModel, self).__init__()
        self.cnt = 0

        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        self._norm_layer = norm_layer
        self.loss = loss
        self.feature_dim_base = 512
        self.feature_dim = self.feature_dim_base * block_rgb.expansion
        self.inplanes = 64
        self.dilation = 1
        self.part_num = part_num
        self.part_weight = part_weight
        self.reduced_dim = 256
        if replace_stride_with_dilation is None:
            # each element in the tuple indicates if we should replace
            # the 2x2 stride with a dilated convolution instead
            replace_stride_with_dilation = [False, False, False]
        if len(replace_stride_with_dilation) != 3:
            raise ValueError("replace_stride_with_dilation should be None "
                             "or a 3-element tuple, got {}".format(
                                 replace_stride_with_dilation))
        self.groups = groups
        self.base_width = width_per_group

        # Backbone network for appearance feature extraction
        self.conv1 = nn.Conv2d(3,
                               self.inplanes,
                               kernel_size=7,
                               stride=2,
                               padding=3,
                               bias=False)
        self.bn1 = norm_layer(self.inplanes)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block_rgb, 64, layers_rgb[0])
        self.layer2 = self._make_layer(block_rgb,
                                       128,
                                       layers_rgb[1],
                                       stride=2,
                                       dilate=replace_stride_with_dilation[0])
        self.layer3 = self._make_layer(block_rgb,
                                       256,
                                       layers_rgb[2],
                                       stride=2,
                                       dilate=replace_stride_with_dilation[1])
        self.layer4 = self._make_layer(block_rgb,
                                       self.feature_dim_base,
                                       layers_rgb[3],
                                       stride=last_stride,
                                       dilate=replace_stride_with_dilation[2])
        self.inplanes = 256 * block_rgb.expansion
        # self.layer4_part = self._make_layer(block_rgb, self.feature_dim_base, layers_rgb[3], stride=last_stride,
        #                                dilate=replace_stride_with_dilation[2])
        self.global_avgpool = nn.AdaptiveAvgPool2d((1, 1))
        # self.global_maxpool = nn.AdaptiveMaxPool2d((1, 1))
        # self.global_avgpool = GeneralizedMeanPoolingP()
        self.parts_avgpool = nn.AdaptiveAvgPool2d((self.part_num, 1))
        self.conv5 = DimReduceLayer(self.feature_dim_base *
                                    block_rgb.expansion,
                                    self.reduced_dim,
                                    nonlinear='relu')

        # fc layers definition
        if fc_dims is None:
            self.fc = None
        else:
            self.fc = self._construct_fc_layer(fc_dims,
                                               512 * block_rgb.expansion,
                                               dropout_p)

        # Backbone network for contour feature extraction
        self.inplanes = 64
        self.conv1_contour = nn.Conv2d(1,
                                       64,
                                       kernel_size=7,
                                       stride=2,
                                       padding=3,
                                       bias=False)
        self.bn1_contour = nn.BatchNorm2d(64)
        self.layer1_contour = self._make_layer(block_contour, 64,
                                               layers_contour[0])
        self.layer2_contour = self._make_layer(block_contour,
                                               128,
                                               layers_contour[1],
                                               stride=2)
        self.layer3_contour = self._make_layer(block_contour,
                                               256,
                                               layers_contour[2],
                                               stride=2)
        self.layer4_contour = self._make_layer(block_contour,
                                               self.feature_dim_base,
                                               layers_contour[3],
                                               stride=last_stride)
        self.conv5_contour = DimReduceLayer(self.feature_dim_base *
                                            block_contour.expansion,
                                            self.reduced_dim,
                                            nonlinear='relu')

        # Sub-networks for contour graph modeling
        self.parts_avgpool_contour = nn.AdaptiveAvgPool2d((self.part_num, 3))
        # self.parts_avgpool_contour = nn.AdaptiveAvgPool2d((self.part_num, 1))
        self.feature_dim_gnn = self.feature_dim_base * block_contour.expansion
        self.gnns = nn.ModuleList([
            GraphConvolution(self.feature_dim_gnn,
                             self.feature_dim_gnn,
                             bias=True) for _ in range(self.part_num + 1)
        ])
        # self.bns_gnn = nn.ModuleList([nn.BatchNorm1d(self.feature_dim_gnn) for _ in range(self.part_num + 1)])

        # Bnneck layers
        self.bnneck_rgb = nn.BatchNorm1d(self.feature_dim)
        self.bnneck_rgb_part = nn.ModuleList(
            [nn.BatchNorm1d(self.reduced_dim) for _ in range(self.part_num)])
        self.bnneck_contour = nn.BatchNorm1d(self.feature_dim_base *
                                             block_contour.expansion)
        self.bnneck_contour_part = nn.ModuleList(
            [nn.BatchNorm1d(self.reduced_dim) for _ in range(self.part_num)])

        # Classifiers
        self.classifier = nn.Linear(self.feature_dim, num_classes, bias=False)
        self.classifier_contour = nn.Linear(self.feature_dim_base *
                                            block_contour.expansion,
                                            num_classes,
                                            bias=False)
        # self.classifiers_part = nn.ModuleList([nn.Linear(self.reduced_dim, num_classes) for _ in range(self.part_num)])
        # self.classifiers_contour_part = nn.ModuleList(
        #     [nn.Linear(self.reduced_dim, num_classes) for _ in range(self.part_num)])

        self._init_params()

        # Zero-initialize the last BN in each residual branch,
        # so that the residual branch starts with zeros, and each residual block behaves like an identity.
        # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
        if zero_init_residual:
            for m in self.modules():
                if isinstance(m, Bottleneck):
                    nn.init.constant_(m.bn3.weight, 0)
                elif isinstance(m, BasicBlock):
                    nn.init.constant_(m.bn2.weight, 0)
Пример #15
0
    def __init__(self,
                 groups=3,
                 widen_factor=1.0,
                 out_indices=(2, ),
                 frozen_stages=-1,
                 conv_cfg=None,
                 norm_cfg=dict(type='BN'),
                 act_cfg=dict(type='ReLU'),
                 norm_eval=False,
                 with_cp=False,
                 init_cfg=None):
        super(ShuffleNetV1, self).__init__(init_cfg)
        self.init_cfg = init_cfg
        self.stage_blocks = [4, 8, 4]
        self.groups = groups

        for index in out_indices:
            if index not in range(0, 3):
                raise ValueError('the item in out_indices must in '
                                 f'range(0, 3). But received {index}')

        if frozen_stages not in range(-1, 3):
            raise ValueError('frozen_stages must be in range(-1, 3). '
                             f'But received {frozen_stages}')
        self.out_indices = out_indices
        self.frozen_stages = frozen_stages
        self.conv_cfg = conv_cfg
        self.norm_cfg = norm_cfg
        self.act_cfg = act_cfg
        self.norm_eval = norm_eval
        self.with_cp = with_cp

        if groups == 1:
            channels = (144, 288, 576)
        elif groups == 2:
            channels = (200, 400, 800)
        elif groups == 3:
            channels = (240, 480, 960)
        elif groups == 4:
            channels = (272, 544, 1088)
        elif groups == 8:
            channels = (384, 768, 1536)
        else:
            raise ValueError(f'{groups} groups is not supported for 1x1 '
                             'Grouped Convolutions')

        channels = [make_divisible(ch * widen_factor, 8) for ch in channels]

        self.in_channels = int(24 * widen_factor)

        self.conv1 = ConvModule(
            in_channels=3,
            out_channels=self.in_channels,
            kernel_size=3,
            stride=2,
            padding=1,
            conv_cfg=conv_cfg,
            norm_cfg=norm_cfg,
            act_cfg=act_cfg)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.layers = nn.ModuleList()
        for i, num_blocks in enumerate(self.stage_blocks):
            first_block = True if i == 0 else False
            layer = self.make_layer(channels[i], num_blocks, first_block)
            self.layers.append(layer)
Пример #16
0
 def __init__(self, in_size, out_size, norm_layer, need_bias, pad,
              dilation):
     super(unetDown, self).__init__()
     self.conv = unetConv2(in_size, out_size, norm_layer, need_bias, pad,
                           dilation)
     self.down = nn.MaxPool2d(2, 2)
Пример #17
0
	def __init__(self, conv_body_func, fpn_level_info, P2only = False):
		super().__init__()
		self.fpn_level_info = fpn_level_info
		self.P2only = P2only
		
		self.dim_out = fpn_dim = cfg.FPN.DIM
		min_level, max_level = get_min_max_levels()
		self.num_backbone_stages = len(fpn_level_info.blobs) - (min_level - LOWEST_BACKBONE_LVL)
		fpn_dim_lateral = fpn_level_info.dims
		self.spatial_scale = []  # a list of scales for FPN outputs
		
		#
		# Step 1: recursively build down starting from the coarsest backbone level
		#
		# For the coarest backbone level: 1x1 conv only seeds recursion
		self.conv_top = nn.Conv2d(fpn_dim_lateral[0], fpn_dim, 1, 1, 0)
		if cfg.FPN.USE_GN:
			self.conv_top = nn.Sequential(
				nn.Conv2d(fpn_dim_lateral[0], fpn_dim, 1, 1, 0, bias = False),
				nn.GroupNorm(net_utils.get_group_gn(fpn_dim), fpn_dim,
				             eps = cfg.GROUP_NORM.EPSILON)
			)
		else:
			self.conv_top = nn.Conv2d(fpn_dim_lateral[0], fpn_dim, 1, 1, 0)
		self.topdown_lateral_modules = nn.ModuleList()
		self.posthoc_modules = nn.ModuleList()
		
		# For other levels add top-down and lateral connections
		for i in range(self.num_backbone_stages - 1):
			self.topdown_lateral_modules.append(
				topdown_lateral_module(fpn_dim, fpn_dim_lateral[i + 1])
			)
		
		# Post-hoc scale-specific 3x3 convs
		for i in range(self.num_backbone_stages):
			if cfg.FPN.USE_GN:
				self.posthoc_modules.append(nn.Sequential(
					nn.Conv2d(fpn_dim, fpn_dim, 3, 1, 1, bias = False),
					nn.GroupNorm(net_utils.get_group_gn(fpn_dim), fpn_dim,
					             eps = cfg.GROUP_NORM.EPSILON)
				))
			else:
				self.posthoc_modules.append(
					nn.Conv2d(fpn_dim, fpn_dim, 3, 1, 1)
				)
			
			self.spatial_scale.append(fpn_level_info.spatial_scales[i])
		#
		# Step 2: build up starting from the coarsest backbone level
		#
		# Check if we need the P6 feature map
		if not cfg.FPN.EXTRA_CONV_LEVELS and max_level == HIGHEST_BACKBONE_LVL + 1:
			# Original FPN P6 level implementation from our CVPR'17 FPN paper
			# Use max pooling to simulate stride 2 subsampling
			self.maxpool_p6 = nn.MaxPool2d(kernel_size = 1, stride = 2, padding = 0)
			self.spatial_scale.insert(0, self.spatial_scale[0] * 0.5)
		
		# Coarser FPN levels introduced for RetinaNet
		if cfg.FPN.EXTRA_CONV_LEVELS and max_level > HIGHEST_BACKBONE_LVL:
			self.extra_pyramid_modules = nn.ModuleList()
			dim_in = fpn_level_info.dims[0]
			for i in range(HIGHEST_BACKBONE_LVL + 1, max_level + 1):
				self.extra_pyramid_modules(
					nn.Conv2d(dim_in, fpn_dim, 3, 2, 1)
				)
				dim_in = fpn_dim
				self.spatial_scale.insert(0, self.spatial_scale[0] * 0.5)
		
		if self.P2only:
			# use only the finest level
			self.spatial_scale = self.spatial_scale[-1]
		
		self._init_weights()
		
		# Deliberately add conv_body after _init_weights.
		# conv_body has its own _init_weights function
		self.conv_body = conv_body_func()  # e.g resnet
Пример #18
0
    def __init__(self, k, stages):
        super(CPM, self).__init__()
        self.k = k
        self.stages = stages
        self.pool_center = nn.AvgPool2d(kernel_size=9, stride=8, padding=1)
        self.conv1_stage1 = nn.Conv2d(3, 128, kernel_size=9, padding=4)
        self.pool1_stage1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.conv2_stage1 = nn.Conv2d(128, 128, kernel_size=9, padding=4)
        self.pool2_stage1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.conv3_stage1 = nn.Conv2d(128, 128, kernel_size=9, padding=4)
        self.pool3_stage1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.conv4_stage1 = nn.Conv2d(128, 32, kernel_size=5, padding=2)
        self.conv5_stage1 = nn.Conv2d(32, 512, kernel_size=9, padding=4)
        self.conv6_stage1 = nn.Conv2d(512, 512, kernel_size=1)
        self.conv7_stage1 = nn.Conv2d(512, self.k + 1, kernel_size=1)

        self.conv1_stage2 = nn.Conv2d(3, 128, kernel_size=9, padding=4)
        self.pool1_stage2 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.conv2_stage2 = nn.Conv2d(128, 128, kernel_size=9, padding=4)
        self.pool2_stage2 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.conv3_stage2 = nn.Conv2d(128, 128, kernel_size=9, padding=4)
        self.pool3_stage2 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.conv4_stage2 = nn.Conv2d(128, 32, kernel_size=5, padding=2)

        self.Mconv1_stage2 = nn.Conv2d(32 + self.k + 2,
                                       128,
                                       kernel_size=11,
                                       padding=5)
        self.Mconv2_stage2 = nn.Conv2d(128, 128, kernel_size=11, padding=5)
        self.Mconv3_stage2 = nn.Conv2d(128, 128, kernel_size=11, padding=5)
        self.Mconv4_stage2 = nn.Conv2d(128, 128, kernel_size=1, padding=0)
        self.Mconv5_stage2 = nn.Conv2d(128,
                                       self.k + 1,
                                       kernel_size=1,
                                       padding=0)

        self.conv1_stage3 = nn.Conv2d(128, 32, kernel_size=5, padding=2)

        self.Mconv1_stage3 = nn.Conv2d(32 + self.k + 2,
                                       128,
                                       kernel_size=11,
                                       padding=5)
        self.Mconv2_stage3 = nn.Conv2d(128, 128, kernel_size=11, padding=5)
        self.Mconv3_stage3 = nn.Conv2d(128, 128, kernel_size=11, padding=5)
        self.Mconv4_stage3 = nn.Conv2d(128, 128, kernel_size=1, padding=0)
        self.Mconv5_stage3 = nn.Conv2d(128,
                                       self.k + 1,
                                       kernel_size=1,
                                       padding=0)

        self.conv1_stage4 = nn.Conv2d(128, 32, kernel_size=5, padding=2)

        self.Mconv1_stage4 = nn.Conv2d(32 + self.k + 2,
                                       128,
                                       kernel_size=11,
                                       padding=5)
        self.Mconv2_stage4 = nn.Conv2d(128, 128, kernel_size=11, padding=5)
        self.Mconv3_stage4 = nn.Conv2d(128, 128, kernel_size=11, padding=5)
        self.Mconv4_stage4 = nn.Conv2d(128, 128, kernel_size=1, padding=0)
        self.Mconv5_stage4 = nn.Conv2d(128,
                                       self.k + 1,
                                       kernel_size=1,
                                       padding=0)

        self.conv1_stage5 = nn.Conv2d(128, 32, kernel_size=5, padding=2)

        self.Mconv1_stage5 = nn.Conv2d(32 + self.k + 2,
                                       128,
                                       kernel_size=11,
                                       padding=5)
        self.Mconv2_stage5 = nn.Conv2d(128, 128, kernel_size=11, padding=5)
        self.Mconv3_stage5 = nn.Conv2d(128, 128, kernel_size=11, padding=5)
        self.Mconv4_stage5 = nn.Conv2d(128, 128, kernel_size=1, padding=0)
        self.Mconv5_stage5 = nn.Conv2d(128,
                                       self.k + 1,
                                       kernel_size=1,
                                       padding=0)

        self.conv1_stage6 = nn.Conv2d(128, 32, kernel_size=5, padding=2)

        self.Mconv1_stage6 = nn.Conv2d(32 + self.k + 2,
                                       128,
                                       kernel_size=11,
                                       padding=5)
        self.Mconv2_stage6 = nn.Conv2d(128, 128, kernel_size=11, padding=5)
        self.Mconv3_stage6 = nn.Conv2d(128, 128, kernel_size=11, padding=5)
        self.Mconv4_stage6 = nn.Conv2d(128, 128, kernel_size=1, padding=0)
        self.Mconv5_stage6 = nn.Conv2d(128,
                                       self.k + 1,
                                       kernel_size=1,
                                       padding=0)
Пример #19
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 first_block=False,
                 epsilon=1e-4,
                 attention=True,
                 freeze_params=False):
        """
        Args:
            first_block: whether the input comes directly from the efficientnet,
                        if True, downchannel it first, and downsample P5 to generate P6
            epsilon: epsilon of fast weighted attention sum of BiFPN, not the BN's epsilon
        """
        super(SingleBiFPN, self).__init__()
        assert isinstance(in_channels, list)
        self.first_block = first_block
        self.epsilon = epsilon
        self.attention = attention
        self.freeze_params = freeze_params

        if self.first_block:
            self.p2_down_channel = nn.Sequential(
                Conv2d(in_channels[0], out_channels, 1),
                nn.BatchNorm2d(out_channels, momentum=0.01, eps=1e-3),
            )
            self.p3_down_channel = nn.Sequential(
                Conv2d(in_channels[1], out_channels, 1),
                nn.BatchNorm2d(out_channels, momentum=0.01, eps=1e-3),
            )
            self.p4_down_channel = nn.Sequential(
                Conv2d(in_channels[2], out_channels, 1),
                nn.BatchNorm2d(out_channels, momentum=0.01, eps=1e-3),
            )
            self.p5_down_channel = nn.Sequential(
                Conv2d(in_channels[3], out_channels, 1),
                nn.BatchNorm2d(out_channels, momentum=0.01, eps=1e-3),
            )

            self.p5_to_p6 = nn.Sequential(
                Conv2d(in_channels[3], out_channels, 1),
                nn.BatchNorm2d(out_channels, momentum=0.01, eps=1e-3),
                nn.MaxPool2d(3, 2, padding=1))

            self.p3_down_channel_2 = nn.Sequential(
                Conv2d(in_channels[1], out_channels, 1),
                nn.BatchNorm2d(out_channels, momentum=0.01, eps=1e-3),
            )
            self.p4_down_channel_2 = nn.Sequential(
                Conv2d(in_channels[2], out_channels, 1),
                nn.BatchNorm2d(out_channels, momentum=0.01, eps=1e-3),
            )
            self.p5_down_channel_2 = nn.Sequential(
                Conv2d(in_channels[3], out_channels, 1),
                nn.BatchNorm2d(out_channels, momentum=0.01, eps=1e-3),
            )

        # Conv layers
        self.conv5_up = SeparableConvBlock(out_channels,
                                           freeze_params=self.freeze_params)
        self.conv4_up = SeparableConvBlock(out_channels,
                                           freeze_params=self.freeze_params)
        self.conv3_up = SeparableConvBlock(out_channels,
                                           freeze_params=self.freeze_params)
        self.conv2_up = SeparableConvBlock(out_channels,
                                           freeze_params=self.freeze_params)
        self.conv3_down = SeparableConvBlock(out_channels,
                                             freeze_params=self.freeze_params)
        self.conv4_down = SeparableConvBlock(out_channels,
                                             freeze_params=self.freeze_params)
        self.conv5_down = SeparableConvBlock(out_channels,
                                             freeze_params=self.freeze_params)
        self.conv6_down = SeparableConvBlock(out_channels,
                                             freeze_params=self.freeze_params)

        # top-down (upsample to target phase's by nearest interpolation)
        self.p5_upsample = nn.Upsample(scale_factor=2, mode='nearest')
        self.p4_upsample = nn.Upsample(scale_factor=2, mode='nearest')
        self.p3_upsample = nn.Upsample(scale_factor=2, mode='nearest')
        self.p2_upsample = nn.Upsample(scale_factor=2, mode='nearest')

        # bottom-up (downsample to target phase's by pooling)
        self.p3_downsample = nn.MaxPool2d(3, 2, padding=1)
        self.p4_downsample = nn.MaxPool2d(3, 2, padding=1)
        self.p5_downsample = nn.MaxPool2d(3, 2, padding=1)
        self.p6_downsample = nn.MaxPool2d(3, 2, padding=1)

        self.relu = nn.ReLU()

        # Weight
        self.p5_w1 = nn.Parameter(
            torch.ones(2, dtype=torch.float32),
            requires_grad=False if self.freeze_params else True)
        self.p5_w1_relu = nn.ReLU()
        self.p4_w1 = nn.Parameter(
            torch.ones(2, dtype=torch.float32),
            requires_grad=False if self.freeze_params else True)
        self.p4_w1_relu = nn.ReLU()
        self.p3_w1 = nn.Parameter(
            torch.ones(2, dtype=torch.float32),
            requires_grad=False if self.freeze_params else True)
        self.p3_w1_relu = nn.ReLU()
        self.p2_w1 = nn.Parameter(
            torch.ones(2, dtype=torch.float32),
            requires_grad=False if self.freeze_params else True)
        self.p2_w1_relu = nn.ReLU()

        self.p3_w2 = nn.Parameter(
            torch.ones(3, dtype=torch.float32),
            requires_grad=False if self.freeze_params else True)
        self.p3_w2_relu = nn.ReLU()
        self.p4_w2 = nn.Parameter(
            torch.ones(3, dtype=torch.float32),
            requires_grad=False if self.freeze_params else True)
        self.p4_w2_relu = nn.ReLU()
        self.p5_w2 = nn.Parameter(
            torch.ones(3, dtype=torch.float32),
            requires_grad=False if self.freeze_params else True)
        self.p5_w2_relu = nn.ReLU()
        self.p6_w2 = nn.Parameter(
            torch.ones(2, dtype=torch.float32),
            requires_grad=False if self.freeze_params else True)
        self.p6_w2_relu = nn.ReLU()

        if self.freeze_params:
            for m in [
                    self.p2_down_channel, self.p3_down_channel,
                    self.p4_down_channel, self.p5_down_channel, self.p5_to_p6,
                    self.p3_down_channel_2, self.p4_down_channel_2,
                    self.p5_down_channel_2
            ]:
                for param in m.parameters():
                    param.requires_grad = False
Пример #20
0
    def __init__(self,
                 in_channels=3,
                 n_classes=1,
                 feature_scale=4,
                 is_deconv=True,
                 is_batchnorm=True):
        super(UNet_3Plus_DeepSup_CGM, self).__init__()
        self.is_deconv = is_deconv
        self.in_channels = in_channels
        self.is_batchnorm = is_batchnorm
        self.feature_scale = feature_scale

        filters = [64, 128, 256, 512, 1024]

        ## -------------Encoder--------------
        self.conv1 = unetConv2(self.in_channels, filters[0], self.is_batchnorm)
        self.maxpool1 = nn.MaxPool2d(kernel_size=2)

        self.conv2 = unetConv2(filters[0], filters[1], self.is_batchnorm)
        self.maxpool2 = nn.MaxPool2d(kernel_size=2)

        self.conv3 = unetConv2(filters[1], filters[2], self.is_batchnorm)
        self.maxpool3 = nn.MaxPool2d(kernel_size=2)

        self.conv4 = unetConv2(filters[2], filters[3], self.is_batchnorm)
        self.maxpool4 = nn.MaxPool2d(kernel_size=2)

        self.conv5 = unetConv2(filters[3], filters[4], self.is_batchnorm)

        ## -------------Decoder--------------
        self.CatChannels = filters[0]
        self.CatBlocks = 5
        self.UpChannels = self.CatChannels * self.CatBlocks
        '''stage 4d'''
        # h1->320*320, hd4->40*40, Pooling 8 times
        self.h1_PT_hd4 = nn.MaxPool2d(8, 8, ceil_mode=True)
        self.h1_PT_hd4_conv = nn.Conv2d(filters[0],
                                        self.CatChannels,
                                        3,
                                        padding=1)
        self.h1_PT_hd4_bn = nn.BatchNorm2d(self.CatChannels)
        self.h1_PT_hd4_relu = nn.ReLU(inplace=True)

        # h2->160*160, hd4->40*40, Pooling 4 times
        self.h2_PT_hd4 = nn.MaxPool2d(4, 4, ceil_mode=True)
        self.h2_PT_hd4_conv = nn.Conv2d(filters[1],
                                        self.CatChannels,
                                        3,
                                        padding=1)
        self.h2_PT_hd4_bn = nn.BatchNorm2d(self.CatChannels)
        self.h2_PT_hd4_relu = nn.ReLU(inplace=True)

        # h3->80*80, hd4->40*40, Pooling 2 times
        self.h3_PT_hd4 = nn.MaxPool2d(2, 2, ceil_mode=True)
        self.h3_PT_hd4_conv = nn.Conv2d(filters[2],
                                        self.CatChannels,
                                        3,
                                        padding=1)
        self.h3_PT_hd4_bn = nn.BatchNorm2d(self.CatChannels)
        self.h3_PT_hd4_relu = nn.ReLU(inplace=True)

        # h4->40*40, hd4->40*40, Concatenation
        self.h4_Cat_hd4_conv = nn.Conv2d(filters[3],
                                         self.CatChannels,
                                         3,
                                         padding=1)
        self.h4_Cat_hd4_bn = nn.BatchNorm2d(self.CatChannels)
        self.h4_Cat_hd4_relu = nn.ReLU(inplace=True)

        # hd5->20*20, hd4->40*40, Upsample 2 times
        self.hd5_UT_hd4 = nn.Upsample(scale_factor=2, mode='bilinear')  # 14*14
        self.hd5_UT_hd4_conv = nn.Conv2d(filters[4],
                                         self.CatChannels,
                                         3,
                                         padding=1)
        self.hd5_UT_hd4_bn = nn.BatchNorm2d(self.CatChannels)
        self.hd5_UT_hd4_relu = nn.ReLU(inplace=True)

        # fusion(h1_PT_hd4, h2_PT_hd4, h3_PT_hd4, h4_Cat_hd4, hd5_UT_hd4)
        self.conv4d_1 = nn.Conv2d(self.UpChannels,
                                  self.UpChannels,
                                  3,
                                  padding=1)  # 16
        self.bn4d_1 = nn.BatchNorm2d(self.UpChannels)
        self.relu4d_1 = nn.ReLU(inplace=True)
        '''stage 3d'''
        # h1->320*320, hd3->80*80, Pooling 4 times
        self.h1_PT_hd3 = nn.MaxPool2d(4, 4, ceil_mode=True)
        self.h1_PT_hd3_conv = nn.Conv2d(filters[0],
                                        self.CatChannels,
                                        3,
                                        padding=1)
        self.h1_PT_hd3_bn = nn.BatchNorm2d(self.CatChannels)
        self.h1_PT_hd3_relu = nn.ReLU(inplace=True)

        # h2->160*160, hd3->80*80, Pooling 2 times
        self.h2_PT_hd3 = nn.MaxPool2d(2, 2, ceil_mode=True)
        self.h2_PT_hd3_conv = nn.Conv2d(filters[1],
                                        self.CatChannels,
                                        3,
                                        padding=1)
        self.h2_PT_hd3_bn = nn.BatchNorm2d(self.CatChannels)
        self.h2_PT_hd3_relu = nn.ReLU(inplace=True)

        # h3->80*80, hd3->80*80, Concatenation
        self.h3_Cat_hd3_conv = nn.Conv2d(filters[2],
                                         self.CatChannels,
                                         3,
                                         padding=1)
        self.h3_Cat_hd3_bn = nn.BatchNorm2d(self.CatChannels)
        self.h3_Cat_hd3_relu = nn.ReLU(inplace=True)

        # hd4->40*40, hd4->80*80, Upsample 2 times
        self.hd4_UT_hd3 = nn.Upsample(scale_factor=2, mode='bilinear')  # 14*14
        self.hd4_UT_hd3_conv = nn.Conv2d(self.UpChannels,
                                         self.CatChannels,
                                         3,
                                         padding=1)
        self.hd4_UT_hd3_bn = nn.BatchNorm2d(self.CatChannels)
        self.hd4_UT_hd3_relu = nn.ReLU(inplace=True)

        # hd5->20*20, hd4->80*80, Upsample 4 times
        self.hd5_UT_hd3 = nn.Upsample(scale_factor=4, mode='bilinear')  # 14*14
        self.hd5_UT_hd3_conv = nn.Conv2d(filters[4],
                                         self.CatChannels,
                                         3,
                                         padding=1)
        self.hd5_UT_hd3_bn = nn.BatchNorm2d(self.CatChannels)
        self.hd5_UT_hd3_relu = nn.ReLU(inplace=True)

        # fusion(h1_PT_hd3, h2_PT_hd3, h3_Cat_hd3, hd4_UT_hd3, hd5_UT_hd3)
        self.conv3d_1 = nn.Conv2d(self.UpChannels,
                                  self.UpChannels,
                                  3,
                                  padding=1)  # 16
        self.bn3d_1 = nn.BatchNorm2d(self.UpChannels)
        self.relu3d_1 = nn.ReLU(inplace=True)
        '''stage 2d '''
        # h1->320*320, hd2->160*160, Pooling 2 times
        self.h1_PT_hd2 = nn.MaxPool2d(2, 2, ceil_mode=True)
        self.h1_PT_hd2_conv = nn.Conv2d(filters[0],
                                        self.CatChannels,
                                        3,
                                        padding=1)
        self.h1_PT_hd2_bn = nn.BatchNorm2d(self.CatChannels)
        self.h1_PT_hd2_relu = nn.ReLU(inplace=True)

        # h2->160*160, hd2->160*160, Concatenation
        self.h2_Cat_hd2_conv = nn.Conv2d(filters[1],
                                         self.CatChannels,
                                         3,
                                         padding=1)
        self.h2_Cat_hd2_bn = nn.BatchNorm2d(self.CatChannels)
        self.h2_Cat_hd2_relu = nn.ReLU(inplace=True)

        # hd3->80*80, hd2->160*160, Upsample 2 times
        self.hd3_UT_hd2 = nn.Upsample(scale_factor=2, mode='bilinear')  # 14*14
        self.hd3_UT_hd2_conv = nn.Conv2d(self.UpChannels,
                                         self.CatChannels,
                                         3,
                                         padding=1)
        self.hd3_UT_hd2_bn = nn.BatchNorm2d(self.CatChannels)
        self.hd3_UT_hd2_relu = nn.ReLU(inplace=True)

        # hd4->40*40, hd2->160*160, Upsample 4 times
        self.hd4_UT_hd2 = nn.Upsample(scale_factor=4, mode='bilinear')  # 14*14
        self.hd4_UT_hd2_conv = nn.Conv2d(self.UpChannels,
                                         self.CatChannels,
                                         3,
                                         padding=1)
        self.hd4_UT_hd2_bn = nn.BatchNorm2d(self.CatChannels)
        self.hd4_UT_hd2_relu = nn.ReLU(inplace=True)

        # hd5->20*20, hd2->160*160, Upsample 8 times
        self.hd5_UT_hd2 = nn.Upsample(scale_factor=8, mode='bilinear')  # 14*14
        self.hd5_UT_hd2_conv = nn.Conv2d(filters[4],
                                         self.CatChannels,
                                         3,
                                         padding=1)
        self.hd5_UT_hd2_bn = nn.BatchNorm2d(self.CatChannels)
        self.hd5_UT_hd2_relu = nn.ReLU(inplace=True)

        # fusion(h1_PT_hd2, h2_Cat_hd2, hd3_UT_hd2, hd4_UT_hd2, hd5_UT_hd2)
        self.conv2d_1 = nn.Conv2d(self.UpChannels,
                                  self.UpChannels,
                                  3,
                                  padding=1)  # 16
        self.bn2d_1 = nn.BatchNorm2d(self.UpChannels)
        self.relu2d_1 = nn.ReLU(inplace=True)
        '''stage 1d'''
        # h1->320*320, hd1->320*320, Concatenation
        self.h1_Cat_hd1_conv = nn.Conv2d(filters[0],
                                         self.CatChannels,
                                         3,
                                         padding=1)
        self.h1_Cat_hd1_bn = nn.BatchNorm2d(self.CatChannels)
        self.h1_Cat_hd1_relu = nn.ReLU(inplace=True)

        # hd2->160*160, hd1->320*320, Upsample 2 times
        self.hd2_UT_hd1 = nn.Upsample(scale_factor=2, mode='bilinear')  # 14*14
        self.hd2_UT_hd1_conv = nn.Conv2d(self.UpChannels,
                                         self.CatChannels,
                                         3,
                                         padding=1)
        self.hd2_UT_hd1_bn = nn.BatchNorm2d(self.CatChannels)
        self.hd2_UT_hd1_relu = nn.ReLU(inplace=True)

        # hd3->80*80, hd1->320*320, Upsample 4 times
        self.hd3_UT_hd1 = nn.Upsample(scale_factor=4, mode='bilinear')  # 14*14
        self.hd3_UT_hd1_conv = nn.Conv2d(self.UpChannels,
                                         self.CatChannels,
                                         3,
                                         padding=1)
        self.hd3_UT_hd1_bn = nn.BatchNorm2d(self.CatChannels)
        self.hd3_UT_hd1_relu = nn.ReLU(inplace=True)

        # hd4->40*40, hd1->320*320, Upsample 8 times
        self.hd4_UT_hd1 = nn.Upsample(scale_factor=8, mode='bilinear')  # 14*14
        self.hd4_UT_hd1_conv = nn.Conv2d(self.UpChannels,
                                         self.CatChannels,
                                         3,
                                         padding=1)
        self.hd4_UT_hd1_bn = nn.BatchNorm2d(self.CatChannels)
        self.hd4_UT_hd1_relu = nn.ReLU(inplace=True)

        # hd5->20*20, hd1->320*320, Upsample 16 times
        self.hd5_UT_hd1 = nn.Upsample(scale_factor=16,
                                      mode='bilinear')  # 14*14
        self.hd5_UT_hd1_conv = nn.Conv2d(filters[4],
                                         self.CatChannels,
                                         3,
                                         padding=1)
        self.hd5_UT_hd1_bn = nn.BatchNorm2d(self.CatChannels)
        self.hd5_UT_hd1_relu = nn.ReLU(inplace=True)

        # fusion(h1_Cat_hd1, hd2_UT_hd1, hd3_UT_hd1, hd4_UT_hd1, hd5_UT_hd1)
        self.conv1d_1 = nn.Conv2d(self.UpChannels,
                                  self.UpChannels,
                                  3,
                                  padding=1)  # 16
        self.bn1d_1 = nn.BatchNorm2d(self.UpChannels)
        self.relu1d_1 = nn.ReLU(inplace=True)

        # -------------Bilinear Upsampling--------------
        self.upscore6 = nn.Upsample(scale_factor=32, mode='bilinear')  ###
        self.upscore5 = nn.Upsample(scale_factor=16, mode='bilinear')
        self.upscore4 = nn.Upsample(scale_factor=8, mode='bilinear')
        self.upscore3 = nn.Upsample(scale_factor=4, mode='bilinear')
        self.upscore2 = nn.Upsample(scale_factor=2, mode='bilinear')

        # DeepSup
        self.outconv1 = nn.Conv2d(self.UpChannels, n_classes, 3, padding=1)
        self.outconv2 = nn.Conv2d(self.UpChannels, n_classes, 3, padding=1)
        self.outconv3 = nn.Conv2d(self.UpChannels, n_classes, 3, padding=1)
        self.outconv4 = nn.Conv2d(self.UpChannels, n_classes, 3, padding=1)
        self.outconv5 = nn.Conv2d(filters[4], n_classes, 3, padding=1)

        self.cls = nn.Sequential(nn.Dropout(p=0.5),
                                 nn.Conv2d(filters[4], 2, 1),
                                 nn.AdaptiveMaxPool2d(1), nn.Sigmoid())

        # initialise weights
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                init_weights(m, init_type='kaiming')
            elif isinstance(m, nn.BatchNorm2d):
                init_weights(m, init_type='kaiming')
Пример #21
0
    def __init__(self):
        super(VGG16, self).__init__()
        self.layer1 = nn.Sequential(

            # 1-1 conv layer
            nn.Conv2d(3, 64, kernel_size=3, padding=1),
            tnn.BatchNorm2d(64),
            tnn.ReLU(),

            # 1-2 conv layer
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),

            # 1 Pooling layer
            nn.MaxPool2d(kernel_size=2, stride=2))

        self.layer2 = nn.Sequential(

            # 2-1 conv layer
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),

            # 2-2 conv layer
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),

            # 2 Pooling lyaer
            nn.MaxPool2d(kernel_size=2, stride=2))

        self.layer3 = nn.Sequential(

            # 3-1 conv layer
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),

            # 3-2 conv layer
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),

            #3-3 conv layer
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),

            # 3 Pooling layer
            nn.MaxPool2d(kernel_size=2, stride=2))

        self.layer4 = nn.Sequential(

            # 4-1 conv layer
            nn.Conv2d(256, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),

            # 4-2 conv layer
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),

            #4-3
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),

            # 4 Pooling layer
            nn.MaxPool2d(kernel_size=2, stride=2))

        self.layer5 = nn.Sequential(

            # 5-1 conv layer
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),

            # 5-2 conv layer
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),

            #5-3
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),

            # 5 Pooling layer
            nn.MaxPool2d(kernel_size=2, stride=2))

        self.layer6 = nn.Sequential(

            # 6 Fully connected layer
            # Dropout layer omitted since batch normalization is used.
            nn.Linear(512*7*7, 4096),
            nn.BatchNorm1d(4096),
            nn.ReLU())

        self.layer7 = nn.Sequential(

            # 7 Fully connected layer
            # Dropout layer omitted since batch normalization is used.
            nn.Linear(4096, 4096),
            nn.BatchNorm1d(4096),
            nn.ReLU())

        self.layer8 = nn.Sequential(

            # 8 output layer
            nn.Linear(4096, 2),
            nn.BatchNorm1d(2),
            nn.Softmax())
Пример #22
0
    def __init__(self, n_classes=21, learned_billinear=False):
        super(fcn16s, self).__init__()
        self.learned_billinear = learned_billinear
        self.n_classes = n_classes
        self.loss = functools.partial(cross_entropy2d, size_average=False)

        self.conv_block1 = nn.Sequential(
            nn.Conv2d(3, 64, 3, padding=100),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, 3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, stride=2, ceil_mode=True),
        )

        self.conv_block2 = nn.Sequential(
            nn.Conv2d(64, 128, 3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 128, 3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, stride=2, ceil_mode=True),
        )

        self.conv_block3 = nn.Sequential(
            nn.Conv2d(128, 256, 3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, 3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, 3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, stride=2, ceil_mode=True),
        )

        self.conv_block4 = nn.Sequential(
            nn.Conv2d(256, 512, 3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, 3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, 3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, stride=2, ceil_mode=True),
        )

        self.conv_block5 = nn.Sequential(
            nn.Conv2d(512, 512, 3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, 3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, 3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, stride=2, ceil_mode=True),
        )

        self.classifier = nn.Sequential(
            nn.Conv2d(512, 4096, 7),
            nn.ReLU(inplace=True),
            nn.Dropout2d(),
            nn.Conv2d(4096, 4096, 1),
            nn.ReLU(inplace=True),
            nn.Dropout2d(),
            nn.Conv2d(4096, self.n_classes, 1),
        )

        self.score_pool4 = nn.Conv2d(512, self.n_classes, 1)

        # TODO: Add support for learned upsampling
        if self.learned_billinear:
            raise NotImplementedError
Пример #23
0
 def __init__(self, num_classes=1001):
     super(InceptionResnetV2, self).__init__()
     self.conv2d_1a = BasicConv2d(3, 32, kernel_size=3, stride=2)
     self.conv2d_2a = BasicConv2d(32, 32, kernel_size=3, stride=1)
     self.conv2d_2b = BasicConv2d(32, 64, kernel_size=3, stride=1, padding=1)
     self.maxpool_3a = nn.MaxPool2d(3, stride=2)
     self.conv2d_3b = BasicConv2d(64, 80, kernel_size=1, stride=1)
     self.conv2d_4a = BasicConv2d(80, 192, kernel_size=3, stride=1)
     self.maxpool_5a = nn.MaxPool2d(3, stride=2)
     self.mixed_5b = Mixed_5b()
     self.repeat = nn.Sequential(
         Block35(scale=0.17),
         Block35(scale=0.17),
         Block35(scale=0.17),
         Block35(scale=0.17),
         Block35(scale=0.17),
         Block35(scale=0.17),
         Block35(scale=0.17),
         Block35(scale=0.17),
         Block35(scale=0.17),
         Block35(scale=0.17)
     )
     self.mixed_6a = Mixed_6a()
     self.repeat_1 = nn.Sequential(
         Block17(scale=0.10),
         Block17(scale=0.10),
         Block17(scale=0.10),
         Block17(scale=0.10),
         Block17(scale=0.10),
         Block17(scale=0.10),
         Block17(scale=0.10),
         Block17(scale=0.10),
         Block17(scale=0.10),
         Block17(scale=0.10),
         Block17(scale=0.10),
         Block17(scale=0.10),
         Block17(scale=0.10),
         Block17(scale=0.10),
         Block17(scale=0.10),
         Block17(scale=0.10),
         Block17(scale=0.10),
         Block17(scale=0.10),
         Block17(scale=0.10),
         Block17(scale=0.10)
     )
     self.mixed_7a = Mixed_7a()
     self.repeat_2 = nn.Sequential(
         Block8(scale=0.20),
         Block8(scale=0.20),
         Block8(scale=0.20),
         Block8(scale=0.20),
         Block8(scale=0.20),
         Block8(scale=0.20),
         Block8(scale=0.20),
         Block8(scale=0.20),
         Block8(scale=0.20)
     )
     self.block8 = Block8(noReLU=True)
     self.conv2d_7b = BasicConv2d(2080, 1536, kernel_size=1, stride=1)
     self.avgpool_1a = nn.AdaptiveAvgPool2d((1,1))
     self.classif = nn.Linear(1536, num_classes)
Пример #24
0
    def __init__(self, n_classes=21, learned_billinear=True):
        super(fcn8s, self).__init__()
        self.learned_billinear = learned_billinear
        self.n_classes = n_classes
        self.loss = functools.partial(cross_entropy2d, size_average=False)

        self.conv_block1 = nn.Sequential(
            nn.Conv2d(3, 64, 3, padding=100),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, 3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, stride=2, ceil_mode=True),
        )

        self.conv_block2 = nn.Sequential(
            nn.Conv2d(64, 128, 3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 128, 3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, stride=2, ceil_mode=True),
        )

        self.conv_block3 = nn.Sequential(
            nn.Conv2d(128, 256, 3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, 3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, 3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, stride=2, ceil_mode=True),
        )

        self.conv_block4 = nn.Sequential(
            nn.Conv2d(256, 512, 3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, 3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, 3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, stride=2, ceil_mode=True),
        )

        self.conv_block5 = nn.Sequential(
            nn.Conv2d(512, 512, 3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, 3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, 3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, stride=2, ceil_mode=True),
        )

        self.classifier = nn.Sequential(
            nn.Conv2d(512, 4096, 7),
            nn.ReLU(inplace=True),
            nn.Dropout2d(),
            nn.Conv2d(4096, 4096, 1),
            nn.ReLU(inplace=True),
            nn.Dropout2d(),
            nn.Conv2d(4096, self.n_classes, 1),
        )

        self.score_pool4 = nn.Conv2d(512, self.n_classes, 1)
        self.score_pool3 = nn.Conv2d(256, self.n_classes, 1)

        if self.learned_billinear:
            self.upscore2 = nn.ConvTranspose2d(self.n_classes,
                                               self.n_classes,
                                               4,
                                               stride=2,
                                               bias=False)
            self.upscore4 = nn.ConvTranspose2d(self.n_classes,
                                               self.n_classes,
                                               4,
                                               stride=2,
                                               bias=False)
            self.upscore8 = nn.ConvTranspose2d(self.n_classes,
                                               self.n_classes,
                                               16,
                                               stride=8,
                                               bias=False)

        for m in self.modules():
            if isinstance(m, nn.ConvTranspose2d):
                m.weight.data.copy_(
                    get_upsampling_weight(m.in_channels, m.out_channels,
                                          m.kernel_size[0]))
Пример #25
0
 def __init__(self):
     super(Model, self).__init__()
     self.conv = nn.Conv2d(1, 16, 5)
     self.pool = nn.MaxPool2d(2, 2)
     self.fc = nn.Linear(2304, 10)
Пример #26
0
def create_modules(module_defs):
    """
    Constructs module list of layer blocks from module configuration in module_defs
    """
    hyperparams = module_defs.pop(0)
    output_filters = [int(hyperparams["channels"])]
    module_list = nn.ModuleList()
    for module_i, module_def in enumerate(module_defs):
        modules = nn.Sequential()

        if module_def["type"] == "convolutional":
            bn = int(module_def["batch_normalize"])
            filters = int(module_def["filters"])
            kernel_size = int(module_def["size"])
            pad = (kernel_size - 1) // 2
            modules.add_module(
                f"conv_{module_i}",
                nn.Conv2d(
                    in_channels=output_filters[-1],
                    out_channels=filters,
                    kernel_size=kernel_size,
                    stride=int(module_def["stride"]),
                    padding=pad,
                    bias=not bn,
                ),
            )
            if bn:
                modules.add_module(f"batch_norm_{module_i}", nn.BatchNorm2d(filters, momentum=0.9, eps=1e-5))
            if module_def["activation"] == "leaky":
                modules.add_module(f"leaky_{module_i}", nn.LeakyReLU(0.1))

        elif module_def["type"] == "maxpool":
            kernel_size = int(module_def["size"])
            stride = int(module_def["stride"])
            if kernel_size == 2 and stride == 1:
                modules.add_module(f"_debug_padding_{module_i}", nn.ZeroPad2d((0, 1, 0, 1)))
            maxpool = nn.MaxPool2d(kernel_size=kernel_size, stride=stride, padding=int((kernel_size - 1) // 2))
            modules.add_module(f"maxpool_{module_i}", maxpool)

        elif module_def["type"] == "upsample":
            upsample = Upsample(scale_factor=int(module_def["stride"]), mode="nearest")
            modules.add_module(f"upsample_{module_i}", upsample)

        elif module_def["type"] == "route":
            layers = [int(x) for x in module_def["layers"].split(",")]
            filters = sum([output_filters[1:][i] for i in layers])
            modules.add_module(f"route_{module_i}", EmptyLayer())

        elif module_def["type"] == "shortcut":
            filters = output_filters[1:][int(module_def["from"])]
            modules.add_module(f"shortcut_{module_i}", EmptyLayer())

        elif module_def["type"] == "yolo":
            anchor_idxs = [int(x) for x in module_def["mask"].split(",")]
            # Extract anchors
            anchors = [int(x) for x in module_def["anchors"].split(",")]
            anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)]
            anchors = [anchors[i] for i in anchor_idxs]
            num_classes = int(module_def["classes"])
            img_size    = int(hyperparams["height"])
            # Define detection layer
            yolo_layer = YOLOLayer(anchors, num_classes, img_size)
            modules.add_module(f"yolo_{module_i}", yolo_layer)
        # Register module list and number of output filters
        module_list.append(modules)
        output_filters.append(filters)

    return hyperparams, module_list
Пример #27
0
 def __init__(self, inp=10, out=16, kernel_size=3, bias=True):
     super(MaxPool, self).__init__()
     self.conv2d = nn.Conv2d(inp, out, kernel_size=kernel_size, bias=bias)
     self.pool = nn.MaxPool2d(kernel_size=3, padding=1)
Пример #28
0
def create_modules(module_defs): #创建整个网络流程
    """
    Constructs module list of layer blocks from module configuration in module_defs

    Args:
        module_defs (List):保存网络超参的list
    
    Regurns:
        dict :hyperparams网络超参数
        torch.nn.ModuelList :module_list整个网络流程
    """
    hyperparams = module_defs.pop(0) #获取第一个保存着网络超参的dict,用于设置初始输入。
    output_filters = [int(hyperparams["channels"])] #保存每层的输出通道数,用于后面route层或者shortcut层时计算当前输出的通道数。
    module_list = nn.ModuleList() #存放各层,最后返回用于前向。
    for module_i, module_def in enumerate(module_defs): #遍历各层,编号下标用于定义层名称
        modules = nn.Sequential() #每层用Sequential包装

        if module_def["type"] == "convolutional": #如果是卷积层
            bn = int(module_def["batch_normalize"])
            filters = int(module_def["filters"])
            kernel_size = int(module_def["size"])
            pad = (kernel_size - 1) // 2
            modules.add_module(
                f"conv_{module_i}",
                nn.Conv2d(
                    in_channels=output_filters[-1],
                    out_channels=filters,
                    kernel_size=kernel_size,
                    stride=int(module_def["stride"]),
                    padding=pad,
                    bias=not bn,
                ),
            )
            if bn: #如果卷积层带BN
                modules.add_module(f"batch_norm_{module_i}", nn.BatchNorm2d(filters, momentum=0.9, eps=1e-5)) #加入BN层
            if module_def["activation"] == "leaky": #如果使用Leaky ReLU
                modules.add_module(f"leaky_{module_i}", nn.LeakyReLU(0.1)) #加入Leaky ReLU层

        elif module_def["type"] == "maxpool": #如果是maxpool层
            kernel_size = int(module_def["size"])
            stride = int(module_def["stride"])
            if kernel_size == 2 and stride == 1:
                modules.add_module(f"_debug_padding_{module_i}", nn.ZeroPad2d((0, 1, 0, 1)))
            maxpool = nn.MaxPool2d(kernel_size=kernel_size, stride=stride, padding=int((kernel_size - 1) // 2))
            modules.add_module(f"maxpool_{module_i}", maxpool)

        elif module_def["type"] == "upsample": #如果是上采样层
            upsample = Upsample(scale_factor=int(module_def["stride"]), mode="nearest") #上采样层,输入采样倍率以及采样模式,使用F.interpolate函数
            modules.add_module(f"upsample_{module_i}", upsample)

        elif module_def["type"] == "route": #如果是route层
            layers = [int(x) for x in module_def["layers"].split(",")] #获取用于route的两个层,因为是叠加层所以sum两层通道数相加
            filters = sum([output_filters[1:][i] for i in layers]) #计算输出的通道数,[1:]原因是一开始的3不是第一层输出通道,从第二个开始才是第一层。
            modules.add_module(f"route_{module_i}", EmptyLayer())

        elif module_def["type"] == "shortcut": #如果是shorcut层
            filters = output_filters[1:][int(module_def["from"])] #计算输出的通道数,因为是元素相加层所以通道数不变。
            modules.add_module(f"shortcut_{module_i}", EmptyLayer())

        elif module_def["type"] == "yolo": #如果是yolo检测层
            anchor_idxs = [int(x) for x in module_def["mask"].split(",")] #选中的anchor下标,用于选中anchors,每个预测层使用不同的anchors
            # Extract anchors
            anchors = [int(x) for x in module_def["anchors"].split(",")]
            anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)] #[(10,13),(16,30),(33,23),(30,61)...]
            anchors = [anchors[i] for i in anchor_idxs] #获得这个预测层使用的anchor超参
            num_classes = int(module_def["classes"]) #该预测层预测的类别数
            img_size = int(hyperparams["height"]) #输入到网络的初始图片尺寸,用于后面anchor计算stride
            # Define detection layer
            yolo_layer = YOLOLayer(anchors, num_classes, img_size)
            modules.add_module(f"yolo_{module_i}", yolo_layer)
        # Register module list and number of output filters
        module_list.append(modules)
        output_filters.append(filters)

    return hyperparams, module_list 
Пример #29
0
    def __init__(self,
                 image_dim=128,
                 memory_dim=128,
                 instr_dim=128,
                 num_embeddings=3,
                 num_rnn_layers=1,
                 vocabulary=None,
                 max_tau=0.2,
                 greedy=True,
                 corr_length=2,
                 var_len=False,
                 script=False,
                 obs_space=None):
        super().__init__()

        self.image_dim = image_dim
        self.memory_dim = memory_dim
        self.instr_dim = instr_dim

        self.num_embeddings = num_embeddings
        self.num_rnn_layers = num_rnn_layers

        self.obs_space = obs_space

        self.var_len = var_len  # variable correction lengths

        if vocabulary is not None:
            self.vocab = vocabulary  # Vocabulary object, from obss_preprocessor / None
            self.vocab_idx2word = self.vocab.idx2word
            # Add SOS symbol to vocab/get idx
            self.sos_id = self.vocab['<S>']
        else:
            # if Corrector gets to use own vocabulary (standard)
            self.vocab_idx2word = {
                i: 'w' + str(i)
                for i in range(self.num_embeddings)
            }
            self.sos_id = 0
            if self.var_len:
                self.vocab_idx2word[self.num_embeddings] = '<eos>'
                self.eos_id = self.num_embeddings
                self.num_embeddings += 1
            self.vocab_word2idx = {
                self.vocab_idx2word[key]: key
                for key in self.vocab_idx2word
            }

        self.instr_embedding = nn.Embedding(obs_space["instr"], self.instr_dim)
        self.instr_rnn = nn.GRU(self.instr_dim,
                                self.instr_dim,
                                batch_first=True)

        self.image_conv = nn.Sequential(
            nn.Conv2d(in_channels=3,
                      out_channels=128,
                      kernel_size=(2, 2),
                      padding=1), nn.BatchNorm2d(128), nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2, 2), stride=2),
            nn.Conv2d(in_channels=128,
                      out_channels=128,
                      kernel_size=(3, 3),
                      padding=1), nn.BatchNorm2d(128), nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2, 2), stride=2))
        self.film_pool = nn.MaxPool2d(kernel_size=(2, 2), stride=2)

        num_module = 2
        self.controllers = []
        for ni in range(num_module):
            if ni < num_module - 1:
                mod = ExpertControllerFiLM(in_features=self.instr_dim,
                                           out_features=128,
                                           in_channels=128,
                                           imm_channels=128)
            else:
                mod = ExpertControllerFiLM(in_features=self.instr_dim,
                                           out_features=self.image_dim,
                                           in_channels=128,
                                           imm_channels=128)
            self.controllers.append(mod)
            self.add_module('FiLM_Controler_' + str(ni), mod)

        self.memory_rnn = nn.LSTMCell(self.image_dim, self.memory_dim)

        self.word_embedding_corrector = nn.Embedding(
            num_embeddings=self.num_embeddings, embedding_dim=self.instr_dim)

        self.decoder_rnn = nn.GRU(input_size=self.instr_dim,
                                  hidden_size=self.memory_dim,
                                  num_layers=self.num_rnn_layers,
                                  batch_first=True)

        self.out = nn.Linear(self.memory_dim, self.num_embeddings)

        # learn tau(following https: // arxiv.org / pdf / 1701.08718.pdf) # Gumbel Softmax temperature
        self.tau_layer = nn.Sequential(nn.Linear(self.memory_dim, 1),
                                       nn.Softplus())
        self.max_tau = max_tau

        self.corr_length = corr_length  # maximum length of correction message (if no variable length, always this length)
        self.greedy = greedy

        self.random_corrector = False

        if self.var_len:
            self.correction_loss = nn.CrossEntropyLoss()

        self.script = script

        self.apply(initialize_parameters)
        self.device = torch.device(
            "cuda" if torch.cuda.is_available() else "cpu")
def create_modules(module_defs):
    """
    Constructs module list of layer blocks from module configuration in module_defs
    """
    hyperparams = module_defs.pop(0)
    output_filters = [int(hyperparams['channels'])]
    module_list = nn.ModuleList()
    for i, module_def in enumerate(module_defs):
        modules = nn.Sequential()

        if module_def['type'] == 'convolutional':
            bn = int(module_def['batch_normalize'])
            filters = int(module_def['filters'])
            kernel_size = int(module_def['size'])
            pad = (kernel_size - 1) // 2 if int(module_def['pad']) else 0
            modules.add_module(
                'conv_%d' % i,
                nn.Conv2d(in_channels=output_filters[-1],
                          out_channels=filters,
                          kernel_size=kernel_size,
                          stride=int(module_def['stride']),
                          padding=pad,
                          bias=not bn))
            if bn:
                modules.add_module('batch_norm_%d' % i,
                                   nn.BatchNorm2d(filters))
            if module_def['activation'] == 'leaky':
                modules.add_module('leaky_%d' % i, nn.LeakyReLU(0.1))

        elif module_def['type'] == 'maxpool':
            kernel_size = int(module_def['size'])
            stride = int(module_def['stride'])
            if kernel_size == 2 and stride == 1:
                modules.add_module('_debug_padding_%d' % i,
                                   nn.ZeroPad2d((0, 1, 0, 1)))
            maxpool = nn.MaxPool2d(kernel_size=kernel_size,
                                   stride=stride,
                                   padding=int((kernel_size - 1) // 2))
            modules.add_module('maxpool_%d' % i, maxpool)

        elif module_def['type'] == 'upsample':
            # upsample = nn.Upsample(scale_factor=int(module_def['stride']), mode='nearest')  # WARNING: deprecated
            upsample = Upsample(scale_factor=int(module_def['stride']),
                                mode='nearest')
            modules.add_module('upsample_%d' % i, upsample)

        elif module_def['type'] == 'route':
            layers = [int(x) for x in module_def['layers'].split(',')]
            filters = sum(
                [output_filters[i + 1 if i > 0 else i] for i in layers])
            modules.add_module('route_%d' % i, EmptyLayer())

        elif module_def['type'] == 'shortcut':
            filters = output_filters[int(module_def['from'])]
            modules.add_module('shortcut_%d' % i, EmptyLayer())

        elif module_def['type'] == 'yolo':
            anchor_idxs = [int(x) for x in module_def['mask'].split(',')]
            # Extract anchors
            anchors = [float(x) for x in module_def['anchors'].split(',')]
            anchors = [(anchors[i], anchors[i + 1])
                       for i in range(0, len(anchors), 2)]
            anchors = [anchors[i] for i in anchor_idxs]
            num_classes = int(module_def['classes'])
            img_height = int(hyperparams['height'])
            # Define detection layer
            yolo_layer = YOLOLayer(anchors,
                                   num_classes,
                                   img_height,
                                   anchor_idxs,
                                   cfg=hyperparams['cfg'])
            modules.add_module('yolo_%d' % i, yolo_layer)

        # Register module list and number of output filters
        module_list.append(modules)
        output_filters.append(filters)

    return hyperparams, module_list