Exemplo n.º 1
0
    def __init__(self,
                 codeword_multiplier,
                 sparsity_multiplier,
                 centers,
                 intercept,
                 block,
                 nblocks,
                 growth_rate=12,
                 reduction=0.5,
                 nb_class=10):

        super(GlobalLocalLabelDenseNet, self).__init__()

        in_dims = []

        self.growth_rate = growth_rate

        num_planes = 2 * growth_rate
        self.conv1 = nn.Conv2d(3, num_planes, kernel_size=3, padding=1)

        self.dense1 = self._make_dense_layers(block, num_planes, nblocks[0])
        num_planes += nblocks[0] * growth_rate
        out_planes = int(math.floor(num_planes * reduction))
        self.trans1 = Transition(num_planes, out_planes)
        num_planes = out_planes

        in_dims.append(num_planes)

        self.dense2 = self._make_dense_layers(block, num_planes, nblocks[1])
        num_planes += nblocks[1] * growth_rate
        out_planes = int(math.floor(num_planes * reduction))
        self.trans2 = Transition(num_planes, out_planes)
        num_planes = out_planes

        in_dims.append(num_planes)

        self.dense3 = self._make_dense_layers(block, num_planes, nblocks[2])
        num_planes += nblocks[2] * growth_rate
        out_planes = int(math.floor(num_planes * reduction))
        self.trans3 = Transition(num_planes, out_planes)
        num_planes = out_planes

        self.dense4 = self._make_dense_layers(block, num_planes, nblocks[3])
        num_planes += nblocks[3] * growth_rate

        in_dims.append(num_planes)

        self.bn = nn.BatchNorm2d(num_planes)
        self.linear = nn.Linear(num_planes, nb_class)

        if centers is None:
            centers = [
                None,
            ] * 3
        if intercept is None:
            intercept = [
                None,
            ] * 3

        self.sparse_layers = nn.ModuleList()
        sparsity_multiplier *= codeword_multiplier
        self.sparse_layers.append(
            Layers.GlobalLocalLabel(in_dims[0],
                                    in_dims[0] * codeword_multiplier,
                                    int(in_dims[0] * sparsity_multiplier),
                                    centers=centers[0],
                                    intercept=intercept[0]))

        self.sparse_layers.append(
            Layers.GlobalLocalLabel(in_dims[1],
                                    in_dims[1] * codeword_multiplier,
                                    int(in_dims[1] * sparsity_multiplier),
                                    centers=centers[1],
                                    intercept=intercept[1]))

        self.sparse_layers.append(
            Layers.GlobalLocalLabel(in_dims[2],
                                    in_dims[2] * codeword_multiplier,
                                    int(in_dims[2] * sparsity_multiplier),
                                    centers=centers[2],
                                    intercept=intercept[2]))

        self.initialize()
Exemplo n.º 2
0
    def __init__(self,
                 codeword_multiplier,
                 sparsity_multiplier,
                 block,
                 layers,
                 num_classes=1000,
                 zero_init_residual=False,
                 groups=1,
                 width_per_group=64,
                 replace_stride_with_dilation=None,
                 norm_layer=None):
        super(GlobalLocalLabelResNet, self).__init__()
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        self._norm_layer = norm_layer

        in_dims = []
        self.inplanes = 64
        self.dilation = 1
        if replace_stride_with_dilation is None:
            # each element in the tuple indicates if we should replace
            # the 2x2 stride with a dilated convolution instead
            replace_stride_with_dilation = [False, False, False]
        if len(replace_stride_with_dilation) != 3:
            raise ValueError("replace_stride_with_dilation should be None "
                             "or a 3-element tuple, got {}".format(
                                 replace_stride_with_dilation))
        self.groups = groups
        self.base_width = width_per_group
        self.conv1 = nn.Conv2d(3,
                               self.inplanes,
                               kernel_size=7,
                               stride=2,
                               padding=3,
                               bias=False)

        self.bn1 = norm_layer(self.inplanes)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        in_dims.append(self.inplanes)
        self.layer1 = self._make_layer(block, 64, layers[0])

        self.layer2 = self._make_layer(block,
                                       128,
                                       layers[1],
                                       stride=2,
                                       dilate=replace_stride_with_dilation[0])

        in_dims.append(self.inplanes)

        self.layer3 = self._make_layer(block,
                                       256,
                                       layers[2],
                                       stride=2,
                                       dilate=replace_stride_with_dilation[1])

        in_dims.append(self.inplanes)

        self.layer4 = self._make_layer(block,
                                       512,
                                       layers[3],
                                       stride=2,
                                       dilate=replace_stride_with_dilation[2])

        in_dims.append(self.inplanes)
        in_dims.append(self.inplanes)

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight,
                                        mode='fan_out',
                                        nonlinearity='relu')
            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

        # Zero-initialize the last BN in each residual branch,
        # so that the residual branch starts with zeros, and each residual block behaves like an identity.
        # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
        if zero_init_residual:
            for m in self.modules():
                if isinstance(m, Bottleneck):
                    nn.init.constant_(m.bn3.weight, 0)
                elif isinstance(m, BasicBlock):
                    nn.init.constant_(m.bn2.weight, 0)

        sparsity_multiplier *= codeword_multiplier
        self.sparse_layers = nn.ModuleList()

        for input_dim in in_dims:
            self.sparse_layers.append(
                Layers.GlobalLocalLabel(input_dim,
                                        input_dim * codeword_multiplier,
                                        input_dim * sparsity_multiplier))

        self.in_dims = in_dims