Exemplo n.º 1
0
    def _make_blocks(self, blocks, in_channels, channels):
        result = []
        for i in range(blocks):
            stride = 2 if i == 0 else 1
            inp = in_channels if i == 0 else channels
            oup = channels

            base_mid_channels = channels // 2
            mid_channels = int(base_mid_channels)  # prepare for scale
            choice_block = mutables.LayerChoice([
                ShuffleNetBlock(inp, oup, mid_channels=mid_channels, ksize=3, stride=stride, affine=self._affine),
                ShuffleNetBlock(inp, oup, mid_channels=mid_channels, ksize=5, stride=stride, affine=self._affine),
                ShuffleNetBlock(inp, oup, mid_channels=mid_channels, ksize=7, stride=stride, affine=self._affine),
                ShuffleXceptionBlock(inp, oup, mid_channels=mid_channels, stride=stride, affine=self._affine)
            ])
            result.append(choice_block)

            # find the corresponding flops
            flop_key = (inp, oup, mid_channels, self._feature_map_size, self._feature_map_size, stride)
            self._parsed_flops[choice_block.key] = [
                self._op_flops_dict["{}_stride_{}".format(k, stride)][flop_key] for k in self.block_keys
            ]
            if stride == 2:
                self._feature_map_size //= 2
        return result
Exemplo n.º 2
0
    def _make_blocks(self, blocks, in_channels, channels):
        result = []
        for i in range(blocks):
            stride = 2 if i == 0 else 1
            inp = in_channels if i == 0 else channels
            oup = channels

            base_mid_channels = channels // 2
            mid_channels = int(base_mid_channels)  # prepare for scale
            self._layerchoice_count += 1
            choice_block = LayerChoice([
                ShuffleNetBlock(inp,
                                oup,
                                mid_channels=mid_channels,
                                ksize=3,
                                stride=stride,
                                affine=self._affine),
                ShuffleNetBlock(inp,
                                oup,
                                mid_channels=mid_channels,
                                ksize=5,
                                stride=stride,
                                affine=self._affine),
                ShuffleNetBlock(inp,
                                oup,
                                mid_channels=mid_channels,
                                ksize=7,
                                stride=stride,
                                affine=self._affine),
                ShuffleXceptionBlock(inp,
                                     oup,
                                     mid_channels=mid_channels,
                                     stride=stride,
                                     affine=self._affine)
            ],
                                       label="LayerChoice" +
                                       str(self._layerchoice_count))
            result.append(choice_block)

            if stride == 2:
                self._feature_map_size //= 2
        return result
    def __init__(self,
                 input_size=224,
                 n_class=1000,
                 architecture=None,
                 channels_scales=None,
                 use_all_blocks=False,
                 bn=nn.BatchNorm2d,
                 use_se=False,
                 last_conv_after_pooling=False,
                 shuffle_method=ShuffleChannels,
                 stage_out_channels=None,
                 candidate_scales=None):
        """
        scale_cand_ids = [6, 5, 3, 5, 2, 6, 3, 4,
            2, 5, 7, 5, 4, 6, 7, 4, 4, 5, 4, 3]
        scale_candidate_list = [0.2, 0.4, 0.6,
            0.8, 1.0, 1.2, 1.4, 1.6, 1.8, 2.0]
        stage_repeats = [4, 4, 8, 4]
        len(scale_cand_ids) == sum(stage_repeats) == # feature blocks == 20
        """
        super(ShuffleNetV2_OneShot, self).__init__()

        assert input_size % 32 == 0

        self.stage_repeats = [4, 4, 8, 4]
        self.stage_out_channels = [
            -1, 16, 64, 160, 320, 640, 1024
        ] if stage_out_channels is None else stage_out_channels
        self.candidate_scales = [0.2, 0.4, 0.6, 0.8, 1.0, 1.2, 1.4, 1.6, 1.8, 2.0] \
            if candidate_scales is None else candidate_scales
        self.use_all_blocks = use_all_blocks
        self.use_se = use_se
        self.last_conv_after_pooling = last_conv_after_pooling

        if architecture is None and channels_scales is None:
            fix_arch = False
        elif architecture is not None and channels_scales is not None:
            fix_arch = True
            assert len(architecture) == len(channels_scales)
        else:
            raise ValueError(
                "architecture and scale_ids should be both None or not None.")
        self.fix_arch = fix_arch
        assert len(self.stage_repeats) == len(self.stage_out_channels) - 3

        # building first layer
        input_channel = self.stage_out_channels[1]
        self.first_conv = nn.Sequential(
            nn.Conv2d(3, input_channel, 3, 2, 1, bias=False),
            bn(input_channel),
            Activation('hswish' if self.use_se else 'relu'),
        )

        self.features = []
        archIndex = 0
        for idxstage in range(len(self.stage_repeats)):
            numrepeat = self.stage_repeats[idxstage]
            output_channel = self.stage_out_channels[idxstage + 2]

            if self.use_se:
                act_name = 'hswish' if idxstage >= 1 else 'relu'
                block_use_se = True if idxstage >= 2 else False
            else:
                act_name = 'relu'
                block_use_se = False

            for i in range(numrepeat):
                if i == 0:
                    inp, outp, stride = input_channel, output_channel, 2
                else:
                    inp, outp, stride = input_channel // 2, output_channel, 1

                if fix_arch:
                    blockIndex = architecture[archIndex]
                    base_mid_channels = outp // 2
                    mid_channels = make_divisible(
                        int(base_mid_channels * channels_scales[archIndex]))
                    archIndex += 1
                    if blockIndex == 0:
                        # print('Shuffle3x3')
                        self.features.append(
                            ShuffleNetBlock(inp,
                                            outp,
                                            mid_channels=mid_channels,
                                            bn=bn,
                                            ksize=3,
                                            stride=stride,
                                            block_mode='ShuffleNetV2',
                                            use_se=block_use_se,
                                            act_name=act_name,
                                            shuffle_method=shuffle_method))
                    elif blockIndex == 1:
                        # print('Shuffle5x5')
                        self.features.append(
                            ShuffleNetBlock(inp,
                                            outp,
                                            mid_channels=mid_channels,
                                            bn=bn,
                                            ksize=5,
                                            stride=stride,
                                            block_mode='ShuffleNetV2',
                                            use_se=block_use_se,
                                            act_name=act_name,
                                            shuffle_method=shuffle_method))
                    elif blockIndex == 2:
                        # print('Shuffle7x7')
                        self.features.append(
                            ShuffleNetBlock(inp,
                                            outp,
                                            mid_channels=mid_channels,
                                            bn=bn,
                                            ksize=7,
                                            stride=stride,
                                            block_mode='ShuffleNetV2',
                                            use_se=block_use_se,
                                            act_name=act_name,
                                            shuffle_method=shuffle_method))
                    elif blockIndex == 3:
                        # print('Xception')
                        self.features.append(
                            ShuffleNetBlock(inp,
                                            outp,
                                            mid_channels=mid_channels,
                                            bn=bn,
                                            ksize=3,
                                            stride=stride,
                                            block_mode='ShuffleXception',
                                            use_se=block_use_se,
                                            act_name=act_name,
                                            shuffle_method=shuffle_method))
                    else:
                        raise NotImplementedError

                else:
                    archIndex += 1
                    self.features.append(
                        ShuffleNasBlock(
                            input_channel,
                            output_channel,
                            stride=stride,
                            bn=bn,
                            max_channel_scale=self.candidate_scales[-1],
                            use_all_blocks=self.use_all_blocks,
                            use_se=block_use_se,
                            act_name=act_name))
                # update input_channel for next block
                input_channel = output_channel

        if fix_arch:
            self.features = nn.Sequential(*self.features)
        else:
            self.features = NasHybridSequential(self.features)

        if self.last_conv_after_pooling:
            self.conv_last = nn.Sequential(
                # GlobalAvgPool2d(),
                nn.AdaptiveAvgPool2d(1),
                nn.Conv2d(input_channel,
                          self.stage_out_channels[-1],
                          1,
                          1,
                          0,
                          bias=False),
                # nn.BatchNorm2d(self.stage_out_channels[-1]),
                Activation('hswish' if self.use_se else 'relu'),
            )
        else:
            if self.use_se:
                # ShuffleNetV2+ approach
                self.conv_last = nn.Sequential(
                    nn.Conv2d(input_channel,
                              make_divisible(self.stage_out_channels[-1] *
                                             0.75),
                              1,
                              1,
                              0,
                              bias=False),
                    bn(make_divisible(self.stage_out_channels[-1] * 0.75)),
                    nn.AdaptiveAvgPool2d(1),
                    SE(make_divisible(self.stage_out_channels[-1] * 0.75)),
                    nn.Conv2d(make_divisible(self.stage_out_channels[-1] *
                                             0.75),
                              self.stage_out_channels[-1],
                              1,
                              1,
                              0,
                              bias=False),
                    Activation('hswish' if self.use_se else 'relu'),
                )
            else:
                # Origin Oneshot NAS approach
                self.conv_last = nn.Sequential(
                    nn.Conv2d(input_channel,
                              self.stage_out_channels[-1],
                              1,
                              1,
                              0,
                              bias=False), bn(self.stage_out_channels[-1]),
                    Activation('hswish' if self.use_se else 'relu'),
                    nn.AvgPool2d(7)
                    # nn.AdaptiveAvgPool2d(1),
                )
        # self.globalpool = nn.AvgPool2d(7)
        self.dropout = nn.Dropout(0.2 if self.use_se else 0.1)
        self.classifier = nn.Sequential(
            # nn.Conv2d(self.stage_out_channels[-1], n_class, 1,1,0,bias=False))
            nn.Linear(self.stage_out_channels[-1], n_class, bias=False),
            Activation('hswish' if self.use_se else 'relu'),
        )
        # nn.Flatten())
        self._initialize_weights()