示例#1
0
 def __init__(self):
     super().__init__()
     ch1 = ValueChoice([16, 32])
     kernel = ValueChoice([3, 5])
     self.conv1 = nn.Conv2d(1, ch1, kernel, padding=kernel // 2)
     self.batch_norm = nn.BatchNorm2d(ch1)
     self.conv2 = nn.Conv2d(ch1, 64, 3, padding=1)
     self.dropout1 = LayerChoice(
         [nn.Dropout(.25), nn.Dropout(.5),
          nn.Dropout(.75)])
     self.fc = nn.Linear(64, 10)
     self.rpfc = nn.Repeat(nn.Linear(10, 10), (1, 4))
示例#2
0
    def _make_stage(self, stage_idx, inp, oup, se, stride, act):
        # initialize them first because they are related to layer_count.
        exp, ks, se_blocks = [], [], []
        for _ in range(4):
            exp.append(
                nn.ValueChoice(list(self.expand_ratios),
                               label=f'exp_{self.layer_count}'))
            ks.append(nn.ValueChoice([3, 5, 7],
                                     label=f'ks_{self.layer_count}'))
            if se:
                # if SE is true, assign a layer choice to SE
                se_blocks.append(lambda hidden_ch: nn.LayerChoice(
                    [nn.Identity(), SELayer(hidden_ch)],
                    label=f'se_{self.layer_count}'))
            else:
                se_blocks.append(None)
            self.layer_count += 1

        blocks = [
            # stride = 2
            InvertedResidual(inp,
                             oup,
                             exp[0],
                             ks[0],
                             stride,
                             squeeze_and_excite=se_blocks[0],
                             activation_layer=act),
            # stride = 1, residual connection should be automatically enabled
            InvertedResidual(oup,
                             oup,
                             exp[1],
                             ks[1],
                             squeeze_and_excite=se_blocks[1],
                             activation_layer=act),
            InvertedResidual(oup,
                             oup,
                             exp[2],
                             ks[2],
                             squeeze_and_excite=se_blocks[2],
                             activation_layer=act),
            InvertedResidual(oup,
                             oup,
                             exp[3],
                             ks[3],
                             squeeze_and_excite=se_blocks[3],
                             activation_layer=act)
        ]

        # mutable depth
        return nn.Repeat(blocks, depth=(1, 4), label=f'depth_{stage_idx}')
示例#3
0
    def __init__(self, num_labels: int = 1000,
                 base_widths: Tuple[int, ...] = (32, 16, 32, 40, 80, 96, 192, 320, 1280),
                 dropout_rate: float = 0.,
                 width_mult: float = 1.0,
                 bn_eps: float = 1e-3,
                 bn_momentum: float = 0.1):

        super().__init__()

        assert len(base_widths) == 9
        # include the last stage info widths here
        widths = [make_divisible(width * width_mult, 8) for width in base_widths]
        downsamples = [True, False, True, True, True, False, True, False]

        self.num_labels = num_labels
        self.dropout_rate = dropout_rate
        self.bn_eps = bn_eps
        self.bn_momentum = bn_momentum

        self.stem = ConvBNReLU(3, widths[0], stride=2, norm_layer=nn.BatchNorm2d)

        blocks: List[nn.Module] = [
            # first stage is fixed
            DepthwiseSeparableConv(widths[0], widths[1], kernel_size=3, stride=1)
        ]

        # https://github.com/ultmaster/AceNAS/blob/46c8895fd8a05ffbc61a6b44f1e813f64b4f66b7/searchspace/proxylessnas/__init__.py#L21
        for stage in range(2, 8):
            # Rather than returning a fixed module here,
            # we return a builder that dynamically creates module for different `repeat_idx`.
            builder = inverted_residual_choice_builder(
                [3, 6], [3, 5, 7], downsamples[stage], widths[stage - 1], widths[stage], f's{stage}')
            if stage < 7:
                blocks.append(nn.Repeat(builder, (1, 4), label=f's{stage}_depth'))
            else:
                # No mutation for depth in the last stage.
                # Directly call builder to initiate one block
                blocks.append(builder(0))

        self.blocks = nn.Sequential(*blocks)

        # final layers
        self.feature_mix_layer = ConvBNReLU(widths[7], widths[8], kernel_size=1, norm_layer=nn.BatchNorm2d)
        self.global_avg_pooling = nn.AdaptiveAvgPool2d(1)
        self.dropout_layer = nn.Dropout(dropout_rate)
        self.classifier = nn.Linear(widths[-1], num_labels)

        reset_parameters(self, bn_momentum=bn_momentum, bn_eps=bn_eps)
示例#4
0
 def __init__(self):
     super().__init__()
     self.stem = nn.Conv2d(1, 5, 7, stride=4)
     self.cells = nn.Repeat(
         lambda index: nn.Cell(
             {
                 'conv1':
                 lambda _, __, inp: nn.Conv2d(
                     (5 if index == 0 else 3 * 4)
                     if inp is not None and inp < 1 else 4, 4, 1),
                 'conv2':
                 lambda _, __, inp: nn.Conv2d(
                     (5 if index == 0 else 3 * 4)
                     if inp is not None and inp < 1 else 4,
                     4,
                     3,
                     padding=1),
             },
             3,
             merge_op='loose_end'), (1, 3))
     self.fc = nn.Linear(3 * 4, 10)
示例#5
0
 def __init__(self):
     super().__init__()
     self.block = nn.Repeat(AddOne(), (3, 5))
示例#6
0
    def __init__(self,
                 op_candidates: List[str],
                 merge_op: Literal['all', 'loose_end'] = 'all',
                 num_nodes_per_cell: int = 4,
                 width: Union[Tuple[int, ...], int] = 16,
                 num_cells: Union[Tuple[int, ...], int] = 20,
                 dataset: Literal['cifar', 'imagenet'] = 'imagenet',
                 auxiliary_loss: bool = False):
        super().__init__()

        self.dataset = dataset
        self.num_labels = 10 if dataset == 'cifar' else 1000
        self.auxiliary_loss = auxiliary_loss

        # preprocess the specified width and depth
        if isinstance(width, Iterable):
            C = nn.ValueChoice(list(width), label='width')
        else:
            C = width

        self.num_cells: nn.MaybeChoice[int] = cast(int, num_cells)
        if isinstance(num_cells, Iterable):
            self.num_cells = nn.ValueChoice(list(num_cells), label='depth')
        num_cells_per_stage = [
            (i + 1) * self.num_cells // 3 - i * self.num_cells // 3
            for i in range(3)
        ]

        # auxiliary head is different for network targetted at different datasets
        if dataset == 'imagenet':
            self.stem0 = nn.Sequential(
                nn.Conv2d(3,
                          cast(int, C // 2),
                          kernel_size=3,
                          stride=2,
                          padding=1,
                          bias=False),
                nn.BatchNorm2d(cast(int, C // 2)),
                nn.ReLU(inplace=True),
                nn.Conv2d(cast(int, C // 2),
                          cast(int, C),
                          3,
                          stride=2,
                          padding=1,
                          bias=False),
                nn.BatchNorm2d(C),
            )
            self.stem1 = nn.Sequential(
                nn.ReLU(inplace=True),
                nn.Conv2d(cast(int, C),
                          cast(int, C),
                          3,
                          stride=2,
                          padding=1,
                          bias=False),
                nn.BatchNorm2d(C),
            )
            C_pprev = C_prev = C_curr = C
            last_cell_reduce = True
        elif dataset == 'cifar':
            self.stem = nn.Sequential(
                nn.Conv2d(3, cast(int, 3 * C), 3, padding=1, bias=False),
                nn.BatchNorm2d(cast(int, 3 * C)))
            C_pprev = C_prev = 3 * C
            C_curr = C
            last_cell_reduce = False

        self.stages = nn.ModuleList()
        for stage_idx in range(3):
            if stage_idx > 0:
                C_curr *= 2
            # For a stage, we get C_in, C_curr, and C_out.
            # C_in is only used in the first cell.
            # C_curr is number of channels for each operator in current stage.
            # C_out is usually `C * num_nodes_per_cell` because of concat operator.
            cell_builder = CellBuilder(op_candidates, C_pprev, C_prev, C_curr,
                                       num_nodes_per_cell, merge_op,
                                       stage_idx > 0, last_cell_reduce)
            stage = nn.Repeat(cell_builder, num_cells_per_stage[stage_idx])
            self.stages.append(stage)

            # C_pprev is output channel number of last second cell among all the cells already built.
            if len(stage) > 1:
                # Contains more than one cell
                C_pprev = len(cast(nn.Cell,
                                   stage[-2]).output_node_indices) * C_curr
            else:
                # Look up in the out channels of last stage.
                C_pprev = C_prev

            # This was originally,
            # C_prev = num_nodes_per_cell * C_curr.
            # but due to loose end, it becomes,
            C_prev = len(cast(nn.Cell, stage[-1]).output_node_indices) * C_curr

            # Useful in aligning the pprev and prev cell.
            last_cell_reduce = cell_builder.last_cell_reduce

            if stage_idx == 2:
                C_to_auxiliary = C_prev

        if auxiliary_loss:
            assert isinstance(
                self.stages[2], nn.Sequential
            ), 'Auxiliary loss can only be enabled in retrain mode.'
            self.stages[2] = SequentialBreakdown(
                cast(nn.Sequential, self.stages[2]))
            self.auxiliary_head = AuxiliaryHead(
                C_to_auxiliary, self.num_labels,
                dataset=self.dataset)  # type: ignore

        self.global_pooling = nn.AdaptiveAvgPool2d((1, 1))
        self.classifier = nn.Linear(cast(int, C_prev), self.num_labels)
示例#7
0
 def __init__(self):
     super().__init__()
     self.block = nn.Repeat(lambda index: nn.LayerChoice(
         [AddOne(), nn.Identity()]), (2, 3),
                            label='rep')
示例#8
0
 def __init__(self):
     super().__init__()
     self.block = nn.Repeat(nn.LayerChoice(
         [AddOne(), nn.Identity()], label='lc'), (3, 5),
                            label='rep')
示例#9
0
    def __init__(
            self,
            search_embed_dim: Tuple[int, ...] = (192, 216, 240),
            search_mlp_ratio: Tuple[float, ...] = (3.5, 4.0),
            search_num_heads: Tuple[int, ...] = (3, 4),
            search_depth: Tuple[int, ...] = (12, 13, 14),
            img_size: int = 224,
            patch_size: int = 16,
            in_chans: int = 3,
            num_classes: int = 1000,
            qkv_bias: bool = False,
            drop_rate: float = 0.,
            attn_drop_rate: float = 0.,
            drop_path_rate: float = 0.,
            pre_norm: bool = True,
            global_pool: bool = False,
            abs_pos: bool = True,
            qk_scale: Optional[float] = None,
            rpe: bool = True,
    ):
        super().__init__()

        embed_dim = nn.ValueChoice(list(search_embed_dim), label="embed_dim")
        fixed_embed_dim = nn.ModelParameterChoice(
            list(search_embed_dim), label="embed_dim")
        depth = nn.ValueChoice(list(search_depth), label="depth")
        self.patch_embed = nn.Conv2d(
            in_chans,
            cast(int, embed_dim),
            kernel_size=patch_size,
            stride=patch_size)
        self.patches_num = int((img_size // patch_size) ** 2)
        self.global_pool = global_pool
        self.cls_token = nn.Parameter(torch.zeros(1, 1, cast(int, fixed_embed_dim)))
        trunc_normal_(self.cls_token, std=.02)

        dpr = [
            x.item() for x in torch.linspace(
                0,
                drop_path_rate,
                max(search_depth))]  # stochastic depth decay rule

        self.abs_pos = abs_pos
        if self.abs_pos:
            self.pos_embed = nn.Parameter(torch.zeros(
                1, self.patches_num + 1, cast(int, fixed_embed_dim)))
            trunc_normal_(self.pos_embed, std=.02)

        self.blocks = nn.Repeat(lambda index: nn.LayerChoice([
            TransformerEncoderLayer(embed_dim=embed_dim,
                                    fixed_embed_dim=fixed_embed_dim,
                                    num_heads=num_heads, mlp_ratio=mlp_ratio,
                                    qkv_bias=qkv_bias, drop_rate=drop_rate,
                                    attn_drop=attn_drop_rate,
                                    drop_path=dpr[index],
                                    rpe_length=img_size // patch_size,
                                    qk_scale=qk_scale, rpe=rpe,
                                    pre_norm=pre_norm,)
            for mlp_ratio, num_heads in itertools.product(search_mlp_ratio, search_num_heads)
        ], label=f'layer{index}'), depth)
        self.pre_norm = pre_norm
        if self.pre_norm:
            self.norm = nn.LayerNorm(cast(int, embed_dim))
        self.head = nn.Linear(
            cast(int, embed_dim),
            num_classes) if num_classes > 0 else nn.Identity()