def __init__(self, in_channels): super().__init__() self.conv1 = nn.Conv2d(in_channels, 10, 3) self.conv2 = nn.LayerChoice([nn.Conv2d(10, 10, 3), nn.MaxPool2d(3)]) self.conv3 = nn.LayerChoice([nn.Identity(), nn.Conv2d(10, 10, 1)]) self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) self.fc = nn.Linear(10, 1)
def __init__(self): super().__init__() self.net1 = nn.LayerChoice( [nn.Linear(10, 10), nn.Linear(10, 10, bias=False)]) self.net2 = nn.LayerChoice( [nn.Linear(10, 10), nn.Linear(10, 10, bias=False)])
def __init__(self): super().__init__() self.module = nn.LayerChoice([ nn.LayerChoice([ nn.Conv2d(3, 3, kernel_size=1), nn.Conv2d(3, 4, kernel_size=1), nn.Conv2d(3, 5, kernel_size=1) ]), nn.Conv2d(3, 1, kernel_size=1) ])
def __init__(self, shared=True): super().__init__() labels = ['x', 'x'] if shared else [None, None] self.module1 = nn.LayerChoice([ nn.Conv2d(3, 3, kernel_size=1), nn.Conv2d(3, 5, kernel_size=1) ], label=labels[0]) self.module2 = nn.LayerChoice([ nn.Conv2d(3, 3, kernel_size=1), nn.Conv2d(3, 5, kernel_size=1) ], label=labels[1])
def __init__(self, hidden_size=32): super(Net, self).__init__() self.conv1 = nn.Conv2d(1, 20, 5, 1) self.conv2 = nn.Conv2d(20, 50, 5, 1) self.fc1 = nn.LayerChoice([ nn.Linear(4 * 4 * 50, hidden_size, bias=True), nn.Linear(4 * 4 * 50, hidden_size, bias=False) ]) self.fc2 = nn.LayerChoice([ nn.Linear(hidden_size, 10, bias=False), nn.Linear(hidden_size, 10, bias=True) ])
def __init__(self, hidden_size=32, diff_size=False): super(Net, self).__init__() self.conv1 = nn.Conv2d(1, 20, 5, 1) self.conv2 = nn.Conv2d(20, 50, 5, 1) self.fc1 = nn.LayerChoice([ nn.Linear(4 * 4 * 50, hidden_size, bias=True), nn.Linear(4 * 4 * 50, hidden_size, bias=False) ], label='fc1') self.fc2 = nn.LayerChoice([ nn.Linear(hidden_size, 10, bias=False), nn.Linear(hidden_size, 10, bias=True) ] + ([] if not diff_size else [nn.Linear(hidden_size, 10, bias=False)]), label='fc2')
def __init__(self): super().__init__() channels = nn.ValueChoice([4, 6, 8]) self.conv1 = nn.Conv2d(1, channels, 5) self.pool1 = nn.LayerChoice([ nn.MaxPool2d((2, 2)), nn.AvgPool2d((2, 2)) ]) self.conv2 = nn.Conv2d(channels, 16, 5) self.pool2 = nn.LayerChoice([ nn.MaxPool2d(2), nn.AvgPool2d(2), nn.Conv2d(16, 16, 2, 2) ]) self.fc1 = nn.Linear(16 * 5 * 5, 120) # 5*5 from image dimension self.fc2 = nn.Linear(120, 84) self.fcplus = nn.Linear(84, 84) self.shortcut = nn.InputChoice(2, 1) self.fc3 = nn.Linear(84, 10)
def __init__(self): super().__init__() self.fc1 = ModelInner() self.fc2 = nn.LayerChoice( [nn.Linear(10, 10), nn.Linear(10, 10, bias=False)]) self.fc3 = ModelInner()
def __init__(self): super().__init__() self.conv = nn.LayerChoice([ nn.Conv2d(3, 1, 3), nn.Conv2d(3, 1, 5, padding=1), ]) self.pool = nn.MaxPool2d(kernel_size=2)
def __init__(self, hidden_size): super().__init__() self.conv1 = nn.Conv2d(1, 20, 5, 1) self.conv2 = nn.Conv2d(20, 50, 5, 1) self.fc1 = nn.LayerChoice([ nn.Linear(4*4*50, hidden_size), nn.Linear(4*4*50, hidden_size, bias=False) ], label='fc1_choice') self.fc2 = nn.Linear(hidden_size, 10)
def __init__(self): super().__init__() self.conv1 = nn.Conv2d(1, 32, 3, 1) self.conv2 = nn.LayerChoice( [nn.Conv2d(32, 64, 3, 1), DepthwiseSeparableConv(32, 64)]) self.dropout1 = nn.Dropout(nn.ValueChoice([0.25, 0.5, 0.75])) self.dropout2 = nn.Dropout(0.5) feature = nn.ValueChoice([64, 128, 256]) self.fc1 = nn.Linear(9216, feature) self.fc2 = nn.Linear(feature, 10)
def _make_stage(self, stage_idx, inp, oup, se, stride, act): # initialize them first because they are related to layer_count. exp, ks, se_blocks = [], [], [] for _ in range(4): exp.append( nn.ValueChoice(list(self.expand_ratios), label=f'exp_{self.layer_count}')) ks.append(nn.ValueChoice([3, 5, 7], label=f'ks_{self.layer_count}')) if se: # if SE is true, assign a layer choice to SE se_blocks.append(lambda hidden_ch: nn.LayerChoice( [nn.Identity(), SELayer(hidden_ch)], label=f'se_{self.layer_count}')) else: se_blocks.append(None) self.layer_count += 1 blocks = [ # stride = 2 InvertedResidual(inp, oup, exp[0], ks[0], stride, squeeze_and_excite=se_blocks[0], activation_layer=act), # stride = 1, residual connection should be automatically enabled InvertedResidual(oup, oup, exp[1], ks[1], squeeze_and_excite=se_blocks[1], activation_layer=act), InvertedResidual(oup, oup, exp[2], ks[2], squeeze_and_excite=se_blocks[2], activation_layer=act), InvertedResidual(oup, oup, exp[3], ks[3], squeeze_and_excite=se_blocks[3], activation_layer=act) ] # mutable depth return nn.Repeat(blocks, depth=(1, 4), label=f'depth_{stage_idx}')
def __init__(self): super().__init__() self.conv1 = nn.Conv2d(1, 32, 3, 1) # LayerChoice is used to select a layer between Conv2d and DwConv. self.conv2 = nn.LayerChoice( [nn.Conv2d(32, 64, 3, 1), DepthwiseSeparableConv(32, 64)]) # ValueChoice is used to select a dropout rate. # ValueChoice can be used as parameter of modules wrapped in `nni.retiarii.nn.pytorch` # or customized modules wrapped with `@basic_unit`. self.dropout1 = nn.Dropout(nn.ValueChoice( [0.25, 0.5, 0.75])) # choose dropout rate from 0.25, 0.5 and 0.75 self.dropout2 = nn.Dropout(0.5) feature = nn.ValueChoice([64, 128, 256]) self.fc1 = nn.Linear(9216, feature) self.fc2 = nn.Linear(feature, 10)
def __init__(self, node_id, num_prev_nodes, channels, num_downsample_connect): super().__init__() self.ops = nn.ModuleList() choice_keys = [] for i in range(num_prev_nodes): stride = 2 if i < num_downsample_connect else 1 choice_keys.append("{}_p{}".format(node_id, i)) self.ops.append( nn.LayerChoice([ ops.PoolBN('max', channels, 3, stride, 1, affine=False), ops.PoolBN('avg', channels, 3, stride, 1, affine=False), nn.Identity() if stride == 1 else ops.FactorizedReduce(channels, channels, affine=False), ops.SepConv(channels, channels, 3, stride, 1, affine=False), ops.SepConv(channels, channels, 5, stride, 2, affine=False), ops.DilConv(channels, channels, 3, stride, 2, 2, affine=False), ops.DilConv(channels, channels, 5, stride, 4, 2, affine=False) ])) self.drop_path = ops.DropPath() self.input_switch = nn.InputChoice(n_chosen=2)
def builder(index): stride = 1 inp = stage_output_width if index == 0: # first layer in stage # do downsample and width reshape inp = stage_input_width if downsample: stride = 2 oup = stage_output_width op_choices = {} for exp_ratio in expand_ratios: for kernel_size in kernel_sizes: op_choices[f'k{kernel_size}e{exp_ratio}'] = InvertedResidual(inp, oup, exp_ratio, kernel_size, stride) # It can be implemented with ValueChoice, but we use LayerChoice here # to be aligned with the intention of the original ProxylessNAS. return nn.LayerChoice(op_choices, label=f'{label}_i{index}')
def __init__(self): super().__init__() self.module = nn.LayerChoice([nn.Conv2d(3, i, kernel_size=1) for i in range(1, 11)])
def __init__(self): super().__init__() self.linear = nn.LayerChoice([ nn.Linear(3, nn.ValueChoice([10, 20])), nn.Linear(3, nn.ValueChoice([30, 40])) ])
def __init__(self, num_labels: int = 1000, channel_search: bool = False, affine: bool = False): super().__init__() self.num_labels = num_labels self.channel_search = channel_search self.affine = affine # the block number in each stage. 4 stages in total. 20 blocks in total. self.stage_repeats = [4, 4, 8, 4] # output channels for all stages, including the very first layer and the very last layer self.stage_out_channels = [-1, 16, 64, 160, 320, 640, 1024] # building first layer out_channels = self.stage_out_channels[1] self.first_conv = nn.Sequential( nn.Conv2d(3, out_channels, 3, 2, 1, bias=False), nn.BatchNorm2d(out_channels), nn.ReLU(inplace=True), ) self.features = [] global_block_idx = 0 for stage_idx, num_repeat in enumerate(self.stage_repeats): for block_idx in range(num_repeat): # count global index to give names to choices global_block_idx += 1 # get ready for input and output in_channels = out_channels out_channels = self.stage_out_channels[stage_idx + 2] stride = 2 if block_idx == 0 else 1 # mid channels can be searched base_mid_channels = out_channels // 2 if self.channel_search: k_choice_list = [ int(base_mid_channels * (.2 * k)) for k in range(1, 9) ] mid_channels = nn.ValueChoice( k_choice_list, label=f'channel_{global_block_idx}') else: mid_channels = int(base_mid_channels) choice_block = nn.LayerChoice( [ ShuffleNetBlock(in_channels, out_channels, mid_channels=mid_channels, kernel_size=3, stride=stride, affine=affine), ShuffleNetBlock(in_channels, out_channels, mid_channels=mid_channels, kernel_size=5, stride=stride, affine=affine), ShuffleNetBlock(in_channels, out_channels, mid_channels=mid_channels, kernel_size=7, stride=stride, affine=affine), ShuffleXceptionBlock(in_channels, out_channels, mid_channels=mid_channels, stride=stride, affine=affine) ], label=f'layer_{global_block_idx}') self.features.append(choice_block) self.features = nn.Sequential(*self.features) # final layers last_conv_channels = self.stage_out_channels[-1] self.conv_last = nn.Sequential( nn.Conv2d(out_channels, last_conv_channels, 1, 1, 0, bias=False), nn.BatchNorm2d(last_conv_channels, affine=affine), nn.ReLU(inplace=True), ) self.globalpool = nn.AdaptiveAvgPool2d((1, 1)) self.dropout = nn.Dropout(0.1) self.classifier = nn.Sequential( nn.Linear(last_conv_channels, num_labels, bias=False), ) self._initialize_weights()
def __init__(self): super().__init__() self.block = nn.Repeat(nn.LayerChoice( [AddOne(), nn.Identity()], label='lc'), (3, 5), label='rep')
def __init__( self, search_embed_dim: Tuple[int, ...] = (192, 216, 240), search_mlp_ratio: Tuple[float, ...] = (3.5, 4.0), search_num_heads: Tuple[int, ...] = (3, 4), search_depth: Tuple[int, ...] = (12, 13, 14), img_size: int = 224, patch_size: int = 16, in_chans: int = 3, num_classes: int = 1000, qkv_bias: bool = False, drop_rate: float = 0., attn_drop_rate: float = 0., drop_path_rate: float = 0., pre_norm: bool = True, global_pool: bool = False, abs_pos: bool = True, qk_scale: Optional[float] = None, rpe: bool = True, ): super().__init__() embed_dim = nn.ValueChoice(list(search_embed_dim), label="embed_dim") fixed_embed_dim = nn.ModelParameterChoice( list(search_embed_dim), label="embed_dim") depth = nn.ValueChoice(list(search_depth), label="depth") self.patch_embed = nn.Conv2d( in_chans, cast(int, embed_dim), kernel_size=patch_size, stride=patch_size) self.patches_num = int((img_size // patch_size) ** 2) self.global_pool = global_pool self.cls_token = nn.Parameter(torch.zeros(1, 1, cast(int, fixed_embed_dim))) trunc_normal_(self.cls_token, std=.02) dpr = [ x.item() for x in torch.linspace( 0, drop_path_rate, max(search_depth))] # stochastic depth decay rule self.abs_pos = abs_pos if self.abs_pos: self.pos_embed = nn.Parameter(torch.zeros( 1, self.patches_num + 1, cast(int, fixed_embed_dim))) trunc_normal_(self.pos_embed, std=.02) self.blocks = nn.Repeat(lambda index: nn.LayerChoice([ TransformerEncoderLayer(embed_dim=embed_dim, fixed_embed_dim=fixed_embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, drop_rate=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[index], rpe_length=img_size // patch_size, qk_scale=qk_scale, rpe=rpe, pre_norm=pre_norm,) for mlp_ratio, num_heads in itertools.product(search_mlp_ratio, search_num_heads) ], label=f'layer{index}'), depth) self.pre_norm = pre_norm if self.pre_norm: self.norm = nn.LayerNorm(cast(int, embed_dim)) self.head = nn.Linear( cast(int, embed_dim), num_classes) if num_classes > 0 else nn.Identity()
def __init__(self): super().__init__() self.block = nn.Repeat(lambda index: nn.LayerChoice( [AddOne(), nn.Identity()]), (2, 3), label='rep')