def _build(self, s_in: Shape, c_out: int) -> Shape: c_in = s_in.num_features() max_exp = max(self.expansions) exp_mults = [e / max_exp for e in self.expansions] c_mid = make_divisible(int(c_in * max_exp), divisible=8) self.has_skip = self.stride == 1 and c_in == c_out ops = [] self.conv = SuperKernelConv(c_mid, c_mid, self.name, self.strategy_name, self.k_sizes, exp_mults, self.dilation, self.stride, self.padding, c_mid, bias=False) if max_exp > 1: # pw ops.extend([ nn.Conv2d(c_in, c_mid, 1, 1, 0, bias=False), nn.BatchNorm2d(c_mid, affine=self.bn_affine), Register.act_funs.get(self.act_fun)(inplace=self.act_inplace), ]) # dw ops.extend([ self.conv, nn.BatchNorm2d(c_mid, affine=self.bn_affine), Register.act_funs.get(self.act_fun)(inplace=self.act_inplace), ]) # optional attention module if isinstance(self.att_dict, dict): ops.append( AbstractAttentionModule.module_from_dict(c_mid, c_substitute=c_in, **self.att_dict)) # pw ops.extend([ nn.Conv2d(c_mid, c_out, 1, 1, 0, bias=False), nn.BatchNorm2d(c_out, affine=self.bn_affine), ]) self.block = nn.Sequential(*ops) if self.has_skip: self.block = DropPathModule(self.block) return self.probe_outputs(s_in)
def _build(self, s_in: Shape, c_out: int) -> Shape: c_in = s_in.num_features() c_inner = int(c_out * self.expansion) self.block = self._build_block(c_in, c_inner, c_out, self.has_first_act) if self.shortcut_type in [None, 'None']: pass elif self.shortcut_type == 'id': self.shortcut = nn.Identity() elif self.shortcut_type == 'conv1x1': self.shortcut = nn.Sequential(*[ nn.Conv2d(c_in, c_out, 1, self.stride, 0, bias=False), nn.BatchNorm2d(c_out, affine=self.bn_affine), ]) elif self.shortcut_type == 'avg_conv': self.shortcut = nn.Sequential(*[ nn.AvgPool2d(kernel_size=2, stride=self.stride, padding=0), nn.Conv2d(c_in, c_out, 1, 1, 0, bias=False), ]) else: raise NotImplementedError('shortcut type "%s" is not implemented' % self.shortcut_type) self.has_shortcut = isinstance(self.shortcut, nn.Module) if self.has_shortcut: self.block = DropPathModule(self.block) return self.probe_outputs(s_in)
def _build(self, s_in: Shape, c_out: int) -> Shape: c_in = s_in.num_features() c_mid = make_divisible(int(c_in * self.expansion), divisible=8) self.has_skip = self.stride == 1 and c_in == c_out ops = [] conv_kwargs = dict(dilation=self.dilation, padding=self.padding) if self.expansion > 1: # pw ops.extend([ get_conv2d(c_in, c_mid, k_size=self.k_size_in, groups=1, **conv_kwargs), nn.BatchNorm2d(c_mid, affine=self.bn_affine), Register.act_funs.get(self.act_fun)(inplace=self.act_inplace), ]) # dw ops.extend([ get_conv2d(c_mid, c_mid, k_size=self.k_size, stride=self.stride, groups=-1, **conv_kwargs), nn.BatchNorm2d(c_mid, affine=self.bn_affine), Register.act_funs.get(self.act_fun)(inplace=self.act_inplace), ]) # optional squeeze+excitation module if isinstance(self.att_dict, dict): ops.append(AbstractAttentionModule.module_from_dict(c_mid, c_substitute=c_in, att_dict=self.att_dict)) # pw ops.extend([ get_conv2d(c_mid, c_out, k_size=self.k_size_out, groups=1, **conv_kwargs), nn.BatchNorm2d(c_out, affine=self.bn_affine), ]) self.block = nn.Sequential(*ops) if self.has_skip: self.block = DropPathModule(self.block) return self.probe_outputs(s_in)
def __init__(self, ops: nn.ModuleList, **stored_kwargs): super().__init__(**stored_kwargs) ops = nn.ModuleList([ DropPathModule(op, op.is_layer(SkipLayer), drop_p=0.0, drop_ids=True) for op in ops ]) self._add_to_submodule_lists(ops=ops)
def _build(self, s_in: Shape, c_out: int) -> Shape: assert not (c_out <= s_in.num_features() and self.stride > 1), "must increase num features when stride is >1" assert s_in.num_features() % 4 == 0 and c_out % 2 == 0, "num features must be divisible by 4" padding = get_padding(self.padding, self.k_size, self.stride, self.dilation) padding2 = get_padding(self.padding, self.k_size, 1, self.dilation) if self.stride >= 2: c_side = c_main_in = s_in.num_features() self.branch_proj = nn.Sequential(*[ # dw nn.Conv2d(c_side, c_side, self.k_size, self.stride, padding, groups=c_side, bias=False), nn.BatchNorm2d(c_side, affine=self.bn_affine), # pw nn.Conv2d(c_side, c_side, 1, 1, 0, bias=False), nn.BatchNorm2d(c_side, affine=self.bn_affine), Register.act_funs.get(self.act_fun)(inplace=self.act_inplace), ]) else: c_side = c_main_in = s_in.num_features() // 2 c_main_out = c_out - c_side c_main_mid = int(c_out // 2 * self.expansion) bm = [ # dw 1 nn.Conv2d(c_main_in, c_main_in, self.k_size, self.stride, padding, groups=c_main_in, bias=False), nn.BatchNorm2d(c_main_in, affine=self.bn_affine), # pw 1 nn.Conv2d(c_main_in, c_main_mid, 1, 1, 0, bias=False), nn.BatchNorm2d(c_main_mid, affine=self.bn_affine), Register.act_funs.get(self.act_fun)(inplace=self.act_inplace), # dw 2 nn.Conv2d(c_main_mid, c_main_mid, self.k_size, 1, padding2, groups=c_main_mid, bias=False), nn.BatchNorm2d(c_main_mid, affine=self.bn_affine), # pw 2 nn.Conv2d(c_main_mid, c_main_mid, 1, 1, 0, bias=False), nn.BatchNorm2d(c_main_mid, affine=self.bn_affine), Register.act_funs.get(self.act_fun)(inplace=self.act_inplace), # dw 3 nn.Conv2d(c_main_mid, c_main_mid, self.k_size, 1, padding2, groups=c_main_mid, bias=False), nn.BatchNorm2d(c_main_mid, affine=self.bn_affine), # pw 3 nn.Conv2d(c_main_mid, c_main_out, 1, 1, 0, bias=False), nn.BatchNorm2d(c_main_out, affine=self.bn_affine), Register.act_funs.get(self.act_fun)(inplace=self.act_inplace), ] # optional attention module if isinstance(self.att_dict, dict): bm.append(AbstractAttentionModule.module_from_dict(c_main_out, c_substitute=c_main_in, att_dict=self.att_dict)) # self.branch_main = nn.Sequential(*bm) self.branch_main = DropPathModule(nn.Sequential(*bm)) return self.probe_outputs(s_in)
def __init__(self, name: str, strategy_name='default', skip_op: str = None, k_size_in=1, k_size_out=1, k_sizes=(3, 5, 7), stride=1, padding='same', expansions=(3, 6), dilation=1, bn_affine=True, act_fun='relu6', act_inplace=True, att_dict: dict = None): """ A fused layer for several kernel sizes and expansion sizes, to share the 1x1 conv weights. Currently only designed for having a single kernel+expansion per forward pass and for the final config. :param name: name under which to register architecture weights :param strategy_name: name of the strategy for architecture weights :param skip_op: optional layer name, adds an op that enables skipping the entire block, e.g. "SkipLayer" :param k_size_in: kernel size(s) for the first conv kernel (expanding) :param k_size_out: kernel size(s) for the last conv kernel (projecting) :param k_sizes: kernel sizes for the spatial kernel :param stride: stride for the spatial kernel :param padding: 'same' or number :param expansions: multipliers for inner channels, based on input channels :param dilation: dilation for the spatial kernel :param bn_affine: affine batch norm :param act_fun: activation function :param act_inplace: whether to use the activation function in-place if possible (e.g. ReLU) :param att_dict: None to disable attention modules, otherwise a dict with respective kwargs """ super().__init__() self._add_to_kwargs(name=name, strategy_name=strategy_name, skip_op=skip_op, k_size_in=k_size_in, k_size_out=k_size_out, k_sizes=k_sizes, stride=stride, expansions=expansions, padding=padding, dilation=dilation, bn_affine=bn_affine, act_fun=act_fun, act_inplace=act_inplace, att_dict=att_dict) self._add_to_print_kwargs(has_skip=False, has_att=isinstance(self.att_dict, dict)) self.ws = None self.skip = None self.pw_in = nn.ModuleList([]) self.dw_conv = nn.ModuleList([]) self.dw_att = nn.ModuleList([]) self.pw_out = nn.ModuleList([]) self.drop_path = DropPathModule() self._choices_by_idx = []
def _build(self, s_in: Shape, c_out: int) -> Shape: c_in = s_in.num_features() c_mid = int(c_in * max(self.expansions)) self.has_skip = self.stride == 1 and c_in == c_out max_exp = max(self.expansions) exp_mults = [e / max_exp for e in self.expansions] ops = [] if max_exp > 1: # pw ops.extend([ nn.Conv2d(c_in, c_mid, 1, 1, 0, groups=1, bias=False), nn.BatchNorm2d(c_mid, affine=self.bn_affine), Register.act_funs.get(self.act_fun)(inplace=self.act_inplace), ]) # dw self.conv = SuperKernelThresholdConv(c_mid, c_mid, self.k_sizes, exp_mults, self.dilation, self.stride, self.padding, -1, bias=False) ops.extend([ self.conv, nn.BatchNorm2d(c_mid, affine=self.bn_affine), Register.act_funs.get(self.act_fun)(inplace=self.act_inplace), ]) # optional squeeze+excitation module with searchable width if isinstance(self.sse_dict, dict): self.learned_se = SuperSqueezeExcitationChannelThresholdModule(c_mid, c_substitute=c_in, **self.sse_dict) ops.append(self.learned_se) else: self.learned_se = None # pw ops.extend([ nn.Conv2d(c_mid, c_out, 1, 1, 0, groups=1, bias=False), nn.BatchNorm2d(c_out, affine=self.bn_affine), ]) self.block = nn.Sequential(*ops) if self.has_skip: self.block = DropPathModule(self.block) return self.probe_outputs(s_in)
class SuperMobileInvertedConvLayer(AbstractLayer): def __init__(self, name: str, strategy_name='default', k_sizes=(3, 5, 7), stride=1, padding='same', expansions=(3, 6), dilation=1, bn_affine=True, act_fun='relu6', act_inplace=True, att_dict: dict = None): """ A super kernel layer for several kernel sizes and expansion sizes, to share as many weights as possible. :param name: name under which to register architecture weights :param strategy_name: name of the strategy for architecture weights :param k_sizes: kernel sizes for the spatial kernel :param stride: stride for the spatial kernel :param padding: 'same' or number :param expansions: multipliers for inner channels, based on input channels :param dilation: dilation for the spatial kernel :param bn_affine: affine batch norm :param act_fun: activation function :param act_inplace: whether to use the activation function in-place if possible (e.g. ReLU) :param att_dict: None to disable attention modules, otherwise a dict with respective kwargs """ super().__init__() self._add_to_kwargs(name=name, strategy_name=strategy_name, k_sizes=k_sizes, stride=stride, expansions=sorted(expansions), padding=padding, dilation=dilation, bn_affine=bn_affine, act_fun=act_fun, act_inplace=act_inplace, att_dict=att_dict) self._add_to_print_kwargs(has_skip=False) self.conv = None self.block = None def _build(self, s_in: Shape, c_out: int) -> Shape: c_in = s_in.num_features() max_exp = max(self.expansions) exp_mults = [e / max_exp for e in self.expansions] c_mid = make_divisible(int(c_in * max_exp), divisible=8) self.has_skip = self.stride == 1 and c_in == c_out ops = [] self.conv = SuperKernelConv(c_mid, c_mid, self.name, self.strategy_name, self.k_sizes, exp_mults, self.dilation, self.stride, self.padding, c_mid, bias=False) if max_exp > 1: # pw ops.extend([ nn.Conv2d(c_in, c_mid, 1, 1, 0, bias=False), nn.BatchNorm2d(c_mid, affine=self.bn_affine), Register.act_funs.get(self.act_fun)(inplace=self.act_inplace), ]) # dw ops.extend([ self.conv, nn.BatchNorm2d(c_mid, affine=self.bn_affine), Register.act_funs.get(self.act_fun)(inplace=self.act_inplace), ]) # optional attention module if isinstance(self.att_dict, dict): ops.append( AbstractAttentionModule.module_from_dict(c_mid, c_substitute=c_in, **self.att_dict)) # pw ops.extend([ nn.Conv2d(c_mid, c_out, 1, 1, 0, bias=False), nn.BatchNorm2d(c_out, affine=self.bn_affine), ]) self.block = nn.Sequential(*ops) if self.has_skip: self.block = DropPathModule(self.block) return self.probe_outputs(s_in) def forward(self, x: torch.Tensor) -> torch.Tensor: if self.has_skip: return x + self.block.forward(x) return self.block.forward(x) def config(self, finalize=False, **__): cfg = super().config(finalize=finalize, **__) if finalize: cfg['name'] = MobileInvertedConvLayer.__name__ kwargs = cfg['kwargs'] kwargs.pop('name') kwargs.pop('strategy_name') kwargs.pop('k_sizes') kwargs.pop('expansions') ks = self.conv.get_finalized_kernel() es = self.conv.get_finalized_channel_mult() kwargs['k_size'] = ks[1] kwargs['expansion'] = self.expansions[es[0]] cfg['kwargs'] = kwargs return cfg