def __init__(self, in_channels, out_channels, kernel_size, stride, expand_ratio, se_ratio=0.25, drop_rate=0.2): super().__init__() channels = in_channels * expand_ratio use_se = se_ratio is not None and 0 < se_ratio < 1 self.use_res_connect = stride == 1 and in_channels == out_channels layers = nn.Sequential() if expand_ratio != 1: layers.add_module( "expand", Conv2d(in_channels, channels, kernel_size=1, norm='default', act='swish')) layers.add_module( "dwconv", Conv2d(channels, channels, kernel_size, stride, groups=channels, norm='default', act='swish')) if use_se: layers.add_module( "se", SEModule(channels, int(in_channels * se_ratio))) layers.add_module( "project", Conv2d(channels, out_channels, kernel_size=1, norm='default')) if self.use_res_connect and drop_rate: layers.add_module( "drop_path", DropPath(drop_rate)) self.layers = layers
def __init__(self, num_classes=10, width_mult=1.0): super().__init__() block = InvertedResidual in_channels = 16 last_channels = 1280 inverted_residual_setting = [ # k, e, o, se, nl, s, [3, 16, 16, False, 'relu6', 1], [3, 64, 24, False, 'relu6', 1], [3, 72, 24, False, 'relu6', 1], [5, 72, 40, True, 'relu6', 1], [5, 120, 40, True, 'relu6', 1], [5, 120, 40, True, 'relu6', 1], [3, 240, 80, False, 'hswish', 2], [3, 200, 80, False, 'hswish', 1], [3, 184, 80, False, 'hswish', 1], [3, 184, 80, False, 'hswish', 1], [3, 480, 112, True, 'hswish', 1], [3, 672, 112, True, 'hswish', 1], [5, 672, 160, True, 'hswish', 2], [5, 960, 160, True, 'hswish', 1], [5, 960, 160, True, 'hswish', 1], ] last_channels = _make_divisible( last_channels * width_mult) if width_mult > 1.0 else last_channels # building first layer features = [ Conv2d(3, in_channels, kernel_size=3, stride=1, norm='default', act='hswish') ] # building inverted residual blocks for k, exp, c, se, nl, s in inverted_residual_setting: out_channels = _make_divisible(c * width_mult) exp_channels = _make_divisible(exp * width_mult) features.append( block(in_channels, exp_channels, out_channels, k, s, nl, se)) in_channels = out_channels # building last several layers features.extend([ Conv2d(in_channels, exp_channels, kernel_size=1, norm='default', act='hswish'), ]) in_channels = exp_channels # make it nn.Sequential self.features = nn.Sequential(*features) # building classifier self.classifier = nn.Sequential( nn.AdaptiveAvgPool2d(1), Conv2d(in_channels, last_channels, kernel_size=1, act='hswish'), Conv2d(last_channels, num_classes, kernel_size=1))
def __init__(self, in_channels, out_channels, stride, groups, use_se): super().__init__() self.use_se = use_se self.conv1 = Conv2d(in_channels, out_channels, kernel_size=1, norm='default', act='default') self.conv2 = Conv2d(out_channels, out_channels, kernel_size=3, stride=stride, groups=groups, norm='default', act='default') if self.use_se: self.se = SE(out_channels, 4) self.conv3 = Conv2d(out_channels, out_channels, kernel_size=1, norm='default') if stride != 1 or in_channels != out_channels: layers = [] if stride != 1: layers.append(nn.AvgPool2d(kernel_size=(2, 2), stride=2)) layers.extend([ Conv2d(in_channels, out_channels, kernel_size=1, bias=False), Norm(out_channels), ]) self.shortcut = nn.Sequential(*layers) else: self.shortcut = nn.Identity() self.relu = Act('default')
def __init__(self, in_channels, channels, out_channels, kernel_size, stride, activation='relu6', with_se=True): super().__init__() self.with_se = with_se if in_channels != channels: self.expand = Conv2d(in_channels, channels, kernel_size=1, norm='default', act=activation) else: self.expand = nn.Identity() self.dwconv = Conv2d(channels, channels, kernel_size, stride, groups=channels, norm='default', act=activation) if self.with_se: self.se = SELayerM(channels, 4) self.project = Conv2d(channels, out_channels, kernel_size=1, norm='default') self.use_res_connect = stride == 1 and in_channels == out_channels
def __init__(self, in_channels, channels, stride, group_channels, cardinality, start_block=False, end_block=False, exclude_bn0=False): super().__init__() out_channels = channels * self.expansion width = group_channels * cardinality if not start_block and not exclude_bn0: self.bn0 = Norm(in_channels) if not start_block: self.act0 = Act() self.conv1 = Conv2d(in_channels, width, kernel_size=1) self.bn1 = Norm(width) self.act1 = Act() self.conv2 = Conv2d(width, width, kernel_size=3, stride=stride, groups=cardinality, norm='def', act='def') self.conv3 = Conv2d(width, out_channels, kernel_size=1) if start_block: self.bn3 = Norm(out_channels) if end_block: self.bn3 = Norm(out_channels) self.act3 = Act() if stride != 1 or in_channels != out_channels: shortcut = [] if stride != 1: shortcut.append(Pool2d(2, 2, type='avg')) shortcut.append( Conv2d(in_channels, out_channels, kernel_size=1, norm='def')) self.shortcut = Sequential(shortcut) else: self.shortcut = Identity() self.start_block = start_block self.end_block = end_block self.exclude_bn0 = exclude_bn0
def __init__(self, in_channels, out_channels, stride=1, depthwise=True): super().__init__() self.layers = nn.Sequential( Norm(in_channels), Act(), Conv2d(in_channels, out_channels, kernel_size=1), Norm(out_channels), Act(), Conv2d(out_channels, out_channels, kernel_size=3, stride=stride, groups=out_channels if depthwise else 1), Norm(out_channels), Act(), Conv2d(out_channels, out_channels, kernel_size=1), ) if in_channels != out_channels or stride != 1: self.shortcut = Conv2d(in_channels, out_channels, kernel_size=1, stride=stride) else: self.shortcut = nn.Identity()
def __init__(self, in_channels, channels, stride, cardinality, base_width): super().__init__() out_channels = channels * self.expansion D = math.floor(channels * (base_width / 64)) C = cardinality self.conv1 = Conv2d(in_channels, D * C, kernel_size=1, norm='def', act='def') self.conv2 = Conv2d(D * C, D * C, kernel_size=3, stride=stride, groups=cardinality, norm='def', act='def') self.conv3 = Conv2d(D * C, out_channels, kernel_size=1, norm='def') self.shortcut = Conv2d( in_channels, out_channels, kernel_size=1, stride=stride, norm='def') if in_channels != out_channels else Identity() self.act = Act()
def __init__(self, C_in, C_out): super().__init__() assert C_out % 2 == 0 self.act = Act('relu', inplace=False) self.conv_1 = Conv2d(C_in, C_out // 2, 1, stride=2, bias=False) self.conv_2 = Conv2d(C_in, C_out // 2, 1, stride=2, bias=False) self.bn = Norm(C_out)
def __init__(self, stem_channels, channels_per_stage, units_per_stage, final_channels, num_classes=10, use_se=True): super().__init__() self.stem = Conv2d(3, stem_channels, kernel_size=3, act='def', norm='def') # block = ResUnit if residual else BasicUnit block = BasicUnit self.stage1 = _make_layer(block, units_per_stage[0], stem_channels, channels_per_stage[0], 1, use_se) self.stage2 = _make_layer(block, units_per_stage[1], channels_per_stage[0], channels_per_stage[1], 2, use_se) self.stage3 = _make_layer(block, units_per_stage[2], channels_per_stage[1], channels_per_stage[2], 2, use_se) self.final_block = Conv2d(channels_per_stage[2], final_channels, kernel_size=1, act='def', norm='def') self.final_pool = nn.AdaptiveAvgPool2d(1) self.fc = nn.Linear(final_channels, num_classes)
def __init__(self, in_channels, out_channels, stride=1, dropout=0, use_se=False): super().__init__() self.use_se = use_se self._dropout = dropout self.norm1 = Norm(in_channels) self.act1 = Act() self.conv1 = Conv2d(in_channels, out_channels, kernel_size=3, stride=stride) self.norm2 = Norm(out_channels) self.act2 = Act() if self._dropout: self.dropout = nn.Dropout(dropout) self.conv2 = Conv2d(out_channels, out_channels, kernel_size=3) if self.use_se: self.se = SEModule(out_channels, reduction=8) self.shortcut = Conv2d(in_channels, out_channels, kernel_size=1, stride=stride)
def __init__(self, in_channels, channels, stride, erase_relu): super().__init__() out_channels = channels * self.expansion self.conv1 = Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, norm='def', act='def') self.conv2 = Conv2d(out_channels, out_channels, kernel_size=3, norm='def') if stride != 1 or in_channels != out_channels: shortcut = [] if stride != 1: shortcut.append(Pool2d(2, 2, type='avg')) shortcut.append( Conv2d(in_channels, out_channels, kernel_size=1, norm='def')) self.shortcut = Sequential(shortcut) else: self.shortcut = Identity() self.act = Act() if not erase_relu else Identity()
def __init__(self, in_channels, reduction=8): super().__init__() channels = in_channels // reduction self.conv_theta = Conv2d(in_channels, channels, kernel_size=1) self.conv_phi = Conv2d(in_channels, channels, kernel_size=1) self.conv_g = Conv2d(in_channels, channels, kernel_size=1) self.conv_attn = Conv2d(channels, in_channels, kernel_size=1) self.sigma = nn.Parameter(torch.zeros(1), requires_grad=True)
def __init__(self, in_channels, reduction=4): super().__init__() channels = in_channels // reduction self.layers = nn.Sequential( nn.AdaptiveAvgPool2d(1), Conv2d(in_channels, channels, kernel_size=1, norm='bn', act='relu'), Conv2d(channels, in_channels, kernel_size=1, bias=False), HardSigmoid(True), )
def __init__(self, channels, reduction): super().__init__() c = channels // reduction self.avg_pool = nn.AdaptiveAvgPool2d((1, 1)) self.f_ex = nn.Sequential( Conv2d(channels, c, 1), Act(), Conv2d(c, channels, 1), nn.Sigmoid(), )
def __init__(self): super().__init__( Conv2d(1, 6, kernel_size=5, norm='default', act='default'), nn.AvgPool2d(kernel_size=2, stride=2), Conv2d(6, 16, kernel_size=5, norm='default', act='default'), nn.AvgPool2d(kernel_size=2, stride=2), Flatten(), nn.Linear(8 * 8 * 16, 120), nn.Linear(120, 84), nn.Linear(84, 10), )
def __init__(self, in_channels, channels, stride, dropout, drop_path, start_block=False, end_block=False, exclude_bn0=False): super().__init__() # For torch.jit.script self.bn0 = Identity() self.act0 = Identity() self.act2 = Identity() self.bn2 = Identity() out_channels = channels * self.expansion if not start_block and not exclude_bn0: self.bn0 = Norm(in_channels) if not start_block: self.act0 = Act() self.conv1 = Conv2d(in_channels, out_channels, kernel_size=3, stride=stride) self.bn1 = Norm(out_channels) self.act1 = Act() self.dropout = Dropout(dropout) if dropout else Identity() self.conv2 = Conv2d(out_channels, out_channels, kernel_size=3) if start_block: self.bn2 = Norm(out_channels) self.drop_path = DropPath(drop_path) if drop_path else Identity() if end_block: self.bn2 = Norm(out_channels) self.act2 = Act() if stride != 1 or in_channels != out_channels: shortcut = [] if stride != 1: shortcut.append(Pool2d(2, 2, type='avg')) shortcut.append( Conv2d(in_channels, out_channels, kernel_size=1, norm='def')) self.shortcut = Sequential(shortcut) else: self.shortcut = Identity() self.start_block = start_block self.end_block = end_block self.exclude_bn0 = exclude_bn0
def __init__(self, in_channels): super().__init__() self.conv_theta = Conv2d(in_channels, in_channels // 8, kernel_size=1) self.conv_phi = Conv2d(in_channels, in_channels // 8, kernel_size=1) self.pool_phi = nn.MaxPool2d(kernel_size=2, stride=(2, 2)) self.conv_g = Conv2d(in_channels, in_channels // 2, kernel_size=1) self.pool_g = Pool2d(kernel_size=2, stride=2, type='max') self.conv_attn = Conv2d(in_channels // 2, in_channels, kernel_size=1) self.sigma = nn.Parameter(torch.zeros(1), requires_grad=True)
def __init__(self, C, num_classes): """assuming input size 8x8""" super().__init__() self.features = nn.Sequential( Act('relu', inplace=True), Pool2d(5, stride=3, padding=0, type='avg'), Conv2d(C, 128, 1, norm='def', act='relu'), Conv2d(128, 768, 2, norm='def', act='relu', padding=0), ) self.classifier = nn.Sequential( GlobalAvgPool(), Linear(768, num_classes), )
def __init__(self, num_classes=10, width_mult=1.0, depth_coef=1.0, dropout=0.2, drop_path=0.3): super().__init__() in_channels = 32 last_channels = 1280 setting = [ # r, k, s, e, i, o, se, [1, 3, 1, 1, 32, 16, 0.25], [2, 3, 1, 6, 16, 24, 0.25], [2, 5, 1, 6, 24, 40, 0.25], [3, 3, 2, 6, 40, 80, 0.25], [3, 5, 1, 6, 80, 112, 0.25], [4, 5, 2, 6, 112, 192, 0.25], [1, 3, 1, 6, 192, 320, 0.25], ] in_channels = round_channels(in_channels, width_mult) last_channels = round_channels(last_channels, width_mult) # building stem self.features = nn.Sequential() self.features.init_block = Conv2d(3, in_channels, kernel_size=3, stride=1, norm='default', act='swish') si = 1 j = 1 stage = nn.Sequential() # building inverted residual blocks for idx, (r, k, s, e, i, o, se) in enumerate(setting): drop_rate = drop_path * (float(idx) / len(setting)) if s == 2: self.features.add_module("stage%d" % si, stage) si += 1 j = 1 stage = nn.Sequential() in_channels = round_channels(i, width_mult) out_channels = round_channels(o, width_mult) stage.add_module("unit%d" % j, MBConv( in_channels, out_channels, k, s, e, se, drop_rate=drop_rate)) j += 1 for _ in range(round_repeats(r, depth_coef) - 1): stage.add_module("unit%d" % j, MBConv( out_channels, out_channels, k, 1, e, se, drop_rate=drop_rate)) j += 1 self.features.add_module("stage%d" % si, stage) self.features.add_module("final_block", Conv2d(out_channels, last_channels, kernel_size=1, norm='default', act='swish')) self.classifier = nn.Sequential( nn.AdaptiveAvgPool2d(1), nn.Dropout(dropout), Conv2d(last_channels, num_classes, kernel_size=1) )
def __init__(self, in_channels, out_channels, dropout, use_se, drop_path): super().__init__() self.norm1 = Norm(in_channels) self.act1 = Act() self.conv1 = Conv2d(in_channels, out_channels, kernel_size=3) self.norm2 = Norm(out_channels) self.act2 = Act() if dropout: self.dropout = nn.Dropout(dropout) self.conv2 = Conv2d(out_channels, out_channels, kernel_size=3) if use_se: self.se = SEModule(out_channels, reduction=8) if drop_path: self.drop_path = DropPath(drop_path)
def __init__(self, C_in, C_out, kernel_size, stride, dilation): super().__init__() self.op = nn.Sequential( Act('relu', inplace=False), Conv2d(C_in, C_in, kernel_size=kernel_size, stride=stride, dilation=dilation, groups=C_in, bias=False), Conv2d(C_in, C_out, kernel_size=1, bias=False), Norm(C_out), )
def __init__(self, in_channels, out_channels, stride=1): super().__init__() self.conv = nn.Sequential( Norm(in_channels), Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, bias=False), Norm(out_channels), Act(), Conv2d(out_channels, out_channels, kernel_size=3, bias=False), Norm(out_channels), ) self.shortcut = Shortcut(in_channels, out_channels, stride)
def __init__(self, C_in, C_out, kernel_size, stride=1): super().__init__() self.op = nn.Sequential( Act('relu', inplace=False), Conv2d(C_in, C_out, kernel_size, bias=False, stride=stride), Norm(C_out), )
def __init__(self, in_channels, out_channels, kernel_size, activation, use_se): super().__init__() assert kernel_size in [3, 5, 7] mid_channels = out_channels // 2 output = out_channels - in_channels branch_main = [ Conv2d(in_channels, mid_channels, kernel_size=1, norm='default', act=activation), DWConv2d(mid_channels, output, kernel_size=kernel_size, stride=2, norm='default', act=activation), ] if use_se: branch_main.append(SELayer(output, reduction=2)) self.branch_main = nn.Sequential(*branch_main) self.branch_proj = DWConv2d(in_channels, in_channels, kernel_size=kernel_size, stride=2, norm='default', act=activation)
def __init__(self, in_channels, out_channels, use_se): super().__init__() assert out_channels % 2 == 0 channels = out_channels // 2 self.branch1 = DWConv2d(in_channels, channels, kernel_size=3, stride=2, norm='def', act='def') branch2 = [ Conv2d(in_channels, channels, kernel_size=1, act='def', norm='def'), DWConv2d(channels, channels, kernel_size=3, stride=2, norm='def', act='def'), ] if use_se: branch2.append(SELayer(channels, reduction=2)) self.branch2 = nn.Sequential(*branch2)
def __init__(self, depth, k, num_classes=10, depthwise=True): super().__init__() num_blocks = (depth - 4) // 6 self.stem = Conv2d(3, self.stages[0], kernel_size=3) self.layer1 = self._make_layer(self.stages[0] * 1, self.stages[1] * k, num_blocks, stride=1, depthwise=depthwise) self.layer2 = self._make_layer(self.stages[1] * k, self.stages[2] * k, num_blocks, stride=2, depthwise=depthwise) self.layer3 = self._make_layer(self.stages[2] * k, self.stages[3] * k, num_blocks, stride=2, depthwise=depthwise) self.norm = Norm(self.stages[3] * k) self.act = Act() self.avgpool = GlobalAvgPool() self.fc = Linear(self.stages[3] * k, num_classes)
def __init__(self, start_channels, num_classes, block, widening_fractor, depth): super().__init__() if block == 'basic': block = BasicBlock num_layers = [(depth - 2) // 6] * 3 elif block == 'bottleneck': block = Bottleneck num_layers = [(depth - 2) // 9] * 3 else: raise ValueError("invalid block type: %s" % block) strides = [1, 2, 2] self.add_channel = widening_fractor / sum(num_layers) self.in_channels = start_channels self.channels = start_channels layers = [Conv2d(3, start_channels, kernel_size=3, norm='default')] for n, s in zip(num_layers, strides): layers.append(self._make_layer(block, n, stride=s)) self.features = nn.Sequential(*layers) assert (start_channels + widening_fractor) * block.expansion == self.in_channels self.post_activ = nn.Sequential( Norm(self.in_channels), Act('default'), ) self.final_pool = nn.AdaptiveAvgPool2d(1) self.output = nn.Linear(self.in_channels, num_classes)
def __init__(self, depth, group_channels=(64, 128, 256), cardinality=(8, 8, 8), num_classes=100, stages=(64, 64, 128, 256)): super().__init__() self.stages = stages block = Bottleneck layers = [(depth - 2) // 9] * 3 if isinstance(group_channels, int): group_channels = [group_channels] * 3 if isinstance(cardinality, int): cardinality = [cardinality] * 3 self.stem = Conv2d(3, self.stages[0], kernel_size=3, norm='def', act='def') self.in_channels = self.stages[0] self.layer1 = self._make_layer( block, self.stages[1], layers[0], stride=1, group_channels=group_channels[0], cardinality=cardinality[0]) self.layer2 = self._make_layer( block, self.stages[2], layers[1], stride=2, group_channels=group_channels[1], cardinality=cardinality[1]) self.layer3 = self._make_layer( block, self.stages[3], layers[2], stride=2, group_channels=group_channels[2], cardinality=cardinality[2]) self.avgpool = GlobalAvgPool() self.fc = Linear(self.in_channels, num_classes)
def __init__(self, C, layers, auxiliary, drop_path, num_classes, genotype): super().__init__() self._num_layers = layers self._auxiliary = auxiliary self._drop_path = drop_path stem_multiplier = 3 C_curr = stem_multiplier * C self.stem = Conv2d(3, C_curr, 3, norm='def') C_prev_prev, C_prev, C_curr = C_curr, C_curr, C self.cells = nn.ModuleList() reduction_prev = False for i in range(layers): if i in [layers // 3, 2 * layers // 3]: C_curr *= 2 reduction = True else: reduction = False cell = Cell(genotype, C_prev_prev, C_prev, C_curr, reduction, reduction_prev, drop_path) reduction_prev = reduction self.cells.append(cell) C_prev_prev, C_prev = C_prev, cell.multiplier * C_curr if auxiliary and i == 2 * layers // 3: C_to_auxiliary = C_prev if auxiliary: self.auxiliary_head = AuxiliaryHeadCIFAR(C_to_auxiliary, num_classes) self.classifier = nn.Sequential( GlobalAvgPool(), Linear(C_prev, num_classes), )
def _make_layer(block, num_units, in_channels, out_channels, stride, use_se): units = nn.Sequential() units.add_module("unit1", ReduceUnit(in_channels, out_channels) if stride == 2 \ else Conv2d(in_channels, out_channels, kernel_size=3, norm='default', act='default')) for i in range(1, num_units): units.add_module(f"unit{i + 1}", block(out_channels, use_se)) return units