def _build(self, s_in: Shape, s_out: Shape) -> Shape: before, after, squeeze = [], [], [ nn.AdaptiveAvgPool2d(1), SqueezeModule() ] if self.gap_first: after = [ nn.Linear(s_in.num_features(), self.features, bias=True), # no affine bn -> use bias Register.act_funs.get(self.act_fun)(inplace=True) ] self.cached['shape_inner'] = Shape([self.features]) else: before = [ nn.Conv2d(s_in.num_features(), self.features, 1, 1, 0, bias=False), nn.BatchNorm2d(self.features, affine=True), Register.act_funs.get(self.act_fun)(inplace=True) ] self.cached['shape_inner'] = Shape( [self.features, s_in.shape[1], s_in.shape[2]]) ops = before + squeeze + after + [ nn.Dropout(p=self.dropout), nn.Linear(self.features, s_out.num_features(), bias=self.bias) ] self.head_module = nn.Sequential(*ops) return self.probe_outputs(s_in)
def _build(self, s_in: Shape, c_out: int, weight_functions=()) -> Shape: steps = [] for s in self.order.split('_'): if s == 'bn' and self.use_bn and self.batchnorm_fun is not None: bn = self._get_bn(s_in.num_features(), c_out) if bn is not None: steps.append(bn) if s == 'w': if (self.dropout_rate > 0 or self.dropout_keep) and self.dropout_fun is not None: steps.append( self.dropout_fun(self.dropout_rate, inplace=self.dropout_inplace)) else: self.dropout_rate = 0.0 steps.extend(weight_functions) if s == 'act': act = Register.act_funs.get( self.act_fun)(inplace=self.act_inplace) if act is not None: steps.append(act) if (c_out > s_in.num_features()) and not self.changes_c: steps.append(PaddingToValueModule(c_out, dim=1)) self.steps = nn.ModuleList(steps) return self.probe_outputs(s_in, multiple_outputs=False)
def get_mobilenet_v3_small100(s_in=Shape([3, 224, 224]), s_out=Shape([1000])) -> nn.Module: stem = get_stem_instance(MobileNetV2Stem, features=16, features1=16, act_fun='hswish', act_fun1='relu', stride1=2, se_cmul1=0.5) head = get_head_instance(FeatureMixClassificationHead, features=1024, act_fun='hswish', gap_first=True, bias=True) defaults = dict(padding='same', dilation=1, bn_affine=True, act_inplace=True) se = dict(att_cls='SqueezeExcitationChannelModule', use_c_substitute=False, c_mul=0.25, squeeze_act='relu', excite_act='sigmoid', divisible=8, squeeze_bias=True, excite_bias=True, squeeze_bn=False) cell_partials, cell_order = get_passthrough_partials([ (24, MobileInvertedConvLayer, defaults, dict(stride=2, k_size=3, expansion=4.5, act_fun='relu')), (24, MobileInvertedConvLayer, defaults, dict(stride=1, k_size=3, expansion=3.5, act_fun='relu')), (40, MobileInvertedConvLayer, defaults, dict(stride=2, k_size=5, expansion=4, act_fun='hswish', att_dict=se)), (40, MobileInvertedConvLayer, defaults, dict(stride=1, k_size=5, expansion=6, act_fun='hswish', att_dict=se)), (40, MobileInvertedConvLayer, defaults, dict(stride=1, k_size=5, expansion=6, act_fun='hswish', att_dict=se)), (48, MobileInvertedConvLayer, defaults, dict(stride=1, k_size=5, expansion=3, act_fun='hswish', att_dict=se)), (48, MobileInvertedConvLayer, defaults, dict(stride=1, k_size=5, expansion=3, act_fun='hswish', att_dict=se)), (96, MobileInvertedConvLayer, defaults, dict(stride=2, k_size=5, expansion=6, act_fun='hswish', att_dict=se)), (96, MobileInvertedConvLayer, defaults, dict(stride=1, k_size=5, expansion=6, act_fun='hswish', att_dict=se)), (96, MobileInvertedConvLayer, defaults, dict(stride=1, k_size=5, expansion=6, act_fun='hswish', att_dict=se)), (576, ConvLayer, dict(), dict(k_size=1, bias=False, act_fun='hswish', act_inplace=True, order='w_bn_act', use_bn=True, bn_affine=True)), ]) return get_network(StackedCellsNetworkBody, stem, head, cell_partials, cell_order, s_in, s_out)
def _build(self, s_in: Shape, s_out: Shape) -> ShapeList: """ build the network, count params, log, maybe load pretrained weights """ assert isinstance(s_out, Shape), "Attempting to build a network with an output that is not a Shape!" s_out_copy = s_out.copy(copy_id=True) self.shape_in = s_in.copy(copy_id=True) s_out_net = self._build2(s_in, s_out) LoggerManager().get_logger().info('Network built, it has %d parameters!' % self.get_num_parameters()) # validate output shape sizes assert isinstance(s_out_net, ShapeList), "The network must output a list of Shapes, one shape per head! (ShapeList)" for shape in s_out_net.shapes: if not s_out_copy == shape: text = "One or more output shapes mismatch: %s, expected: %s" % (s_out_net, s_out_copy) if self.assert_output_match: raise ValueError(text) else: LoggerManager().get_logger().warning(text) break # load weights? if len(self.checkpoint_path) > 0: path = CheckpointCallback.find_pretrained_weights_path(self.checkpoint_path, self.model_name, raise_missing=len(self.checkpoint_path) > 0) num_replacements = 1 if self.is_external() else 999 self.loaded_weights(CheckpointCallback.load_network(path, self.get_network(), num_replacements)) self.shape_out = s_out_net.shapes[0].copy(copy_id=True) self.shape_in_list = self.shape_in.shape self.shape_out_list = self.shape_out.shape return s_out_net
def get_mobilenet_v2(s_in=Shape([3, 224, 224]), s_out=Shape([1000])) -> nn.Module: stem = get_stem_instance(MobileNetV2Stem, features=32, features1=16, act_fun='relu6', act_fun1='relu6') head = get_head_instance(FeatureMixClassificationHead, features=1280, act_fun='relu6') defaults = dict(k_size=3, stride=1, padding='same', expansion=6, dilation=1, bn_affine=True, act_fun='relu6', act_inplace=True, att_dict=None) cell_partials, cell_order = get_passthrough_partials([ (24, MobileInvertedConvLayer, defaults, dict(stride=2)), (24, MobileInvertedConvLayer, defaults, dict(stride=1)), (32, MobileInvertedConvLayer, defaults, dict(stride=2)), (32, MobileInvertedConvLayer, defaults, dict(stride=1)), (32, MobileInvertedConvLayer, defaults, dict(stride=1)), (64, MobileInvertedConvLayer, defaults, dict(stride=2)), (64, MobileInvertedConvLayer, defaults, dict(stride=1)), (64, MobileInvertedConvLayer, defaults, dict(stride=1)), (64, MobileInvertedConvLayer, defaults, dict(stride=1)), (96, MobileInvertedConvLayer, defaults, dict(stride=1)), (96, MobileInvertedConvLayer, defaults, dict(stride=1)), (96, MobileInvertedConvLayer, defaults, dict(stride=1)), (160, MobileInvertedConvLayer, defaults, dict(stride=2)), (160, MobileInvertedConvLayer, defaults, dict(stride=1)), (160, MobileInvertedConvLayer, defaults, dict(stride=1)), (320, MobileInvertedConvLayer, defaults, dict(stride=1)), ]) return get_network(StackedCellsNetworkBody, stem, head, cell_partials, cell_order, s_in, s_out)
def from_args(cls, args: Namespace, index: int = None) -> AbstractDataSet: # set class attributes data_shape, target_shape = cls._parsed_arguments(['data_shape', 'target_shape'], args, index=index) cls.data_raw_shape = Shape(split(data_shape, int)) cls.label_shape = Shape(split(target_shape, int)) # default generation now return super().from_args(args, index)
def _build(self, s_in: Shape, s_out: Shape) -> ShapeList: LoggerManager().get_logger().info('Building %s:' % self.__class__.__name__) rows = [('cell index', 'name', 'class', 'input shapes', '', 'output shapes', '#params')] def get_row(idx, name: str, obj: AbstractModule) -> tuple: s_in_str = obj.get_shape_in().str() s_inner = obj.get_cached('shape_inner') s_inner_str = '' if s_inner is None else s_inner.str() s_out_str = obj.get_shape_out().str() return str(idx), name, obj.__class__.__name__, s_in_str, s_inner_str, s_out_str, count_parameters(obj) s_out_data = s_out.copy() out_shapes = self.stem.build(s_in) final_out_shapes = [] rows.append(get_row('', '-', self.stem)) # cells and (aux) heads updated_cell_order = [] for i, cell_name in enumerate(self.cell_order): strategy_name, cell = self._get_cell(name=cell_name, cell_index=i) assert self.stem.num_outputs() == cell.num_inputs() == cell.num_outputs(), 'Cell does not fit the network!' updated_cell_order.append(cell.name) s_ins = out_shapes[-cell.num_inputs():] with StrategyManagerDefault(strategy_name): s_out = cell.build(s_ins.copy(), features_mul=self.features_mul, features_fixed=self.features_first_cell if i == 0 else -1) out_shapes.extend(s_out) rows.append(get_row(i, cell_name, cell)) self.cells.append(cell) # optional (aux) head after every cell head = self._head_positions.get(i, None) if head is not None: if head.weight > 0: final_out_shapes.append(head.build(s_out[-1], s_out_data)) rows.append(get_row('', '-', head)) else: LoggerManager().get_logger().info('not adding head after cell %d, weight <= 0' % i) del self._head_positions[i] else: assert i != len(self.cell_order) - 1, "Must have a head after the final cell" # remove heads that are impossible to add for i in self._head_positions.keys(): if i >= len(self.cells): LoggerManager().get_logger().warning('Can not add a head after cell %d which does not exist, deleting the head!' % i) head = self._head_positions.get(i) for j, head2 in enumerate(self.heads): if head is head2: self.heads.__delitem__(j) break s_out = ShapeList(final_out_shapes) rows.append(('complete network', '', '', self.get_shape_in().str(), '', s_out.str(), count_parameters(self))) log_in_columns(LoggerManager().get_logger(), rows, start_space=4) self.set(cell_order=updated_cell_order) return s_out
def example_export_network(path: str) -> AbstractUninasNetwork: """ create a new network and export it, does not require to have onnx installed """ network = get_network("FairNasC", Shape([3, 224, 224]), Shape([1000]), weights_path=None) network = network.cuda() network.export_onnx(path, export_params=True) return network
def _build(self, s_in: Shape, c_out: int) -> Shape: assert c_out - s_in.num_features() >= 0 self.conv = nn.Conv2d(s_in.num_features(), c_out, kernel_size=1, stride=1, padding=0, bias=False) return self.probe_outputs(s_in)
def _build2(self, s_in: Shape, s_out: Shape) -> ShapeList: """ build the network """ assert s_in.num_dims() == s_out.num_dims() == 1 s_cur = self.stem.build(s_in, c_out=self._layer_widths[0]) for i in range(len(self._layer_widths) - 1): s_cur = self.cells[i].build(s_cur, c_out=self._layer_widths[i + 1]) s_heads = [ h.build(s_cur, c_out=s_out.num_features()) for h in self.heads ] return ShapeList(s_heads)
def _build(self, s_in: Shape, s_out: Shape) -> Shape: self.head_module = nn.Sequential(*[ nn.BatchNorm2d(s_in.num_features()), nn.ReLU(inplace=True), nn.AdaptiveAvgPool2d(1), SqueezeModule(), nn.Dropout(p=0.0), nn.Linear(s_in.num_features(), s_out.num_features(), bias=True) ]) return self.probe_outputs(s_in)
def _build(self, s_in: Shape, c_out: int) -> Shape: assert not (c_out <= s_in.num_features() and self.stride > 1), "must increase num features when stride is >1" assert s_in.num_features() % 4 == 0 and c_out % 2 == 0, "num features must be divisible by 4" padding = get_padding(self.padding, self.k_size, self.stride, self.dilation) padding2 = get_padding(self.padding, self.k_size, 1, self.dilation) if self.stride >= 2: c_side = c_main_in = s_in.num_features() self.branch_proj = nn.Sequential(*[ # dw nn.Conv2d(c_side, c_side, self.k_size, self.stride, padding, groups=c_side, bias=False), nn.BatchNorm2d(c_side, affine=self.bn_affine), # pw nn.Conv2d(c_side, c_side, 1, 1, 0, bias=False), nn.BatchNorm2d(c_side, affine=self.bn_affine), Register.act_funs.get(self.act_fun)(inplace=self.act_inplace), ]) else: c_side = c_main_in = s_in.num_features() // 2 c_main_out = c_out - c_side c_main_mid = int(c_out // 2 * self.expansion) bm = [ # dw 1 nn.Conv2d(c_main_in, c_main_in, self.k_size, self.stride, padding, groups=c_main_in, bias=False), nn.BatchNorm2d(c_main_in, affine=self.bn_affine), # pw 1 nn.Conv2d(c_main_in, c_main_mid, 1, 1, 0, bias=False), nn.BatchNorm2d(c_main_mid, affine=self.bn_affine), Register.act_funs.get(self.act_fun)(inplace=self.act_inplace), # dw 2 nn.Conv2d(c_main_mid, c_main_mid, self.k_size, 1, padding2, groups=c_main_mid, bias=False), nn.BatchNorm2d(c_main_mid, affine=self.bn_affine), # pw 2 nn.Conv2d(c_main_mid, c_main_mid, 1, 1, 0, bias=False), nn.BatchNorm2d(c_main_mid, affine=self.bn_affine), Register.act_funs.get(self.act_fun)(inplace=self.act_inplace), # dw 3 nn.Conv2d(c_main_mid, c_main_mid, self.k_size, 1, padding2, groups=c_main_mid, bias=False), nn.BatchNorm2d(c_main_mid, affine=self.bn_affine), # pw 3 nn.Conv2d(c_main_mid, c_main_out, 1, 1, 0, bias=False), nn.BatchNorm2d(c_main_out, affine=self.bn_affine), Register.act_funs.get(self.act_fun)(inplace=self.act_inplace), ] # optional attention module if isinstance(self.att_dict, dict): bm.append(AbstractAttentionModule.module_from_dict(c_main_out, c_substitute=c_main_in, att_dict=self.att_dict)) # self.branch_main = nn.Sequential(*bm) self.branch_main = DropPathModule(nn.Sequential(*bm)) return self.probe_outputs(s_in)
def _build(self, s_in: Shape, c_out: int, weight_functions=()) -> Shape: padding = get_padding(self.padding, self.k_size, self.stride, self.dilation) conv = nn.Conv2d(s_in.num_features(), c_out, kernel_size=self.k_size, stride=self.stride, padding=padding, dilation=self.dilation, groups=get_number(self.groups, s_in.num_features()), bias=self.bias) wf = list(weight_functions) + [conv] return super()._build(s_in, c_out, weight_functions=wf)
def probe_outputs(self, s_in: ShapeOrList, module: nn.Module = None, multiple_outputs=False) -> ShapeOrList: """ returning the output shape of one forward pass using zero tensors """ with torch.no_grad(): if module is None: module = self x = s_in.random_tensor(batch_size=2) s = module(x) if multiple_outputs: return ShapeList([Shape(list(sx.shape)[1:]) for sx in s]) return Shape(list(s.shape)[1:])
def _build(self, s_in: Shape, c_out: int, weight_functions=()) -> Shape: c_in = s_in.num_features() self.conv = SuperKernelConv(c_in, c_in, self.name, self.strategy_name, self.k_sizes, (1.0, ), self.dilation, self.stride, self.padding, self.groups, self.bias) point_conv = nn.Conv2d(c_in, c_out, kernel_size=1, groups=get_number(self.groups, s_in.num_features()), bias=self.bias) wf = list(weight_functions) + [self.conv, point_conv] return super()._build(s_in, c_out, weight_functions=wf)
def _build(self, s_in: Shape, s_out: Shape) -> ShapeList: """ build the network, count params, log, maybe load pretrained weights """ s_in_net = s_in.copy(copy_id=True) super()._build(s_in, s_out) rows = [('cell index', 'input shapes', 'output shapes', '#params'), ('stem', s_in.str(), self.get_stem_output_shape(), count_parameters(self.get_stem()))] LoggerManager().get_logger().info('%s (%s):' % (self.__class__.__name__, self.model_name)) for i, (s_in, s_out, cell) in enumerate(zip(self.get_cell_input_shapes(flatten=False), self.get_cell_output_shapes(flatten=False), self.get_cells())): rows.append((i, s_in.str(), s_out.str(), count_parameters(cell))) rows.append(('head(s)', self.get_heads_input_shapes(), self.get_network_output_shapes(flatten=False), count_parameters(self.get_heads()))) rows.append(("complete network", s_in_net.str(), self.get_network_output_shapes(flatten=False), count_parameters(self))) log_in_columns(LoggerManager().get_logger(), rows, start_space=4) return self.get_network_output_shapes(flatten=False)
class SubImagenet100Data(Imagenet1000Data): """ Subset of the ImageNet data set with fewer classes, and fewer images per class http://image-net.org/ https://github.com/microsoft/Cream/blob/main/tools/generate_subImageNet.py """ length = (25000, 0, 5000) # training, valid, test data_raw_shape = Shape([ 3, 300, 300 ]) # channel height width, the shapes of the raw images actually vary label_shape = Shape([100]) data_mean = (0.485, 0.456, 0.406) # not recomputed for the subset data_std = (0.229, 0.224, 0.225) # not recomputed for the subset can_download = False
def _build(self, s_in: Shape, c_out: int) -> Shape: c_in = s_in.num_features() c_inner = int(c_out * self.expansion) self.block = self._build_block(c_in, c_inner, c_out, self.has_first_act) if self.shortcut_type in [None, 'None']: pass elif self.shortcut_type == 'id': self.shortcut = nn.Identity() elif self.shortcut_type == 'conv1x1': self.shortcut = nn.Sequential(*[ nn.Conv2d(c_in, c_out, 1, self.stride, 0, bias=False), nn.BatchNorm2d(c_out, affine=self.bn_affine), ]) elif self.shortcut_type == 'avg_conv': self.shortcut = nn.Sequential(*[ nn.AvgPool2d(kernel_size=2, stride=self.stride, padding=0), nn.Conv2d(c_in, c_out, 1, 1, 0, bias=False), ]) else: raise NotImplementedError('shortcut type "%s" is not implemented' % self.shortcut_type) self.has_shortcut = isinstance(self.shortcut, nn.Module) if self.has_shortcut: self.block = DropPathModule(self.block) return self.probe_outputs(s_in)
def _build(self, s_in: Shape, c_out: int, weight_functions=()) -> Shape: self.conv = SuperKernelConv(s_in.num_features(), c_out, self.name, self.strategy_name, self.k_sizes, (1.0, ), self.dilation, self.stride, self.padding, self.groups, self.bias) wf = list(weight_functions) + [self.conv] return super()._build(s_in, c_out, weight_functions=wf)
def _build(self, s_in: Shape, c_out: int) -> Shape: c_in = s_in.num_features() c_mid = make_divisible(int(c_in * self.expansion), divisible=8) self.has_skip = self.stride == 1 and c_in == c_out ops = [] conv_kwargs = dict(dilation=self.dilation, padding=self.padding) if self.expansion > 1: # pw ops.extend([ get_conv2d(c_in, c_mid, k_size=self.k_size_in, groups=1, **conv_kwargs), nn.BatchNorm2d(c_mid, affine=self.bn_affine), Register.act_funs.get(self.act_fun)(inplace=self.act_inplace), ]) # dw ops.extend([ get_conv2d(c_mid, c_mid, k_size=self.k_size, stride=self.stride, groups=-1, **conv_kwargs), nn.BatchNorm2d(c_mid, affine=self.bn_affine), Register.act_funs.get(self.act_fun)(inplace=self.act_inplace), ]) # optional squeeze+excitation module if isinstance(self.att_dict, dict): ops.append(AbstractAttentionModule.module_from_dict(c_mid, c_substitute=c_in, att_dict=self.att_dict)) # pw ops.extend([ get_conv2d(c_mid, c_out, k_size=self.k_size_out, groups=1, **conv_kwargs), nn.BatchNorm2d(c_out, affine=self.bn_affine), ]) self.block = nn.Sequential(*ops) if self.has_skip: self.block = DropPathModule(self.block) return self.probe_outputs(s_in)
def get_network( net_cls: Type[AbstractNetworkBody], stem: AbstractModule, head: AbstractModule, cell_partials: dict, cell_order: [str], s_in=Shape([3, 224, 224]), s_out=Shape([1000]) ) -> nn.Module: net_kwargs = net_cls.parsed_argument_defaults() net_kwargs.update( dict(cell_configs={}, cell_partials=cell_partials, cell_order=cell_order)) network = StackedCellsNetworkBody(stem=stem, heads=nn.ModuleList([head]), **net_kwargs) network.build(s_in=s_in, s_out=s_out) return network
def get_shufflenet_v2plus_medium(s_in=Shape([3, 224, 224]), s_out=Shape([1000])) -> nn.Module: stem = get_stem_instance(ConvStem, k_size=3, features=16, act_fun='hswish', stride=2, use_bn=True, bn_affine=True, order='w_bn_act') head = get_head_instance(SeFeatureMixClassificationHead, se_cmul=0.25, se_act_fun='relu', se_squeeze_bias=True, se_bn=True, se_excite_bias=False, features=1280, act_fun='hswish', bias0=False, dropout=0.0, bias1=False) defaults = dict(padding='same', dilation=1, bn_affine=True, act_inplace=False, expansion=1) att = dict(att_cls='SqueezeExcitationChannelModule', use_c_substitute=False, c_mul=0.25, squeeze_act='relu', excite_act='relu6', divisible=8, squeeze_bias=False, excite_bias=False, squeeze_bn=True, squeeze_bn_affine=True) cell_partials, cell_order = get_passthrough_partials([ (48, ShuffleNetV2Layer, defaults, dict(stride=2, k_size=3, act_fun='relu')), (48, ShuffleNetV2Layer, defaults, dict(stride=1, k_size=3, act_fun='relu')), (48, ShuffleNetV2XceptionLayer, defaults, dict(stride=1, k_size=3, act_fun='relu')), (48, ShuffleNetV2Layer, defaults, dict(stride=1, k_size=5, act_fun='relu')), (128, ShuffleNetV2Layer, defaults, dict(stride=2, k_size=5, act_fun='hswish')), (128, ShuffleNetV2Layer, defaults, dict(stride=1, k_size=5, act_fun='hswish')), (128, ShuffleNetV2Layer, defaults, dict(stride=1, k_size=3, act_fun='hswish')), (128, ShuffleNetV2Layer, defaults, dict(stride=1, k_size=3, act_fun='hswish')), (256, ShuffleNetV2Layer, defaults, dict(stride=2, k_size=7, act_fun='hswish', att_dict=att)), (256, ShuffleNetV2Layer, defaults, dict(stride=1, k_size=3, act_fun='hswish', att_dict=att)), (256, ShuffleNetV2Layer, defaults, dict(stride=1, k_size=7, act_fun='hswish', att_dict=att)), (256, ShuffleNetV2Layer, defaults, dict(stride=1, k_size=5, act_fun='hswish', att_dict=att)), (256, ShuffleNetV2Layer, defaults, dict(stride=1, k_size=5, act_fun='hswish', att_dict=att)), (256, ShuffleNetV2Layer, defaults, dict(stride=1, k_size=3, act_fun='hswish', att_dict=att)), (256, ShuffleNetV2Layer, defaults, dict(stride=1, k_size=7, act_fun='hswish', att_dict=att)), (256, ShuffleNetV2Layer, defaults, dict(stride=1, k_size=3, act_fun='hswish', att_dict=att)), (512, ShuffleNetV2Layer, defaults, dict(stride=2, k_size=7, act_fun='hswish', att_dict=att)), (512, ShuffleNetV2Layer, defaults, dict(stride=1, k_size=5, act_fun='hswish', att_dict=att)), (512, ShuffleNetV2XceptionLayer, defaults, dict(stride=1, k_size=3, act_fun='hswish', att_dict=att)), (512, ShuffleNetV2Layer, defaults, dict(stride=1, k_size=7, act_fun='hswish', att_dict=att)), (1280, ConvLayer, dict(), dict(k_size=1, bias=False, act_fun='hswish', act_inplace=True, order='w_bn_act', use_bn=True, bn_affine=True)), ]) return get_network(StackedCellsNetworkBody, stem, head, cell_partials, cell_order, s_in, s_out)
def _build(self, s_in: Shape, c_out: int, weight_functions=()) -> Shape: padding = get_padding(self.padding, self.k_size, self.stride, 1) pool = (nn.AvgPool2d if self.pool_type == 'avg' else nn.MaxPool2d)( self.k_size, self.stride, padding) conv = nn.Conv2d(s_in.num_features(), c_out, kernel_size=1, stride=1, padding=0, bias=self.bias) wf = list(weight_functions) + [pool, conv] return super()._build(s_in, c_out, weight_functions=wf)
class AbstractCNNClassificationDataSet(AbstractDataSet): length = (0, 0, 0) # training, valid, test data_raw_shape = Shape([-1, -1, -1]) # channel height width label_shape = Shape([-1]) data_mean = (-1, -1, -1) data_std = (-1, -1, -1) def _get_train_data(self, used_transforms: transforms.Compose) -> torch.utils.data.Dataset: raise NotImplementedError def _get_test_data(self, used_transforms: transforms.Compose) -> torch.utils.data.Dataset: raise NotImplementedError def _get_fake_train_data(self, used_transforms: transforms.Compose) -> torch.utils.data.Dataset: return FakeData(self.length[0], self.data_raw_shape.shape, self.num_classes(), used_transforms) def _get_fake_valid_data(self, used_transforms: transforms.Compose) -> torch.utils.data.Dataset: return FakeData(self.length[1], self.data_raw_shape.shape, self.num_classes(), used_transforms) def _get_fake_test_data(self, used_transforms: transforms.Compose) -> torch.utils.data.Dataset: return FakeData(self.length[2], self.data_raw_shape.shape, self.num_classes(), used_transforms)
class FashionMnistData(AbstractCNNClassificationDataSet): """ """ length = (60000, 0, 10000) # training, valid, test data_raw_shape = Shape([1, 28, 28]) # channel height width label_shape = Shape([10]) data_mean = (0.2860, ) data_std = (0.3530, ) def _get_train_data(self, used_transforms: transforms.Compose): return datasets.FashionMNIST(root=self.dir, train=True, download=self.download, transform=used_transforms) def _get_test_data(self, used_transforms: transforms.Compose): return datasets.FashionMNIST(root=self.dir, train=False, download=self.download, transform=used_transforms)
def _resnet(block: Type[AbstractResNetLayer], stages=(2, 2, 2, 2), inner_channels=(64, 128, 256, 512), expansion=1, s_in=Shape([3, 224, 224]), s_out=Shape([1000])) -> nn.Module: stem = get_stem_instance(ConvStem, features=inner_channels[0], stride=2, k_size=7, act_fun='relu') head = get_head_instance(ClassificationHead, bias=True, dropout=0.0) layers = [(inner_channels[0], PoolingLayer, dict(pool_type='max', k_size=3, padding='same', order='w', dropout_rate=0), dict(stride=2))] channels = [int(c*expansion) for c in inner_channels] defaults = dict(k_size=3, stride=1, padding='same', dilation=1, bn_affine=True, act_fun='relu', act_inplace=True, expansion=1/expansion, has_first_act=False) for s, (num, cx) in enumerate(zip(stages, channels)): for i in range(num): if s > 0 and i == 0: layers.append((cx, block, defaults, dict(stride=2, shortcut_type='conv1x1'))) elif i == 0 and expansion > 1: layers.append((cx, block, defaults, dict(stride=1, shortcut_type='conv1x1'))) else: layers.append((cx, block, defaults, dict(stride=1, shortcut_type='id'))) cell_partials, cell_order = get_passthrough_partials(layers) return get_network(StackedCellsNetworkBody, stem, head, cell_partials, cell_order, s_in, s_out)
def _build(self, s_in: Shape, s_out: Shape) -> Shape: ops = [nn.AdaptiveAvgPool2d(1)] if self.se_cmul > 0: ops.append( SqueezeExcitationChannelModule( s_in.num_features(), c_mul=self.se_cmul, squeeze_act=self.se_act_fun, squeeze_bias=self.se_squeeze_bias and not self.se_bn, excite_bias=self.se_excite_bias, squeeze_bn=self.se_bn, squeeze_bn_affine=self.se_squeeze_bias)) ops.extend([ SqueezeModule(), nn.Linear(s_in.num_features(), self.features, bias=self.bias0), Register.act_funs.get(self.act_fun)(inplace=True), nn.Dropout(p=self.dropout), nn.Linear(self.features, s_out.num_features(), bias=self.bias1) ]) self.head_module = nn.Sequential(*ops) self.cached['shape_inner'] = Shape([self.features]) return self.probe_outputs(s_in)
class Cifar10Data(AbstractCNNClassificationDataSet): """ The popular CIFAR-10 data set https://www.cs.toronto.edu/~kriz/cifar.html """ length = (50000, 0, 10000) # training, valid, test data_raw_shape = Shape([3, 32, 32]) # channel height width label_shape = Shape([10]) data_mean = (0.49139968, 0.48215827, 0.44653124) data_std = (0.24703233, 0.24348505, 0.26158768) def _get_train_data(self, used_transforms: transforms.Compose): return datasets.CIFAR10(root=self.dir, train=True, download=self.download, transform=used_transforms) def _get_test_data(self, used_transforms: transforms.Compose): return datasets.CIFAR10(root=self.dir, train=False, download=self.download, transform=used_transforms)
class Cinic10Data(AbstractCNNClassificationDataSet): """ CINIC-10: CINIC-10 Is Not Imagenet or CIFAR-10 https://github.com/BayesWatch/cinic-10 """ length = (90000, 90000, 90000) # training, valid, test data_raw_shape = Shape([3, 32, 32]) label_shape = Shape([10]) data_mean = (0.47889522, 0.47227842, 0.43047404) data_std = (0.24205776, 0.23828046, 0.25874835) def _get_train_data(self, used_transforms: transforms.Compose): return datasets.ImageFolder(os.path.join(self.dir, 'train'), transform=used_transforms) def _get_valid_data(self, used_transforms: transforms.Compose): return datasets.ImageFolder(os.path.join(self.dir, 'valid'), transform=used_transforms) def _get_test_data(self, used_transforms: transforms.Compose): return datasets.ImageFolder(os.path.join(self.dir, 'test'), transform=used_transforms)
class Imagenet1000Data(AbstractCNNClassificationDataSet): """ The ImageNet data set http://image-net.org/ """ length = (1281167, 0, 50000) # training, valid, test data_raw_shape = Shape([ 3, 300, 300 ]) # channel height width, the shapes of the raw images actually vary label_shape = Shape([1000]) data_mean = (0.485, 0.456, 0.406) data_std = (0.229, 0.224, 0.225) can_download = False def _get_train_data(self, used_transforms: transforms.Compose): return datasets.ImageFolder(os.path.join(self.dir, 'train'), transform=used_transforms) def _get_test_data(self, used_transforms: transforms.Compose): return datasets.ImageFolder(os.path.join(self.dir, 'val'), transform=used_transforms)