def __init__(self, cfgs, mode='small', input_channel=3, feat_channels=16, special_stride=1, num_classes=10, width_mult=1., block=InvertedResidualSE, momentum=0.1, is_prune_mode=False, **kwargs): """Init MobileNetV3. :params cfgs: cfgs for mobilenetv3 :type cfgs: list :params special_stride: the stride of the first InvertedResidualSE block. :type special_stride: int (1 for cifar10, 2 for imagenet) """ super(MobileNetV3, self).__init__() self.cfgs = cfgs # building first layer if not is_prune_mode: feat_channels = _make_divisible(feat_channels * width_mult, 8) else: feat_channels = int(feat_channels * width_mult) layers = [ ConvBnAct(input_channel, feat_channels, kernel_size=3, momentum=momentum, stride=special_stride, padding=1, activation='hswish') ] # buidling blocks # kernel_size, expand_ratio, output_channels, use_se, use_hs, stride for k, t, c, use_se, use_hs, s in self.cfgs: output_channel = _make_divisible( c * width_mult, 8) if not is_prune_mode else int(c * width_mult) hidden_dim = _make_divisible(t, 8) if not is_prune_mode else t layers.append( block(feat_channels, hidden_dim, output_channel, k, s, use_se, use_hs, momentum)) feat_channels = output_channel self.features = Sequential(*layers) # building last linear layer self.avgpool = ops.AdaptiveAvgPool2d((1, 1)) chn = 1280 if mode == 'large' else 1024 self.classifier = Sequential(ops.View(), ops.Linear(feat_channels, chn), ops.Hswish(), ops.Dropout(0.2), ops.Linear(chn, num_classes)) self._initialize_weights()
def __init__(self, channel, reduction=4): """Init SELayer.""" super(SELayer, self).__init__() self.avg_pool = ops.AdaptiveAvgPool2d(1) hidden_dim = _make_divisible(channel // reduction, 8) self.fc = Sequential(ops.Linear(channel, hidden_dim, use_bias=False), ops.Relu(inplace=True), ops.Linear(hidden_dim, channel, use_bias=False), ops.Hsigmoid())
def _reset_classifier_model(self): if vega.is_torch_backend(): # num_classes = ModelConfig.model_desc.backbone.n_class num_classes = ModelConfig.num_classes model = self.trainer.model out_features = num_classes # fix layers # for param in model.parameters(): # param.requires_grad = False # change head if "torch_vision_model" in ModelConfig.model_desc["modules"]: # torchvision import torch.nn as nn in_features = model.fc.in_features model.fc = nn.Linear(in_features, out_features).cuda() else: # vega in_features = model.fc.in_features from vega.modules.operators import ops model.fc = ops.Linear(in_features=in_features, out_features=out_features).cuda() # TODO n_class ModelConfig.model_desc.backbone.n_class = num_classes logging.info("Model fine tuned successfully.")
def __init__(self, config): super(BertSelfAttention, self).__init__() if config.hidden_size % config.num_attention_heads != 0: raise ValueError( "The hidden size (%d) is not a multiple of the number of attention " "heads (%d)" % (config.hidden_size, config.num_attention_heads)) self.num_attention_heads = config.num_attention_heads self.attention_head_size = int(config.hidden_size / config.num_attention_heads) self.all_head_size = self.num_attention_heads * self.attention_head_size self.query = ops.Linear(config.hidden_size, self.all_head_size) self.key = ops.Linear(config.hidden_size, self.all_head_size) self.value = ops.Linear(config.hidden_size, self.all_head_size) self.dropout = ops.Dropout(config.attention_probs_dropout_prob)
def __init__(self, config): super(BertIntermediate, self).__init__() self.dense = ops.Linear(config.hidden_size, config.intermediate_size) if isinstance(config.hidden_act, str): active_fn = { "gelu": ops.gelu, "relu": ops.relu, "swish": ops.swish } self.intermediate_act_fn = active_fn[config.hidden_act] else: self.intermediate_act_fn = config.hidden_act
def __init__(self, base_channel, num_classes): """Create layers. :param base_channel: base_channel :type base_channel: int :param num_class: number of class :type num_class: int """ super(LinearClassificationHead, self).__init__() self.avgpool = ops.AdaptiveAvgPool2d(output_size=(1, 1)) self.view = ops.View() self.linear = ops.Linear(in_features=base_channel, out_features=num_classes)
def _transform_op(init_layer): """Transform the torch op to Vega op.""" if isinstance(init_layer, nn.Conv2d): in_channels = init_layer.in_channels out_channels = init_layer.out_channels kernel_size = init_layer.kernel_size[0] stride = init_layer.stride padding = init_layer.padding # bias = init_layer.bias new_layer = ops.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding, bias=False) elif isinstance(init_layer, nn.BatchNorm2d): num_features = init_layer.num_features new_layer = ops.BatchNorm2d(num_features=num_features) elif isinstance(init_layer, nn.ReLU): new_layer = ops.Relu() elif isinstance(init_layer, nn.MaxPool2d): kernel_size = init_layer.kernel_size stride = init_layer.stride # padding = init_layer.padding new_layer = ops.MaxPool2d(kernel_size=kernel_size, stride=stride) elif isinstance(init_layer, nn.AvgPool2d): kernel_size = init_layer.kernel_size stride = init_layer.stride padding = init_layer.padding new_layer = ops.AvgPool2d(kernel_size=kernel_size, stride=stride, padding=padding) elif isinstance(init_layer, P.ReduceMean): new_layer = ops.AdaptiveAvgPool2d() elif isinstance(init_layer, nn.Dense): in_features = init_layer.in_channels out_features = init_layer.out_channels # use_bias = init_layer.bias new_layer = ops.Linear(in_features=in_features, out_features=out_features) elif isinstance(init_layer, nn.Dropout): prob = init_layer.p inplace = init_layer.inplace new_layer = ops.Dropout(prob=prob, inplace=inplace) elif isinstance(init_layer, nn.Flatten): new_layer = ops.View() else: raise ValueError("The op {} is not supported.".format( type(init_layer))) return new_layer
def __init__(self, **desc): """Initialize.""" super(SimpleCnn, self).__init__() desc = Config(**desc) self.num_class = desc.num_class self.fp16 = desc.get('fp16', False) self.channels = desc.channels self.conv1 = ops.Conv2d(3, 32, padding=1, kernel_size=3) self.pool1 = ops.MaxPool2d(2, stride=2) self.blocks = self._blocks(self.channels, desc.blocks) self.pool2 = ops.MaxPool2d(2, stride=2) self.conv2 = ops.Conv2d(self.channels, 64, padding=1, kernel_size=3) self.global_conv = ops.Conv2d(64, 64, kernel_size=8, padding=0) self.view = ops.View() self.fc = ops.Linear(64, self.num_class)
def __init__(self, encoding, n_class=1000): super(DNet, self).__init__() op_names = ["conv3", "conv1", "conv3_grp2", "conv3_grp4", "conv3_base1", "conv3_base32", "conv3_sep"] block_str, num_channel, macro_str = encoding.split('_') curr_channel, index = int(num_channel), 0 _big_model = "*" in block_str if _big_model: block_encoding_list = block_str.split('*') # stem self.layers = Sequential( create_op('conv3', 3, curr_channel // 2, stride=2), ops.Relu(), create_op('conv3', curr_channel // 2, curr_channel // 2), ops.Relu(), create_op('conv3', curr_channel // 2, curr_channel, stride=2), ops.Relu() ) # body if not _big_model: while index < len(macro_str): stride = 1 if macro_str[index] == '-': stride = 2 index += 1 channel_increase = int(macro_str[index]) block = EncodedBlock(block_str, curr_channel, op_names, stride, channel_increase) self.layers.append(block) curr_channel *= channel_increase index += 1 else: block_encoding_index = 0 while index < len(macro_str): stride = 1 if macro_str[index] == '-': stride = 2 index += 1 block_encoding_index += 1 channel_increase = int(macro_str[index]) block_encoding = block_encoding_list[block_encoding_index] block = EncodedBlock(block_encoding, curr_channel, op_names, stride, channel_increase) self.layers.append(block) curr_channel *= channel_increase index += 1 self.layers.append(ops.AdaptiveAvgPool2d((1, 1))) self.view = ops.View() self.fc = ops.Linear(in_features=curr_channel, out_features=n_class)
def __init__(self, C, num_classes, input_size): """Init AuxiliaryHead.""" super(AuxiliaryHead, self).__init__() stride = input_size - 5 self.relu1 = ops.Relu(inplace=True) self.avgpool1 = ops.AvgPool2d(5, stride=stride, padding=0, count_include_pad=False) self.conv1 = ops.Conv2d(C, 128, 1, bias=False) self.batchnorm1 = ops.BatchNorm2d(128) self.relu2 = ops.Relu(inplace=True) self.conv2 = ops.Conv2d(128, 768, 2, bias=False) self.batchnorm2 = ops.BatchNorm2d(768) self.relu3 = ops.Relu(inplace=True) self.view = ops.View() self.classifier = ops.Linear(768, num_classes)
def __init__(self, config): super(Pooler, self).__init__() self.dense = ops.Linear(config.hidden_size, config.hidden_size) self.activation = ops.Tanh()
def __init__(self, encoding, n_class=1000): super().__init__() self.backbone = DNetBackbone(encoding) self.view = ops.View() out_plane = self.backbone.out_channels self.fc = ops.Linear(in_features=out_plane, out_features=n_class)
def __init__(self, config, fit_size=768): super(TinyBertForPreTraining, self).__init__(config) self.bert = BertModel(self.config) self.apply(self.init_bert_weights) self.fit_dense = ops.Linear(self.config.hidden_size, fit_size)
def __init__(self, config): super(BertClassification, self).__init__(config) self.bert = BertModel(self.config) self.dropout = ops.Dropout(self.config.hidden_dropout_prob) self.classifier = ops.Linear(self.config.hidden_size, self.config.num_labels)
def __init__(self, config): super(BertSelfOutput, self).__init__() self.dense = ops.Linear(config.hidden_size, config.hidden_size) self.LayerNorm = ops.LayerNorm(config.hidden_size, eps=1e-12) self.dropout = ops.Dropout(config.hidden_dropout_prob)
def __init__(self, hidden_size, num_labels, hidden_dropout_prob=0.1): super(BertClassificationHeader, self).__init__() self.dropout = ops.Dropout(hidden_dropout_prob) self.classifier = ops.Linear(hidden_size, num_labels)
def __init__(self, in_channels=1, embed_dim=8, kernel_num=16, num_class=2): super(TextCNN, self).__init__() self.cells = TextCells(in_channels, embed_dim, kernel_num) self.head = ops.Linear(self.cells.out_channels, num_class, activation='softmax')