def __init__(self, config): super(BertEmbeddings, self).__init__() self.word_embeddings = ops.Embedding(config.vocab_size, config.hidden_size, padding_idx=0) self.position_embeddings = ops.Embedding(config.max_position_embeddings, config.hidden_size) self.token_type_embeddings = ops.Embedding(config.type_vocab_size, config.hidden_size) self.LayerNorm = ops.LayerNorm(config.hidden_size, eps=1e-12) self.dropout = ops.Dropout(config.hidden_dropout_prob)
def __init__(self, cfgs, mode='small', input_channel=3, feat_channels=16, special_stride=1, num_classes=10, width_mult=1., block=InvertedResidualSE, momentum=0.1, is_prune_mode=False, **kwargs): """Init MobileNetV3. :params cfgs: cfgs for mobilenetv3 :type cfgs: list :params special_stride: the stride of the first InvertedResidualSE block. :type special_stride: int (1 for cifar10, 2 for imagenet) """ super(MobileNetV3, self).__init__() self.cfgs = cfgs # building first layer if not is_prune_mode: feat_channels = _make_divisible(feat_channels * width_mult, 8) else: feat_channels = int(feat_channels * width_mult) layers = [ ConvBnAct(input_channel, feat_channels, kernel_size=3, momentum=momentum, stride=special_stride, padding=1, activation='hswish') ] # buidling blocks # kernel_size, expand_ratio, output_channels, use_se, use_hs, stride for k, t, c, use_se, use_hs, s in self.cfgs: output_channel = _make_divisible( c * width_mult, 8) if not is_prune_mode else int(c * width_mult) hidden_dim = _make_divisible(t, 8) if not is_prune_mode else t layers.append( block(feat_channels, hidden_dim, output_channel, k, s, use_se, use_hs, momentum)) feat_channels = output_channel self.features = Sequential(*layers) # building last linear layer self.avgpool = ops.AdaptiveAvgPool2d((1, 1)) chn = 1280 if mode == 'large' else 1024 self.classifier = Sequential(ops.View(), ops.Linear(feat_channels, chn), ops.Hswish(), ops.Dropout(0.2), ops.Linear(chn, num_classes)) self._initialize_weights()
def _transform_op(init_layer): """Transform the torch op to Vega op.""" if isinstance(init_layer, nn.Conv2d): in_channels = init_layer.in_channels out_channels = init_layer.out_channels kernel_size = init_layer.kernel_size[0] stride = init_layer.stride padding = init_layer.padding # bias = init_layer.bias new_layer = ops.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding, bias=False) elif isinstance(init_layer, nn.BatchNorm2d): num_features = init_layer.num_features new_layer = ops.BatchNorm2d(num_features=num_features) elif isinstance(init_layer, nn.ReLU): new_layer = ops.Relu() elif isinstance(init_layer, nn.MaxPool2d): kernel_size = init_layer.kernel_size stride = init_layer.stride # padding = init_layer.padding new_layer = ops.MaxPool2d(kernel_size=kernel_size, stride=stride) elif isinstance(init_layer, nn.AvgPool2d): kernel_size = init_layer.kernel_size stride = init_layer.stride padding = init_layer.padding new_layer = ops.AvgPool2d(kernel_size=kernel_size, stride=stride, padding=padding) elif isinstance(init_layer, P.ReduceMean): new_layer = ops.AdaptiveAvgPool2d() elif isinstance(init_layer, nn.Dense): in_features = init_layer.in_channels out_features = init_layer.out_channels # use_bias = init_layer.bias new_layer = ops.Linear(in_features=in_features, out_features=out_features) elif isinstance(init_layer, nn.Dropout): prob = init_layer.p inplace = init_layer.inplace new_layer = ops.Dropout(prob=prob, inplace=inplace) elif isinstance(init_layer, nn.Flatten): new_layer = ops.View() else: raise ValueError("The op {} is not supported.".format( type(init_layer))) return new_layer
def __init__(self, config): super(BertSelfAttention, self).__init__() if config.hidden_size % config.num_attention_heads != 0: raise ValueError( "The hidden size (%d) is not a multiple of the number of attention " "heads (%d)" % (config.hidden_size, config.num_attention_heads)) self.num_attention_heads = config.num_attention_heads self.attention_head_size = int(config.hidden_size / config.num_attention_heads) self.all_head_size = self.num_attention_heads * self.attention_head_size self.query = ops.Linear(config.hidden_size, self.all_head_size) self.key = ops.Linear(config.hidden_size, self.all_head_size) self.value = ops.Linear(config.hidden_size, self.all_head_size) self.dropout = ops.Dropout(config.attention_probs_dropout_prob)
def __init__(self, config): super(BertSelfOutput, self).__init__() self.dense = ops.Linear(config.hidden_size, config.hidden_size) self.LayerNorm = ops.LayerNorm(config.hidden_size, eps=1e-12) self.dropout = ops.Dropout(config.hidden_dropout_prob)
def __init__(self, hidden_size, num_labels, hidden_dropout_prob=0.1): super(BertClassificationHeader, self).__init__() self.dropout = ops.Dropout(hidden_dropout_prob) self.classifier = ops.Linear(hidden_size, num_labels)
def __init__(self, config): super(BertClassification, self).__init__(config) self.bert = BertModel(self.config) self.dropout = ops.Dropout(self.config.hidden_dropout_prob) self.classifier = ops.Linear(self.config.hidden_size, self.config.num_labels)