def decoder(nz, outSize, channel=3, nf=128): init_dim = 8 layers = int(np.log2(outSize) - 3) decoder_seq = nn.HybridSequential(prefix='decoder') decoder_seq.add( nn.Dense(nf * init_dim ** 2, in_units=nz), nn.HybridLambda(lambda F, x: F.reshape(x, shape=(-1, nf, init_dim, init_dim))), nn.Conv2D(nf, kernel_size=3, strides=3, padding=init_dim), nn.ELU(), nn.Conv2D(nf, kernel_size=3, strides=3, padding=init_dim), nn.ELU(), ) current_dim = init_dim for i in range(layers): current_dim *= 2 decoder_seq.add( nn.HybridLambda(lambda F, x: F.UpSampling(x, scale=2, sample_type='nearest')), nn.Conv2D(nf, kernel_size=3, strides=3, padding=current_dim), nn.ELU(), nn.Conv2D(nf, kernel_size=3, strides=3, padding=current_dim), nn.ELU(), ) decoder_seq.add( nn.Conv2D(channel, kernel_size=3, strides=3, padding=current_dim), nn.ELU(), ) return decoder_seq
def __init__(self, in_channels, out_channels, bn_use_global_stats=False, **kwargs): super(LwopEncoderFinalBlock, self).__init__(**kwargs) with self.name_scope(): self.pre_conv = conv1x1_block( in_channels=in_channels, out_channels=out_channels, use_bias=True, use_bn=False, bn_use_global_stats=bn_use_global_stats) self.body = nn.HybridSequential(prefix="") for i in range(3): self.body.add(dwsconv3x3_block( in_channels=out_channels, out_channels=out_channels, dw_use_bn=False, pw_use_bn=False, bn_use_global_stats=bn_use_global_stats, dw_activation=(lambda: nn.ELU()), pw_activation=(lambda: nn.ELU()))) self.post_conv = conv3x3_block( in_channels=out_channels, out_channels=out_channels, use_bias=True, use_bn=False, bn_use_global_stats=bn_use_global_stats)
def encoder(nz, inSize, channel=3, nf=128): init_dim = 8 layers = int(np.log2(inSize) - 2) encoder_seq = nn.HybridSequential(prefix='encoder') encoder_seq.add( nn.Conv2D(channel, kernel_size=3, strides=3, padding=inSize), nn.ELU(), ) current_dim = inSize for i in range(1, layers): encoder_seq.add( nn.Conv2D(i * nf, kernel_size=3, strides=3, padding=current_dim), nn.ELU(), # nn.Conv2D(i * nf, kernel_size=3, strides=3, padding=current_dim), # nn.ELU(), nn.Conv2D(i * nf, kernel_size=3, strides=2, padding=1), nn.ELU(), ) current_dim //= 2 encoder_seq.add( nn.Conv2D(layers * nf, kernel_size=3, strides=3, padding=current_dim), nn.ELU(), nn.Conv2D(layers * nf, kernel_size=3, strides=3, padding=current_dim), nn.ELU(), ) encoder_seq.add( nn.HybridLambda(lambda F, x: F.reshape(x, shape=(-1, layers * nf * init_dim ** 2))), nn.Dense(nz) ) return encoder_seq
def __init__(self, act_func: str, **kwargs): super(Activation, self).__init__() with self.name_scope(): if act_func in ('relu', 'sigmoid', 'softrelu', 'softsign', 'tanh'): self.act = nn.Activation(act_func) elif act_func == 'leaky': self.act = nn.LeakyReLU(**kwargs) elif act_func == 'prelu': self.act = nn.PReLU(**kwargs) elif act_func == 'selu': self.act = nn.SELU() elif act_func == 'elu': self.act = nn.ELU(**kwargs) elif act_func == 'gelu': self.act = nn.GELU() elif act_func == 'relu6': self.act = ReLU6() elif act_func == 'hard_sigmoid': self.act = HardSigmoid() elif act_func == 'swish': self.act = nn.Swish() elif act_func == 'hard_swish': self.act = HardSwish() elif act_func == 'mish': self.act = Mish() else: raise NotImplementedError( f"Not implemented activation: {act_func}")
def __init__(self, dec_units=[10, 1], latent_dim=1, npar=1, act="softrelu"): super(VAEDecoder, self).__init__() # Output dimension self.output_dim = dec_units[-1] # Latent dimension self.latent_dim = latent_dim # How many parameters to output # Related to the conditional distribution p(x|theta) self.npar = npar # From latent z to theta # HybridSequential here is used as a list self.latent_to_dec = nn.HybridSequential() self.dec = nn.HybridSequential() for i in range(npar): self.latent_to_dec.add(nn.Dense(dec_units[0], in_units=latent_dim)) self.dec.add(MLPBuilder(dec_units, act)) # Activation function used if act == "elu": self.act = nn.ELU() else: self.act = nn.Activation(act)
def get_activation(activation: str, **kwargs) -> nn.HybridBlock: """ Parameters ---------- activation Activation type Returns ------- mxnet.gluon.HybridBlock Activation object """ if activation in ["relu", "sigmoid", "softrelu", "softsign", "tanh"]: return nn.Activation(activation=activation, **kwargs) if activation == "lrelu": return nn.LeakyReLU(alpha=0.2, **kwargs) if activation == "elu": return nn.ELU(**kwargs) if activation == "swish": return nn.Swish(**kwargs) if activation == "lipswish": return LipSwish(**kwargs) raise NotImplementedError(activation)
def __init__(self, inp, output, kernel_size, depth_multiplier=1, with_bn=True, activation='elu'): super(SepCONV, self).__init__() self.net = nn.HybridSequential() self.net.add( nn.Conv2D(channels=int(inp * depth_multiplier), groups=int(inp), kernel_size=kernel_size, strides=(1, 1), use_bias=True), nn.Conv2D(channels=output, kernel_size=(1, 1), strides=(1, 1), use_bias=False if with_bn else True)) self.act = activation self.with_bn = with_bn if activation is not None: self.elu = nn.ELU() if with_bn: self.bn = nn.BatchNorm(axis=1, use_global_stats=False)
def __init__( self, d_hidden: int, d_input: Optional[int] = None, d_output: Optional[int] = None, d_static: Optional[int] = None, dropout: float = 0.0, **kwargs, ): super(GatedResidualNetwork, self).__init__(**kwargs) self.d_hidden = d_hidden self.d_input = d_input or d_hidden self.d_static = d_static or 0 if d_output is None: self.d_output = self.d_input self.add_skip = False else: self.d_output = d_output if d_output != self.d_input: self.add_skip = True with self.name_scope(): self.skip_proj = nn.Dense( units=self.d_output, in_units=self.d_input, flatten=False, weight_initializer=init.Xavier(), ) else: self.add_skip = False with self.name_scope(): self.mlp = nn.HybridSequential(prefix="mlp_") self.mlp.add( nn.Dense( units=self.d_hidden, in_units=self.d_input + self.d_static, flatten=False, weight_initializer=init.Xavier(), )) self.mlp.add(nn.ELU()) self.mlp.add( nn.Dense( units=self.d_hidden, in_units=self.d_hidden, flatten=False, weight_initializer=init.Xavier(), )) self.mlp.add(nn.Dropout(dropout)), self.mlp.add( nn.Dense( units=self.d_output * 2, in_units=self.d_hidden, flatten=False, weight_initializer=init.Xavier(), )) self.mlp.add(GatedLinearUnit( axis=-1, nonlinear=False, )) self.lnorm = nn.LayerNorm(axis=-1, in_channels=self.d_output)
def __init__(self, output, drop_rate=0, with_bn=True, activation='elu'): super(DENSE, self).__init__() self.net = nn.Dense(units=output, flatten=False, use_bias=True) self.act = activation self.with_bn = with_bn self.drop_rate = drop_rate if activation is not None: self.elu = nn.ELU() #if with_bn: # self.bn = nn.BatchNorm(axis=1, use_global_stats=False) if drop_rate > 0: self.drop = nn.Dropout(drop_rate)
def __init__(self, output, kernel_size, with_bn=True, activation='elu'): super(CONV, self).__init__() self.net = nn.Conv2D(channels=output, kernel_size=kernel_size, strides=(1, 1), use_bias=False if with_bn else True) self.act = activation self.with_bn = with_bn if activation is not None: self.elu = nn.ELU() if with_bn: self.bn = nn.BatchNorm(axis=1, use_global_stats=False)
def __init__(self, act_type, r, skernel, dilation, channels, useReLU, useGlobal, asBackbone, stride, downsample=False, in_channels=0, norm_layer=BatchNorm, norm_kwargs=None, **kwargs): super(ResBlockV2ATAC, self).__init__(**kwargs) self.bn1 = norm_layer(**({} if norm_kwargs is None else norm_kwargs)) self.conv1 = _conv3x3(channels, stride, in_channels) self.bn2 = norm_layer(**({} if norm_kwargs is None else norm_kwargs)) self.conv2 = _conv3x3(channels, 1, channels) if downsample: self.downsample = nn.Conv2D(channels, 1, stride, use_bias=False, in_channels=in_channels) else: self.downsample = None if act_type == 'relu': self.msAA1 = nn.Activation('relu') self.msAA2 = nn.Activation('relu') elif act_type == 'prelu': self.msAA1 = nn.PReLU() self.msAA2 = nn.PReLU() elif act_type == 'elu': self.msAA1 = nn.ELU() self.msAA2 = nn.ELU() elif act_type == 'selu': self.msAA1 = nn.SELU() self.msAA2 = nn.SELU() elif act_type == 'gelu': self.msAA1 = nn.GELU() self.msAA2 = nn.GELU() elif act_type == 'swish': self.msAA1 = nn.Swish() self.msAA2 = nn.Swish() elif act_type == 'ChaATAC': self.msAA1 = ChaATAC(channels=in_channels, r=r, useReLU=useReLU, useGlobal=useGlobal) self.msAA2 = ChaATAC(channels=channels, r=r, useReLU=useReLU, useGlobal=useGlobal) else: raise ValueError("Unknown act_type in ResBlockV2ATAC")
def __init__(self,**kwargs): super(Encoder_hybrid,self).__init__(**kwargs) vgg19 = model_zoo.vision.vgg19(pretrained=True) self.vgg = nn.HybridSequential() for i in vgg19.features[:27]: self.vgg.add(i) self.layer1 = nn.HybridSequential() self.layer1.add( nn.Conv2D(512,kernel_size =3), nn.BatchNorm(), nn.ELU()) self.layer2 = nn.HybridSequential() self.layer2.add( nn.Conv2D(512,kernel_size =3), nn.BatchNorm(), nn.ELU(), nn.MaxPool2D(pool_size = 3,strides=3)) self.layer3 = nn.HybridSequential() self.layer3.add( nn.Conv2D(256,kernel_size =1), nn.BatchNorm(), nn.ELU()) for i in self.vgg: i.grad_req = 'null'
def __init__(self, units=[1, 20], act="softrelu"): super(MLPBuilder, self).__init__() self.in_size = units[0] self.out_size = units[-1] self.net = nn.HybridSequential() if act == "elu": self.act = nn.ELU() else: self.act = nn.Activation(act) # Add linear layers nlayer = len(units) with self.net.name_scope(): for i in range(1, nlayer - 1): self.net.add(nn.Dense(units[i], in_units=units[i - 1])) self.net.add(self.act) # Do not apply activation function in the last layer self.net.add(nn.Dense(units[-1], in_units=units[-2]))
def __init__(self, ctx=mx.cpu(), warmup=10, runs=50, inputs=None): # Set the default Inputs. # Default data is (32, 3, 256, 256) to mimic an input of batch_size=128 and a sample image of size 3*256*256. default_parameters = {"data": (32, 3, 256, 256), "data_initializer": nd.normal, "run_backward": True, "dtype": "float32"} super().__init__(ctx=ctx, warmup=warmup, runs=runs, default_parameters=default_parameters, custom_parameters=inputs) self.data = get_mx_ndarray(ctx=self.ctx, in_tensor=self.inputs["data"], dtype=self.inputs["dtype"], initializer=self.inputs["data_initializer"], attach_grad=self.inputs["run_backward"]) self.block = nn.ELU() self.block.initialize(ctx=self.ctx)
def __init__(self, nfilter=32, latent_dim=10, act="elu"): super(MNISTDecoderOnePar, self).__init__() self.conv_input_shape = (nfilter, 3, 3) self.output_dim = 28 * 28 self.latent_dim = latent_dim if act == "elu": self.act = nn.ELU() else: self.act = nn.Activation(act) # latent_dim => 1024 self.latent_to_fc = nn.Dense(1024, in_units=latent_dim) # 1024 => [3 x 3] self.fc_to_dec = nn.Dense(nfilter * 3 * 3, in_units=1024) # [3 x 3] => [7 x 7] self.conv1 = nn.Conv2DTranspose(nfilter, in_channels=nfilter, kernel_size=3, strides=2, padding=(0, 0)) self.bn1 = nn.BatchNorm(in_channels=nfilter) # [7 x 7] => [14 x 14] self.conv2 = nn.Conv2DTranspose(nfilter, in_channels=nfilter, kernel_size=4, strides=2, padding=(1, 1)) self.bn2 = nn.BatchNorm(in_channels=nfilter) # [14 x 14] => [28 x 28] self.conv3 = nn.Conv2DTranspose(nfilter, in_channels=nfilter, kernel_size=4, strides=2, padding=(1, 1)) self.bn3 = nn.BatchNorm(in_channels=nfilter) # [28 x 28] => [28 x 28] self.conv4 = nn.Conv2D(1, in_channels=nfilter, kernel_size=3, padding=1)
def __init__(self, enc_units=[1, 10], latent_dim=1, act="softrelu"): super(VAEEncoder, self).__init__() # Input dimension self.input_dim = enc_units[0] # Latent dimension (dimension of mu and logvar) self.latent_dim = latent_dim # From data to a common hidden state self.enc_common = MLPBuilder(enc_units, act) # From common hidden state to mu and logvar self.enc_mu = nn.Dense(latent_dim, in_units=self.enc_common.output_size()) self.enc_logvar = nn.Dense(latent_dim, in_units=self.enc_common.output_size()) # Activation function used if act == "elu": self.act = nn.ELU() else: self.act = nn.Activation(act)
def __init__(self, nfilter=32, latent_dim=10, act="elu"): super(MNISTEncoder, self).__init__() self.conv_input_shape = (1, 28, 28) self.input_dim = 28 * 28 self.latent_dim = latent_dim if act == "elu": self.act = nn.ELU() else: self.act = nn.Activation(act) # [28 x 28] => [14 x 14] self.conv1 = nn.Conv2D(nfilter, in_channels=1, kernel_size=4, strides=2, padding=1) self.bn1 = nn.BatchNorm(in_channels=nfilter) # [14 x 14] => [7 x 7] self.conv2 = nn.Conv2D(nfilter, in_channels=nfilter, kernel_size=4, strides=2, padding=1) self.bn2 = nn.BatchNorm(in_channels=nfilter) # [7 x 7] => [3 x 3] self.conv3 = nn.Conv2D(nfilter, in_channels=nfilter, kernel_size=4, strides=2, padding=1) self.bn3 = nn.BatchNorm(in_channels=nfilter) # [3 x 3] => 1024 self.fc = nn.Dense(1024, in_units=nfilter * 3 * 3) # To mu and logvar self.enc_mu = nn.Dense(latent_dim, in_units=1024) self.enc_logvar = nn.Dense(latent_dim, in_units=1024)
def test_activations_elu(): act_layer = nn.ELU(1.0) out = act_layer(mx.np.random.uniform(size=(10,))) out.asnumpy()
def __init__(self, in_channels, out_channels): super(ConvBlock, self).__init__() with self.name_scope(): self.conv = Conv3x3(in_channels, out_channels) self.nonlin = nn.ELU()
def __init__( self, bin_values: mx.nd.NDArray, n_residue: int, n_skip: int, dilation_depth: int, n_stacks: int, act_type: str, cardinality: List[int], embedding_dimension: int, pred_length: int, **kwargs, ): super().__init__(**kwargs) self.dilation_depth = dilation_depth self.pred_length = pred_length self.mu = len(bin_values) self.dilations = WaveNet._get_dilations( dilation_depth=dilation_depth, n_stacks=n_stacks ) self.receptive_field = WaveNet.get_receptive_field( dilation_depth=dilation_depth, n_stacks=n_stacks ) self.trim_lengths = [ sum(self.dilations) - sum(self.dilations[: i + 1]) for i, _ in enumerate(self.dilations) ] with self.name_scope(): self.feature_embedder = FeatureEmbedder( cardinalities=cardinality, embedding_dims=[embedding_dimension for _ in cardinality], ) self.post_transform = LookupValues(bin_values) self.target_embed = nn.Embedding( input_dim=self.mu, output_dim=n_residue ) self.residuals = nn.HybridSequential() for i, d in enumerate(self.dilations): is_not_last = i + 1 < len(self.dilations) self.residuals.add( CausalDilatedResidue( n_residue=n_residue, n_skip=n_skip, dilation=d, return_dense_out=is_not_last, kernel_size=2, ) ) # heuristic assuming ~5 features std = 1.0 / math.sqrt(n_residue + 5) self.conv_project = nn.Conv1D( channels=n_residue, kernel_size=1, use_bias=True, weight_initializer=mx.init.Uniform(std), bias_initializer="zero", ) self.conv1 = conv1d( in_channels=n_skip, channels=n_skip, kernel_size=1 ) self.conv2 = conv1d( in_channels=n_skip, channels=self.mu, kernel_size=1 ) self.output_act = ( nn.ELU() if act_type == "elu" else nn.Activation(act_type=act_type) ) self.cross_entropy_loss = gluon.loss.SoftmaxCrossEntropyLoss()
ctx = mx.cpu() train_x = (-5, 5) test_x = np.arange(-20, 20, 0.1) n_model = 20 n_batch = 5000 batch_size = 64 activations = { 'ReLU': mx.nd.relu, 'Sigmoid': mx.nd.sigmoid, 'Tanh': mx.nd.tanh, 'Relu6': lambda x: mx.nd.clip(mx.nd.relu(x), 0, 6), 'LeakyRelu': mx.nd.LeakyReLU, 'ELU': nn.ELU(), 'SELU': nn.SELU(), 'PReLU': nn.PReLU(), 'Swish': nn.Swish(), } legends = [] for act in activations: test_err = np.zeros_like(test_x) for i in range(n_model): print("Train: %s %d/%d" % (act, i + 1, n_model)) net = Net(act=activations[act]) net.collect_params().initialize(mx.init.Xavier(), ctx=ctx) train(net, train_x[0], train_x[1], batch_size, n_batch)
def __init__(self, layers, channels, classes, act_type, r, skernel, dilation, useReLU, useGlobal, act_layers, replace_act, act_order, asBackbone, norm_layer=BatchNorm, norm_kwargs=None, **kwargs): super(ResNet20V2ATAC, self).__init__(**kwargs) assert len(layers) == len(channels) - 1 with self.name_scope(): self.features = nn.HybridSequential(prefix='') self.features.add(norm_layer(scale=False, center=False, **({} if norm_kwargs is None else norm_kwargs))) self.features.add(nn.Conv2D(channels[0], 3, 1, 1, use_bias=False)) in_channels = channels[0] for i, num_layer in enumerate(layers): stride = 1 if i == 0 else 2 if act_order == 'bac': if i + act_layers < len(channels): tmp_act_type = replace_act else: tmp_act_type = act_type elif act_order == 'pre': if i + 1 > act_layers: tmp_act_type = replace_act else: tmp_act_type = act_type else: raise ValueError('Unknown act_order') self.features.add(self._make_layer( layers=num_layer, channels=channels[i+1], in_channels=in_channels, stride=stride, stage_index=i+1, act_type=tmp_act_type, r=r, skernel=skernel, dilation=dilation, useReLU=useReLU, useGlobal=useGlobal, asBackbone=asBackbone, norm_layer=norm_layer, norm_kwargs=norm_kwargs )) in_channels = channels[i+1] self.features.add(norm_layer(**({} if norm_kwargs is None else norm_kwargs))) if act_order == 'bac': if act_layers <= 0: tmp_act_type = replace_act else: tmp_act_type = act_type elif act_order == 'pre': if act_layers >= 4: tmp_act_type = act_type else: tmp_act_type = replace_act else: raise ValueError('Unknown act_order') if tmp_act_type == 'relu': self.features.add(nn.Activation('relu')) elif tmp_act_type == 'prelu': self.features.add(nn.PReLU()) elif tmp_act_type == 'elu': self.features.add(nn.ELU()) elif tmp_act_type == 'selu': self.features.add(nn.SELU()) elif tmp_act_type == 'gelu': self.features.add(nn.GELU()) elif tmp_act_type == 'swish': self.features.add(nn.Swish()) elif tmp_act_type == 'ChaATAC': self.features.add(ChaATAC(channels=in_channels, r=r, useReLU=useReLU, useGlobal=useGlobal)) else: raise ValueError("Unknown act_type in ResBlockV2ATAC") self.features.add(nn.GlobalAvgPool2D()) self.features.add(nn.Flatten()) self.output = nn.Dense(classes, in_units=in_channels)
def __init__(self,**kwargs): super(STE,self).__init__(**kwargs) # self.verbose = verbose # self.in_planes = 64 with self.name_scope(): self.layer1 = nn.Conv2D(64, kernel_size=4, strides=2,padding=1) self.lc1 = LC(64) self.conv1 = nn.Conv2D(64,kernel_size=1) # self.bn1 = nn.BatchNorm() # self.relu_conv1 = nn.Activation(activation='relu') self.a1 = nn.MaxPool2D(pool_size=2, strides=2) self.a2 = Residual(64) self.layer2 = Residual(64) self.lc2 = LC(64) self.conv2 = nn.Conv2D(64,kernel_size=1) # self.bn2 = nn.BatchNorm() # self.relu_conv2 = nn.Activation(activation='relu') self.b1 = Residual(128, same_shape=False) self.layer3 = Residual(128) self.lc3 = LC(128) self.conv3 = nn.Conv2D(128,kernel_size=1) # self.bn3 = nn.BatchNorm() # self.relu_conv3 = nn.Activation(activation='relu') self.c1 = Residual(256, same_shape=False) self.layer4 = Residual(256) self.lc4 = LC(256) self.conv4 = nn.Conv2D(256,kernel_size=1) # self.bn4 = nn.BatchNorm() # self.relu_conv4 = nn.Activation(activation='relu') self.d1 = Residual(512, same_shape=False) self.layer5 = Residual(512) # block 6 # b6 = nn.Sequential() # b6.add( # nn.AvgPool2D(pool_size=3), # nn.Dense(num_classes) # ) self.layer6 = nn.Conv2D(2,kernel_size=1) self.delayer1 = nn.Conv2DTranspose(256, kernel_size=4, padding=1,strides=2) # self.debn1 = nn.BatchNorm() self.relu1 = nn.ELU(alpha=1.0) # self.relu1 = nn.ELU(alpha=0.2) # self.relu1 = nn.ELU(alpha=0.2) # self.relu11 = nn.(activation='relu') self.relu11 = nn.ELU(alpha=1.0) # self.relu11 = nn.ELU(alpha=1.0) # mxnet.ndarray.add(lhs, rhs) self.delayer2 = nn.Conv2DTranspose(128, kernel_size=4, padding=1,strides=2) # self.debn2 = nn.BatchNorm() self.relu2 = nn.ELU(alpha=1.0) self.relu22 = nn.ELU(alpha=1.0) self.delayer3 = nn.Conv2DTranspose(64, kernel_size=4, padding=1,strides=2) self.convs_1 = Conv2D(channels=3, kernel_size=1, strides=1, padding=0,use_bias=False) # self.debn3 = nn.BatchNorm() self.relu3 = nn.ELU(alpha=1.0) self.relu33 = nn.ELU(alpha=1.0) self.delayer4 = nn.Conv2DTranspose(64, kernel_size=4, padding=1,strides=2) self.convs_2 =Conv2D(channels=3, kernel_size=1, strides=1, padding=0,use_bias=False) # self.debn4 = nn.BatchNorm() self.relu4 = nn.ELU(alpha=1.0) self.relu44 = nn.ELU(alpha=1.0) self.delayer5 = nn.Conv2DTranspose(3, kernel_size=4, padding=1,strides=2) # self.debn5 = nn.BatchNorm() self.relu5 = nn.ELU(alpha=1.0)