def __init__(self, opts): super(BasicBlock, self).__init__() self.bblock = HybridSequential() if opts.bottle_neck: if opts.norm_type is 'batch': self.bblock.add(NormLayer()) elif opts.norm_type is 'group': self.bblock.add(GroupNorm()) elif opts.norm_type is 'instance': self.bblock.add(InstanceNorm()) if opts.activation in ['leaky']: self.bblock.add(LeakyReLU(alpha=opts.alpha)) else: self.bblock.add(Activation(opts.activation)) self.bblock.add(Conv3D(channels=int(opts.growth_rate * 4), kernel_size=(opts.zKernelSize, 1, 1), strides=(opts.zStride, 1, 1), use_bias=opts.use_bias, padding=(opts.zPad, 0, 0))) if opts.drop_out > 0: self.bblock.add(Dropout(opts.drop_out)) if opts.norm_type is 'batch': self.bblock.add(NormLayer()) elif opts.norm_type is 'group': self.bblock.add(GroupNorm(in_channels=int(opts.growth_rate * 4))) elif opts.norm_type is 'instance': self.bblock.add(InstanceNorm()) if opts.activation in ['leaky']: self.bblock.add(LeakyReLU(opts.alpha)) else: self.bblock.add(Activation(opts.activation)) self.bblock.add(Conv3D(channels=int(opts.growth_rate), kernel_size=(opts.zKernelSize, 3, 3), strides=(opts.zStride, 1, 1), use_bias=opts.use_bias, padding=(opts.zPad, 1, 1))) if opts.drop_out > 0: self.bblock.add(Dropout(opts.drop_out))
def __init__(self, layers, filters, classes=1000, batch_norm=False, **kwargs): super(VGG, self).__init__(**kwargs) assert len(layers) == len(filters) with self.name_scope(): self.features = self._make_features(layers, filters, batch_norm) self.features.add( Dense(4096, activation='relu', weight_initializer='normal', bias_initializer='zeros')) self.features.add(Dropout(rate=0.5)) self.features.add( Dense(4096, activation='relu', weight_initializer='normal', bias_initializer='zeros')) self.features.add(Dropout(rate=0.5)) self.output = Dense(classes, weight_initializer='normal', bias_initializer='zeros')
def __init__(self, opts): super(BasicBlock, self).__init__() self.bblock = HybridSequential() if opts.bottle_neck: self.bblock.add( BatchNorm(momentum=opts.bn_mom, epsilon=opts.bn_eps)) if not opts.trans_block: self.bblock.add(LeakyReLU(alpha=.2)) else: self.bblock.add(Activation(opts.activation)) self.bblock.add( Conv2D(channels=int(opts.growth_rate * 4), kernel_size=(1, 1), strides=(1, 1), use_bias=opts.use_bias, padding=(0, 0))) if opts.drop_out > 0: self.bblock.add(Dropout(opts.drop_out)) self.bblock.add(BatchNorm(momentum=opts.bn_mom, epsilon=opts.bn_eps)) self.bblock.add(Activation(activation=opts.activation)) self.bblock.add( Conv2D(channels=int(opts.growth_rate), kernel_size=(3, 3), strides=(1, 1), use_bias=opts.use_bias, padding=(1, 1))) if opts.drop_out > 0: self.bblock.add(Dropout(opts.drop_out))
def __init__(self, training=False, **kwargs): super(MLP, self).__init__(**kwargs) self.layer1 = HybridSequential() self.layer1.add(Dense(1024, in_units=25*25, activation="relu"), Dropout(0.1 if training else 0.0)) self.layer2 = HybridSequential() self.layer2.add(Dense(512, activation="relu"), Dropout(0.1 if training else 0.0)) self.layer3 = Dense(256, activation="relu") self.output = Dense(1) self.hybridize()
def __init__(self, **kwargs): super(Model, self).__init__(**kwargs) with self.name_scope(): self.conv1 = Conv2D(32, (3, 3)) self.conv2 = Conv2D(64, (3, 3)) self.pool = MaxPool2D(pool_size=(2, 2)) self.dropout1 = Dropout(0.25) self.flatten = Flatten() self.dense1 = Dense(128) self.dropout2 = Dropout(0.5) self.dense2 = Dense(NUM_CLASSES)
def __init__(self, training=False, **kwargs): super(CNN, self).__init__(**kwargs) self.cnn = HybridSequential() self.cnn.add( # We don't need pooling, since local information matters Conv2D(channels=384, kernel_size=3, padding=1, activation='relu'), # Sees 3*3 Conv2D(channels=256, kernel_size=3, padding=1, activation='relu'), # Sees 5*5 Dense(units=1024, activation='relu'), Dropout(0.2 if training else 0.0), Dense(units=512, activation='relu'), Dropout(0.2 if training else 0.0), Dense(units=256, activation='relu'), Dense(1)) self.cnn.hybridize()
def __init__(self, count: int, depth: int, frac = 100) -> None: super(Network, self).__init__() self._count = count self._depth = depth with self.name_scope(): self.add(Dense(int(1475/frac))) self.add(LeakyReLU(alpha=0.2)) layer = Identity(512, 512) layer = Skip(int(2949/frac), int(5898/frac), layer) layer.block.add(Dropout(0.5)) # 48 x 48 x 64 = 147456 # 24 x 24 x 512 = 294912 # 48 x 48 x 512 = 1179648 layer = Skip(int(589824/frac), int(1179648/frac), layer) layer = Skip(int(147456/frac), int(294912/frac), layer) layer = Skip(int(36864/frac), int(73728/frac), layer) self.add(layer) self.add(Dense(int(27648))) self.add(Activation("sigmoid"))
def __init__(self, inner_channels, outer_channels, inner_block=None, innermost=False, outermost=False, use_dropout=False, use_bias=False, final_out=3): super(UnetSkipUnit, self).__init__() with self.name_scope(): self.outermost = outermost en_conv = Conv2D(channels=inner_channels, kernel_size=4, strides=2, padding=1, in_channels=outer_channels, use_bias=use_bias) en_relu = LeakyReLU(alpha=0.2) en_norm = BatchNorm(momentum=0.1, in_channels=inner_channels) de_relu = Activation(activation='relu') de_norm = BatchNorm(momentum=0.1, in_channels=outer_channels) if innermost: de_conv = Conv2DTranspose(channels=outer_channels, kernel_size=4, strides=2, padding=1, in_channels=inner_channels, use_bias=use_bias) encoder = [en_relu, en_conv] decoder = [de_relu, de_conv, de_norm] model = encoder + decoder elif outermost: de_conv = Conv2DTranspose(channels=final_out, kernel_size=4, strides=2, padding=1, in_channels=inner_channels * 2) encoder = [en_conv] decoder = [de_relu, de_conv, Activation(activation='tanh')] model = encoder + [inner_block] + decoder else: de_conv = Conv2DTranspose(channels=outer_channels, kernel_size=4, strides=2, padding=1, in_channels=inner_channels * 2, use_bias=use_bias) encoder = [en_relu, en_conv, en_norm] decoder = [de_relu, de_conv, de_norm] model = encoder + [inner_block] + decoder if use_dropout: model += [Dropout(rate=0.5)] self.model = HybridSequential() with self.model.name_scope(): for block in model: self.model.add(block)
def __init__(self, count: int, depth: int) -> None: super(Network, self).__init__() self._count = count self._depth = depth with self.name_scope(): self.add(Conv2D(64, 4, 2, 1, in_channels=depth)) self.add(LeakyReLU(alpha=0.2)) layer = Identity(512, 512) layer = Skip(512, 512, layer) for _ in range(0): layer = Skip(512, 512, layer) layer.block.add(Dropout(0.5)) layer = Skip(256, 256, layer) layer = Skip(128, 128, layer) layer = Skip(64, 64, layer) self.add(layer) self.add(Conv2DTranspose(count, 4, 2, 1, in_channels=128)) self.add(Activation("sigmoid")) for param in self.collect_params().values(): param.initialize() if "bias" in param.name: param.set_data(zeros(param.data().shape)) elif "gamma" in param.name: param.set_data(random_normal(1, 0.02, param.data().shape)) elif "weight" in param.name: param.set_data(random_normal(0, 0.02, param.data().shape))
def __init__(self, in_channels, ndf=64, n_layers=3, use_bias=False, istest=False, latent=256, usetanh = False ): super(Decoder, self).__init__() self.model = HybridSequential() kernel_size = 5 padding = 0 nf_mult = 2 ** n_layers self.model.add(Conv2DTranspose(channels=ndf * nf_mult/2, kernel_size=kernel_size, strides=2, padding=padding, in_channels=latent, use_bias=use_bias)) self.model.add(BatchNorm(momentum=0.1, in_channels=ndf * nf_mult / 2, use_global_stats=istest)) #self.model.add(LeakyReLU(alpha=0.2)) self.model.add(Activation(activation='relu')) for n in range(1, n_layers): nf_mult = nf_mult / 2 self.model.add(Conv2DTranspose(channels=ndf * nf_mult / 2, kernel_size=kernel_size, strides=2, padding=padding, in_channels=ndf * nf_mult, use_bias=use_bias)) self.model.add(BatchNorm(momentum=0.1, in_channels=ndf * nf_mult / 2, use_global_stats=istest)) #self.model.add(LeakyReLU(alpha=0.2)) if n==2: self.model.add(Dropout(rate=0.5)) self.model.add(Activation(activation='relu')) self.model.add(Conv2DTranspose(channels=in_channels, kernel_size=kernel_size, strides=2, padding=padding, in_channels=ndf)) #self.model.add(LeakyReLU(alpha=0.2)) self.model.add(Activation(activation='tanh'))
def __init__(self, in_channels, ndf=64, n_layers=3, use_bias=False, istest=False, usetanh = False ): super(CEGeneratorP, self).__init__() with self.name_scope(): self.model = HybridSequential() kernel_size = 5 padding = 0 #int(np.ceil((kernel_size - 1) / 2)) self.model.add(Conv2D(channels=ndf, kernel_size=kernel_size, strides=2, padding=padding, in_channels=in_channels)) self.model.add(LeakyReLU(alpha=0.2)) nf_mult = 2; nf_mult_prev = 1; nf_mult = 1 for n in range(1, n_layers): nf_mult_prev = nf_mult nf_mult = 2 ** n self.model.add(Conv2D(channels=ndf * nf_mult, kernel_size=kernel_size, strides=2, padding=padding, in_channels=ndf * nf_mult_prev, use_bias=use_bias)) self.model.add(BatchNorm(momentum=0.1, in_channels=ndf * nf_mult, use_global_stats=istest)) self.model.add(LeakyReLU(alpha=0.2)) nf_mult_prev = nf_mult nf_mult = 2 ** n_layers self.model.add(Conv2D(channels=4096, kernel_size=kernel_size, strides=2, padding=padding, in_channels=ndf * nf_mult_prev, use_bias=use_bias)) #self.model.add(BatchNorm(momentum=0.1, in_channels =128, use_global_stats=istest)) if usetanh: self.model.add(Activation(activation='tanh')) else: self.model.add(LeakyReLU(alpha=0.2)) # Decoder self.model.add(Conv2DTranspose(channels=ndf * nf_mult/2, kernel_size=kernel_size, strides=2, padding=padding, in_channels=4096, use_bias=use_bias)) self.model.add(BatchNorm(momentum=0.1, in_channels=ndf * nf_mult / 2, use_global_stats=istest)) #self.model.add(LeakyReLU(alpha=0.2)) self.model.add(Activation(activation='relu')) for n in range(1, n_layers): nf_mult = nf_mult / 2 self.model.add(Conv2DTranspose(channels=ndf * nf_mult / 2, kernel_size=kernel_size, strides=2, padding=padding, in_channels=ndf * nf_mult, use_bias=use_bias)) self.model.add(BatchNorm(momentum=0.1, in_channels=ndf * nf_mult / 2, use_global_stats=istest)) #self.model.add(LeakyReLU(alpha=0.2)) if n==2: self.model.add(Dropout(rate=0.5)) self.model.add(Activation(activation='relu')) self.model.add(Conv2DTranspose(channels=in_channels, kernel_size=kernel_size, strides=2, padding=padding, in_channels=ndf)) #self.model.add(LeakyReLU(alpha=0.2)) self.model.add(Activation(activation='tanh'))
def __init__(self, opts, num_filters, pool_type='avg'): super(TransitionBlock, self).__init__() self.pool_type = pool_type self.tblock = HybridSequential() self.tblock.add(BatchNorm(momentum=opts.bn_mom, epsilon=opts.bn_eps)) self.tblock.add(Activation(opts.activation)) self.tblock.add( Conv2D(channels=int(num_filters * opts.reduction), kernel_size=(1, 1), strides=(1, 1), use_bias=opts.use_bias, padding=(0, 0))) if opts.drop_out > 0: self.tblock.add(Dropout(opts.drop_out))
def create_model(self) -> Sequential: embedding_size = 100 model = Sequential() with model.name_scope(): # input shape is (batch_size,), output shape is (batch_size, embedding_size) model.add( Embedding(input_dim=self.vocab_size, output_dim=embedding_size)) model.add(Dropout(0.2)) # layout : str, default 'TNC' # The format of input and output tensors. # T, N and C stand for sequence length, batch size, and feature dimensions respectively. # Change it to NTC so that the input shape can be (batch_size, sequence_length, embedding_size) model.add(LSTM(hidden_size=64, layout='NTC', bidirectional=True)) model.add(Dense(len(self.labels))) return model
def __init__(self, opts, num_filters, pool_type='avg'): super(TransitionBlock, self).__init__() self.pool_type = pool_type self.tblock = HybridSequential() if opts.norm_type is 'batch': self.tblock.add(NormLayer()) elif opts.norm_type is 'group': self.tblock.add(GroupNorm()) elif opts.norm_type is 'instance': self.tblock.add(InstanceNorm()) if opts.activation in ['leaky']: self.tblock.add(LeakyReLU(opts.alpha)) else: self.tblock.add(Activation(opts.activation)) self.tblock.add(Conv3D(channels=int(num_filters * opts.reduction), kernel_size=(opts.zKernelSize, 1, 1), strides=(opts.zStride, 1, 1), use_bias=opts.use_bias, padding=(opts.zPad, 0, 0))) if opts.drop_out > 0: self.tblock.add(Dropout(opts.drop_out))
def get_model(vocab_size, embedding_size, hidden_size, dropout_rate, classes=3): net = HybridSequential() with net.name_scope(): net.add(Embedding(vocab_size, embedding_size)) net.add(Dropout(args.dropout)) net.add( LSTM(hidden_size=hidden_size // 2, num_layers=1, layout='NTC', bidirectional=True, dropout=dropout_rate)) net.add(Dense(units=classes, flatten=False)) return net
def __init__(self, inner_channels, outer_channels, inner_block=None, innermost=False, outermost=False, use_dropout=False, use_bias=False, **kwargs): super(UnetSkipUnit, self).__init__() with self.name_scope(): self.outermost = outermost downsample = MaxPool3D(pool_size=2, strides=2) upsample = Conv3DTranspose(channels=outer_channels, kernel_size=2, padding=0, strides=2, use_bias=use_bias) head = Conv3D(channels=outer_channels, kernel_size=1) self.model = HybridSequential() if not outermost: self.model.add(downsample) self.model.add(conv_block(inner_channels, use_bias=use_bias, **kwargs)) if not innermost: self.model.add(inner_block) self.model.add(conv_block(inner_channels, use_bias=use_bias, **kwargs)) if not outermost: self.model.add(upsample) if outermost: if use_dropout: self.model.add(Dropout(rate=0.1)) self.model.add(head)
def __init__(self, inner_channels, outer_channels, inner_block=None, innermost=False, outermost=False, use_dropout=False, use_bias=False): super(UnetSkipUnit, self).__init__() # 先定义一些基本的组件 self.outermost = outermost en_conv = Conv2D(channels=inner_channels, kernel_size=4, strides=2, padding=1, in_channels=outer_channels, use_bias=use_bias) en_relu = LeakyReLU(alpha=0.2) en_bn = BatchNorm(momentum=0.1, in_channels=inner_channels) deconv_innermost = Conv2DTranspose( channels=outer_channels, kernel_size=4, strides=2, padding=1, in_channels=inner_channels, use_bias=use_bias) deconv_output = Conv2DTranspose( channels=outer_channels, kernel_size=4, strides=2, padding=1, in_channels=2*inner_channels, use_bias=True) deconv_common = de_conv_innermost = Conv2DTranspose( channels=outer_channels, kernel_size=4, strides=2, padding=1, in_channels=2*inner_channels, use_bias=use_bias) de_relu = Activation('relu') de_bn = BatchNorm(momentum=0.1, in_channels=outer_channels) # Unet网络块可以分为三种:最里面的,中间的,最外面的。 if innermost: encoder = [en_relu, en_conv] decoder = [de_relu, deconv_innermost, de_bn] model = encoder + decoder elif outermost: encoder = [en_conv] decoder = [de_relu, deconv_output] model = encoder + [inner_block] + decoder model += [Activation('tanh')] else: encoder = [en_relu, en_conv, en_bn] decoder = [de_relu, deconv_common, de_bn] model = encoder + [inner_block] + decoder if use_dropout: model += [Dropout(0.5)] self.model = nn.HybridSequential() with self.model.name_scope(): for block in model: self.model.add(block)
import d2lzh as d2l from mxnet import gluon, init, nd from mxnet.gluon.nn import Sequential, Conv2D, Dense, MaxPool2D, Dropout net = Sequential() net.add(Conv2D(channels=6, kernel_size=5, activation='sigmoid'), MaxPool2D(pool_size=2, strides=2), Conv2D(channels=16, kernel_size=5, activation='sigmoid'), MaxPool2D(pool_size=2, strides=2), Dense(120, activation='sigmoid'), Dropout(0.05), Dense(84, activation='sigmoid'), Dropout(0.05), Dense(10)) batch_size = 256 train_iter, test_iter = d2l.load_data_mnist(batch_size=batch_size) lr, num_epochs = 0.9, 20 ctx = d2l.try_gpu() net.initialize(force_reinit=True, ctx=ctx, init=init.Xavier()) X = nd.random.uniform(shape=(1, 1, 28, 28)) trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr}) d2l.train_ch5(net, train_iter, test_iter, batch_size, trainer, ctx, num_epochs)
def train(args: argparse.Namespace) -> HybridBlock: session = boto3.session.Session() client = session.client(service_name="secretsmanager", region_name="us-east-1") mlflow_secret = client.get_secret_value(SecretId=args.mlflow_secret) mlflowdb_conf = json.loads(mlflow_secret["SecretString"]) converters.encoders[np.float64] = converters.escape_float converters.conversions = converters.encoders.copy() converters.conversions.update(converters.decoders) mlflow.set_tracking_uri( f"mysql+pymysql://{mlflowdb_conf['username']}:{mlflowdb_conf['password']}@{mlflowdb_conf['host']}/mlflow" ) if mlflow.get_experiment_by_name(args.mlflow_experiment) is None: mlflow.create_experiment(args.mlflow_experiment, args.mlflow_artifacts_location) mlflow.set_experiment(args.mlflow_experiment) col_names = ["target"] + [f"kinematic_{i}" for i in range(1, 22)] train_df = pd.read_csv(f"{args.train_channel}/train.csv.gz", header=None, names=col_names) val_df = pd.read_csv(f"{args.validation_channel}/val.csv.gz", header=None, names=col_names) train_X = train_df.drop("target", axis=1) train_y = train_df["target"] train_dataset = ArrayDataset(train_X.to_numpy(dtype="float32"), train_y.to_numpy(dtype="float32")) train = DataLoader(train_dataset, batch_size=args.batch_size) val_X = val_df.drop("target", axis=1) val_y = val_df["target"] val_dataset = ArrayDataset(val_X.to_numpy(dtype="float32"), val_y.to_numpy(dtype="float32")) validation = DataLoader(val_dataset, batch_size=args.batch_size) ctx = [gpu(i) for i in range(args.gpus)] if args.gpus > 0 else cpu() mlflow.gluon.autolog() with mlflow.start_run(): net = HybridSequential() with net.name_scope(): net.add(Dense(256)) net.add(Dropout(.2)) net.add(Dense(64)) net.add(Dropout(.1)) net.add(Dense(16)) net.add(Dense(2)) net.initialize(Xavier(magnitude=2.24), ctx=ctx) net.hybridize() trainer = Trainer(net.collect_params(), "sgd", {"learning_rate": args.learning_rate}) est = estimator.Estimator(net=net, loss=SoftmaxCrossEntropyLoss(), trainer=trainer, train_metrics=Accuracy(), context=ctx) est.fit(train, epochs=args.epochs, val_data=validation) return net
def __init__(self, inner_channels, outer_channels, inner_block=None, innermost=False, outermost=False, use_dropout=False, use_bias=False, use_attention=True, use_resblock=True, use_p_at=False, use_c_at=False, save_att=False): super(UnetSkipUnit, self).__init__() with self.name_scope(): self.save_att = save_att self.outermost = outermost self.innermost = innermost self.use_attention = use_attention if not self.outermost: res_block_1 = Res_Block(outer_channels=outer_channels) res_block_2 = Res_Block(outer_channels=inner_channels) en_conv = Conv2D(channels=inner_channels, kernel_size=4, strides=2, padding=1, in_channels=outer_channels, use_bias=use_bias) en_relu = LeakyReLU(alpha=0.2) en_norm = BatchNorm(momentum=0.1, in_channels=inner_channels) de_relu = Activation(activation='relu') de_norm = BatchNorm(momentum=0.1, in_channels=outer_channels) if innermost: de_conv = Conv2DTranspose(channels=outer_channels, kernel_size=4, strides=2, padding=1, in_channels=inner_channels, use_bias=use_bias) if use_p_at: self.p_at = CA_M2(in_channel=inner_channels) else: self.p_at = CA_M3() if use_c_at: self.c_at = CA_M1() else: self.c_at = CA_M3() res_block_3 = Res_Block(outer_channels=inner_channels) res_block_4 = Res_Block(outer_channels=outer_channels) if use_resblock: res1 = res_block_1 encoder = [en_conv, en_norm, en_relu] res2 = res_block_2 res3 = res_block_3 decoder = [de_conv, de_norm, de_relu] res4 = res_block_4 else: encoder = [en_relu, en_conv] decoder = [de_relu, de_conv, de_norm] elif outermost: de_conv = Conv2DTranspose(channels=outer_channels, kernel_size=4, strides=2, padding=1, in_channels=inner_channels) channel_trans = Conv2D(channels=1, in_channels=outer_channels, kernel_size=1, prefix='') if use_resblock: res1 = None encoder = [en_conv, en_norm, en_relu] res2 = None res3 = None decoder = [de_conv, de_norm, de_relu, channel_trans] res4 = None else: encoder = [en_conv] decoder = [de_relu, de_conv, de_norm, channel_trans] if use_p_at: self.p_at = CA_M2(in_channel=inner_channels) else: self.p_at = CA_M3() if use_c_at: self.c_at = CA_M1() else: self.c_at = CA_M3() else: de_conv = Conv2DTranspose(channels=outer_channels, kernel_size=4, strides=2, padding=1, in_channels=inner_channels, use_bias=use_bias) if use_p_at: self.p_at = CA_M2(in_channel=inner_channels) else: self.p_at = CA_M3() if use_c_at: self.c_at = CA_M1() else: self.c_at = CA_M3() res_block_3 = Res_Block(outer_channels=inner_channels) res_block_4 = Res_Block(outer_channels=outer_channels) if use_resblock: res1 = res_block_1 encoder = [en_conv, en_norm, en_relu] res2 = res_block_2 res3 = res_block_3 decoder = [de_conv, de_norm, de_relu] res4 = res_block_4 else: encoder = [en_relu, en_conv, en_norm] decoder = [de_relu, de_conv, de_norm] if use_dropout: decoder += [Dropout(rate=0.5)] self.encoder = HybridSequential() with self.encoder.name_scope(): for block in encoder: self.encoder.add(block) self.inner_block = inner_block self.res1 = res1 self.res2 = res2 self.res3 = res3 self.res4 = res4 self.decoder = HybridSequential() with self.decoder.name_scope(): for block in decoder: self.decoder.add(block)
def __init__(self, inner_channels, outer_channels, inner_block=None, innermost=False, outermost=False, use_dropout=False, use_bias=False, use_attention=True, use_resblock=True): super(UnetSkipUnit, self).__init__() with self.name_scope(): self.outermost = outermost self.innermost = innermost self.use_attention = use_attention res_block = Res_Block(outer_channels=outer_channels) en_conv = Conv2D(channels=inner_channels, kernel_size=4, strides=2, padding=1, in_channels=outer_channels, use_bias=use_bias) en_relu = LeakyReLU(alpha=0.2) en_norm = BatchNorm(momentum=0.1, in_channels=inner_channels) de_relu = Activation(activation='relu') de_norm = BatchNorm(momentum=0.1, in_channels=outer_channels) if innermost: de_conv = Conv2DTranspose(channels=outer_channels, kernel_size=4, strides=2, padding=1, in_channels=inner_channels, use_bias=use_bias) res_block1 = Res_Block(outer_channels=inner_channels) if use_resblock: encoder = [res_block, en_relu, en_conv] decoder = [de_relu, res_block1, de_conv, de_norm] else: encoder = [en_relu, en_conv] decoder = [de_relu, de_conv, de_norm] attention = self_attention_block(in_channel=inner_channels) model = encoder elif outermost: de_conv = Conv2DTranspose(channels=outer_channels, kernel_size=4, strides=2, padding=1, in_channels=inner_channels * 2) res_block1 = Res_Block(outer_channels=inner_channels * 2) if use_resblock: encoder = [res_block, en_conv] decoder = [ de_relu, res_block1, de_conv, Activation(activation='tanh') ] else: encoder = [en_conv] decoder = [de_relu, de_conv, Activation(activation='tanh')] attention = self_attention_block(in_channel=inner_channels * 2) model = encoder + [inner_block] else: de_conv = Conv2DTranspose(channels=outer_channels, kernel_size=4, strides=2, padding=1, in_channels=inner_channels * 2, use_bias=use_bias) res_block1 = Res_Block(outer_channels=inner_channels * 2) if use_resblock: encoder = [res_block, en_relu, en_conv, en_norm] decoder = [de_relu, res_block1, de_conv, de_norm] else: encoder = [en_relu, en_conv, en_norm] decoder = [de_relu, de_conv, de_norm] attention = self_attention_block(in_channel=inner_channels * 2) model = encoder + [inner_block] if use_dropout: decoder += [Dropout(rate=0.5)] self.encoder = HybridSequential() with self.encoder.name_scope(): for block in model: self.encoder.add(block) self.inner_block = inner_block self.attention = attention self.decoder = HybridSequential() with self.decoder.name_scope(): for block in decoder: self.decoder.add()
def train(hyperparameters, channel_input_dirs, num_gpus, hosts): batch_size = hyperparameters.get("batch_size", 64) epochs = hyperparameters.get("epochs", 3) mx.random.seed(42) training_dir = channel_input_dirs['training'] with open("{}/train/data.p".format(training_dir), "rb") as pickle: train_nd = load(pickle) with open("{}/validation/data.p".format(training_dir), "rb") as pickle: validation_nd = load(pickle) train_data = gluon.data.DataLoader(train_nd, batch_size, shuffle=True) validation_data = gluon.data.DataLoader(validation_nd, batch_size, shuffle=True) net = Sequential() # http: // gluon.mxnet.io / chapter03_deep - neural - networks / plumbing.html # What's-the-deal-with-name_scope()? with net.name_scope(): net.add( Conv2D(channels=32, kernel_size=(3, 3), padding=0, activation="relu")) net.add( Conv2D(channels=32, kernel_size=(3, 3), padding=0, activation="relu")) net.add(MaxPool2D(pool_size=(2, 2))) net.add(Dropout(.25)) net.add(Flatten()) net.add(Dense(8)) ctx = mx.gpu() if num_gpus > 0 else mx.cpu() # Also known as Glorot net.collect_params().initialize(Xavier(magnitude=2.24), ctx=ctx) loss = SoftmaxCrossEntropyLoss() # kvstore type for multi - gpu and distributed training. if len(hosts) == 1: kvstore = "device" if num_gpus > 0 else "local" else: kvstore = "dist_device_sync'" if num_gpus > 0 else "dist_sync" trainer = Trainer(net.collect_params(), optimizer="adam", kvstore=kvstore) smoothing_constant = .01 for e in range(epochs): moving_loss = 0 for i, (data, label) in enumerate(train_data): data = data.as_in_context(ctx) label = label.as_in_context(ctx) with autograd.record(): output = net(data) loss_result = loss(output, label) loss_result.backward() trainer.step(batch_size) curr_loss = nd.mean(loss_result).asscalar() moving_loss = (curr_loss if ((i == 0) and (e == 0)) else (1 - smoothing_constant) * moving_loss + smoothing_constant * curr_loss) validation_accuracy = measure_performance(net, ctx, validation_data) train_accuracy = measure_performance(net, ctx, train_data) print("Epoch %s. Loss: %s, Train_acc %s, Test_acc %s" % (e, moving_loss, train_accuracy, validation_accuracy)) return net
# layer 3 Conv2D(channels=64, kernel_size=(3, 3), padding=(3 // 2, 3 // 2), activation='relu'), BatchNorm(axis=1, momentum=0.9), # layer 4 Conv2D(channels=64, kernel_size=(3, 3), padding=(3 // 2, 3 // 2), activation='relu'), BatchNorm(axis=1, momentum=0.9), MaxPool2D(pool_size=(2, 2), strides=(2, 2)), # layer 5 Flatten(), Dropout(0.3), Dense(128, activation='relu'), # layer 6 Dense(10)) # %% # -- Initialize parameters net.initialize(init=init.Xavier(), ctx=mx_ctx) for name, param in net.collect_params().items(): print(name) # %% # -- Define loss function and optimizer
net = HybridSequential() with net.name_scope(): net.add( # block 1 Conv2D(channels=32, kernel_size=(5, 5), strides=(1, 1), padding=(2, 2)), Activation("relu"), BatchNorm(axis=1, momentum=0.9, epsilon=1e-5), Conv2D(channels=32, kernel_size=(5, 5), strides=(1, 1), padding=(2, 2)), Activation("relu"), BatchNorm(axis=1, momentum=0.9, epsilon=1e-5), MaxPool2D(pool_size=(2, 2), strides=(2, 2)), BatchNorm(axis=1, momentum=0.9, epsilon=1e-5), Dropout(0.5), # block 2 Conv2D(channels=64, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1)), Activation("relu"), BatchNorm(axis=1, momentum=0.9, epsilon=1e-5), Conv2D(channels=128, kernel_size=(3, 3), strides=(2, 2), padding=(1, 1)), Activation("relu"), BatchNorm(axis=1, momentum=0.9, epsilon=1e-5), # MaxPool2D(pool_size=(2, 2), strides=(2, 2)), # BatchNorm(axis=1, momentum=0.9, epsilon=1e-5), Dropout(0.5), # block 3