def __init__(self, name, channels=2, n_labels=4992, bn_mom=0.9, act_type="relu", select_policy_from_plane=False): """ Definition of the value head proposed by the alpha zero authors :param name: name prefix for all blocks :param channels: Number of channels for 1st conv operation in branch 0 :param bn_mom: Batch normalization momentum parameter :param act_type: Activation type to use :param se_type: SqueezeExcitation type choose either [None, "cSE", "sSE", csSE"] for no squeeze excitation, channelwise squeeze excitation, channel-spatial-squeeze-excitation, respectively """ super(_PolicyHeadAlphaZero, self).__init__(prefix=name + "_") self.body = HybridSequential(prefix="") self.select_policy_from_plane = select_policy_from_plane with self.name_scope(): if self.select_policy_from_plane: self.body.add( Conv2D(channels=256, padding=1, kernel_size=(3, 3), use_bias=False)) self.body.add(BatchNorm(momentum=bn_mom)) self.body.add(get_act(act_type)) self.body.add( Conv2D(channels=channels, padding=1, kernel_size=(3, 3), use_bias=False)) self.body.add(Flatten()) else: self.body.add( Conv2D(channels=channels, kernel_size=(1, 1), use_bias=False)) self.body.add(BatchNorm(momentum=bn_mom)) # if not self.select_policy_from_plane: self.body.add(get_act(act_type)) self.body.add(Flatten()) self.body.add(Dense(units=n_labels))
def preNeuralNet(fs, T, ctx, template_block, margin, learning_rate=0.003): net = gluon.nn.Sequential() with net.name_scope( ): # Used to disambiguate saving and loading net parameters net.add( MatchedFilteringLayer( mod=fs * T, fs=fs, template_H1=template_block[:, :1], #.as_in_context(ctx), template_L1=template_block[:, -1:] #.as_in_context(ctx) )) net.add(CutHybridLayer(margin=margin)) net.add(Conv2D(channels=16, kernel_size=(1, 3), activation='relu')) net.add(MaxPool2D(pool_size=(1, 4), strides=2)) net.add(Conv2D(channels=32, kernel_size=(1, 3), activation='relu')) net.add(MaxPool2D(pool_size=(1, 4), strides=2)) net.add(Flatten()) net.add(Dense(32)) net.add(Activation('relu')) net.add(Dense(2)) net.initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx[-1], force_reinit=True) # Initialize parameters of all layers net.summary(nd.random.randn(1, 2, 2, 1, fs * T, ctx=ctx[-1])) net.initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx, force_reinit=True) # Initialize parameters of all layers # 交叉熵损失函数 # loss = gloss.SoftmaxCrossEntropyLoss() # The cross-entropy loss for binary classification. bloss = gluon.loss.SigmoidBinaryCrossEntropyLoss() trainer = gluon.Trainer(net.collect_params(), 'adam', {'learning_rate': learning_rate}) return net, bloss, trainer
def __init__(self, name, channels=1, fc0=256, bn_mom=0.9, act_type="relu"): """ Definition of the value head proposed by the alpha zero authors :param name: name prefix for all blocks :param channels: Number of channels for 1st conv operation in branch 0 :param fc0: Number of units in Dense/Fully-Connected layer :param bn_mom: Batch normalization momentum parameter :param act_type: Activation type to use """ super(_ValueHeadAlphaZero, self).__init__(prefix=name + "_") self.body = HybridSequential(prefix="") with self.name_scope(): self.body.add( Conv2D(channels=channels, kernel_size=(1, 1), use_bias=False)) self.body.add(BatchNorm(momentum=bn_mom)) self.body.add(get_act(act_type)) self.body.add(Flatten()) self.body.add(Dense(units=fc0)) self.body.add(get_act(act_type)) self.body.add(Dense(units=1)) self.body.add(get_act("tanh"))
def __init__(self, name, channels=1, fc0=256, bn_mom=0.9, act_type="relu"): """ Definition of the value head. Same as alpha zero authors but changed order Batch-Norm with RElu. :param name: name prefix for all blocks :param channels: Number of channels for 1st conv operation in branch 0 :param fc0: Number of units in Dense/Fully-Connected layer :param bn_mom: Batch normalization momentum parameter :param act_type: Activation type to use :param se_type: SqueezeExcitation type choose either [None, "cSE", "sSE", csSE"] for no squeeze excitation, channelwise squeeze excitation, channel-spatial-squeeze-excitation, respectively """ super(_ValueHeadRise, self).__init__(prefix=name + "_") self.body = HybridSequential(prefix="") with self.name_scope(): self.body.add( Conv2D(channels=channels, kernel_size=(1, 1), use_bias=False)) self.body.add(BatchNorm(momentum=bn_mom)) self.body.add(get_act(act_type)) self.body.add(Flatten()) self.body.add(Dense(units=fc0)) self.body.add(get_act(act_type)) self.body.add(Dense(units=1)) self.body.add(get_act("tanh"))
def __init__(self, **kwargs): super(Model, self).__init__(**kwargs) with self.name_scope(): self.conv1 = Conv2D(32, (3, 3)) self.conv2 = Conv2D(64, (3, 3)) self.pool = MaxPool2D(pool_size=(2, 2)) self.dropout1 = Dropout(0.25) self.flatten = Flatten() self.dense1 = Dense(128) self.dropout2 = Dropout(0.5) self.dense2 = Dense(NUM_CLASSES)
def __init__(self, name, channels=1, fc0=256, bn_mom=0.9, act_type="relu", se_type=None): """ Definition of the value head proposed by the alpha zero authors :param name: name prefix for all blocks :param channels: Number of channels for 1st conv operation in branch 0 :param fc0: Number of units in Dense/Fully-Connected layer :param bn_mom: Batch normalization momentum parameter :param act_type: Activation type to use :param se_type: SqueezeExcitation type choose either [None, "cSE", "sSE", csSE"] for no squeeze excitation, channelwise squeeze excitation, channel-spatial-squeeze-excitation, respectively """ super(_ValueHeadAlphaZero, self).__init__(prefix=name + "_") self.body = HybridSequential(prefix="") with self.name_scope(): self.body.add( Conv2D(channels=channels, kernel_size=(1, 1), use_bias=False)) self.body.add(BatchNorm(momentum=bn_mom)) if se_type: if se_type == "cSE": # apply squeeze excitation self.body.add( _ChannelSqueezeExcitation("se0", channels, 16, act_type)) elif se_type == "sSE": self.body.add(_SpatialSqueezeExcitation("se0")) elif se_type == "csSE": self.body.add( _SpatialChannelSqueezeExcitation( "se0", channels, 1, act_type)) else: raise Exception( 'Unsupported Squeeze Excitation Module: Choose either [None, "cSE", "sSE", "csSE"' ) self.body.add(get_act(act_type)) self.body.add(Flatten()) self.body.add(Dense(units=fc0)) self.body.add(get_act(act_type)) self.body.add(Dense(units=1)) self.body.add(get_act("tanh"))
def __init__(self, n_dims=128, **kwargs): PersistentBlock.__init__(self, **kwargs) if n_dims < 16: raise ValueError('`n_dims` must be at least 16 (given: %d)' % n_dims) self.encoder = Sequential() self.encoder.add(BatchNorm(), Conv2D(int(n_dims / 16), 6, (4, 3)), Activation('relu'), Conv2D(int(n_dims / 8), 3), Activation('relu'), Conv2D(int(n_dims / 2), 3), BatchNorm(), MaxPool2D(), Activation('relu'), Conv2D(int(n_dims), 3), MaxPool2D(), Activation('relu'), Conv2D(int(n_dims), 3), MaxPool2D(), Activation('relu'), Flatten())
def __init__(self, n_dims=16, n_hidden_units=16, n_hidden_layers=2, activation='relu', transform=(lambda x: x), **kwargs): PersistentBlock.__init__(self, **kwargs) self.transform = transform self.seq = Sequential() self.seq.add( Flatten(), *[ Dense(n_hidden_units, activation=activation) for _ in range(n_hidden_layers) ], Dense(n_dims))
def __init__(self, **kwargs): super().__init__(**kwargs) with self.name_scope(): self.conv1 = Conv2D(64, kernel_size=(7, 7), padding=(3, 3)) self.conv2 = Conv2D(64, kernel_size=(5, 5), padding=(2, 2)) self.conv3 = Conv2D(64, kernel_size=(5, 5), padding=(2, 2)) self.conv4 = Conv2D(64, kernel_size=(5, 5), padding=(2, 2)) self.conv5 = Conv2D(48, kernel_size=(5, 5), padding=(2, 2)) self.conv6 = Conv2D(48, kernel_size=(5, 5), padding=(2, 2)) self.conv7 = Conv2D(48, kernel_size=(5, 5), padding=(2, 2)) self.conv8 = Conv2D(32, kernel_size=(5, 5), padding=(2, 2)) self.conv9 = Conv2D(32, kernel_size=(5, 5), padding=(2, 2)) self.conv10 = Conv2D(32, kernel_size=(5, 5), padding=(2, 2)) self.flatten = Flatten() self.dense1 = Dense(1024) self.dense2 = Dense(19 * 19)
def __init__(self, name, channels=2, n_labels=4992, bn_mom=0.9, act_type='relu'): """ Definition of the value head proposed by the alpha zero authors :param name: name prefix for all blocks :param channels: Number of channels for 1st conv operation in branch 0 :param bn_mom: Batch normalization momentum parameter :param act_type: Activation type to use """ super(_PolicyHeadAlphaZero, self).__init__(prefix=name+'_') self.body = HybridSequential(prefix='') with self.name_scope(): self.body.add(Conv2D(channels=channels, kernel_size=(1, 1), use_bias=False)) self.body.add(BatchNorm(momentum=bn_mom)) self.body.add(get_act(act_type)) self.body.add(Flatten()) self.body.add(Dense(units=n_labels))
def __init__(self, channels, in_channels, downsample=False, use_se=True, se_divide=2, strides=None, **kwargs): super(ResidualBlock, self).__init__(**kwargs) self.channels = channels self.in_channels = in_channels self.use_se = use_se self.se_divide = se_divide if strides: s1 = strides[0] s2 = strides[1] else: s1 = 1 s2 = 1 with self.name_scope(): self.body = HybridSequential(prefix='body-') with self.body.name_scope(): self.body.add( Conv2D(channels, kernel_size=3, strides=s1, padding=1, in_channels=in_channels, use_bias=False), BatchNorm(axis=1, in_channels=channels), LeakyReLU(0.2), Conv2D(channels, kernel_size=3, strides=s2, padding=1, in_channels=channels, use_bias=False), BatchNorm(axis=1, in_channels=channels)) if downsample: self.downsample = HybridSequential(prefix='downsample-') with self.downsample.name_scope(): self.downsample.add( Conv2D(channels, kernel_size=3, strides=2, padding=1, in_channels=in_channels, use_bias=False), BatchNorm(axis=1, in_channels=channels)) else: self.downsample = None self.out_act = HybridSequential(prefix='outact-') with self.out_act.name_scope(): self.out_act.add( BatchNorm(axis=1), LeakyReLU(0.2), ) if self.use_se: self.se_control = HybridSequential(prefix='se-') with self.se_control.name_scope(): self.se_control.add( GlobalAvgPool2D(), Flatten(), Dense(channels // self.se_divide, activation='relu', prefix='squeeze-'), Dense(channels, activation='sigmoid', prefix='excitation-'))
MaxPool2D(pool_size=(2, 2), strides=(2, 2)), # layer 3 Conv2D(channels=64, kernel_size=(3, 3), padding=(3 // 2, 3 // 2), activation='relu'), BatchNorm(axis=1, momentum=0.9), # layer 4 Conv2D(channels=64, kernel_size=(3, 3), padding=(3 // 2, 3 // 2), activation='relu'), BatchNorm(axis=1, momentum=0.9), MaxPool2D(pool_size=(2, 2), strides=(2, 2)), # layer 5 Flatten(), Dropout(0.3), Dense(128, activation='relu'), # layer 6 Dense(10)) # %% # -- Initialize parameters net.initialize(init=init.Xavier(), ctx=mx_ctx) for name, param in net.collect_params().items(): print(name) # %% # -- Define loss function and optimizer
def train(hyperparameters, channel_input_dirs, num_gpus, hosts): batch_size = hyperparameters.get("batch_size", 64) epochs = hyperparameters.get("epochs", 3) mx.random.seed(42) training_dir = channel_input_dirs['training'] with open("{}/train/data.p".format(training_dir), "rb") as pickle: train_nd = load(pickle) with open("{}/validation/data.p".format(training_dir), "rb") as pickle: validation_nd = load(pickle) train_data = gluon.data.DataLoader(train_nd, batch_size, shuffle=True) validation_data = gluon.data.DataLoader(validation_nd, batch_size, shuffle=True) net = Sequential() # http: // gluon.mxnet.io / chapter03_deep - neural - networks / plumbing.html # What's-the-deal-with-name_scope()? with net.name_scope(): net.add( Conv2D(channels=32, kernel_size=(3, 3), padding=0, activation="relu")) net.add( Conv2D(channels=32, kernel_size=(3, 3), padding=0, activation="relu")) net.add(MaxPool2D(pool_size=(2, 2))) net.add(Dropout(.25)) net.add(Flatten()) net.add(Dense(8)) ctx = mx.gpu() if num_gpus > 0 else mx.cpu() # Also known as Glorot net.collect_params().initialize(Xavier(magnitude=2.24), ctx=ctx) loss = SoftmaxCrossEntropyLoss() # kvstore type for multi - gpu and distributed training. if len(hosts) == 1: kvstore = "device" if num_gpus > 0 else "local" else: kvstore = "dist_device_sync'" if num_gpus > 0 else "dist_sync" trainer = Trainer(net.collect_params(), optimizer="adam", kvstore=kvstore) smoothing_constant = .01 for e in range(epochs): moving_loss = 0 for i, (data, label) in enumerate(train_data): data = data.as_in_context(ctx) label = label.as_in_context(ctx) with autograd.record(): output = net(data) loss_result = loss(output, label) loss_result.backward() trainer.step(batch_size) curr_loss = nd.mean(loss_result).asscalar() moving_loss = (curr_loss if ((i == 0) and (e == 0)) else (1 - smoothing_constant) * moving_loss + smoothing_constant * curr_loss) validation_accuracy = measure_performance(net, ctx, validation_data) train_accuracy = measure_performance(net, ctx, train_data) print("Epoch %s. Loss: %s, Train_acc %s, Test_acc %s" % (e, moving_loss, train_accuracy, validation_accuracy)) return net
(x_train, y_train), (x_test, y_test), min_pixel_value, max_pixel_value = load_mnist() # Step 1a: Swap axes to MXNet's NCHW format x_train = np.swapaxes(x_train, 1, 3) x_test = np.swapaxes(x_test, 1, 3) # Step 2: Create the model model = mxnet.gluon.nn.Sequential() with model.name_scope(): model.add(Conv2D(channels=4, kernel_size=5, activation="relu")) model.add(MaxPool2D(pool_size=2, strides=1)) model.add(Conv2D(channels=10, kernel_size=5, activation="relu")) model.add(MaxPool2D(pool_size=2, strides=1)) model.add(Flatten()) model.add(Dense(100, activation="relu")) model.add(Dense(10)) model.initialize() loss = mxnet.gluon.loss.SoftmaxCrossEntropyLoss() trainer = mxnet.gluon.Trainer(model.collect_params(), "adam", {"learning_rate": 0.01}) # Step 3: Create the ART classifier classifier = MXClassifier( model=model, clip_values=(min_pixel_value, max_pixel_value), loss=loss, input_shape=(28, 28, 1), nb_classes=10,
batch_size = 256 train_data = gluon.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=4) mnist_valid = gluon.data.vision.FashionMNIST(train=False) valid_data = gluon.data.DataLoader(mnist_valid.transform_first(transformer), batch_size=batch_size, num_workers=4) # Only hybrid based networks can be exported net = HybridSequential() net.add(Conv2D(channels=6, kernel_size=5, activation="relu"), MaxPool2D(pool_size=2, strides=2), Conv2D(channels=16, kernel_size=3, activation="relu"), MaxPool2D(pool_size=2, strides=2), Flatten(), Dense(120, activation="relu"), Dense(84, activation="relu"), Dense(10)) net.initialize(init=init.Xavier()) # Only after hybridization a model can be exported with architecture included net.hybridize() trainer = Trainer(net.collect_params(), "sgd", {"learning_rate": 0.1}) est = estimator.Estimator(net=net, loss=SoftmaxCrossEntropyLoss(), metrics=Accuracy(), trainer=trainer) est.fit(train_data=train_data, epochs=2, val_data=valid_data)