def __init__(self, level, num_stacks, dims, num_blocks, heads, pre=None, conv_dim=256, make_conv_layer=make_conv_layer, make_heat_layer=make_keypoint_layer, make_tag_layer=make_keypoint_layer, make_regress_layer=make_keypoint_layer, make_up_layer=make_repeat_layers, make_low_layer=make_repeat_layers, make_hg_layer=make_repeat_layers, make_hg_layer_reverse=make_repeat_layers_reverse, make_pool_layer=make_pool_layer, make_unpool_layer=make_unpool_layer, make_merge_layer=make_merge_layer, make_inter_layer=make_inter_layer, kp_layer=residual): super(stacked_hourglass, self).__init__() self.num_stacks = num_stacks self.heads = heads curr_dim = dims[0] if pre is None: #self.pre = nn.HybridSequential() self.pre = nn.Sequential() with self.name_scope(): self.pre.add(convolution(7, 128, 3, strides=2), residual(3, 256, 128, stride=2)) else: self.pre = pre #self.kpts = nn.HybridSequential() self.kpts = nn.Sequential() with self.name_scope(): for _ in range(num_stacks): self.kpts.add( keypoint_struct( level, dims, num_blocks, make_up_layer=make_up_layer, make_low_layer=make_low_layer, make_hg_layer=make_hg_layer, make_hg_layer_reverse=make_hg_layer_reverse, make_pool_layer=make_pool_layer, make_unpool_layer=make_unpool_layer, make_merge_layer=make_merge_layer)) #self.convs = nn.HybridSequential() self.convs = nn.Sequential() with self.name_scope(): for _ in range(num_stacks): self.convs.add(make_conv_layer(conv_dim, curr_dim)) #self.inters = nn.HybridSequential() self.inters = nn.Sequential() with self.name_scope(): for _ in range(num_stacks): self.inters.add(make_inter_layer(curr_dim)) #self.inters_ = nn.HybridSequential() self.inters_ = nn.Sequential() with self.name_scope(): for _ in range(num_stacks - 1): #seq = nn.HybridSequential() seq = nn.Sequential() seq.add( nn.Conv2D(curr_dim, (1, 1), use_bias=False, in_channels=conv_dim), nn.BatchNorm()) self.inters_.add(seq) #self.convs_ = nn.HybridSequential() self.convs_ = nn.Sequential() with self.name_scope(): for _ in range(num_stacks - 1): #seq = nn.HybridSequential() seq = nn.Sequential() seq.add( nn.Conv2D(curr_dim, (1, 1), use_bias=False, in_channels=conv_dim), nn.BatchNorm()) self.convs_.add(seq) # keypoint heatmaps for head in heads.keys(): if "hm" in head: #module = nn.HybridSequential() module = nn.Sequential() with self.name_scope(): for _ in range(num_stacks): module.add( make_heat_layer(channels_out=heads[head], channels_intermediate=curr_dim, channels_in=conv_dim)) self.__setattr__(head, module) ''' for heat in self.__getattribute__(head): #for heat in self.__getattr__(head): #print("heat[-1]: ", heat[-1].bias.data) #heat[-1].bias.data.fill_(-2.19) heat[-1].bias.data = -2.19 ''' else: #module = nn.HybridSequential() module = nn.Sequential() with self.name_scope(): for _ in range(num_stacks): module.add( make_regress_layer(channels_out=heads[head], channels_intermediate=curr_dim, channels_in=conv_dim)) self.__setattr__(head, module)
def base_net(): blk = nn.Sequential() for num_filters in [16, 32, 64]: blk.add(down_sample_blk(num_filters)) return blk
def vgg_stack(architecture): out = nn.Sequential() for (num_convs, in_channels, channels) in architecture: out.add(vgg_block(num_convs,in_channels, channels)) return out
def get_net(pretrained_net, content_layers, style_layers): net = nn.Sequential() for i in range(max(content_layers + style_layers) + 1): net.add(pretrained_net.features[i]) return net
#! /usr/bin/env python # -*- coding: utf-8 -*- from mxnet import nd from mxnet.gluon import nn # nn.Sequential net = nn.Sequential() with net.name_scope(): net.add(nn.Dense(256)) net.add(nn.Dense(10)) net.initialize() # nn.Block class MLP(nn.Block): def __init__(self, **kwargs): super(MLP, self).__init__(**kwargs) with self.name_scope(): self.dense0 = nn.Dense(256) self.dense1 = nn.Dense(10) def forward(self, x): return self.dense1(nd.relu(self.dense0(x))) net2 = MLP() # print(net2) net2.initialize() x = nd.random.uniform(shape=(4, 20)) y2 = net2(x)
def define_network(): net = nn.Sequential() with net.name_scope(): net.add(nn.Dense(64, activation="relu")) net.add(nn.Dense(1)) return net
output = network(data) loss = softmax_cross_entropy(output, label) loss.backward() trainer.step(data.shape[0]) # Your first model should be a sequential network, with 3 layers. You first layer should have 16 hidden units, the second should have 8 hidden units and the last layer should the correct number of output units for the classification task at hand. You should add ReLU activations on all hidden layers, but not the output layer. You should define `network` in the cell below. # # **Hint**: You'll find classes in the `mxnet.gluon.nn` subpackage useful for this task. # In[47]: # YOUR CODE HERE from mxnet.gluon import nn network = nn.Sequential() network.add( nn.Dense(16, activation='relu'), nn.Dense(8, activation='relu'), nn.Dense(10) ) #raise NotImplementedError() # In[48]: assert isinstance(network, mx.gluon.nn.Sequential) assert len(network) == 3 assert isinstance(network[0], mx.gluon.nn.Dense) assert network[0].act.name.endswith('relu')
def _get_test_network(net=nn.Sequential()): net.add(nn.Dense(128, activation='relu', flatten=False), nn.Dense(64, activation='relu'), nn.Dense(10, activation='relu')) return net
def __init__(self, **kwargs): super(NestMLP, self).__init__(**kwargs) self.net = nn.Sequential() self.net.add(nn.Dense(64, activation="relu"), nn.Dense(32, activation="relu")) self.dense = nn.Dense(16, activation="relu")
Y_test = nd.array(y_test, context) print("X_train: " + str(X_train)) print("X_test: " + str(X_test)) print("Y_train: " + str(Y_train)) print("Y_test: " + str(Y_test)) ## define network num_classes = 2 num_hidden = 200 learning_rate = .01 epochs = 200 batch_size = 20 model = nn.Sequential() with model.name_scope(): model.embed = nn.Embedding(voca_size, num_embed) model.add( rnn.LSTM(num_hidden, layout='NTC', dropout=0.7, bidirectional=False)) model.add(nn.Dense(num_classes)) def eval_accuracy(x, y, batch_size): accuracy = mx.metric.Accuracy() for i in range(x.shape[0] // batch_size): data = x[i * batch_size:(i * batch_size + batch_size), ] target = y[i * batch_size:(i * batch_size + batch_size), ] output = model(data)
activation = nd.relu data = load_data(args) g = data.graph g.remove_edges_from(g.selfloop_edges()) g.add_edges_from(zip(g.nodes, g.nodes)) g = dgl.DGLGraph(g) print(g) features = nd.array(data.features) num_hidden = args.num_hidden input_dim = features.shape[1] hidden_dim = args.hidden_dim num_classes = data.num_labels layers = nn.Sequential() layers.add(conv.GraphConv(input_dim, hidden_dim, activation=activation)) for _ in range(num_hidden): layers.add(conv.GraphConv(hidden_dim, hidden_dim, activation=activation)) layers.add(conv.GraphConv(hidden_dim, num_classes, activation=activation)) layers.initialize(ctx=mx.cpu(0)) print('testing inference time cost') tot_time = 0 for i in range(100): tic = time.time() feat = features for layer in layers: feat = layer(feat, g) toc = time.time() tot_time += toc - tic
def __init__( self, num_scenes, num_actions, model_depth, final_spatial_kernel=7, final_temporal_kernel=2, with_bias=False, ): super(R2Plus2D_MT, self).__init__() self.comp_count = 0 self.base = nn.Sequential(prefix='base_') with self.base.name_scope(): self.base.add( nn.Conv3D(channels=45, kernel_size=(1, 7, 7), strides=(1, 2, 2), padding=(0, 3, 3), use_bias=with_bias), nn.BatchNorm(), nn.Activation(activation='relu'), nn.Conv3D(channels=64, kernel_size=(3, 1, 1), strides=(1, 1, 1), padding=(1, 0, 0), use_bias=with_bias), nn.BatchNorm(), nn.Activation(activation='relu')) self.base_name = self.set_base_name() (n2, n3, n4, n5) = BLOCK_CONFIG[model_depth] self.conv2_name = [] self.conv2 = nn.Sequential(prefix='conv2_') with self.conv2.name_scope(): for _ in range(n2): self.conv2_name.extend( self.add_comp_count_index(change_channels=False, comp_index=self.comp_count, prefix=self.conv2.prefix)) self.conv2.add( R3DBlock(input_filter=64, num_filter=64, comp_index=self.comp_count, use_bias=with_bias)) self.comp_count += 1 #self.conv3 self.conv3_name = [] self.conv3 = nn.Sequential(prefix='conv3_') with self.conv3.name_scope(): print("this in conv3 comp_count is ", self.comp_count) self.conv3_name.extend( self.add_comp_count_index(change_channels=True, downsampling=True, comp_index=self.comp_count)) self.conv3.add( R3DBlock(input_filter=64, num_filter=128, comp_index=self.comp_count, downsampling=True, use_bias=with_bias)) self.comp_count += 1 for _ in range(n3 - 1): self.conv3_name.extend( self.add_comp_count_index(change_channels=False, downsampling=False, comp_index=self.comp_count)) self.conv3.add( R3DBlock(input_filter=128, num_filter=128, comp_index=self.comp_count, use_bias=with_bias)) self.comp_count += 1 # self.conv4 self.conv4_name = [] self.conv4 = nn.Sequential(prefix='conv4_') with self.conv4.name_scope(): self.conv4_name.extend( self.add_comp_count_index(change_channels=True, downsampling=True, comp_index=self.comp_count)) self.conv4.add( R3DBlock(128, 256, comp_index=self.comp_count, downsampling=True, use_bias=with_bias)) self.comp_count += 1 for _ in range(n4 - 1): self.conv4_name.extend( self.add_comp_count_index(change_channels=False, downsampling=False, comp_index=self.comp_count)) self.conv4.add( R3DBlock(256, 256, comp_index=self.comp_count, use_bias=with_bias)) self.comp_count += 1 #conv5 self.conv5_name = [] self.conv5 = nn.Sequential(prefix='conv5_') with self.conv5.name_scope(): self.conv5_name.extend( self.add_comp_count_index(change_channels=True, downsampling=True, comp_index=self.comp_count)) self.conv5.add( R3DBlock(256, 512, comp_index=self.comp_count, downsampling=True, use_bias=with_bias)) self.comp_count += 1 for _ in range(n5 - 1): self.conv5_name.extend( self.add_comp_count_index(comp_index=self.comp_count)) self.conv5.add( R3DBlock(512, 512, self.comp_count, use_bias=with_bias)) self.comp_count += 1 # final output of conv5 is [512,t/8,7,7] #512x1x7x7 # for static scene tagging self.scene_conv = nn.Sequential() self.scene_conv.add( nn.Conv3D(256, kernel_size=(1, 3, 3), strides=(1, 2, 2)), nn.BatchNorm(), nn.Activation('relu')) # shape 256*1*2*2 # reshape(1024) self.scene_drop = nn.Dropout(rate=0.3) self.scene_output = nn.Dense(num_scenes) # for action classification self.action_conv = nn.Sequential() self.action_conv.add( nn.Conv3D(512, kernel_size=(1, 3, 3), strides=(1, 1, 1), padding=(0, 1, 1)), nn.BatchNorm(), nn.Activation('relu')) self.action_avg = nn.AvgPool3D(pool_size=(final_temporal_kernel, final_spatial_kernel, final_spatial_kernel), strides=(1, 1, 1), padding=(0, 0, 0)) self.action_output = nn.Dense(units=num_actions) self.dense0_name = ['final_fc_weight', 'final_fc_bias']
def get_net(): net = nn.Sequential() net.add(nn.Dense(64)) net.add(nn.Dense(1)) net.initialize() return net
def get_net(): net = nn.Sequential() net.add(nn.Dense(64, activation="relu"), nn.Dense(32, activation="relu"), nn.Dense(1)) return net
def train_text_classification(args: dict, reporter: StatusReporter, task_id: int, resources=None) -> None: # Set Hyper-params def _init_hparams(): batch_size = args.data.batch_size * max(args.num_gpus, 1) ctx = [mx.gpu(i) for i in range(args.num_gpus) ] if args.num_gpus > 0 else [mx.cpu()] return batch_size, ctx batch_size, ctx = _init_hparams() vars(args).update({'task_id': task_id}) logger.info('Task ID : {0}, args : {1}, resources:{2}, pid:{3}'.format( task_id, args, resources, os.getpid())) ps_p = psutil.Process(os.getpid()) ps_p.cpu_affinity(resources.cpu_ids) if 'bert' in args.model: net, vocab = _get_bert_pre_trained_model(args, ctx) elif 'lstm_lm' in args.model: # Get LM specific model attributes net, vocab = _get_lm_pre_trained_model(args, ctx) net.classifier = nn.Sequential() with net.classifier.name_scope(): net.classifier.add(nn.Dropout(args.dropout)) net.classifier.add(nn.Dense(args.data.num_classes)) else: raise ValueError( 'Unsupported pre-trained model type. {} will be supported in the future.' .format(args.model)) if not args.pretrained: net.collect_params().initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx) else: net.classifier.initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx) net.collect_params().reset_ctx(ctx) net.hybridize(static_alloc=True) # do not apply weight decay on LayerNorm and bias terms for _, v in net.collect_params('.*beta|.*gamma|.*bias').items(): v.wd_mult = 0.0 def _get_dataloader(): def _init_dataset(dataset, transform_fn): return transform(dataset, transform_fn, args.data.num_workers) class_labels = args.data.class_labels if args.data.class_labels else list( args.data._label_set) train_dataset = _init_dataset( args.data.train, get_transform_train_fn(args.model, vocab, args.max_sequence_length, args.data.pair, class_labels)) val_dataset = _init_dataset( args.data.val, get_transform_val_fn(args.model, vocab, args.max_sequence_length, args.data.pair, class_labels)) train_data = gluon.data.DataLoader( dataset=train_dataset, num_workers=args.data.num_workers, batch_sampler=get_batch_sampler(args.model, train_dataset, batch_size, args.data.num_workers), batchify_fn=get_batchify_fn(args.model)) val_data = gluon.data.DataLoader(dataset=val_dataset, batch_size=batch_size, batchify_fn=get_batchify_fn( args.model), num_workers=args.data.num_workers, shuffle=False) return train_data, val_data train_data, val_data = _get_dataloader() # fine_tune_lm(pre_trained_network) # TODO def _get_optimizer_params(): # TODO : Add more optimizer params based on the chosen optimizer optimizer_params = {'learning_rate': args.lr} return optimizer_params optimer_params = _get_optimizer_params() trainer = gluon.Trainer(net.collect_params(), args.optimizer, optimer_params) # TODO : Update with search space loss = get_loss_instance(args.loss) metric = get_metric_instance(args.metric) estimator: Estimator = Estimator(net=net, loss=loss, metrics=[metric], trainer=trainer, context=ctx) early_stopping_handler = EarlyStoppingHandler( monitor=estimator.train_metrics[0], mode='max') lr_handler = LRHandler(warmup_ratio=0.1, batch_size=batch_size, num_epochs=args.epochs, train_length=len(args.data.train)) event_handlers = [ early_stopping_handler, lr_handler, TextDataLoaderHandler(args.model), ReporterHandler(reporter) ] estimator.fit(train_data=train_data, val_data=val_data, epochs=args.epochs, event_handlers=event_handlers)
nz = int(opt.nz) ngf = int(opt.ngf) ndf = int(opt.ndf) nc = 3 ctx = mx.gpu(0) check_point = bool(opt.check_point) outf = opt.outf if not os.path.exists(outf): os.makedirs(outf) if opt.dataset == 'cifar10': train_iter, val_iter = cifar10_iterator(opt.batchSize, (3, 64, 64), 64) # build the generator netG = nn.Sequential() with netG.name_scope(): # input is Z, going into a convolution netG.add(nn.Conv2DTranspose(ngf * 8, 4, 1, 0, use_bias=False)) netG.add(nn.BatchNorm()) netG.add(nn.Activation('relu')) # state size. (ngf*8) x 4 x 4 netG.add(nn.Conv2DTranspose(ngf * 4, 4, 2, 1, use_bias=False)) netG.add(nn.BatchNorm()) netG.add(nn.Activation('relu')) # state size. (ngf*8) x 8 x 8 netG.add(nn.Conv2DTranspose(ngf * 2, 4, 2, 1, use_bias=False)) netG.add(nn.BatchNorm()) netG.add(nn.Activation('relu')) # state size. (ngf*8) x 16 x 16 netG.add(nn.Conv2DTranspose(ngf, 4, 2, 1, use_bias=False))
def __init__(self, growth_rate, layers, **kwargs): super(DenseBlock, self).__init__(**kwargs) self.net = nn.Sequential() for i in range(layers): self.net.add(conv_block(growth_rate))
np.uint8)) plt.axis('off') for i in range(4): plt.subplot(1, 4, i + 1) visualize(img_list[i + 10][0]) plt.show() #%% #example of upsampling #produce random vector of shape (1 X 150 X 1 X 1). Order is (Batchsize, channels, X dimenions, Y dimension) random_z = mx.nd.random_normal( 0, 1, shape=(1, latent_z_size, 1, 1), ctx=context) netTest = nn.Sequential() with netTest.name_scope(): # input is Z, going into a convolution. (100X1X1). Ignored the batch size # The upsamples is applied only for X and Y dimensions netTest.add(nn.Conv2DTranspose( 48, 4, 1, 0, use_bias=False)) # 48 filters, upsamples by 4, with 1 stride, 0 padding # The output from previous convolution is of dimension (48 X 4 X 4) . Ignored the batch size netTest.add(nn.Conv2DTranspose( 48, 4, 2, 1, use_bias=False)) # 48 filters, upsamples by 4, with 2 stride, 1 padding netTest.add(nn.Conv2DTranspose(48, 4, 2, 1, use_bias=False)) netTest.initialize(mx.init.Normal(0.02), ctx=context) abc = netTest(random_z)
def transition_block(channels): out = nn.Sequential() out.add(nn.BatchNorm(), nn.Activation('relu'), nn.Conv2D(channels, kernel_size=1), nn.AvgPool2D(pool_size=2, strides=2)) return out
def __init__(self, **kwargs): super(MLP, self).__init__(**kwargs) self.blk = nn.Sequential() self.blk.add(nn.Dense(1024, activation='relu'), nn.Dense(512, activation='relu'), nn.Dense(256, activation='relu'), nn.Dense(10))
def conv_block(channels): out = nn.Sequential() out.add(nn.BatchNorm(), nn.Activation('relu'), nn.Conv2D(channels, kernel_size=3, padding=1)) return out
def get_net(): net = nn.Sequential() net.add(nn.Dense(128, activation='relu')) net.add(nn.Dense(64, activation='relu')) net.add(nn.Dense(10)) return net
def __init__(self, **kwargs): super(Model3, self).__init__(**kwargs) with self.name_scope(): self.layers = nn.Sequential() self.layers.add(*[nn.Dense(i * 10) for i in range(6)])
# use the utilities to easily manage the data. prep = MxnetDataCreator(X_train, X_test, Y_train, Y_test) dl = MxnetDataLoader() train_data, test_data = prep.create_mx_dataset() X, y = train_data[0] ('X shape: ', X.shape, 'X dtype', X.dtype, 'y:', y) train_data, test_data = dl.create_loader(train_data, test_data, batch_size=256, transformer=None) for data, label in train_data: print(data.shape, label.shape) break # create a cnn. net = nn.Sequential() net.add(nn.Conv2D(channels=6, kernel_size=5, activation='relu'), nn.MaxPool2D(pool_size=2, strides=2), nn.Conv2D(channels=16, kernel_size=3, activation='relu'), nn.MaxPool2D(pool_size=2, strides=2), nn.Flatten(), nn.Dense(120, activation="relu"), nn.Dense(84, activation="relu"), nn.Dense(10)) net.initialize(init=init.Xavier()) softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss() trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.1}) batch_size = 256 def acc(output, label):
def newNet(): net = nn.Sequential() net.add(nn.Dense(1)) net.initialize() return net