def load_network(self, load_filename): try: self.qnet.load_params(filename=load_filename + '_qnet', ctx=CTX) self.target.load_params(filename=load_filename + '_target', ctx=CTX) self.trainer.step(1, ignore_stale_grad=True) self.trainer.load_states(fname=load_filename + '_trainer') print "Successfully loaded:", load_filename except: try: init_policy_name = self.init_policy.replace('*', str(self.seed)) print "Could not find old network weights({}), try self.init_policy({})".format( load_filename, init_policy_name) need_dict = gl.ParameterDict() for key, value in self.qnet.collect_params().items(): if not key.endswith('_value_bias_local'): need_dict._params[key] = value need_dict.load(filename=init_policy_name+ '_qnet', ctx=CTX, ignore_extra=True, restore_prefix='qnet_') need_dict = gl.ParameterDict() for key, value in self.target.collect_params().items(): if not key.endswith('_value_bias_local'): need_dict._params[key] = value need_dict.load(filename=init_policy_name + '_target', ctx=CTX, ignore_extra=True, restore_prefix='target_') print "Successfully loaded:", self.init_policy except: print 'no init policy or cannot load it.'
def check_trainer_sparse_kv(kv, stype, grad_stype, update_on_kv, expected): params = gluon.ParameterDict() x = params.get('x', shape=(10, 1), lr_mult=1.0, stype=stype, grad_stype=grad_stype) params.initialize(ctx=[mx.cpu(0), mx.cpu(1)], init='zeros') trainer = gluon.Trainer(params, 'sgd', {'learning_rate': 0.1}, kvstore=kv, update_on_kvstore=update_on_kv) all_rows = mx.nd.arange(0, 10, ctx=mx.cpu(0)) try: ws = x.list_data( ) if stype == 'default' else x.list_row_sparse_data(all_rows) with mx.autograd.record(): for w in ws: y = w + 1 y.backward() trainer.step(1) assert trainer._kvstore.type == kv assert trainer._kv_initialized assert trainer._update_on_kvstore is expected # the updated parameter should be based on the loaded checkpoint mx.nd.waitall() updated_w = x.data(mx.cpu( 0)) if stype == 'default' else x.row_sparse_data(all_rows) assert (updated_w == -0.2).asnumpy().all(), updated_w except Exception as err: assert isinstance(err, expected)
def test_paramdict(): params = gluon.ParameterDict('net_') params.get('weight', shape=(10, 10)) assert list(params.keys()) == ['net_weight'] params.initialize(ctx=mx.cpu()) params.save('test.params') params.load('test.params', mx.cpu())
def check_trainer_reset_kv(kv): params = gluon.ParameterDict() x = params.get('x', shape=(10, ), lr_mult=1.0) params.initialize(ctx=[mx.cpu(0), mx.cpu(1)], init='zeros') trainer = gluon.Trainer(params, 'sgd', {'learning_rate': 0.1}, kvstore=kv) params.save('test_trainer_reset_kv.params') with mx.autograd.record(): for w in x.list_data(): y = w + 1 y.backward() trainer.step(1) assert trainer._kvstore.type == kv # load would reset kvstore mx.nd.waitall() params.load('test_trainer_reset_kv.params') if trainer._update_on_kvstore: # drop kvstore state if new parameters are loaded assert trainer._kvstore is None assert trainer._kv_initialized is False with mx.autograd.record(): for w in x.list_data(): y = w + 1 y.backward() trainer.step(1) # the updated parameter should be based on the loaded checkpoint assert (x.data(mx.cpu()) == -0.2).asnumpy().all()
def model_fn(model_dir): symbol = mx.sym.load('%s/model.json' % model_dir) outputs = mx.symbol.softmax(data=symbol, name='softmax_label') inputs = mx.sym.var('data') param_dict = gluon.ParameterDict('model_') net = gluon.SymbolBlock(outputs, inputs, param_dict) net.load_params('%s/model.params' % model_dir, ctx=mx.cpu()) return net
def test_sparse_hybrid_block(): params = gluon.ParameterDict('net_') params.get('weight', shape=(5,5), stype='row_sparse', dtype='float32') params.get('bias', shape=(5,), dtype='float32') net = gluon.nn.Dense(5, params=params) net.initialize() x = mx.nd.ones((2,5)) # an exception is expected when forwarding a HybridBlock w/ sparse param y = net(x)
def model_fn(model_dir): with open("{}/model.json".format(model_dir), "r") as model_file: model_json = model_file.read() outputs = mx.sym.load_json(model_json) inputs = mx.sym.var("data") param_dict = gluon.ParameterDict("model_") net = gluon.SymbolBlock(outputs, inputs, param_dict) # We will serve the model on CPU net.load_params("{}/model.params".format(model_dir), ctx=mx.cpu()) return net
def test_paramdict(): ctx = mx.cpu(1) params0 = gluon.ParameterDict('net_') params0.get('w0', shape=(10, 10)) params0.get('w1', shape=(10, 10), stype='row_sparse') all_row_ids = mx.nd.arange(0, 10, ctx=ctx) # check param names assert list(params0.keys()) == ['net_w0', 'net_w1'] params0.initialize(ctx=ctx) trainer0 = mx.gluon.Trainer(params0, 'sgd') prev_w0 = params0.get('w0').data(ctx) prev_w1 = params0.get('w1').row_sparse_data(all_row_ids) # save params params0.save('test_paramdict.params') # load params params1 = gluon.ParameterDict('net_') params1.get('w0', shape=(10, 10)) params1.get('w1', shape=(10, 10), stype='row_sparse') params1.load('test_paramdict.params', ctx) trainer1 = mx.gluon.Trainer(params1, 'sgd') # compare the values before and after save/load cur_w0 = params1.get('w0').data(ctx) cur_w1 = params1.get('w1').row_sparse_data(all_row_ids) mx.test_utils.assert_almost_equal(prev_w0.asnumpy(), cur_w0.asnumpy()) mx.test_utils.assert_almost_equal(prev_w1.asnumpy(), cur_w1.asnumpy()) # create a new param dict with dense params, and load from the checkpoint # of sparse & dense params params2 = gluon.ParameterDict('net_') params2.get('w0', shape=(10, 10)) params2.get('w1', shape=(10, 10)) params2.load('test_paramdict.params', ctx) # compare the values before and after save/load cur_w0 = params2.get('w0').data(ctx) cur_w1 = params2.get('w1').data(ctx) mx.test_utils.assert_almost_equal(prev_w0.asnumpy(), cur_w0.asnumpy()) mx.test_utils.assert_almost_equal(prev_w1.asnumpy(), cur_w1.asnumpy())
def model_fn(model_dir): """ Load the gluon model. Called once when hosting service starts. :param: model_dir The directory where model files are stored. :return: a model (in this case a Gluon network) """ symbol = mx.sym.load('%s/model.json' % model_dir) outputs = mx.symbol.softmax(data=symbol, name='softmax_label') inputs = mx.sym.var('data') param_dict = gluon.ParameterDict('model_') net = gluon.SymbolBlock(outputs, inputs, param_dict) net.load_params('%s/model.params' % model_dir, ctx=mx.cpu()) return net
def pretrain_stack(train_data, encoder, loss_encoder, decoder, loss_decoder, model_ctx, num_epochs, learning_rate): epochs = num_epochs smoothing_constant = .01 start_time = time.time() for layer_id, layer_encoder in enumerate(encoder): print layer_id print len(decoder) layer_decoder = decoder[len(decoder) - layer_id - 1] print('layer_encoder', layer_encoder.__dict__['_name']) print('layer_decoder', layer_decoder.__dict__['_name']) if layer_decoder.__dict__['_name'].find('lambda') != -1: continue cur_params = gluon.ParameterDict('my_params') cur_params.update(layer_encoder.collect_params()) cur_params.update(layer_decoder.collect_params()) trainer = gluon.Trainer(cur_params, 'sgd', {'learning_rate': .01}) for e in range(epochs): train_data_shuffle = gluon.data.DataLoader(train_data, batch_size=1, shuffle=True) for i, (data, label) in enumerate(train_data_shuffle): data = data.as_in_context(model_ctx) label = label.as_in_context(model_ctx) encoded_input = data for j in range(0, layer_id): encoded_input = encoder[j](encoded_input) with autograd.record(): encoded_layer = layer_encoder(encoded_input) decoded_input = layer_decoder(encoded_layer) loss = loss_decoder(decoded_input, encoded_input) loss.backward() trainer.step(data.shape[0]) if i % 50000 == 0: print 'Data id = ', i, ' Time: ', time.time() - start_time sys.stdout.flush() ########################## # Keep a moving average of the losses ########################## curr_loss = mx.nd.mean(loss).asscalar() moving_loss = (curr_loss if ((i == 0) and (e == 0)) else (1 - smoothing_constant) * moving_loss + smoothing_constant * curr_loss) return encoder, decoder
def model_fn(model_dir): """Loads the Gluon model. Called once when hosting service starts. Args: model_dir (str): The directory where model files are stored. Returns: mxnet.gluon.block.Block: a Gluon network. """ symbol = mx.sym.load('%s/model.json' % model_dir) vocab = vocab_from_json('%s/vocab.json' % model_dir) outputs = mx.symbol.softmax(data=symbol, name='softmax_label') inputs = mx.sym.var('data') param_dict = gluon.ParameterDict('model_') net = gluon.SymbolBlock(outputs, inputs, param_dict) net.load_params('%s/model.params' % model_dir, ctx=mx.cpu()) return net, vocab
def model_fn(model_dir): """Load the gluon model. Called once when hosting service starts. Args: model_dir: The directory where model files are stored. Returns: a model (in this case a Gluon network) """ symbol = mx.sym.load("%s/model.json" % model_dir) outputs = mx.symbol.softmax(data=symbol, name="softmax_label") inputs = mx.sym.var("data") param_dict = gluon.ParameterDict("model_") net = gluon.SymbolBlock(outputs, inputs, param_dict) net.load_params("%s/model.params" % model_dir, ctx=mx.cpu()) return net
def model_fn(model_dir): """ Load the Gluon model for hosting. Arguments: model_dir -- SageMaker model directory. Retuns: Gluon model """ # Load the saved Gluon model symbol = mx.sym.load('%s/model.json' % model_dir) outputs = mx.sym.sigmoid(data=symbol, name='sigmoid_label') inputs = mx.sym.var('data') param_dict = gluon.ParameterDict('model_') net = gluon.SymbolBlock(outputs, inputs, param_dict) net.load_params('%s/model.params' % model_dir, ctx=mx.cpu()) return net
def train(train_data, test_data, encoder, loss_encoder, decoder, loss_decoder, model_ctx, num_epochs, learning_rate): cur_params = gluon.ParameterDict('my_params') cur_params.update(encoder.collect_params()) trainer = gluon.Trainer(cur_params, 'sgd', {'learning_rate': learning_rate}) epochs = num_epochs smoothing_constant = .01 start_time = time.time() for e in range(epochs): train_data_shuffle = gluon.data.DataLoader(train_data, batch_size=1, shuffle=True) for i, (data, label) in enumerate(train_data_shuffle): data = data.as_in_context(model_ctx) label = label.as_in_context(model_ctx) with autograd.record(): output = encoder(data) loss = loss_encoder(output, label) loss.backward() trainer.step(data.shape[0]) if i % 50000 == 0: print 'Data id = ', i, ' Time: ', time.time() - start_time sys.stdout.flush() ########################## # Keep a moving average of the losses ########################## curr_loss = mx.nd.mean(loss).asscalar() moving_loss = (curr_loss if ((i == 0) and (e == 0)) else (1 - smoothing_constant) * moving_loss + smoothing_constant * curr_loss) test_accuracy = evaluate_accuracy(test_data, encoder, start_time, model_ctx) train_accuracy = evaluate_accuracy(train_data, encoder, start_time, model_ctx) print("Epoch %s. Loss: %s, Train_acc %s, Test_acc %s" % (e, moving_loss, train_accuracy, test_accuracy)) return encoder, decoder
def __init__(self, network, outputs, num_filters, use_1x1_transition=True, use_bn=True, reduce_ratio=1.0, min_depth=128, global_pool=False, pretrained=False, ctx=mx.cpu(), inputs=('data', )): self.IsolatedParams = gluon.ParameterDict() inputs, outputs, params = _parse_network(network, outputs, inputs, pretrained, ctx) # append more layers #在生成的新网络的最后一个输出增加新的层 y = outputs[-1] #权值初始化装置 weight_init = mx.init.Xavier(rnd_type='gaussian', factor_type='out', magnitude=2) for i, f in enumerate(num_filters): if use_1x1_transition: num_trans = max(min_depth, int(round(f * reduce_ratio))) # y = mx.sym.Convolution( # y, num_filter=num_trans, kernel=(1, 1), no_bias=use_bn, # name='expand_trans_conv{}'.format(i), attr={'__init__': weight_init}) Conv2D_1 = nn.Conv2D(channels=num_trans, kernel_size=(1, 1), use_bias=not (use_bn), weight_initializer=weight_init, prefix='expand_trans_conv{}_'.format(i)) y = Conv2D_1(y) self.IsolatedParams.update(Conv2D_1.collect_params()) # name='expand_trans_conv{}'.format(i) if use_bn: y = mx.sym.BatchNorm(y, name='expand_trans_bn{}'.format(i)) y = mx.sym.Activation(y, act_type='relu', name='expand_trans_relu{}'.format(i)) # y = mx.sym.Convolution( # y, num_filter=f, kernel=(3, 3), pad=(1, 1), stride=(2, 2), # no_bias=use_bn, name='expand_conv{}'.format(i), attr={'__init__': weight_init}) Conv2D_2 = nn.Conv2D(channels=f, kernel_size=(3, 3), padding=(1, 1), strides=(2, 2), use_bias=not (use_bn), weight_initializer=weight_init, prefix='expand_conv{}_'.format(i)) y = Conv2D_2(y) self.IsolatedParams.update(Conv2D_2.collect_params()) if use_bn: y = mx.sym.BatchNorm(y, name='expand_bn{}'.format(i)) y = mx.sym.Activation(y, act_type='relu', name='expand_reu{}'.format(i)) outputs.append(y) if global_pool: outputs.append( mx.sym.Pooling(y, pool_type='avg', global_pool=True, kernel=(1, 1))) super(FeatureExpander_IsolatedParams, self).__init__(outputs, inputs, params)
net.add(CenteredLayer()) net.initialize() y = net(nd.random.uniform(shape=(4,8))) y.mean() '''-------------------------------------------------------''' #带模型参数的自定义层 from mxnet import gluon #创建一个3*3大小的参数,取名为exciting_parameter_yay。并初始化 my_params = gluon.Parameter('exciting_parameter_yay', shape=(3,3)) my_params.initialize() #or 使用Block自带的ParamterDict类型的成员变量params。以下获得的参数name为block1_exciting_parameter_yay。初始化 pd = gluon.ParameterDict(profix='block1_') pd.get('exciting_parameter_yay', shape=(3,3)) pd.get('exciting_parameter_yay').initialize() pd['block1_exciting_parameter_yay'].data() #自定义Dense层 def MyDense(nn.Block): def __init__(self, units, in_units, **kwargs): super(MyDense, self).__init__(**kwargs) with self.name_scope(): self.weight = self.params.get('weight', shape=(in_units, units)) self.bias = self.params.get('bias', shape=(units,)) def forward(self, x): linear = nd.dot(x, self.weight.data()) + self.bias.data()
return x - x.mean() layer = CenteredLayer() print(layer(nd.array([1, 2, 3, 4, 5]))) net = nn.Sequential() with net.name_scope(): net.add(nn.Dense(128)) net.add(nn.Dense(10)) net.add(CenteredLayer()) net.initialize() y = net(nd.random.uniform(shape=(4, 8))) print(y.mean()) params = gluon.ParameterDict(prefix='block1_') params.get("param2", shape=(2, 3)) print(params) class MyDense(nn.Block): def __init__(self, units, in_units, prefix=None, params=None): super().__init__(prefix, params) with self.name_scope(): self.weight = self.params.get('weight', shape=(in_units, units)) self.bias = self.params.get('bias', shape=(units,)) def forward(self, x): linear = nd.dot(x, self.weight.data()) + self.bias.data() return nd.relu(linear) dense = MyDense(5, in_units = 10, prefix='o_my_dense_')
# 构建更复杂的模型 net = nn.Sequential() net.add(nn.Dense(128), CenteredLayer()) # 下面打印自定义层各个输出的均值。因为均值是浮点数,所以它的值是一个很接近0的数。 net.initialize() y = net(nd.random.uniform(shape=(4, 8))) print(y.mean().asscalar()) # 4.4.2. 含模型参数的自定义层 ''' 分别介绍了Parameter类和ParameterDict类。 在自定义含模型参数的层时,我们可以利用Block类自带的ParameterDict类型的成员变量params。 它是一个由字符串类型的参数名字映射到Parameter类型的模型参数的字典。我们可以通过get函数从ParameterDict创建Parameter实例。 ''' params = gluon.ParameterDict() params.get('param2', shape=(2, 3)) print(params) # 尝试实现一个含权重参数和偏差参数的全连接层。 # 它使用ReLU函数作为激活函数。其中in_units和units分别代表输入个数和输出个数。 class MyDense(nn.Block): # units为该层的输出个数,in_units为该层的输入个数 def __init__(self, units, in_units, **kwargs): super(MyDense, self).__init__(**kwargs) self.weight = self.params.get('weight', shape=(in_units, units)) self.bias = self.params.get('bias', shape=(units, )) def forward(self, x): linear = nd.dot(x, self.weight.data()) + self.bias.data()
output = net2(data[0:1]) print(output) nd.mean(output) my_param = gluon.Parameter("exciting_parameter_yay", grad_req='write', shape=(5,5)) print(my_param) my_param.initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx) print(my_param.data()) # my_param = gluon.Parameter("exciting_parameter_yay", grad_req='write', shape=(5,5)) # my_param.initialize(mx.init.Xavier(magnitude=2.24), ctx=[mx.gpu(0), mx.gpu(1)]) # print(my_param.data(mx.gpu(0)), my_param.data(mx.gpu(1))) pd = gluon.ParameterDict(prefix="block1_") pd.get("exciting_parameter_yay", grad_req='write', shape=(5,5)) pd["block1_exciting_parameter_yay"] def relu(X): return nd.maximum(X, 0) class MyDense(Block): #################### # We add arguments to our constructor (__init__) # to indicate the number of input units (``in_units``) # and output units (``units``) #################### def __init__(self, units, in_units=0, **kwargs):
def ParameterDictTest(): params = gluon.ParameterDict() params.get('params', shape=(2, 3)) print(params)
# x_recon_batch: the collection of reconstruction of the images # x_recon_loss: the corresponding loss for the reconstruction of each image # which is used for finding the best reconstruction x_recon_batch = nd.zeros((batch_size, 3, 64, 64)) x_recon_loss = nd.ones((batch_size, )) * 100000 # Use different initialization of z for restart in range(num_random_restarts): tic = time.time() # train_last_loss = 2. train_curr_loss = 0.1 # Put z into the dict of parameters to be optimized # Only z will be updated in this algorithm paramdict = gluon.ParameterDict('noise') paramdict.get('z', shape=(batch_size, n_z, 1, 1), init=init.Normal(1)) #default sigma is 0.01 paramdict.initialize(ctx=ctx) z = paramdict.get('z').data() trainer = gluon.Trainer(paramdict, 'Adam', {'learning_rate': learn_rate}) # Define Loss recon_loss = dcgan.Recon_Loss() z_loss = dcgan.Z_Loss() ## Optimization process: find the best z for epoch in range(total_epoch): if abs(train_last_loss - train_curr_loss) / train_last_loss < 1e-3: break