Exemplo n.º 1
0
    def load_network(self, load_filename):
        try:
            self.qnet.load_params(filename=load_filename + '_qnet', ctx=CTX)
            self.target.load_params(filename=load_filename + '_target', ctx=CTX)
            self.trainer.step(1, ignore_stale_grad=True)
            self.trainer.load_states(fname=load_filename + '_trainer')
            print "Successfully loaded:", load_filename
        except:
            try:
                init_policy_name = self.init_policy.replace('*', str(self.seed))
                print "Could not find old network weights({}), try self.init_policy({})".format(
                    load_filename, init_policy_name)
                need_dict = gl.ParameterDict()
                for key, value in self.qnet.collect_params().items():
                    if not key.endswith('_value_bias_local'):
                        need_dict._params[key] = value
                need_dict.load(filename=init_policy_name+ '_qnet', ctx=CTX, ignore_extra=True, restore_prefix='qnet_')

                need_dict = gl.ParameterDict()
                for key, value in self.target.collect_params().items():
                    if not key.endswith('_value_bias_local'):
                        need_dict._params[key] = value
                need_dict.load(filename=init_policy_name + '_target', ctx=CTX, ignore_extra=True, restore_prefix='target_')
                print "Successfully loaded:", self.init_policy
            except:
                print 'no init policy or cannot load it.'
Exemplo n.º 2
0
 def check_trainer_sparse_kv(kv, stype, grad_stype, update_on_kv, expected):
     params = gluon.ParameterDict()
     x = params.get('x',
                    shape=(10, 1),
                    lr_mult=1.0,
                    stype=stype,
                    grad_stype=grad_stype)
     params.initialize(ctx=[mx.cpu(0), mx.cpu(1)], init='zeros')
     trainer = gluon.Trainer(params,
                             'sgd', {'learning_rate': 0.1},
                             kvstore=kv,
                             update_on_kvstore=update_on_kv)
     all_rows = mx.nd.arange(0, 10, ctx=mx.cpu(0))
     try:
         ws = x.list_data(
         ) if stype == 'default' else x.list_row_sparse_data(all_rows)
         with mx.autograd.record():
             for w in ws:
                 y = w + 1
                 y.backward()
         trainer.step(1)
         assert trainer._kvstore.type == kv
         assert trainer._kv_initialized
         assert trainer._update_on_kvstore is expected
         # the updated parameter should be based on the loaded checkpoint
         mx.nd.waitall()
         updated_w = x.data(mx.cpu(
             0)) if stype == 'default' else x.row_sparse_data(all_rows)
         assert (updated_w == -0.2).asnumpy().all(), updated_w
     except Exception as err:
         assert isinstance(err, expected)
Exemplo n.º 3
0
def test_paramdict():
    params = gluon.ParameterDict('net_')
    params.get('weight', shape=(10, 10))
    assert list(params.keys()) == ['net_weight']
    params.initialize(ctx=mx.cpu())
    params.save('test.params')
    params.load('test.params', mx.cpu())
Exemplo n.º 4
0
 def check_trainer_reset_kv(kv):
     params = gluon.ParameterDict()
     x = params.get('x', shape=(10, ), lr_mult=1.0)
     params.initialize(ctx=[mx.cpu(0), mx.cpu(1)], init='zeros')
     trainer = gluon.Trainer(params,
                             'sgd', {'learning_rate': 0.1},
                             kvstore=kv)
     params.save('test_trainer_reset_kv.params')
     with mx.autograd.record():
         for w in x.list_data():
             y = w + 1
             y.backward()
     trainer.step(1)
     assert trainer._kvstore.type == kv
     # load would reset kvstore
     mx.nd.waitall()
     params.load('test_trainer_reset_kv.params')
     if trainer._update_on_kvstore:
         # drop kvstore state if new parameters are loaded
         assert trainer._kvstore is None
         assert trainer._kv_initialized is False
     with mx.autograd.record():
         for w in x.list_data():
             y = w + 1
             y.backward()
     trainer.step(1)
     # the updated parameter should be based on the loaded checkpoint
     assert (x.data(mx.cpu()) == -0.2).asnumpy().all()
def model_fn(model_dir):
    symbol = mx.sym.load('%s/model.json' % model_dir)
    outputs = mx.symbol.softmax(data=symbol, name='softmax_label')
    inputs = mx.sym.var('data')
    param_dict = gluon.ParameterDict('model_')
    net = gluon.SymbolBlock(outputs, inputs, param_dict)
    net.load_params('%s/model.params' % model_dir, ctx=mx.cpu())
    return net
Exemplo n.º 6
0
def test_sparse_hybrid_block():
    params = gluon.ParameterDict('net_')
    params.get('weight', shape=(5,5), stype='row_sparse', dtype='float32')
    params.get('bias', shape=(5,), dtype='float32')
    net = gluon.nn.Dense(5, params=params)
    net.initialize()
    x = mx.nd.ones((2,5))
    # an exception is expected when forwarding a HybridBlock w/ sparse param
    y = net(x)
Exemplo n.º 7
0
def model_fn(model_dir):
    with open("{}/model.json".format(model_dir), "r") as model_file:
        model_json = model_file.read()
    outputs = mx.sym.load_json(model_json)
    inputs = mx.sym.var("data")
    param_dict = gluon.ParameterDict("model_")
    net = gluon.SymbolBlock(outputs, inputs, param_dict)
    # We will serve the model on CPU
    net.load_params("{}/model.params".format(model_dir), ctx=mx.cpu())
    return net
Exemplo n.º 8
0
def test_paramdict():
    ctx = mx.cpu(1)
    params0 = gluon.ParameterDict('net_')
    params0.get('w0', shape=(10, 10))
    params0.get('w1', shape=(10, 10), stype='row_sparse')
    all_row_ids = mx.nd.arange(0, 10, ctx=ctx)
    # check param names
    assert list(params0.keys()) == ['net_w0', 'net_w1']
    params0.initialize(ctx=ctx)
    trainer0 = mx.gluon.Trainer(params0, 'sgd')
    prev_w0 = params0.get('w0').data(ctx)
    prev_w1 = params0.get('w1').row_sparse_data(all_row_ids)
    # save params
    params0.save('test_paramdict.params')

    # load params
    params1 = gluon.ParameterDict('net_')
    params1.get('w0', shape=(10, 10))
    params1.get('w1', shape=(10, 10), stype='row_sparse')
    params1.load('test_paramdict.params', ctx)
    trainer1 = mx.gluon.Trainer(params1, 'sgd')

    # compare the values before and after save/load
    cur_w0 = params1.get('w0').data(ctx)
    cur_w1 = params1.get('w1').row_sparse_data(all_row_ids)
    mx.test_utils.assert_almost_equal(prev_w0.asnumpy(), cur_w0.asnumpy())
    mx.test_utils.assert_almost_equal(prev_w1.asnumpy(), cur_w1.asnumpy())

    # create a new param dict with dense params, and load from the checkpoint
    # of sparse & dense params
    params2 = gluon.ParameterDict('net_')
    params2.get('w0', shape=(10, 10))
    params2.get('w1', shape=(10, 10))
    params2.load('test_paramdict.params', ctx)

    # compare the values before and after save/load
    cur_w0 = params2.get('w0').data(ctx)
    cur_w1 = params2.get('w1').data(ctx)
    mx.test_utils.assert_almost_equal(prev_w0.asnumpy(), cur_w0.asnumpy())
    mx.test_utils.assert_almost_equal(prev_w1.asnumpy(), cur_w1.asnumpy())
Exemplo n.º 9
0
def model_fn(model_dir):
    """
    Load the gluon model. Called once when hosting service starts.

    :param: model_dir The directory where model files are stored.
    :return: a model (in this case a Gluon network)
    """
    symbol = mx.sym.load('%s/model.json' % model_dir)
    outputs = mx.symbol.softmax(data=symbol, name='softmax_label')
    inputs = mx.sym.var('data')
    param_dict = gluon.ParameterDict('model_')
    net = gluon.SymbolBlock(outputs, inputs, param_dict)
    net.load_params('%s/model.params' % model_dir, ctx=mx.cpu())
    return net
Exemplo n.º 10
0
def pretrain_stack(train_data, encoder, loss_encoder, decoder, loss_decoder,
                   model_ctx, num_epochs, learning_rate):
    epochs = num_epochs
    smoothing_constant = .01

    start_time = time.time()
    for layer_id, layer_encoder in enumerate(encoder):
        print layer_id
        print len(decoder)
        layer_decoder = decoder[len(decoder) - layer_id - 1]
        print('layer_encoder', layer_encoder.__dict__['_name'])
        print('layer_decoder', layer_decoder.__dict__['_name'])
        if layer_decoder.__dict__['_name'].find('lambda') != -1:
            continue
        cur_params = gluon.ParameterDict('my_params')
        cur_params.update(layer_encoder.collect_params())
        cur_params.update(layer_decoder.collect_params())

        trainer = gluon.Trainer(cur_params, 'sgd', {'learning_rate': .01})
        for e in range(epochs):
            train_data_shuffle = gluon.data.DataLoader(train_data,
                                                       batch_size=1,
                                                       shuffle=True)
            for i, (data, label) in enumerate(train_data_shuffle):
                data = data.as_in_context(model_ctx)
                label = label.as_in_context(model_ctx)
                encoded_input = data
                for j in range(0, layer_id):
                    encoded_input = encoder[j](encoded_input)

                with autograd.record():
                    encoded_layer = layer_encoder(encoded_input)
                    decoded_input = layer_decoder(encoded_layer)
                    loss = loss_decoder(decoded_input, encoded_input)

                loss.backward()
                trainer.step(data.shape[0])
                if i % 50000 == 0:
                    print 'Data id = ', i, ' Time: ', time.time() - start_time
                    sys.stdout.flush()
                ##########################
                #  Keep a moving average of the losses
                ##########################
                curr_loss = mx.nd.mean(loss).asscalar()
                moving_loss = (curr_loss if ((i == 0) and (e == 0)) else
                               (1 - smoothing_constant) * moving_loss +
                               smoothing_constant * curr_loss)

    return encoder, decoder
Exemplo n.º 11
0
def model_fn(model_dir):
    """Loads the Gluon model. Called once when hosting service starts.
    Args:
        model_dir (str): The directory where model files are stored.
    Returns:
        mxnet.gluon.block.Block: a Gluon network.
    """
    symbol = mx.sym.load('%s/model.json' % model_dir)
    vocab = vocab_from_json('%s/vocab.json' % model_dir)
    outputs = mx.symbol.softmax(data=symbol, name='softmax_label')
    inputs = mx.sym.var('data')
    param_dict = gluon.ParameterDict('model_')
    net = gluon.SymbolBlock(outputs, inputs, param_dict)
    net.load_params('%s/model.params' % model_dir, ctx=mx.cpu())
    return net, vocab
Exemplo n.º 12
0
def model_fn(model_dir):
    """Load the gluon model. Called once when hosting service starts.

    Args:
        model_dir: The directory where model files are stored.

    Returns:
        a model (in this case a Gluon network)
    """
    symbol = mx.sym.load("%s/model.json" % model_dir)
    outputs = mx.symbol.softmax(data=symbol, name="softmax_label")
    inputs = mx.sym.var("data")
    param_dict = gluon.ParameterDict("model_")
    net = gluon.SymbolBlock(outputs, inputs, param_dict)
    net.load_params("%s/model.params" % model_dir, ctx=mx.cpu())
    return net
Exemplo n.º 13
0
def model_fn(model_dir):
    """
    Load the Gluon model for hosting.

    Arguments:
    model_dir -- SageMaker model directory.

    Retuns:
    Gluon model
    """
    # Load the saved Gluon model
    symbol = mx.sym.load('%s/model.json' % model_dir)
    outputs = mx.sym.sigmoid(data=symbol, name='sigmoid_label')
    inputs = mx.sym.var('data')
    param_dict = gluon.ParameterDict('model_')
    net = gluon.SymbolBlock(outputs, inputs, param_dict)
    net.load_params('%s/model.params' % model_dir, ctx=mx.cpu())
    return net
Exemplo n.º 14
0
def train(train_data, test_data, encoder, loss_encoder, decoder, loss_decoder,
          model_ctx, num_epochs, learning_rate):
    cur_params = gluon.ParameterDict('my_params')
    cur_params.update(encoder.collect_params())

    trainer = gluon.Trainer(cur_params, 'sgd',
                            {'learning_rate': learning_rate})

    epochs = num_epochs
    smoothing_constant = .01

    start_time = time.time()
    for e in range(epochs):
        train_data_shuffle = gluon.data.DataLoader(train_data,
                                                   batch_size=1,
                                                   shuffle=True)
        for i, (data, label) in enumerate(train_data_shuffle):
            data = data.as_in_context(model_ctx)
            label = label.as_in_context(model_ctx)
            with autograd.record():
                output = encoder(data)
                loss = loss_encoder(output, label)
            loss.backward()
            trainer.step(data.shape[0])
            if i % 50000 == 0:
                print 'Data id = ', i, ' Time: ', time.time() - start_time
                sys.stdout.flush()
            ##########################
            #  Keep a moving average of the losses
            ##########################
            curr_loss = mx.nd.mean(loss).asscalar()
            moving_loss = (curr_loss if ((i == 0) and (e == 0)) else
                           (1 - smoothing_constant) * moving_loss +
                           smoothing_constant * curr_loss)

        test_accuracy = evaluate_accuracy(test_data, encoder, start_time,
                                          model_ctx)
        train_accuracy = evaluate_accuracy(train_data, encoder, start_time,
                                           model_ctx)
        print("Epoch %s. Loss: %s, Train_acc %s, Test_acc %s" %
              (e, moving_loss, train_accuracy, test_accuracy))

    return encoder, decoder
    def __init__(self,
                 network,
                 outputs,
                 num_filters,
                 use_1x1_transition=True,
                 use_bn=True,
                 reduce_ratio=1.0,
                 min_depth=128,
                 global_pool=False,
                 pretrained=False,
                 ctx=mx.cpu(),
                 inputs=('data', )):

        self.IsolatedParams = gluon.ParameterDict()

        inputs, outputs, params = _parse_network(network, outputs, inputs,
                                                 pretrained, ctx)
        # append more layers
        #在生成的新网络的最后一个输出增加新的层
        y = outputs[-1]
        #权值初始化装置
        weight_init = mx.init.Xavier(rnd_type='gaussian',
                                     factor_type='out',
                                     magnitude=2)
        for i, f in enumerate(num_filters):
            if use_1x1_transition:
                num_trans = max(min_depth, int(round(f * reduce_ratio)))
                #                y = mx.sym.Convolution(
                #                    y, num_filter=num_trans, kernel=(1, 1), no_bias=use_bn,
                #                    name='expand_trans_conv{}'.format(i), attr={'__init__': weight_init})

                Conv2D_1 = nn.Conv2D(channels=num_trans,
                                     kernel_size=(1, 1),
                                     use_bias=not (use_bn),
                                     weight_initializer=weight_init,
                                     prefix='expand_trans_conv{}_'.format(i))
                y = Conv2D_1(y)
                self.IsolatedParams.update(Conv2D_1.collect_params())
                #                name='expand_trans_conv{}'.format(i)
                if use_bn:
                    y = mx.sym.BatchNorm(y, name='expand_trans_bn{}'.format(i))
                y = mx.sym.Activation(y,
                                      act_type='relu',
                                      name='expand_trans_relu{}'.format(i))

#            y = mx.sym.Convolution(
#                y, num_filter=f, kernel=(3, 3), pad=(1, 1), stride=(2, 2),
#                no_bias=use_bn, name='expand_conv{}'.format(i), attr={'__init__': weight_init})

            Conv2D_2 = nn.Conv2D(channels=f,
                                 kernel_size=(3, 3),
                                 padding=(1, 1),
                                 strides=(2, 2),
                                 use_bias=not (use_bn),
                                 weight_initializer=weight_init,
                                 prefix='expand_conv{}_'.format(i))
            y = Conv2D_2(y)
            self.IsolatedParams.update(Conv2D_2.collect_params())

            if use_bn:
                y = mx.sym.BatchNorm(y, name='expand_bn{}'.format(i))
            y = mx.sym.Activation(y,
                                  act_type='relu',
                                  name='expand_reu{}'.format(i))
            outputs.append(y)
        if global_pool:
            outputs.append(
                mx.sym.Pooling(y,
                               pool_type='avg',
                               global_pool=True,
                               kernel=(1, 1)))
        super(FeatureExpander_IsolatedParams,
              self).__init__(outputs, inputs, params)
Exemplo n.º 16
0
    net.add(CenteredLayer())

net.initialize()
y = net(nd.random.uniform(shape=(4,8)))
y.mean()

'''-------------------------------------------------------'''
#带模型参数的自定义层
from mxnet import gluon

#创建一个3*3大小的参数,取名为exciting_parameter_yay。并初始化
my_params = gluon.Parameter('exciting_parameter_yay', shape=(3,3))
my_params.initialize()

#or 使用Block自带的ParamterDict类型的成员变量params。以下获得的参数name为block1_exciting_parameter_yay。初始化
pd = gluon.ParameterDict(profix='block1_')
pd.get('exciting_parameter_yay', shape=(3,3))
pd.get('exciting_parameter_yay').initialize()
pd['block1_exciting_parameter_yay'].data()


#自定义Dense层
def MyDense(nn.Block):
    def __init__(self, units, in_units, **kwargs):
        super(MyDense, self).__init__(**kwargs)
        with self.name_scope():
            self.weight = self.params.get('weight', shape=(in_units, units))
            self.bias = self.params.get('bias', shape=(units,))
    
    def forward(self, x):
        linear = nd.dot(x, self.weight.data()) + self.bias.data()
Exemplo n.º 17
0
        return x - x.mean()

layer = CenteredLayer()
print(layer(nd.array([1, 2, 3, 4, 5])))

net = nn.Sequential()
with net.name_scope():
    net.add(nn.Dense(128))
    net.add(nn.Dense(10))
    net.add(CenteredLayer())

net.initialize()
y = net(nd.random.uniform(shape=(4, 8)))
print(y.mean())

params = gluon.ParameterDict(prefix='block1_')
params.get("param2", shape=(2, 3))
print(params)

class MyDense(nn.Block):
    def __init__(self, units, in_units, prefix=None, params=None):
        super().__init__(prefix, params)
        with self.name_scope():
            self.weight = self.params.get('weight', shape=(in_units, units))
            self.bias = self.params.get('bias', shape=(units,))

    def forward(self, x):
        linear = nd.dot(x, self.weight.data()) + self.bias.data()
        return nd.relu(linear)

dense = MyDense(5, in_units = 10, prefix='o_my_dense_')
Exemplo n.º 18
0
# 构建更复杂的模型
net = nn.Sequential()
net.add(nn.Dense(128), CenteredLayer())

# 下面打印自定义层各个输出的均值。因为均值是浮点数,所以它的值是一个很接近0的数。
net.initialize()
y = net(nd.random.uniform(shape=(4, 8)))
print(y.mean().asscalar())

# 4.4.2. 含模型参数的自定义层
'''
分别介绍了Parameter类和ParameterDict类。
在自定义含模型参数的层时,我们可以利用Block类自带的ParameterDict类型的成员变量params。
它是一个由字符串类型的参数名字映射到Parameter类型的模型参数的字典。我们可以通过get函数从ParameterDict创建Parameter实例。
'''
params = gluon.ParameterDict()
params.get('param2', shape=(2, 3))
print(params)


# 尝试实现一个含权重参数和偏差参数的全连接层。
# 它使用ReLU函数作为激活函数。其中in_units和units分别代表输入个数和输出个数。
class MyDense(nn.Block):
    # units为该层的输出个数,in_units为该层的输入个数
    def __init__(self, units, in_units, **kwargs):
        super(MyDense, self).__init__(**kwargs)
        self.weight = self.params.get('weight', shape=(in_units, units))
        self.bias = self.params.get('bias', shape=(units, ))

    def forward(self, x):
        linear = nd.dot(x, self.weight.data()) + self.bias.data()
Exemplo n.º 19
0
output = net2(data[0:1])
print(output)

nd.mean(output)

my_param = gluon.Parameter("exciting_parameter_yay", grad_req='write', shape=(5,5))
print(my_param)

my_param.initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx)
print(my_param.data())

# my_param = gluon.Parameter("exciting_parameter_yay", grad_req='write', shape=(5,5))
# my_param.initialize(mx.init.Xavier(magnitude=2.24), ctx=[mx.gpu(0), mx.gpu(1)])
# print(my_param.data(mx.gpu(0)), my_param.data(mx.gpu(1)))

pd = gluon.ParameterDict(prefix="block1_")

pd.get("exciting_parameter_yay", grad_req='write', shape=(5,5))

pd["block1_exciting_parameter_yay"]

def relu(X):
    return nd.maximum(X, 0)

class MyDense(Block):
    ####################
    # We add arguments to our constructor (__init__)
    # to indicate the number of input units (``in_units``)
    # and output units (``units``)
    ####################
    def __init__(self, units, in_units=0, **kwargs):
Exemplo n.º 20
0
def ParameterDictTest():
    params = gluon.ParameterDict()
    params.get('params', shape=(2, 3))
    print(params)
Exemplo n.º 21
0
    # x_recon_batch: the collection of reconstruction of the images
    # x_recon_loss: the corresponding loss for the reconstruction of each image
    # which is used for finding the best reconstruction
    x_recon_batch = nd.zeros((batch_size, 3, 64, 64))
    x_recon_loss = nd.ones((batch_size, )) * 100000

    # Use different initialization of z
    for restart in range(num_random_restarts):
        tic = time.time()
        #
        train_last_loss = 2.
        train_curr_loss = 0.1

        # Put z into the dict of parameters to be optimized
        # Only z will be updated in this algorithm
        paramdict = gluon.ParameterDict('noise')
        paramdict.get('z', shape=(batch_size, n_z, 1, 1),
                      init=init.Normal(1))  #default sigma is 0.01
        paramdict.initialize(ctx=ctx)
        z = paramdict.get('z').data()
        trainer = gluon.Trainer(paramdict, 'Adam',
                                {'learning_rate': learn_rate})

        # Define Loss
        recon_loss = dcgan.Recon_Loss()
        z_loss = dcgan.Z_Loss()

        ## Optimization process: find the best z
        for epoch in range(total_epoch):
            if abs(train_last_loss - train_curr_loss) / train_last_loss < 1e-3:
                break