def test_readcell(self): i = nd.normal(shape=(b, m)) w = nd.normal(shape=(b, n)) memory = nd.normal(shape=(n, m)) read = ReadCell([m, 1, 1, 3, 1], n) read.initialize() read.hybridize() r, wr = read(i, memory, w) self.assertEqual(r.shape, (b, m), msg='') self.assertEqual(wr.shape, (b, n), msg='')
def test_writecell(self): i = nd.normal(shape=(b, m)) w = nd.normal(shape=(b, n)) memory = nd.normal(shape=(n, m)) write = WriteCell([m, 1, 1, 3, 1, m, m], n) write.initialize() write.hybridize() memory, ww = write(i, memory, w) self.assertEqual(memory.shape, (n, m), msg='') self.assertEqual(ww.shape, (b, n), msg='')
def forward(self, original_idx, paraphrase_idx): ''' forward pass of the whole model, original/paraphrase_idx are both of layout NT, to be added "C" by embedding layer ''' # ENCODER part mean, logv, last_state = self.encoder(original_idx, paraphrase_idx) # sample from Gaussian distribution N(0, 1), of the shape (batch_size, hidden_size) z = nd.normal(loc=0, scale=1, shape=(original_idx.shape[0], self.latent_size), ctx=model_ctx) latent_input = mean + z * nd.exp( 0.5 * logv) # exp() is to make the std dev positive # DECODER part # the KL Div should be calculated between the sample from N(0, 1), and the distribution after # Parameterization Trick, negation since we want it to be small kl_loss = -self.kl_div(mean, logv) # first paraphrase_input should be the <bos> token last_idx = paraphrase_idx[:, 0:1] ce_loss = 0 # decode the sample for pos in range(paraphrase_idx.shape[-1] - 1): vocab_output, last_state = self.decoder(last_state, last_idx, latent_input) # only compare the label we predict, note the first is bos and will be ignored ce_loss = ce_loss + self.ce_loss( vocab_output, paraphrase_idx[:, pos + 1:pos + 2]) last_idx = vocab_output.argmax(axis=-1, keepdims=True) return kl_loss, ce_loss
def choose_action_train(self, state): state = nd.array([state], ctx=self.ctx) action = self.main_actor_network(state) # no noise clip noise = nd.normal(loc=0, scale=self.explore_noise, shape=action.shape, ctx=self.ctx) action += noise clipped_action = self.action_clip(action).squeeze() return clipped_action
def load_data_polynomial(true_w, true_b, num_train=5000, num_test=1000): """ """ features = nd.normal(shape=(num_train + num_test, 1)) poly_features = [nd.power(features, i) for i in range(1, len(true_w) + 1)] poly_features = nd.concat(*poly_features) labels = nd.dot(poly_features, true_w) + true_b labels += nd.random.normal(scale=0.1) return features, poly_features, labels
def normal(): """ 它的每个元素都随机采样于均值为0、标准差为1的正态分布。nd.sqrt(nd.power(a, 2).sum()) :return: """ n = nd.normal(0, 1, shape=(2, 2)) logger.info(n) a = nd.array([1, 2, 3, 4]) print(a.norm()) print(nd.sqrt(nd.power(a, 2).sum()))
def __init__(self, **kwargs): super(ncf, self).__init__(**kwargs) with self.name_scope(): self.emb_u = nn.Embedding( nb_users, EMBEDDING_DIM_USER) #,weight_initializer=) self.emb_u.weight.set_data( nd.normal(shape=(nb_users, EMBEDDING_DIM_USER))) self.emb_u.grad_req = 'null' self.emb_u2 = nn.Embedding( nb_users, EMBEDDING_DIM_USER) #,weight_initializer=) self.emb_p = nn.Embedding( nb_photos, EMBEDDING_DIM_PHOTO) #,weight_initializer=) self.emb_p.weight.set_data( nd.normal(shape=(nb_photos, EMBEDDING_DIM_USER))) self.emb_p.grad_req = 'null' self.bn = nn.BatchNorm() self.flatten = nn.Flatten() self.dropout = nn.Dropout(rate=0.25) self.dense1 = nn.Dense(units=128, activation='relu') self.dense2 = nn.Dense(units=1, activation='sigmoid')
def update(self): self.total_train_steps += 1 state_batch, action_batch, reward_batch, next_state_batch, done_batch = self.memory_buffer.sample(self.batch_size) # --------------optimize the critic network-------------------- with autograd.record(): # choose next action according to target policy network next_action_batch = self.target_actor_network(next_state_batch) noise = nd.normal(loc=0, scale=self.policy_noise, shape=next_action_batch.shape, ctx=self.ctx) # with noise clip noise = nd.clip(noise, a_min=-self.noise_clip, a_max=self.noise_clip) next_action_batch = next_action_batch + noise clipped_action = self.action_clip(next_action_batch) # get target q value target_q_value1 = self.target_critic_network1(next_state_batch, clipped_action) target_q_value2 = self.target_critic_network2(next_state_batch, clipped_action) target_q_value = nd.minimum(target_q_value1, target_q_value2).squeeze() target_q_value = reward_batch + (1.0 - done_batch) * (self.gamma * target_q_value) # get current q value current_q_value1 = self.main_critic_network1(state_batch, action_batch) current_q_value2 = self.main_critic_network2(state_batch, action_batch) loss = gloss.L2Loss() value_loss1 = loss(current_q_value1, target_q_value.detach()) value_loss2 = loss(current_q_value2, target_q_value.detach()) self.main_critic_network1.collect_params().zero_grad() value_loss1.backward() self.critic1_optimizer.step(self.batch_size) self.main_critic_network2.collect_params().zero_grad() value_loss2.backward() self.critic2_optimizer.step(self.batch_size) # ---------------optimize the actor network------------------------- if self.total_train_steps % self.policy_update == 0: with autograd.record(): pred_action_batch = self.main_actor_network(state_batch) actor_loss = -nd.mean(self.main_critic_network1(state_batch, pred_action_batch)) self.main_actor_network.collect_params().zero_grad() actor_loss.backward() self.actor_optimizer.step(1) self.soft_update(self.target_actor_network, self.main_actor_network) self.soft_update(self.target_critic_network1, self.main_critic_network1) self.soft_update(self.target_critic_network2, self.main_critic_network2)
def test_ntmcell(self): prev_wr = [nd.normal(shape=(b, n)), nd.normal(shape=(b, n))] prev_ww = nd.normal(shape=(b, n)) prev_content = [nd.normal(shape=(b, m)), nd.normal(shape=(b, m))] prev_state = nd.normal(shape=(b, m)) x = nd.normal(shape=(b, m)) memory = nd.normal(shape=(n, m)) ntmcell = NTMCell() ntmcell.initialize() o, memory, (wr, ww, content, state) = ntmcell( x, memory, (prev_wr, prev_ww, prev_content, prev_state)) self.assertEqual(o.shape, (b, 125)) self.assertEqual(memory.shape, (n, m)) for ewr in wr: self.assertEqual(ewr.shape, (b, n)) self.assertEqual(ww.shape, (b, n)) for ec in content: self.assertEqual(ec.shape, (b, m)) self.assertEqual(state.shape, (b, 125))
def get_random_data(size, ctx): x = nd.normal(0, 1, shape=(size, 10), ctx=ctx) y = x.sum(axis=1) > 3 return x, y
# paraphrase_idx = nd.array(paraphrase_idx, ctx=model_ctx).expand_dims(axis=0) pred = model.predict(original_idx, sample, bos=vocab['<bos>'], eos=vocab['<eos>']) return ' '.join(vocab.to_tokens(pred)) if __name__ == '__main__': if args.gen: with open('data/' + args.dataset + '/vocab.json', 'r') as f: vocab = nlp.Vocab.from_json(json.load(f)) model = VAE_LSTM(emb_size=300, vocab_size=len(vocab)) model.load_parameters(args.param, ctx=model_ctx) sample = nd.normal(loc=0, scale=1, shape=(1, 64), ctx=model_ctx) print('\033[33mOriginal: \033[34m%s\033[0m' % args.org_sts) print('\033[31mResult: \033[35m%s\033[0m' % generate(model, args.org_sts, \ sample, vocab, ctx=model_ctx)) else: # load train, valid dataset train_dataset_str, valid_dataset_str = get_dataset_str(folder=args.dataset, \ length=args.nsample) # start from existing parameters if args.param: with open('data/' + args.dataset + '/vocab.json', 'r') as f: vocab = nlp.Vocab.from_json(json.load(f)) # use this loaded vocab train_ld, valid_ld = get_dataloader(train_dataset_str, valid_dataset_str, \ clip_length=args.seq_len, vocab=vocab, \ batch_size=args.batch_size)
import d2lzh as d2l from mxnet import autograd, nd, init,gluon from mxnet.gluon import loss as gloss, data as gdata, nn # 生成数据集或读取数据集 n_train, n_test, n_feature = 20, 100, 200 true_w, true_b = nd.ones((n_feature, 1))*0.01, 0.5 # 生成y features = nd.random.normal(scale=1, shape=(n_train+n_test, n_feature)) labels = nd.dot(features, true_w) + true_b labels += nd.normal(scale=0.01, shape=labels.shape) # 生成batch data batch_size = 1 train_features, test_features = features[:n_train, :], features[n_train:, :] train_labels, test_labels = labels[:n_train], labels[n_train:] train_iter = gdata.DataLoader(gdata.ArrayDataset(train_features, train_labels), batch_size, shuffle=True) # 初始化模型参数 def init_params(): w = nd.random.normal(scale=1, shape=(n_feature, 1)) b = nd.zeros(shape=(1,)) w.attach_grad() b.attach_grad() return [w, b] # 定义L2惩罚 def l2_penalty(w): return (w**2).sum()/2 # 只惩罚模型权重参数
from mxnet import nd x = nd.arange(12) #print(x) x = x.reshape(3, 4) #print(x) y = nd.zeros((2, 3, 4)) #print(y) z = nd.ones((2, 8, 9)) #print(z) z *= 3 #print(z) k = nd.normal(0, 1, shape=(2, 3, 4)) #print(k) k = k + y #print(k) #k = k/y #print(k.exp()) a = nd.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]) #b = nd.array([1, 2, 3, 4], [ 5, 6, 7, 8], [9, 10, 11, 12]) #a = nd.dot(b, a.T) b = nd.arange(12) b = b.reshape((4, 3)) #print(b.shape) #print(a.shape) #print(b) e = b.T #print(b) #print(b.shape) #矩阵的转置乘法 c = nd.dot(a, e)
return softmax(nd.dot(X.reshape(-1,feature_nums),w)+b) def cross_entropy(y_hat,y): return -nd.pick(y_hat,y).log() def sgd(params,lr,batch_size): for param in params: param[:]=param-lr*param.grad/batch_size def eval_accuracy(y_hat,y): return (y_hat.argmax(axis=1)==y.astype('float32')).sum().asscalar() epoch=5 lr=0.1 train_weights=nd.normal(scale=0.01,shape=(feature_nums,output_nums)) train_bias=nd.normal(scale=0.01,shape=(output_nums,)) train_weights.attach_grad() train_bias.attach_grad() for i in range(1,epoch+1): loss_total=0 accuracy_total=0 samples=0 for X,y in train_iter: with autograd.record(): loss_val = cross_entropy(net(X,train_weights,train_bias),y).sum() loss_val.backward() sgd([train_weights,train_bias],lr,batch_size) loss_total+=loss_val.asscalar() accuracy_total+=eval_accuracy(net(X,train_weights,train_bias),y) samples+=batch_size
from mxnet import nd x = nd.ones(shape=(3, 4)) y = nd.ones(shape=(3, 4)) nor = nd.normal(0, 1, shape=(3, 4)) print(x + y) print(x * y) print(nd.dot(x, y.T)) x = nd.arange(3).reshape((1, 3)) y = nd.arange(2).reshape((2, 1)) print(x.shape) print(y.shape) print(x + y) print(x) print(x.asnumpy())
g_net.collect_params(), 'Adam', {'learning_rate': LEARNING_RATE, 'beta1': BETA, 'clip_gradient': CLIP_GRADIENT}) d_trainer = gluon.Trainer( d_net.collect_params(), 'Adam', {'learning_rate': LEARNING_RATE, 'beta1': BETA, 'clip_gradient': CLIP_GRADIENT}) g_net.collect_params().zero_grad() d_net.collect_params().zero_grad() # define evaluation metric metric = mx.metric.CustomMetric(facc) # initialize labels real_label = nd.ones(BATCH_SIZE, CTX) fake_label = nd.zeros(BATCH_SIZE, CTX) for epoch in range(NUM_EPOCHS): for i, (d, _) in enumerate(train_data): # update D data = d.as_in_context(CTX) noise = nd.normal(loc=0, scale=1, shape=( BATCH_SIZE, Z_DIM, 1, 1), ctx=CTX) with autograd.record(): # train with real image output = d_net(data).reshape((-1, 1)) errD_real = loss(output, real_label) metric.update([real_label, ], [output, ]) # train with fake image fake_image = g_net(noise) output = d_net(fake_image.detach()).reshape((-1, 1)) errD_fake = loss(output, fake_label) errD = errD_real + errD_fake errD.backward() metric.update([fake_label, ], [output, ]) d_trainer.step(BATCH_SIZE)
import numpy as np import mxnet as mx logging.basicConfig() logging.getLogger().setLevel(logging.DEBUG) batch_size = 128 # define how many of each feature type to generate user_n_cont_features = 5 user_n_cat_features = 1 item_n_cat_features = 1 item_n_cont_features = 100 # generate data user_cont_features = nd.normal(shape=(batch_size, user_n_cont_features)) user_embed_features = nd.array( np.random.randint(low=0, high=10000, size=(batch_size, user_n_cat_features))) item_cont_features = nd.normal(shape=(batch_size, item_n_cont_features)) item_embed_features = nd.array( np.random.randint(low=0, high=10000, size=(batch_size, item_n_cat_features))) # compute total number of user/item categorical features user_cats = np.unique(user_embed_features.asnumpy()).shape[0] item_cats = np.unique(item_embed_features.asnumpy()).shape[0] logging.debug("{} total user categories & {} total item categories".format(
# define dropout function def dropout(X, drop_prob): assert 0 <= drop_prob <= 1 keep_prob = 1 - drop_prob if keep_prob == 0: return X.zeros_like() mask = nd.random.uniform(0, 1, X.shape) < keep_prob return mask * X # define model parameters num_inputs, num_outputs, num_hiddens1, num_hiddens2 = 784, 10, 256, 256 W1 = nd.normal(scale=0.01, shape=(num_inputs, num_hiddens1)) b1 = nd.zeros(shape=[num_hiddens1]) W2 = nd.normal(scale=0.01, shape=(num_hiddens1, num_hiddens2)) b2 = nd.zeros(shape=(num_hiddens2)) W3 = nd.normal(scale=0.01, shape=(num_hiddens2, num_outputs)) b3 = nd.zeros(num_outputs) params = [W1, b1, W2, b2, W3, b3] for param in params: param.attach_grad() # define dropout probability drop_prob1, drop_prob2 = 0.2, 0.5