def update(self): state_batch, action_batch, reward_batch, next_state_batch, done_batch = self.replay_buffer.sample( self.batch_size) with autograd.record(): # get the Q(s,a) all_current_q_value = self.main_network(state_batch) main_q_value = nd.pick(all_current_q_value, action_batch) # get the maxQ(s',a') all_next_q_value = self.target_network( next_state_batch).detach() # only get gradient of main network max_next_q_value = nd.max(all_next_q_value, axis=1) target_q_value = reward_batch + ( 1 - done_batch) * self.gamma * max_next_q_value # record loss loss = gloss.L2Loss() value_loss = loss(target_q_value, main_q_value) self.main_network.collect_params().zero_grad() value_loss.backward() self.optimizer.step(batch_size=self.batch_size)
def training2(features, labels, position, fire_point, title, batch_size=11): dataset = gdata.ArrayDataset(features, labels) data_iter = gdata.DataLoader(dataset, batch_size, shuffle=True) net = nn.Sequential() net.add(nn.Dense(1)) net.initialize() loss = gloss.L2Loss() trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.01}) epoch = 0 m = 100 while epoch < 1000: epoch += 1 for X, y in data_iter: with autograd.record(): l = loss(net(X), y) l.backward() trainer.step(batch_size) l = loss(net(features), labels) if epoch % 100 == 0 or epoch == 1: print('epcoh: %d loss: %s' % (epoch, l.mean().asscalar())) m = l.mean().asscalar() print("epoch:" + str(epoch)) print("m: " + str(m)) print("w: " + str(net[0].weight.data().asnumpy())) print("b: " + str(net[0].bias.data().asnumpy())) print("error square: " + str(((net(features).reshape(batch_size, 1) - labels.reshape(batch_size, 1))**2).sum().asscalar())) p_test = nd.arange(0, 1 / fire_point, 0.1) q_test = net(p_test) plt.subplot(1, 3, position) plt.scatter(features.asnumpy(), labels.asnumpy(), color='#FF4700') plt.plot(p_test.asnumpy(), q_test.asnumpy(), color='b') plt.xlabel("1/p (1/kPa)") plt.ylabel("1/q (g carbon/mmol)") plt.title(title)
def optimize(batch_size, trainer, num_epochs, decay_epoch, log_interval, features, labels, net): """Optimize an objective function.""" dataset = gdata.ArrayDataset(features, labels) data_iter = gdata.DataLoader(dataset, batch_size, shuffle=True) loss = gloss.L2Loss() ls = [loss(net(features), labels).mean().asnumpy()] for epoch in range(1, num_epochs + 1): # Decay the learning rate. if decay_epoch and epoch > decay_epoch: trainer.set_learning_rate(trainer.learning_rate * 0.1) for batch_i, (X, y) in enumerate(data_iter): with autograd.record(): l = loss(net(X), y) l.backward() trainer.step(batch_size) if batch_i * batch_size % log_interval == 0: ls.append(loss(net(features), labels).mean().asnumpy()) # To print more conveniently, use numpy. print('w:', net[0].weight.data(), '\nb:', net[0].bias.data(), '\n') es = np.linspace(0, num_epochs, len(ls), endpoint=True) semilogy(es, ls, 'epoch', 'loss')
def train_gluon_ch7(trainer_name, trainer_hyperparams, features, labels, batch_size=10, num_epochs=2): # Iniatial model net = nn.Sequential() net.add(nn.Dense(1)) net.initialize(init.Normal(sigma=0.01)) loss = gloss.L2Loss() #Store the loss def eval_loss(): return loss(net(features), labels).mean().asscalar() ls = [eval_loss()] data_iter = gdata.DataLoader(gdata.ArrayDataset(features, labels), batch_size, shuffle=True) # Create Trainer trainer = gluon.Trainer(net.collect_params(), trainer_name, trainer_hyperparams) for _ in range(num_epochs): start = time.time() for batch_i, (X, y) in enumerate(data_iter): with autograd.record(): l = loss(net(X), y) l.backward() trainer.step(batch_size) # Average the gradient if (batch_i + 1) * batch_size % 100 == 0: ls.append(eval_loss()) # Print result and graph print('loss: %f, %f sec per epoch' % (ls[-1], time.time() - start)) d2l.set_figsize() d2l.plt.plot(np.linspace(0, num_epochs, len(ls)), ls) d2l.plt.xlabel('epoch') d2l.plt.ylabel('loss')
def house_prise_gulon(): """ 使用gulon模型构建房价预估 :return: """ features = nd.array(nd.array([[120, 2], [100, 1], [130, 3]])) labels = nd.array([1200000, 1000000, 1300000]) logger.info(features) logger.info(labels) # labels += nd.random.normal(scale=0.01, shape=labels.shape) batch_size = 10 # 将训练数据的特征和标签组合 dataset = gdata.ArrayDataset(features, labels) # 随机读取小批量 data_iter = gdata.DataLoader(dataset, batch_size, shuffle=True) net = nn.Sequential() net.add(nn.Dense(1)) net.initialize(init.Normal(sigma=0.01)) loss = gloss.L2Loss() trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.03}) num_epochs = 3 for epoch in range(1, num_epochs + 1): for X, y in data_iter: with autograd.record(): l = loss(net(X), y) l.backward() trainer.step(batch_size) l = loss(net(features), labels) logger.info('epoch %d, loss: %f' % (epoch, l.mean().asnumpy())) dense = net[0] logger.info("预测数据") logger.info(dense.weight.data()) logger.info(dense.bias.data()) logger.info(net(features))
def update(self): state_batch, action_batch, reward_batch, next_state_batch = self.replay_buffer.sample( self.batch_size) with autograd.record(): # get the Q(s,a) all_current_q_value = self.main_network(state_batch) main_q_value = nd.pick(all_current_q_value, action_batch) # different from DQN # get next action from main network, then get its Q value from target network all_next_q_value = self.target_network( next_state_batch).detach() # only get gradient of main network max_action = nd.argmax(all_current_q_value, axis=1) target_q_value = nd.pick(all_next_q_value, max_action).detach() target_q_value = reward_batch + self.gamma * target_q_value # record loss loss = gloss.L2Loss() value_loss = loss(target_q_value, main_q_value) self.main_network.collect_params().zero_grad() value_loss.backward() self.optimizer.step(batch_size=self.batch_size)
def optimize(batch_size, trainer, num_epochs, decay_epoch, log_interval, features, labels, net): i = 0 dataset = gdata.ArrayDataset(features, labels) data_iter = gdata.DataLoader(dataset, batch_size, shuffle=True) loss = gloss.L2Loss() ls = [loss(net(features), labels).mean().asnumpy()] for epoch in range(1, num_epochs + 1): # 学习率自我衰减。 if decay_epoch and epoch > decay_epoch: trainer.set_learning_rate(trainer.learning_rate * 0.1) for batch_i, (X, y) in enumerate(data_iter): with autograd.record(): l = loss(net(X), y) l.backward() i += 1 trainer.step(batch_size) if batch_i * batch_size % log_interval == 0: ls.append(loss(net(features), labels).mean().asnumpy()) # 为了便于打印,改变输出形状并转化成 numpy 数组。 print('i:', i, 'w:', net[0].weight.data(), '\nb:', net[0].bias.data(), '\n') es = np.linspace(0, num_epochs, len(ls), endpoint=True) gb.semilogy(es, ls, 'epoch', 'loss')
def train_gluon(trainer_name, hyperparams, batch_size): #【读取数据】 data_iter = gdata.DataLoader(gdata.ArrayDataset(features, labels), batch_size, shuffle=True) #【定义模型】 net = nn.Sequential() net.add(nn.Dense(1)) #【初始化模型参数】 net.initialize(init=init.Normal(sigma=0.01)) #【定义损失函数】 loss = gloss.L2Loss() #【定义优化算法】 trainer = gluon.Trainer(net.collect_params(), trainer_name, hyperparams) #【训练模型】 num_epochs = 2 start = 0 ls = [] for _ in range(num_epochs): start = time.time() for epoch_i, (X, y) in enumerate(data_iter): with autograd.record(): y_hat = net(X) l = loss(y_hat, y) l.backward() trainer.step(batch_size) if (epoch_i + 1) * batch_size % 100 == 0: ls.append(loss(net(features), labels).mean().asscalar()) print('loss %f,%f sec per epoch' % (ls[-1], (time.time() - start) / len(ls))) plt.plot(np.linspace(0, num_epochs, len(ls)), ls) plt.xlabel('epoch') plt.ylabel('loss') plt.show()
def optimize(batch_size, trainer, num_epochs, decay_epoch, log_interval, X, y, net): # num_examples = 1000 # X, y = genData(num_examples) dataset = gdata.ArrayDataset(X, y) data_iter = gdata.DataLoader(dataset, batch_size, shuffle=True) square_loss = gloss.L2Loss() y_vals = [square_loss(net(X), y).mean().asnumpy()] for epoch in range(1, num_epochs + 1): if decay_epoch and epoch > decay_epoch: trainer.set_learning_rate(trainer.learning_rate * 0.1) for batch_i, (features, label) in enumerate(data_iter): with autograd.record(): output = net(features) loss = square_loss(output, label) loss.backward() trainer.step(batch_size) if batch_i * batch_size % log_interval == 0: y_vals.append(square_loss(net(X), y).mean().asnumpy()) # 为了便于打印,改变输出形状并转化成numpy数组。 print('w:', net[0].weight.data(), '\nb:', net[0].bias.data(), '\n') x_vals = np.linspace(0, num_epochs, len(y_vals), endpoint=True) utils.semilogy(x_vals, y_vals, 'epoch', 'loss')
def optimize_gluon(trainer, features, labels, net, decay_epoch=None, batch_size=10, log_interval=10, num_epochs=3): """Optimize an objective function with a Gluon trainer.""" dataset = gdata.ArrayDataset(features, labels) data_iter = gdata.DataLoader(dataset, batch_size, shuffle=True) loss = gloss.L2Loss() ls = [loss(net(features), labels).mean().asnumpy()] for epoch in range(1, num_epochs + 1): # Decay the learning rate. if decay_epoch and epoch > decay_epoch: trainer.set_learning_rate(trainer.learning_rate * 0.1) for batch_i, (X, y) in enumerate(data_iter): with autograd.record(): l = loss(net(X), y) l.backward() trainer.step(batch_size) if batch_i * batch_size % log_interval == 0: ls.append(loss(net(features), labels).mean().asnumpy()) print('w[0]=%.2f, w[1]=%.2f, b=%.2f' % (net[0].weight.data()[0][0].asscalar(), net[0].weight.data()[0][1].asscalar(), net[0].bias.data().asscalar())) es = np.linspace(0, num_epochs, len(ls), endpoint=True) semilogy(es, ls, 'epoch', 'loss')
def update(self): state = nd.array([t.state for t in self.buffer], ctx=self.ctx) action = nd.array([t.action for t in self.buffer], ctx=self.ctx) reward = [t.reward for t in self.buffer] # next_state = nd.array([t.next_state for t in self.buffer], ctx=self.ctx) old_action_log_prob = nd.array([t.a_log_prob for t in self.buffer], ctx=self.ctx) R = 0 Gt = [] for r in reward[::-1]: R = r + self.gamma * R Gt.insert(0, R) Gt = nd.array(Gt, ctx=self.ctx) # sample 'ppo_update_time' times # sample 'batch_size' samples every time for i in range(self.ppo_update_times): assert len(self.buffer) >= self.batch_size sample_index = random.sample(range(len(self.buffer)), self.batch_size) for index in sample_index: # optimize the actor network with autograd.record(): Gt_index = Gt[index] V = self.critic_network(state[index].reshape(1, -1)).detach() advantage = (Gt_index - V) all_action_prob = self.actor_network(state[index].reshape( 1, -1)) action_prob = nd.pick(all_action_prob, action[index]) ratio = action_prob / old_action_log_prob[index] surr1 = ratio * advantage surr2 = nd.clip(ratio, 1 - self.clip_param, 1 + self.clip_param) * advantage action_loss = -nd.mean(nd.minimum(surr1, surr2)) # attention self.actor_network.collect_params().zero_grad() action_loss.backward() actor_network_params = [ p.data() for p in self.actor_network.collect_params().values() ] gb.grad_clipping(actor_network_params, theta=self.clip_param, ctx=self.ctx) self.actor_optimizer.step(1) # optimize the critic network with autograd.record(): Gt_index = Gt[index] V = self.critic_network(state[index].reshape(1, -1)) loss = gloss.L2Loss() value_loss = nd.mean(loss(Gt_index, V)) self.critic_network.collect_params().zero_grad() value_loss.backward() critic_network_params = [ p.data() for p in self.critic_network.collect_params().values() ] gb.grad_clipping(critic_network_params, theta=self.clip_param, ctx=self.ctx) self.critic_optimizer.step(1) self.training_step += 1 # clear buffer del self.buffer[:]
def __init__(self, net, ctx=mx.cpu()): super(Regression, self).__init__(net=net, ctx=ctx) self.loss_fun = gloss.L2Loss() self.metric = mx.metric.PearsonCorrelation()
def update(self): self.total_train_steps += 1 state_batch, action_batch, reward_batch, next_state_batch, done_batch = self.memory_buffer.sample( self.batch_size) # --------------optimize the critic network-------------------- with autograd.record(): # choose next action according to target policy network next_action_batch = self.target_actor_network(next_state_batch) noise = nd.normal(loc=0, scale=self.policy_noise, shape=next_action_batch.shape, ctx=self.ctx) # with noise clip noise = nd.clip(noise, a_min=-self.noise_clip, a_max=self.noise_clip) next_action_batch = next_action_batch + noise clipped_action = self.action_clip(next_action_batch) # get target q value target_q_value1 = self.target_critic_network1( next_state_batch, clipped_action) target_q_value2 = self.target_critic_network2( next_state_batch, clipped_action) target_q_value = nd.minimum(target_q_value1, target_q_value2).squeeze() target_q_value = reward_batch + (1.0 - done_batch) * ( self.gamma * target_q_value) # get current q value current_q_value1 = self.main_critic_network1( state_batch, action_batch) current_q_value2 = self.main_critic_network2( state_batch, action_batch) loss = gloss.L2Loss() value_loss1 = loss(current_q_value1, target_q_value.detach()) value_loss2 = loss(current_q_value2, target_q_value.detach()) self.main_critic_network1.collect_params().zero_grad() value_loss1.backward() self.critic1_optimizer.step(self.batch_size) self.main_critic_network2.collect_params().zero_grad() value_loss2.backward() self.critic2_optimizer.step(self.batch_size) # ---------------optimize the actor network------------------------- if self.total_train_steps % self.policy_update == 0: with autograd.record(): pred_action_batch = self.main_actor_network(state_batch) actor_loss = -nd.mean( self.main_critic_network1(state_batch, pred_action_batch)) self.main_actor_network.collect_params().zero_grad() actor_loss.backward() self.actor_optimizer.step(1) self.soft_update(self.target_actor_network, self.main_actor_network) self.soft_update(self.target_critic_network1, self.main_critic_network1) self.soft_update(self.target_critic_network2, self.main_critic_network2)
from mxnet.gluon import loss loss_softmax = loss.SoftmaxCrossEntropyLoss(sparse_label=False) loss_mse = loss.L2Loss()
labels = true_w[0] * features[:, 0] + true_w[1] * features[:, 1] + true_b labels += nd.random.normal(scale=0.01, shape=labels.shape) batch_size = 10 dataset = gdata.ArrayDataset(features, labels) #随机读取小批量 data_iter = gdata.DataLoader(dataset, batch_size, shuffle=True) net = nn.Sequential() net.add(nn.Dense(1)) #Dense定义该层输出个数为 1。全连接:Dense #初始化模型参数 net.initialize(init.Normal(sigma=0.01)) #指定权重参数每个元素将在初始化时随机采样于均值为 0 标准差为 0.01 的正态分布。 #定义损失函数 loss = gloss.L2Loss() # 平⽅损失⼜称 L2 范数损失。 #定义优化算法 学习率的数值一般设置为1/batch_size trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.03}) #指定学习率为 0.03 的小批量随机梯度下降(sgd)为优化算法 些参数可以通过 collect_params 函数获取 #训练模型 num_epochs = 3 for epoch in range(1, num_epochs + 1): for X, y in data_iter: with autograd.record(): l = loss(net(X), y) l.backward() trainer.step(batch_size) #迭代模型参数 指明批量⼤小,从而对批量中样本梯度求平均 l = loss(net(features), labels) print('epoch %d, loss: %f' % (epoch, l.mean().asnumpy()))
batch_size = 10 dataset = gdata.ArrayDataset(features, labels) data_iter = gdata.DataLoader(dataset, batch_size, shuffle=True) from mxnet.gluon import nn net = nn.Sequential() #建立一个net容器 net.add(nn.Dense(1)) # 自动计算Input点数 from mxnet import init net.initialize(init.Normal(sigma=0.01)) # initial from mxnet.gluon import loss as gloss loss = gloss.L2Loss() # L2 loss from mxnet import gluon trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.03}) num_epoch = 3 for epoch in range(1, num_epoch + 1): for X, y in data_iter: with autograd.record(): l = loss(net(X), y) l.backward() trainer.step(batch_size) print('epoch {0} , loss {1}'.format( epoch, loss(net(features), labels).mean().asnumpy()))
def __init__(self, **kwargs): super(PolynomialRegressionGluon, self).__init__(**kwargs) self.net = None self.loss = gloss.L2Loss() self.regularization = kwargs.get("regularization")
def __init__(self, feature_dict, args, ctx, task, **kwargs): """{"sparse":[SingleFeat],"dense":[SingleFeat]}""" super(xDeepFM, self).__init__(**kwargs) # ?? util.mkdir_if_not_exist(args.SAVE_PARAMS_PATH_PREFIX) # self.feature_sizes = args.FEATURE_SIZE self.field_size = args.FIELD_NUM self.feature_dict = feature_dict print('field_size:') print(self.field_size) if args.TASK == 'finish': self.embedding_size = args.FINISH_EMBEDDING_SIZE self.batch_size = args.FINISH_BATCH_SIZE else: self.embedding_size = args.LIKE_EMBEDDING_SIZE self.batch_size = args.LIKE_BATCH_SIZE self.config_name = args.CONFIG_NAME # self.dropout_prob = args.DROPOUT_PROB self.task = task # self.loss = gloss.SigmoidBinaryCrossEntropyLoss() if args.LOSS == 'l2loss': self.loss = gloss.L2Loss() else: self.loss = gloss.SigmoidBinaryCrossEntropyLoss() self.ctx = ctx self.embedding_dict = OrderedDict() self.dense_dict = OrderedDict() with self.name_scope(): if self.task == 'finish': self.layer_list = [np.int(x) for x in args.FINISH_LAYER] self.dropout = args.FINISH_DROPOUT_PROB else: self.layer_list = [np.int(x) for x in args.LIKE_LAYER] self.dropout = args.LIKE_DROPOUT_PROB # self.params.get('v',shape=(self.field_size,self.embedding_size)) self.dnn_out = nn.Dense(1, use_bias=False) self.register_child(self.dnn_out) for feat in feature_dict['sparse']: self.embedding_dict[feat.feat_name] = nn.Embedding( feat.feat_num, self.embedding_size) for feat in feature_dict['dense']: self.dense_dict[feat.feat_name] = nn.Dense(self.embedding_size) for emb_k, emb_v in self.embedding_dict.items(): self.register_child(emb_v) for den_k, den_v in self.dense_dict.items(): self.register_child(den_v) self.linear_logit_dense = nn.Dense(1, use_bias=False) self.register_child(self.linear_logit_dense) self.linear_logit_embedding_bn = nn.BatchNorm() self.register_child(self.linear_logit_embedding_bn) self.dense_list = [] self.dropout_list = [] self.bn_list = [] self.activation_list = [] for i in range(len(self.layer_list)): self.dense_list.append(nn.Dense(self.layer_list[i])) self.dropout_list.append(nn.Dropout(self.dropout)) self.bn_list.append(nn.BatchNorm()) self.activation_list.append(nn.Activation('relu')) self.register_child(self.dense_list[i]) self.register_child(self.dropout_list[i]) self.register_child(self.bn_list[i]) self.register_child(self.activation_list[i]) # if True: print('true') self.layer_size = [np.int(x) for x in args.CONV1D_LAYER] # self.cin_net = CIN(self.embedding_size,self.field_size, (128, 64), self.ctx) # print('oo') # self.cin_net.initialize() # print('uu') # self.register_child(self.cin_net) self.cin_dense = nn.Dense(1) self.register_child(self.cin_dense) self.cin_bn = nn.BatchNorm() self.register_child(self.cin_bn) self.field_nums = [self.field_size] self.conv_list = [] for idx, size in enumerate(self.layer_size): self.conv_list.append( nn.Conv1D(channels=size, kernel_size=1, strides=1, padding=0, activation='relu', in_channels=self.field_nums[0] * self.field_nums[-1], weight_initializer=init.Uniform())) self.field_nums.append(size) self.register_child(self.conv_list[idx])
# 1. prepare training data set n_train, n_test, true_w, true_b = 100, 100, [1.2, -3.4, 5.6], 5 features = nd.random.normal(shape=(n_train + n_test, 1)) poly_features = nd.concat(features, nd.power(features, 2), nd.power(features, 3)) labels = (true_w[0] * poly_features[:, 0] + true_w[1] * poly_features[:, 1] + true_w[2] * poly_features[:, 2] + true_b) labels += nd.random.normal(loc=0, scale=0.01, shape=labels.shape) print(features[:2], poly_features[:2], labels[:2]) print("features.shape={}, poly_features.shape={} ".format( features.shape, poly_features.shape)) print("labels.shape={}".format(labels.shape)) num_epochs, loss = 100, gloss.L2Loss() # 2. training model def fit_and_plot(train_features, test_features, train_labels, test_labels): net = nn.Sequential() net.add(nn.Dense(1)) net.initialize() batch_size = min(10, train_labels.shape[0]) train_iter = gdata.DataLoader(gdata.ArrayDataset(train_features, train_labels), batch_size, shuffle=True) trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.01}) train_ls, test_ls = [], []
modelname = 'semi_pi_simple2' basemodel_zoo = 'simple2' net = symbols.get_model('simple2') net.initialize(mx.init.Xavier(magnitude=2.24)) #net.load_parameters(os.path.join('symbols','para','%s.params'%(modelname))) # g(x) : stochastic input augmentation function def g(x): return x + nd.random.normal(0, stochastic_ratio, shape=x.shape) # loss function l_logistic = gloss.SoftmaxCrossEntropyLoss() l_l2loss = gloss.L2Loss() metric = mx.metric.Accuracy() # train def test(): metric = mx.metric.Accuracy() for data, label in val_data: X = data.reshape((-1, 1, 28, 28)) #img = nd.concat(X,X,X,dim=1) output = net(X) metric.update([label], [output]) return metric.get() def train(epochs, alpha):
def log_rmse(net, features, labels): l2_loss = gloss.L2Loss() clipped_preds = nd.clip(net(features), 1, float('inf')) rmse = nd.sqrt(2 * l2_loss(clipped_preds.log(), labels.log()).mean()) return rmse
def __init__(self): super(LinRegGluon, self).__init__() self.net = None if not self.loss: self.loss = g_loss.L2Loss()
import mxnet from mxnet.gluon import nn, loss as gloss import mxnet as mx import mxnet.ndarray as nd from mxnet import nd, autograd, gluon from mxnet.gluon.data.vision import transforms # L2 Loss loss2 = gloss.L2Loss() # sample data x = nd.ones((2, )) y = nd.ones((2, )) * 2 loss2(x, y) # Huber loss loss_huber = gloss.HuberLoss(rho=0.85) # threshold rho loss = gloss.SoftmaxCrossEntropyLoss() x = nd.array([[1, 10], [8, 2]]) y = nd.array([0, 1]) loss(x, y)
# 全连接层 net.add(nn.Dense(1)) # 【初始化模型参数】 # 默认初始化的是权重参数,采用标准差为0.01的正态分布 # 偏差参数默认初始化为0 # 这时net已经有了w和b,后续操作中只需要将输入输入层即可 # 这个net(X)的操作就是求出预测值的过程! net.initialize(init.Normal(sigma=0.01)) # 【定义损失函数】 loss = loss.L2Loss() # 平方损失又称为L2范数损失 # 【定义优化算法】 # 定义一个模型参数优化算法一共需要4个参数,w,b,learning_rate,batch_size # 其中batch_size这个参数在最后的step函数中给出! trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.03}) # 【训练模型】 num_epochs = 3 for epoch in range(num_epochs): for X, y in data_iter: with autograd.record(): l = loss(net(X), y)
def build_model(self): # DataLoader train_transform = transforms.Compose([ transforms.RandomFlipLeftRight(), transforms.Resize((self.img_size + 30, self.img_size + 30)), transforms.RandomResizedCrop(self.img_size), transforms.ToTensor(), transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)) ]) test_transform = transforms.Compose([ transforms.Resize((self.img_size, self.img_size)), transforms.ToTensor(), transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)) ]) self.trainA = ImageFolder( os.path.join('dataset', self.dataset, 'trainA'), train_transform) self.trainB = ImageFolder( os.path.join('dataset', self.dataset, 'trainB'), train_transform) self.testA = ImageFolder( os.path.join('dataset', self.dataset, 'testA'), test_transform) self.testB = ImageFolder( os.path.join('dataset', self.dataset, 'testB'), test_transform) self.trainA_loader = DataLoader(self.trainA, batch_size=self.batch_size, shuffle=True, num_workers=self.num_workers) self.trainB_loader = DataLoader(self.trainB, batch_size=self.batch_size, shuffle=True, num_workers=self.num_workers) self.testA_loader = DataLoader(self.testA, batch_size=1, shuffle=False) self.testB_loader = DataLoader(self.testB, batch_size=1, shuffle=False) """ Define Generator, Discriminator """ self.genA2B = ResnetGenerator(input_nc=3, output_nc=3, ngf=self.ch, n_blocks=self.n_res, img_size=self.img_size, light=self.light) self.genB2A = ResnetGenerator(input_nc=3, output_nc=3, ngf=self.ch, n_blocks=self.n_res, img_size=self.img_size, light=self.light) self.disGA = Discriminator(input_nc=3, ndf=self.ch, n_layers=7) self.disGB = Discriminator(input_nc=3, ndf=self.ch, n_layers=7) self.disLA = Discriminator(input_nc=3, ndf=self.ch, n_layers=5) self.disLB = Discriminator(input_nc=3, ndf=self.ch, n_layers=5) self.whole_model = nn.HybridSequential() self.whole_model.add(*[ self.genA2B, self.genB2A, self.disGA, self.disGB, self.disLA, self.disLB ]) self.whole_model.hybridize(static_alloc=False, static_shape=False) """ Define Loss """ self.L1_loss = gloss.L1Loss() self.MSE_loss = gloss.L2Loss(weight=2) self.BCE_loss = gloss.SigmoidBCELoss() """ Initialize Parameters""" params = self.whole_model.collect_params() block = self.whole_model if not self.debug: force_init(block.collect_params('.*?_weight'), KaimingUniform()) force_init(block.collect_params('.*?_bias'), BiasInitializer(params)) block.collect_params('.*?_rho').initialize() block.collect_params('.*?_gamma').initialize() block.collect_params('.*?_beta').initialize() block.collect_params('.*?_state_.*?').initialize() else: pass block.collect_params().reset_ctx(self.dev) """ Trainer """ self.G_params = param_dicts_merge( self.genA2B.collect_params(), self.genB2A.collect_params(), ) self.G_optim = gluon.Trainer( self.G_params, 'adam', dict(learning_rate=self.lr, beta1=0.5, beta2=0.999, wd=self.weight_decay), ) self.D_params = param_dicts_merge(self.disGA.collect_params(), self.disGB.collect_params(), self.disLA.collect_params(), self.disLB.collect_params()) self.D_optim = gluon.Trainer( self.D_params, 'adam', dict(learning_rate=self.lr, beta1=0.5, beta2=0.999, wd=self.weight_decay), ) """ Define Rho clipper to constraint the value of rho in AdaILN and ILN""" self.Rho_clipper = RhoClipper(0, 1)
break from mxnet.gluon import nn net = nn.Sequential() net.add(nn.Dense(1)) from mxnet import init net.initialize(init.Normal(sigma=0.01)) from mxnet.gluon import loss as gloss loss = gloss.L2Loss() # 平方损失又称 L2 范数损失。 from mxnet import gluon trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.03}) num_epochs = 3 for epoch in range(1, num_epochs + 1): for X, y in data_iter: with autograd.record(): l = loss(net(X), y) l.backward() trainer.step(batch_size) l = loss(net(features), labels) print('epoch %d, loss: %f' % (epoch, l.mean().asnumpy()))
def train_GAS_ch9(model, data_utils, batch_size, lr, num_epochs, ctx): model.initialize(init.Xavier(), force_reinit=True, ctx=ctx) trainer = gluon.Trainer(model.collect_params(), 'adam', {'learning_rate': lr}) loss = d2l.MaskedSoftmaxCELoss() loss1 = gloss.SigmoidBinaryCrossEntropyLoss(from_sigmoid=True) loss2 = gloss.L2Loss() animator = d2l.Animator(xlabel='epoch', ylabel='loss', xlim=[1, num_epochs], ylim=[0, 0.25]) for epoch in range(1, num_epochs + 1): timer = d2l.Timer() metric = d2l.Accumulator(2) # loss_sum, num_tokens l_sum, l_class_sum, l_score_sum, n, acc_sum = 0.0, 0.0, 0.0, 0, 0.0 rmse_sum = nd.array([0, 0, 0]) data_iter = data_utils.get_batch_train(batch_size) # ti=0 for i, (X, Y) in enumerate(data_iter): # print(X.shape,Y.shape) ##X=(128, 10, 16) (128, 5) # exit() X, Y = nd.array(X).as_np_ndarray(), nd.array(Y).as_np_ndarray() # print("after reshape",X.shape, Y.shape) ##after reshape (128, 10, 16) (128, 5) # exit() # vlinz=nd.random.randint(0,10,(X.shape[0],)).as_np_ndarray() # vlinz=nd.ones((X.shape[0],)).as_np_ndarray() valid_len = np.repeat(np.array([X.shape[1]]), X.shape[0]) # print(valid_len.shape) ##(128,) # exit() # ti+=1 # print('keepup',ti) with autograd.record(): dec_output = model(X, valid_len) # print(dec_output.shape) ##(128, 10, 5) # exit() # ################### # l = loss(dec_output, Y, vlinz) # l.backward() # d2l.grad_clipping(model, 1) # num_tokens = vlinz.sum() # trainer.step(num_tokens) # metric.add(l.sum(), num_tokens) # # exit() # if epoch % 10 == 0: # animator.add(epoch, (metric[0] / metric[1],)) # print(f'loss {metric[0] / metric[1]:.3f}, {metric[1] / timer.stop():.1f} ' # f'tokens/sec on {str(ctx)}') # ########################## output = dec_output.as_nd_ndarray() cl_res, score_res = class_and_score_forward(output) # print("shape of cl_res:",cl_res.shape,"shape of Y[0][:,:3]:",Y.shape) # print("shape of score_res:",score_res.shape,"shape of Y[0][:,3:]:",Y.shape) cl_weight, conc_weight = nd.ones_like(cl_res), nd.ones_like( score_res) l_class = loss1(cl_res.as_np_ndarray(), Y[:, :3], cl_weight.as_np_ndarray()).sum() l_conc = loss2(score_res.as_np_ndarray(), Y[:, 3:], conc_weight.as_np_ndarray()).sum() n = Y.shape[0] l = (l_class / n) + (l_conc / n) l.backward() d2l.grad_clipping(model, 1) num_tokens = n trainer.step(num_tokens) metric.add(l.sum(), num_tokens) if epoch % 10 == 0: animator.add(epoch, (metric[0] / metric[1], )) print(f'loss {metric[0] / metric[1]:.3f}, {metric[1] / timer.stop():.1f} ' f'tokens/sec on {str(ctx)}')
return gluon.data.DataLoader(dataset,batch_size,shuffle=is_train) if (__name__=='__main__'): true_w=nd.array([2,-3.4]) true_b=4.2 features,labels=synthetic_data(true_w,true_b,1000) batch_size=10 data_iter=load_array((features,labels),batch_size) net=nn.Sequential() net.add(nn.Dense(1)) net.initialize(init.Normal(sigma=0.01)) loss=gloss.L2Loss() #The squared loss is known as the L2 norm loss trainer=gluon.Trainer(net.collect_params(),'sgd',{'learning_rate':0.03}) num_epochs=3 for epoch in range(1,num_epochs+1): for X,y in data_iter: with autograd.record(): l=loss(net(X),y) l.backward() trainer.step(batch_size) l=loss(net(features),labels) print('epoch %d,loss: %f'%(epoch,l.mean().asnumpy())) print('After regression, w is ',net[0].weight.data()) print('After regression, b is ',net[0].bias.data())
y2_vals=None, legend=None, figsize=(3.5, 2.5)): gb.plt.rcParams['figure.figsize'] = figsize set_matplotlib_formats('retina') gb.plt.xlabel(x_label) gb.plt.ylabel(y_label) gb.plt.semilogy(x_vals, y_vals) if x2_vals and y2_vals: gb.plt.semilogy(x2_vals, y2_vals) gb.plt.legend(legend) gb.plt.show() num_epochs = 100 loss = gloss.L2Loss() def fit_and_plot(train_features, test_features, train_labels, test_labels): net = nn.Sequential() net.add(nn.Dense(1)) net.initialize() batch_size = min(100, train_labels.shape[0]) train_iter = gdata.DataLoader(gdata.ArrayDataset(train_features, train_labels), batch_size, shuffle=True) trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.01}) train_ls, test_ls = [], []
def main() -> None: """ Main execution of the module. """ # Setup the same initial constraints as our previous linear regression model. true_weights = np.array([2, -3.4]) true_bias = 4.2 features, targets = d2l.synthetic_data(true_weights, true_bias, 1000) batch_size = 10 data_iterator = load_array((features, targets), batch_size, True) # Create a seuqential neural network with one output layer. gluon will infer # the input shape the first time data is passed through to it. net = nn.Sequential() net.add(nn.Dense(1)) # Initialize the weights with a random sample from a normal distribution # with a mean of 0 and a standard deviation of 0.01. bias is initialized as # by default. The initialization is deferred until the first attempt to pass # data through the network. net.initialize(init.Normal(sigma=0.01)) # The squared loss is also known as the L2 norm loss. l2_loss = loss.L2Loss() # Setup our SGD optimizer through the trainer class. trainer = gluon.Trainer(net.collect_params(), "sgd", {"learning_rate": 0.03}) num_epochs = 3 # Training loop time for epoch in range(1, num_epochs + 1): for feature_batch, target_batch in data_iterator: with autograd.record(): predicted_targets = net(feature_batch) batch_loss = l2_loss(predicted_targets, target_batch) # Compute the gradients for all of our weights and bias. The trainer # initialized the parameters for us already, allowing us to not worry # about manually attaching gradients. batch_loss.backward() # Because we're passing in a number of batches, we need to compute # reduction of all gradients in order to update our model # accordingly. trainer.step(batch_size) # Compute the overall loss for the epoch. epoch_loss = l2_loss(net(features), targets) print(f"epoch {epoch}, loss: {epoch_loss.mean().asnumpy()}") # Obtain the weights and biases from the first (and only) layer inside of # our model. first_layer_weights = net[0].weight.data() first_layer_bias = net[0].bias.data() print( f"Error in estimating the weights: {true_weights.reshape(first_layer_weights.shape) - first_layer_weights}" ) print(f"Error in estimating the bias: {true_bias - first_layer_bias}")