def step_HMC(exe, exe_params, exe_grads, label_key, noise_precision, prior_precision, L=10, eps=1E-6): init_params = {k: v.copyto(v.context) for k, v in exe_params.items()} end_params = {k: v.copyto(v.context) for k, v in exe_params.items()} init_momentums = {k: mx.random.normal(0, 1, v.shape) for k, v in init_params.items()} end_momentums = {k: v.copyto(v.context) for k, v in init_momentums.items()} init_potential = calc_potential(exe, init_params, label_key, noise_precision, prior_precision) # 0. Calculate Initial Energy and Kinetic init_kinetic = sum([nd.sum(nd.square(momentum)) / 2.0 for momentum in init_momentums.values()]).asscalar() # 1. Make a half step for momentum at the beginning exe.copy_params_from(end_params) exe.forward(is_train=True) exe.backward() for k, v in exe_grads.items(): v.wait_to_read() for k, momentum in end_momentums.items(): momentum[:] = momentum - (eps / 2) * exe_grads[k] # 2. Alternate full steps for position and momentum for i in range(L): # 2.1 Full step for position for k, param in exe_params.items(): param[:] = param + eps * end_momentums[k] # 2.2 Full step for the momentum, except at the end of trajectory we perform a half step exe.forward(is_train=True) exe.backward() for v in exe_grads.values(): v.wait_to_read() if i != L - 1: for k, momentum in end_momentums.items(): momentum[:] = momentum - eps * exe_grads[k] else: for k, momentum in end_momentums.items(): # We should reverse the sign of the momentum at the end momentum[:] = -(momentum - eps / 2.0 * exe_grads[k]) copy_param(exe, end_params) # 3. Calculate acceptance ratio and accept/reject the move end_potential = calc_potential(exe, end_params, label_key, noise_precision, prior_precision) end_kinetic = sum([nd.sum(nd.square(momentum)) / 2.0 for momentum in end_momentums.values()]).asscalar() # print init_potential, init_kinetic, end_potential, end_kinetic r = numpy.random.rand(1) if r < numpy.exp(-(end_potential + end_kinetic) + (init_potential + init_kinetic)): exe.copy_params_from(end_params) return end_params, 1 else: exe.copy_params_from(init_params) return init_params, 0
def newgradfun(g): gg = gradfun(g) for axis, (i, j) in enumerate(zip(g.shape, padded_shape)): if i != j: gg = ndarray.sum(gg, axis=axis, keepdims=True) if gg.shape != x.shape: gg = gg.reshape(x.shape) return gg
def saturation_aug(self, src, x): alpha = 1.0 + random.uniform(-x, x) coef = nd.array([[[0.299, 0.587, 0.114]]]) gray = src * coef gray = nd.sum(gray, axis=2, keepdims=True) gray *= (1.0 - alpha) src *= alpha src += gray return src
def f(a): b = a * 2 while nd.norm(b).asscalar() < 1000: b = b * 2 if nd.sum(b).asscalar() > 0: c = b else: c = 100 * b return c
def grad_clipping(params, theta, ctx): if theta is not None: norm = nd.array([0.0], ctx) for p in params: norm += nd.sum(p.grad * p.grad) norm = nd.sqrt(norm).asscalar() if norm > theta: for p in params: p.grad[:] *= theta / norm
def contrast_aug(self, src, x): alpha = 1.0 + random.uniform(-x, x) coef = np.array([[[0.299, 0.587, 0.114]]]) gray = src * coef gray = (3.0 * (1.0 - alpha) / gray.size) * nd.sum(gray) src *= alpha src += gray src = nd.clip(src, 0, 255) return src
def _get_gaussian_initialization(num_features, neighborhood_size, num_data): """Initializes permutohedral filter as Gaussian kernel.""" file_name = './experiments/gaussian_initializations/' \ 'gaussian_filter_neighborhood{}_features{}' \ '.npy'.format(neighborhood_size, num_features) init_array = nd.array(np.load(file_name)) # Normalize filter for better initialization. init_array = init_array / nd.sum(init_array) return init_array.repeat(repeats=num_data, axis=0)
def grad_clipping(params, clipping_norm, ctx): """Gradient clipping.""" if clipping_norm is not None: norm = nd.array([0.0], ctx) for p in params: norm += nd.sum(p.grad ** 2) norm = nd.sqrt(norm).asscalar() if norm > clipping_norm: for p in params: p.grad[:] *= clipping_norm / norm
def train(self, s_batch, a_batch_one_hot, V_trace, advantage): batch_size = s_batch.shape[0] s_batch = copy.deepcopy(s_batch) a_batch_one_hot = copy.deepcopy(a_batch_one_hot) V_trace_batch = copy.deepcopy(V_trace) advantage_batch = copy.deepcopy(advantage) sigma_prime = copy.deepcopy(self.sigma) mu_prime = copy.deepcopy(self.mu) self.presigma = (1-self.beta)*self.presigma + self.beta*np.sum(np.array(V_trace))/(np.array(V_trace).shape[0]) self.mu = (1-self.beta)*self.mu + self.beta*np.sum((np.array(V_trace))**2)/(np.array(V_trace).shape[0]) self.sigma = math.sqrt(self.presigma-self.mu**2) pop_art_hyper = self.sigma, sigma_prime, self.mu, mu_prime s_batch = nd.array(s_batch, ctx=CTX) a_batch_one_hot = nd.array(a_batch_one_hot, ctx=CTX) V_trace_batch = nd.array(V_trace_batch, ctx=CTX) advantage_batch = nd.array(advantage_batch, ctx=CTX) self.reset_noise() self.actorcritic.collect_params().zero_grad() with mx.autograd.record(): loss_vec = [] probs, values = self.actorcritic.forward(s_batch, pop_art_hyper, loss_vec) loss = 0. for element in loss_vec: loss = loss + element # print 'loss_dropout:', loss logprob = nd.log(nd.sum(data=probs * a_batch_one_hot, axis=1)) entropyloss = -nd.sum(nd.sum(data=probs*nd.log(probs), axis=1), axis=0) actorloss = -nd.sum(logprob*advantage_batch, axis=0) criticloss = nd.sum(nd.square(values-V_trace_batch), axis=0) loss = actorloss + criticloss loss.backward() grads_list = [] for name, value in self.actorcritic.collect_params().items(): if name.find('batchnorm') < 0: # grads_list.append(mx.nd.array(value.grad().asnumpy())) grads_list.append(value.grad()) return grads_list, batch_size
def test_on_LFW(model,ctx=mx.gpu()): with open('/home1/LFW/pairs.txt', 'rt') as f: pairs_lines = f.readlines()[1:] sims = [] model.get_feature=True normalize = transforms.Normalize(mean=0.5, std=0.25) transform = transforms.Compose([ transforms.Resize((96, 112)), transforms.ToTensor(), normalize, # mTransform, ]) start = time.time() forward_time = 0 for i in range(6000): p = pairs_lines[i].replace('\n', '').split('\t') if 3 == len(p): sameflag = 1 name1 = p[0] + '/' + p[0] + '_' + '{:04}.jpg'.format(int(p[1])) name2 = p[0] + '/' + p[0] + '_' + '{:04}.jpg'.format(int(p[2])) if 4 == len(p): sameflag = 0 name1 = p[0] + '/' + p[0] + '_' + '{:04}.jpg'.format(int(p[1])) name2 = p[2] + '/' + p[2] + '_' + '{:04}.jpg'.format(int(p[3])) img1 = nd.array(Image.open('/home1/LFW/aligned_lfw-112X96/' + name1)) img2 = nd.array(Image.open('/home1/LFW/aligned_lfw-112X96/' + name2)) img1 = transform(img1) img2 = transform(img2) img = nd.stack(img1, img2) img = img.as_in_context(ctx) fstart = time.time() output = model(img) forward_time += time.time() - fstart f1, f2 = output[0], output[1] cosdistance = nd.sum(f1 * f2) / (f1.norm() * f2.norm() + 1e-5) sims.append('{}\t{}\t{}\t{}\n'.format(name1, name2, cosdistance.asscalar(), sameflag)) accuracy = [] thd = [] folds = KFold(n=6000, n_folds=10, shuffle=False) thresholds = np.arange(0, 1.0, 0.005) predicts = np.array(map(lambda line: line.strip('\n').split(), sims)) for idx, (train, test) in enumerate(folds): best_thresh = find_best_threshold(thresholds, predicts[train]) accuracy.append(eval_acc(best_thresh, predicts[test])) thd.append(best_thresh) # print time.time() - start-cost # single 1080Ti about 100s msg = 'LFWACC={:.4f} std={:.4f} thd={:.4f}, model forward test time:{:.4f}, total time: {:.4f}'.format( np.mean(accuracy), np.std(accuracy),np.mean(thd),forward_time, time.time()-start) return msg
def cal_my_acc(test_files, target_files): ''' this method is deprecated :param test_files: :param target_files: :return: ''' mTransform = MTransform() normalize = transforms.Normalize(mean=0.5, std=0.5) transform = transforms.Compose([ # transforms.Resize((96, 112)), transforms.ToTensor(), normalize, # mTransform, ]) model = sphere_net.SphereNet20() model.load_params("log_bn_dy/spherenet.model", ctx=mx.gpu()) correct = 0 total = 0 target_emb = {} for target_file in target_files: target_image = transform(nd.array( Image.open(target_file))).as_in_context(mx.gpu()) target_image = nd.expand_dims(target_image, axis=0) target_label = ''.join(target_file.split('/')[-1].split('.')[:-1]) target_out = model(target_image) target_emb[target_label] = target_out test_emb = {} for test_file in test_files: test_image = Image.open(test_file) test_image = nd.expand_dims(transform(nd.array(test_image)), axis=0).as_in_context(mx.gpu()) test_label = ''.join(test_file.split('/')[-1].split('.')[:-1]) test_out = model(test_image) max_s = mx.nd.zeros(1, ctx=mx.gpu()) max_label = '' sims = {} for target_label, target_out in target_emb.items(): similarity = nd.sum(test_out * target_out) / \ (nd.norm(test_out) * nd.norm(target_out)) sims[target_label] = similarity.asscalar() if max_s < similarity: max_s = similarity max_label = target_label if ''.join(max_label.split('_')[:-1]) == ''.join( test_label.split('_')[:-1]): correct += 1 else: print test_label, max_s.asscalar(), max_label total += 1 test_emb[test_label] = test_out # print correct, total, float(correct)/total return float(correct) / total, test_emb, target_emb
def accuracy(data): good = 0 total = 0 for (X, Y) in data: features = X.as_in_context(model_ctx) label = Y.as_in_context(model_ctx).reshape(Y.size, -1) prediction = nd.argmax(net(features), axis=1).reshape(Y.size, -1) good += nd.sum(prediction == label).asscalar() total += len(X) return good / total
def edge_func(self, edges): real_head, img_head = nd.split(edges.src['emb'], num_outputs=2, axis=-1) real_tail, img_tail = nd.split(edges.dst['emb'], num_outputs=2, axis=-1) real_rel, img_rel = nd.split(edges.data['emb'], num_outputs=2, axis=-1) score = real_head * real_tail * real_rel \ + img_head * img_tail * real_rel \ + real_head * img_tail * img_rel \ - img_head * real_tail * img_rel # TODO: check if there exists minus sign and if gamma should be used here(jin) return {'score': nd.sum(score, -1)}
def infer(self, head_emb, rel_emb, tail_emb): real_head, img_head = nd.split(head_emb, num_outputs=2, axis=-1) real_tail, img_tail = nd.split(tail_emb, num_outputs=2, axis=-1) real_rel, img_rel = nd.split(rel_emb, num_outputs=2, axis=-1) score = (real_head.expand_dims(axis=1) * real_rel.expand_dims(axis=0)).expand_dims(axis=2) * real_tail.expand_dims(axis=0).expand_dims(axis=0) \ + (img_head.expand_dims(axis=1) * real_rel.expand_dims(axis=0)).expand_dims(axis=2) * img_tail.expand_dims(axis=0).expand_dims(axis=0) \ + (real_head.expand_dims(axis=1) * img_rel.expand_dims(axis=0)).expand_dims(axis=2) * img_tail.expand_dims(axis=0).expand_dims(axis=0) \ - (img_head.expand_dims(axis=1) * img_rel.expand_dims(axis=0)).expand_dims(axis=2) * real_tail.expand_dims(axis=0).expand_dims(axis=0) return nd.sum(score, -1)
def hybrid_forward(self, F, data1, data2, **kwargs): basis_outputs_l = [] for i in range(self._num_basis_functions): basis_out = F.sum(F.dot(data1, kwargs["weight{}".format(i)]) * data2, axis=1, keepdims=True) basis_outputs_l.append(basis_out) basis_outputs = F.concat(*basis_outputs_l, dim=1) out = self.rate_out(basis_outputs) return out
def gradient_clipping(parameters, threshold, ctx): if threshold is not None: norm = nd.array([0.0], ctx) for parameter in parameters: norm += nd.sum(parameter.grad ** 2) norm = nd.sqrt(norm).asscalar() if norm > threshold: for parameter in parameters: parameter.grad[:] *= (threshold / norm)
def f(a): b = a *2 #b的L2范数的标量 while nd.norm(b).asscalar() < 1000: b = b *2 #b的轴上和的标量 if nd.sum(b).asscalar() > 0: c = b else: c = 100 * b return c
def _evaluate_accuracy(self, data_iterator, net, layer_params): numerator = 0. denominator = 0. for i, (data, label) in enumerate(data_iterator): data = data.as_in_context(self._context_bnn).reshape((-1, data.shape[1])) label = label.as_in_context(self._context_bnn) replace_params_net(layer_params, net, self._context_bnn) output = net(data) predictions = nd.argmax(output, axis=1) numerator += nd.sum(predictions == label) denominator += data.shape[0] return (numerator / denominator).asscalar()
def f(a): b = a * 2 print('a', a) print('nd.norm(a).asscalar()', nd.norm(a).asscalar()) print('nd.norm(b).asscalar()', nd.norm(b).asscalar()) while nd.norm(b).asscalar() < 1000: b = b * 2 if nd.sum(b).asscalar() > 0: c = b else: c = 100 * b return c
def grad_clipping(params,theta,ctx): if theta is not None: norm = nd.array([0.0],ctx) for p in params: # print('grad_clipping:grad=',p.grad) norm += nd.sum(p.grad ** 2) # print('norm:',norm) norm = nd.sqrt(norm).asscalar() # print('grad_clipoing:norm=%f,theta=%f' %(norm,theta)) if norm > theta: for p in params: p.grad[:] *= theta / norm
def check_acc(self, data_iterator): numerator = 0. denominator = 0. for batch_i, (data, label) in enumerate(data_iterator): _, output = self.loss(data, label, train = False) predictions = nd.argmax(output, axis=1).as_in_context(ctx) numerator += nd.sum(predictions == label.as_in_context(ctx)) denominator += data.shape[0] print('Evaluating accuracy. (complete percent: {:.2f}/100)'.format(1.0 * batch_i / (self.train_size/self.batch_size) * 100 /2)+' '*20, end='') sys.stdout.write("\r") return (numerator / denominator).asscalar()
def evaluate_accuracy(data_iterator, net, ctx=[mx.cpu()]): acc = nd.array([0]) n = 0. data_iterator.reset() for batch in data_iterator: data = batch.data label = batch.label for X, y in zip(data, label): y = y.astype('float32') acc += nd.sum(net(X).argmax(axis=1) == y).copyto(mx.cpu()) n += y.size acc.wait_to_read() # don't push too many operators into backend return acc.asscalar() / n
def forward(self, is_train, req, in_data, out_data, aux): data_input = in_data[0] batch_size = data_input.shape[0] label_input = in_data[1] center_input = in_data[2] label_index = self.class_index[label_input] batch_center = center_input[label_index] batch_diff = data_input - batch_center loss = nd.sum(nd.square(batch_diff)) / batch_size / 2 self.assign(out_data[0], req[0], loss) self.assign(out_data[1], req[0], batch_diff)
def forward(self, is_train, req, in_data, out_data, aux): #Do nothing! fea = in_data[0] N,C,H,W = fea.shape mean = nd.sum(fea) / (N*C*H*W) std = nd.squre(nd.fea-mean)/ (N*C*H*W) fea = fea/std - mean self.assign(out_data[0], req[0], fea)
def rbf_kernels(self, x: NDArray, y: NDArray): """ Computes exp(-c ||x - y||^2). ||x - y||^2 = x . x + y . y - 2 x . y Compute each term separately. x is are original features, y are features used for similarity """ cross_products = nd.dot(x, y) x_products = nd.sum(sqr(x), axis=1, keepdims=True) x_products = nd.broadcast_axis(x_products, axis=1, size=y.shape[1]) y_products = nd.sum(sqr(y), axis=0, keepdims=True) y_products = nd.broadcast_axis(y_products, axis=0, size=x.shape[0]) sqr_difs = x_products + y_products - 2 * cross_products print(nd.mean(x_products), nd.mean(y_products), nd.mean(cross_products)) print(nd.mean(sqr_difs)) res = nd.exp(-0.05 * sqr_difs) print(res.shape) return res
def f(a): b = a * 2 i = 0 while nd.norm(b).asscalar() < 1000: i += 1 print(i) b = b * 2 if nd.sum(b).asscalar() > 0: c = b else: print('100') c = 100 * b return c
def train_model(self): self.__epochs = 5 for e in range(self.__epochs): total_loss = 0 for self.__batch_X, self.__batch_y in self.__data_iter: with autograd.record(): self.__batch_y_hat = self.__net(self.__batch_X) loss = self.__loss_function(self.__batch_y_hat, self.__batch_y) loss.backward() self.__trainer.step(self.__batch_size) total_loss += nd.sum(loss).asscalar() print("Epoch %d, average loss: %f" % (e, total_loss))
def edge_func(self, edges): real_head, img_head = nd.split(edges.src["emb"], num_outputs=2, axis=-1) real_tail, img_tail = nd.split(edges.dst["emb"], num_outputs=2, axis=-1) real_rel, img_rel = nd.split(edges.data["emb"], num_outputs=2, axis=-1) score = ( real_head * real_tail * real_rel + img_head * img_tail * real_rel + real_head * img_tail * img_rel - img_head * real_tail * img_rel ) # TODO: check if there exists minus sign and if gamma should be used here(jin) return {"score": nd.sum(score, -1)}
def train(self, epoch_cnt, learning_method, learning_params, verbose, model, is_random=True, progress=False): # 定义训练器 wd = learning_params['wd'] learning_params['wd'] = 0 trainer = gluon.Trainer(model.collect_params(), learning_method, learning_params) # dense_trainer = gluon.Trainer(model.collect_params(select='.*_(mlp[0-9]|y)'), # learning_method, learning_params) # svd_trainer = gluon.Trainer(model.collect_params(select='.*_(q|p|alpha|b)(_|$)'), # learning_method, learning_params) # alpha = nd.array([alpha]).reshape((1, 1)) # 训练过程 for epoch in range(epoch_cnt): total_loss = 0 trained_cnt = 0 data = gdata.DataLoader(self.train_dataset, batch_size=self.batch_size, shuffle=is_random) # 针对每个评分项进行迭代 if progress is True: data = tqdm(data) for u, R_u, i, t, bint, dev, r in data: trained_cnt += self.batch_size # 预测结果 with mxnet.autograd.record(): r_hat, reg = model(u, i, t, R_u, dev, bint) loss = (r_hat - r)**2 + wd * reg loss.backward() # 调整参数 # dense_trainer.step(self.batch_size) # svd_trainer.step(1) trainer.step(self.batch_size) total_loss += nd.sum(loss).asscalar() cur_loss = total_loss / trained_cnt if progress is True: data.set_description('MSE=%.6f' % cur_loss) # # 输出结果 # print('Epoch', epoch, 'finished, Loss =', # total_loss[0].asscalar() / self.rating_cnt) # 测试效果 if epoch >= verbose: self.test(progress, model)
def train(self, s_batch, a_batch_one_hot, V_trace, advantage): batch_size = s_batch.shape[0] action_indx = np.argmax(a_batch_one_hot,axis=1).tolist() action_stats = [action_indx.count(action_indx[i]) for i in range(batch_size)] action_bp_rate = (1 - np.array(action_stats)/float(batch_size))**2 s_batch = copy.deepcopy(s_batch) a_batch_one_hot = copy.deepcopy(a_batch_one_hot) V_trace_batch = copy.deepcopy(V_trace) advantage_batch = copy.deepcopy(advantage) s_batch = nd.array(s_batch, ctx=CTX) a_batch_one_hot = nd.array(a_batch_one_hot, ctx=CTX) V_trace_batch = nd.array(V_trace_batch, ctx=CTX) advantage_batch = nd.array(advantage_batch, ctx=CTX) action_bp_rate = nd.softmax(nd.array(action_bp_rate, ctx=CTX)) self.actorcritic.collect_params().zero_grad() self.reset_noise() with mx.autograd.record(): loss_vec = [] probs, values, top_decisions = self.actorcritic.forward(s_batch, loss_vec) loss = 0. for element in loss_vec: loss = loss + element # print 'loss_dropout:', loss logprob = nd.log(nd.sum(data=probs * a_batch_one_hot, axis=1)+1e-5) entropy = -nd.sum(nd.sum(data=probs*nd.log(probs+1e-5), axis=1), axis=0) top_decision_entropy = -nd.sum(nd.sum(data=top_decisions*nd.log(top_decisions+1e-5), axis=1), axis=0) entropy_loss = - entropy top_decision_entropy_loss = - top_decision_entropy actorloss = -nd.sum(action_bp_rate*(logprob*advantage_batch), axis=0) criticloss = nd.sum(action_bp_rate*nd.square(values-V_trace_batch), axis=0) # actorloss = -nd.sum(logprob*advantage_batch, axis=0) # criticloss = nd.sum(nd.square(values-V_trace_batch), axis=0) loss = actorloss + 0.3*criticloss + 0.001*entropy_loss # loss = actorloss + 0.3*criticloss + 0.0001*top_decision_entropy_loss loss.backward() # CTname = threading.currentThread().getName() # print(CTname + ' actorloss : '+str(actorloss)) # print(CTname + ' criticloss : '+str(criticloss)) # print(CTname + ' entropy_loss : '+str(entropy_loss)) grads_list = [] for name, value in self.actorcritic.collect_params().items(): if name.find('batchnorm') < 0: # grads_list.append(mx.nd.array(value.grad().asnumpy())) grads_list.append(value.grad()) return grads_list, batch_size
def train(self, s_batch, a_batch_one_hot, V_trace, advantage): batch_size = s_batch.shape[0] s_batch = copy.deepcopy(s_batch) a_batch_one_hot = copy.deepcopy(a_batch_one_hot) V_trace_batch = copy.deepcopy(V_trace) advantage_batch = copy.deepcopy(advantage) s_batch = nd.array(s_batch, ctx=CTX) a_batch_one_hot = nd.array(a_batch_one_hot, ctx=CTX) V_trace_batch = nd.array(V_trace_batch, ctx=CTX) advantage_batch = nd.array(advantage_batch, ctx=CTX) self.actorcritic.collect_params().zero_grad() with mx.autograd.record(): loss_vec = [] probs, _ = self.actorcritic(s_batch, loss_vec) logprob = nd.log(nd.sum(data=probs * a_batch_one_hot, axis=1)) actorloss = -nd.sum(logprob*advantage_batch, axis=0) actorloss.backward() # self.actortrainer.step(batch_size=batch_size, ignore_stale_grad=True) with mx.autograd.record(): loss_vec = [] _, values = self.actorcritic(s_batch, loss_vec) criticloss = nd.sum(nd.square(values-V_trace_batch), axis=0) # print loss criticloss.backward() # self.critictrainer.step(batch_size=batch_size, ignore_stale_grad=True) grads_list = [] for name, value in self.actorcritic.collect_params().items(): if name.find('batchnorm') < 0: # grads_list.append(mx.nd.array(value.grad().asnumpy())) grads_list.append(value.grad()) return grads_list, batch_size
def check_status(self, input, epoch): n_sample = input.shape[0] ph_prob, ph_sample = self.sample_h_given_v(input) nv_prob, nv_sample, nh_prob, nh_sample = self.gibbs_hvh(ph_sample) error = nd.sum((input - nv_sample)**2) / n_sample #use logsoftmax if nan cross = -nd.mean(nd.sum(input * nd.log(nv_prob), axis=1)) freeE = self.get_free_energy(input) sys.stdout.write("Training: ") sys.stdout.write("epoch= %d " % epoch) sys.stdout.write("cross= %f " % cross.asnumpy()[0]) sys.stdout.write("error= %f " % error.asnumpy()[0]) sys.stdout.write("freeE= %f " % freeE.asnumpy()[0]) if self.enum_states is not None: sys.stdout.write("KL= %f " % self.check_KL()) if self.prob_RGs is not None: sys.stdout.write("rgKL= %f " % self.check_rgKL(nv_sample)) sys.stdout.write("\n") return
def hybrid_forward(self, F, pred, label, sample_weight=None): if not self._from_logits: pred = F.log_softmax(pred, axis=self._axis) if self._sparse_label: if self._size_average: valid_label_map = (label != self._ignore_label).astype('float32') loss = -(F.pick(pred, label, axis=self._axis, keepdims=True) * valid_label_map) else: loss = -F.pick(pred, label, axis=self._axis, keepdims=True) loss = F.where( label.expand_dims(axis=self._axis) == self._ignore_label, F.zeros_like(loss), loss) else: label = _reshape_like(F, label, pred) loss = -F.sum(pred * label, axis=self._axis, keepdims=True) loss = _apply_weighting(F, loss, self._weight, sample_weight) if self._size_average: return F.mean(loss, axis=self._batch_axis, exclude=True) * \ valid_label_map.size / F.sum(valid_label_map) else: return F.mean(loss, axis=self._batch_axis, exclude=True)
def evaluate_accuracy(data_iterator, net, ctx=[mx.cpu()]): if isinstance(ctx, mx.Context): ctx = [ctx] acc = nd.array([0]) n = 0. if isinstance(data_iterator, mx.io.MXDataIter): data_iterator.reset() for batch in data_iterator: data, label, batch_size = _get_batch(batch, ctx) for X, y in zip(data, label): acc += nd.sum(net(X).argmax(axis=1)==y).copyto(mx.cpu()) n += y.size acc.wait_to_read() # don't push too many operators into backend return acc.asscalar() / n
def log_sum_exp(vec): max_score = nd.max(vec).asscalar() return nd.log(nd.sum(nd.exp(vec - max_score))) + max_score
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.1}) ### 训练 epochs = 5##训练迭代数据次数 batch_size = 10##每次训练输入的样例个数 #learning_rate = .001##学习率 for e in range(epochs): total_loss = 0 for data, label in data_iter: with autograd.record():##自动微分 output = net(data) loss = square_loss(output, label) loss.backward()## 反向传播 #SGD(params, learning_rate)##求解梯度 trainer.step(batch_size) total_loss += nd.sum(loss).asscalar() print("Epoch %d, average loss: %f" % (e, total_loss/num_examples)) ## 查看训练结果 dense = net[0]#我们先从net拿到需要的层,然后访问其权重和位移 print true_w, dense.weight.data() print true_b, dense.bias.data()
def get_rmse_log(net, X_train, y_train): """Gets root mse between the logarithms of the prediction and the truth.""" num_train = X_train.shape[0] clipped_preds = nd.clip(net(X_train), 1, float('inf')) return np.sqrt(2 * nd.sum(square_loss( nd.log(clipped_preds), nd.log(y_train))).asscalar() / num_train)
def train_and_predict_rnn(rnn, is_random_iter, epochs, num_steps, hidden_dim, learning_rate, clipping_theta, batch_size, pred_period, pred_len, seqs, get_params, get_inputs, ctx, corpus_indices, idx_to_char, char_to_idx, is_lstm=False): if is_random_iter: data_iter = data_iter_random else: data_iter = data_iter_consecutive params = get_params() softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss() for e in range(1, epochs + 1): # 如使用相邻批量采样,在同一个epoch中,隐含变量只需要在该epoch开始的时候初始化。 if not is_random_iter: state_h = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx) if is_lstm: # 当RNN使用LSTM时才会用到,这里可以忽略。 state_c = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx) train_loss, num_examples = 0, 0 for data, label in data_iter(corpus_indices, batch_size, num_steps, ctx): # 如使用随机批量采样,处理每个随机小批量前都需要初始化隐含变量。 if is_random_iter: state_h = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx) if is_lstm: # 当RNN使用LSTM时才会用到,这里可以忽略。 state_c = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx) with autograd.record(): # outputs 尺寸:(batch_size, vocab_size) if is_lstm: # 当RNN使用LSTM时才会用到,这里可以忽略。 outputs, state_h, state_c = rnn(get_inputs(data), state_h, state_c, *params) else: outputs, state_h = rnn(get_inputs(data), state_h, *params) # 设t_ib_j为i时间批量中的j元素: # label 尺寸:(batch_size * num_steps) # label = [t_0b_0, t_0b_1, ..., t_1b_0, t_1b_1, ..., ] label = label.T.reshape((-1,)) # 拼接outputs,尺寸:(batch_size * num_steps, vocab_size)。 outputs = nd.concat(*outputs, dim=0) # 经上述操作,outputs和label已对齐。 loss = softmax_cross_entropy(outputs, label) loss.backward() grad_clipping(params, clipping_theta, ctx) utils.SGD(params, learning_rate) train_loss += nd.sum(loss).asscalar() num_examples += loss.size if e % pred_period == 0: print("Epoch %d. Perplexity %f" % (e, exp(train_loss/num_examples))) for seq in seqs: print' - ', predict_rnn(rnn, seq, pred_len, params, hidden_dim, ctx, idx_to_char, char_to_idx, get_inputs, is_lstm) print()
def main(): parser = argparse.ArgumentParser(description='Script to test the trained network on a game.') parser.add_argument('-r', '--rom', required=False, type=str, default=os.path.join('roms', 'breakout.bin'), help='Path of the ROM File.') parser.add_argument('-v', '--visualization', action='store_true', help='Visualize the runs.') parser.add_argument('--lr', required=False, type=float, default=0.01, help='Learning rate of the AdaGrad optimizer') parser.add_argument('--eps', required=False, type=float, default=0.01, help='Eps of the AdaGrad optimizer') parser.add_argument('--clip-gradient', required=False, type=float, default=None, help='Clip threshold of the AdaGrad optimizer') parser.add_argument('--double-q', action='store_true', help='Use Double DQN only if specified') parser.add_argument('--wd', required=False, type=float, default=0.0, help='Weight of the L2 Regularizer') parser.add_argument('-c', '--ctx', required=False, type=str, default='gpu', help='Running Context. E.g `-c gpu` or `-c gpu1` or `-c cpu`') parser.add_argument('-d', '--dir-path', required=False, type=str, default='', help='Saving directory of model files.') parser.add_argument('--start-eps', required=False, type=float, default=1.0, help='Eps of the epsilon-greedy policy at the beginning') parser.add_argument('--replay-start-size', required=False, type=int, default=50000, help='The step that the training starts') parser.add_argument('--kvstore-update-period', required=False, type=int, default=1, help='The period that the worker updates the parameters from the sever') parser.add_argument('--kv-type', required=False, type=str, default=None, help='type of kvstore, default will not use kvstore, could also be dist_async') parser.add_argument('--optimizer', required=False, type=str, default="adagrad", help='type of optimizer') args = parser.parse_args() if args.dir_path == '': rom_name = os.path.splitext(os.path.basename(args.rom))[0] args.dir_path = 'dqn-%s-lr%g' % (rom_name, args.lr) replay_start_size = args.replay_start_size max_start_nullops = 30 replay_memory_size = 1000000 history_length = 4 rows = 84 cols = 84 ctx = parse_ctx(args.ctx) q_ctx = mx.Context(*ctx[0]) game = AtariGame(rom_path=args.rom, resize_mode='scale', replay_start_size=replay_start_size, resized_rows=rows, resized_cols=cols, max_null_op=max_start_nullops, replay_memory_size=replay_memory_size, display_screen=args.visualization, history_length=history_length) ##RUN NATURE freeze_interval = 10000 epoch_num = 200 steps_per_epoch = 250000 update_interval = 4 discount = 0.99 eps_start = args.start_eps eps_min = 0.1 eps_decay = (eps_start - eps_min) / 1000000 eps_curr = eps_start freeze_interval /= update_interval minibatch_size = 32 action_num = len(game.action_set) data_shapes = {'data': (minibatch_size, history_length) + (rows, cols), 'dqn_action': (minibatch_size,), 'dqn_reward': (minibatch_size,)} dqn_sym = dqn_sym_nature(action_num) qnet = Base(data_shapes=data_shapes, sym_gen=dqn_sym, name='QNet', initializer=DQNInitializer(factor_type="in"), ctx=q_ctx) target_qnet = qnet.copy(name="TargetQNet", ctx=q_ctx) use_easgd = False optimizer = mx.optimizer.create(name=args.optimizer, learning_rate=args.lr, eps=args.eps, clip_gradient=args.clip_gradient, rescale_grad=1.0, wd=args.wd) updater = mx.optimizer.get_updater(optimizer) qnet.print_stat() target_qnet.print_stat() # Begin Playing Game training_steps = 0 total_steps = 0 for epoch in range(epoch_num): # Run Epoch steps_left = steps_per_epoch episode = 0 epoch_reward = 0 start = time.time() game.start() while steps_left > 0: # Running New Episode episode += 1 episode_loss = 0.0 episode_q_value = 0.0 episode_update_step = 0 episode_action_step = 0 time_episode_start = time.time() game.begin_episode(steps_left) while not game.episode_terminate: # 1. We need to choose a new action based on the current game status if game.state_enabled and game.replay_memory.sample_enabled: do_exploration = (npy_rng.rand() < eps_curr) eps_curr = max(eps_curr - eps_decay, eps_min) if do_exploration: action = npy_rng.randint(action_num) else: # TODO Here we can in fact play multiple gaming instances simultaneously and make actions for each # We can simply stack the current_state() of gaming instances and give prediction for all of them # We need to wait after calling calc_score(.), which makes the program slow # TODO Profiling the speed of this part! current_state = game.current_state() state = nd.array(current_state.reshape((1,) + current_state.shape), ctx=q_ctx) / float(255.0) qval_npy = qnet.forward(is_train=False, data=state)[0].asnumpy() action = numpy.argmax(qval_npy) episode_q_value += qval_npy[0, action] episode_action_step += 1 else: action = npy_rng.randint(action_num) # 2. Play the game for a single mega-step (Inside the game, the action may be repeated for several times) game.play(action) total_steps += 1 # 3. Update our Q network if we can start sampling from the replay memory # Also, we update every `update_interval` if total_steps % update_interval == 0 and game.replay_memory.sample_enabled: # 3.1 Draw sample from the replay_memory training_steps += 1 episode_update_step += 1 states, actions, rewards, next_states, terminate_flags \ = game.replay_memory.sample(batch_size=minibatch_size) states = nd.array(states, ctx=q_ctx) / float(255.0) next_states = nd.array(next_states, ctx=q_ctx) / float(255.0) actions = nd.array(actions, ctx=q_ctx) rewards = nd.array(rewards, ctx=q_ctx) terminate_flags = nd.array(terminate_flags, ctx=q_ctx) # 3.2 Use the target network to compute the scores and # get the corresponding target rewards if not args.double_q: target_qval = target_qnet.forward(is_train=False, data=next_states)[0] target_rewards = rewards + nd.choose_element_0index(target_qval, nd.argmax_channel(target_qval))\ * (1.0 - terminate_flags) * discount else: target_qval = target_qnet.forward(is_train=False, data=next_states)[0] qval = qnet.forward(is_train=False, data=next_states)[0] target_rewards = rewards + nd.choose_element_0index(target_qval, nd.argmax_channel(qval))\ * (1.0 - terminate_flags) * discount outputs = qnet.forward(is_train=True, data=states, dqn_action=actions, dqn_reward=target_rewards) qnet.backward() qnet.update(updater=updater) # 3.3 Calculate Loss diff = nd.abs(nd.choose_element_0index(outputs[0], actions) - target_rewards) quadratic_part = nd.clip(diff, -1, 1) loss = 0.5 * nd.sum(nd.square(quadratic_part)).asnumpy()[0] +\ nd.sum(diff - quadratic_part).asnumpy()[0] episode_loss += loss # 3.3 Update the target network every freeze_interval if training_steps % freeze_interval == 0: qnet.copy_params_to(target_qnet) steps_left -= game.episode_step time_episode_end = time.time() # Update the statistics epoch_reward += game.episode_reward info_str = "Epoch:%d, Episode:%d, Steps Left:%d/%d, Reward:%f, fps:%f, Exploration:%f" \ % (epoch, episode, steps_left, steps_per_epoch, game.episode_reward, game.episode_step / (time_episode_end - time_episode_start), eps_curr) if episode_update_step > 0: info_str += ", Avg Loss:%f/%d" % (episode_loss / episode_update_step, episode_update_step) if episode_action_step > 0: info_str += ", Avg Q Value:%f/%d" % (episode_q_value / episode_action_step, episode_action_step) if episode % 100 == 0: logging.info(info_str) end = time.time() fps = steps_per_epoch / (end - start) qnet.save_params(dir_path=args.dir_path, epoch=epoch) logging.info("Epoch:%d, FPS:%f, Avg Reward: %f/%d" % (epoch, fps, epoch_reward / float(episode), episode))
return nd.dot(X, w) + b # return the prediction value # loss def square_loss(yhat, y): return (yhat - y.reshape(yhat.shape)) ** 2 # optimization def SGD(params, lr): for param in params: param[:] = param - lr * param.grad; # why param[:] # training epochs = 5 # scan 5 times for raw data learning_rate = 0.001 for e in range(epochs): total_loss = 0 for data, label in data_iter(): with ag.record(): output = net(data) loss = square_loss(output, label) # label is the true value in traing set loss.backward() SGD(params, learning_rate) total_loss += nd.sum(loss).asscalar() # to float print("Epoch %d, average loss: %f" % (e, total_loss/num_examples)) print(true_b, b); print(true_w, w);
def newgradfun(g): gg = gradfun(g) return ndarray.sum(gg)
def accuracy(output, label): return nd.sum(output.argmax(axis = 1) == label).asscalar()