def experiment(min_k=10, max_k=1500, input_folder='experiment_2/201103', config_file='../Gnip-Trend-Detection/config_files/config_k.cfg'): print('Step1: generate trends dict and styles') get_trends(input_folder, f'{input_folder}_trends.json') print(f'Step2: generate freq and eta time series from {min_k} to {max_k} k values') manager = enlighten.Manager() counter = manager.counter(total=max_k - min_k) for i in range(min_k,max_k): out_folder = f'{input_folder}/k_values/{i}' if Path(out_folder).exists(): continue flag = process(i, input_folder, out_folder) if not flag: print(f'STOP: {i} Intervals too small') return else: print(f'... generated {i} FTS' ) subprocess.run(['python','../Gnip-Trend-Detection/trend_analyze.py','-c',config_file,'-f', out_folder]) print(f'... generated {i} ETS' ) subprocess.run(['python','../Gnip-Trend-Detection/trend_plot.py','-c',config_file,'-f', out_folder, '-s', f'{input_folder}_trends.json']) print(f'... plotting {i}' ) evaluation(out_folder, f'{input_folder}/k_values/evaluation.csv') print(f'... evaluation {i}' ) counter.update(1)
def advTrain_FGSM(model_org, model_new, loaders, args, DEVICE, log_dir, model_dir): train_loader, valid_loader, test_loader = loaders model_org.to(DEVICE) model_new.to(DEVICE) criterion = nn.CrossEntropyLoss() learning_rate = args.lr optimizer = optim.Adam(model_new.parameters(), lr=learning_rate) filename_save = os.path.join(model_dir, 'params_advFGSM.pt') # Load original model if os.path.exists(model_dir): if os.path.exists(filename_save): pass else: sys.exit("Pre-trained model does not exist! Terminate.") attacker = AttackerOfClassification(model_org) # FSGM adv training main body for epoch in range(args.FGSM_epochs): model_org.eval() model_new.train() for i, (images, labels) in tqdm(enumerate(train_loader)): if i == args.FGSM_number: break images = images.to(DEVICE).requires_grad_() labels = labels.to(DEVICE) optimizer.zero_grad() outputs_org = model_new(images) loss_org = criterion(outputs_org, labels) loss_org.backward() data_GRAD = images.grad.data images_perturb = attacker.fgsm_attack(images, args.epsilon_FGSM, data_GRAD) outputs_adv = model_new(images_perturb) loss_adv = criterion(outputs_adv, labels) loss_adv.backward() optimizer.step() acc_train = evaluation(model_new, train_loader, DEVICE) acc_valid = evaluation(model_new, valid_loader, DEVICE) acc_test = evaluation(model_new, test_loader, DEVICE) acc_adv_test_new = attacker.fgsm_test(model_new, DEVICE, test_loader, args.epsilon_FGSM) acc_adv_test_org = attacker.fgsm_test(model_org, DEVICE, test_loader, args.epsilon_FGSM) print( 'Epoch: %d/%d, Train acc: %0.2f, Valid acc: %0.2f, Test acc: %0.2f ; Adv test acc new : %0.2f; Adv test acc org : %0.2f' % (epoch, args.epochs, acc_train, acc_valid, acc_test, acc_adv_test_new, acc_adv_test_org)) torch.save(model_new.state_dict(), filename_save)
def training(batch_size, n_epoch, lr, model_dir, train, valid, model, device): total = sum(p.numel() for p in model.parameters()) trainable = sum(p.numel() for p in model.parameters() if p.requires_grad) print('\nstart training, parameter total:{}, trainable:{}\n'.format(total, trainable)) model.train() # 將 model 的模式設為 train,這樣 optimizer 就可以更新 model 的參數 criterion = nn.BCELoss() # 定義損失函數,這裡我們使用 binary cross entropy loss t_batch = len(train) v_batch = len(valid) optimizer = optim.Adam(model.parameters(), lr=lr) # 將模型的參數給 optimizer,並給予適當的 learning rate total_loss, total_acc, best_acc = 0, 0, 0 for epoch in range(n_epoch): total_loss, total_acc = 0, 0 # 這段做 training for i, (inputs, labels) in enumerate(train): inputs = inputs.to(device, dtype=torch.long) # device 為 "cuda",將 inputs 轉成 torch.cuda.LongTensor labels = labels.to(device, dtype=torch.float) # device為 "cuda",將 labels 轉成 torch.cuda.FloatTensor,因為等等要餵進 criterion,所以型態要是 float optimizer.zero_grad() # 由於 loss.backward() 的 gradient 會累加,所以每次餵完一個 batch 後需要歸零 outputs = model(inputs) # 將 input 餵給模型 outputs = outputs.squeeze() # 去掉最外面的 dimension,好讓 outputs 可以餵進 criterion() loss = criterion(outputs, labels) # 計算此時模型的 training loss loss.backward() # 算 loss 的 gradient optimizer.step() # 更新訓練模型的參數 correct = evaluation(outputs, labels) # 計算此時模型的 training accuracy total_acc += (correct / batch_size) total_loss += loss.item() print('[ Epoch{}: {}/{} ] loss:{:.3f} acc:{:.3f} '.format( epoch+1, i+1, t_batch, loss.item(), correct*100/batch_size), end='\r') print('\nTrain | Loss:{:.5f} Acc: {:.3f}'.format(total_loss/t_batch, total_acc/t_batch*100)) # 這段做 validation model.eval() # 將 model 的模式設為 eval,這樣 model 的參數就會固定住 with torch.no_grad(): total_loss, total_acc = 0, 0 for i, (inputs, labels) in enumerate(valid): inputs = inputs.to(device, dtype=torch.long) # device 為 "cuda",將 inputs 轉成 torch.cuda.LongTensor labels = labels.to(device, dtype=torch.float) # device 為 "cuda",將 labels 轉成 torch.cuda.FloatTensor,因為等等要餵進 criterion,所以型態要是 float outputs = model(inputs) # 將 input 餵給模型 outputs = outputs.squeeze() # 去掉最外面的 dimension,好讓 outputs 可以餵進 criterion() loss = criterion(outputs, labels) # 計算此時模型的 validation loss correct = evaluation(outputs, labels) # 計算此時模型的 validation accuracy total_acc += (correct / batch_size) total_loss += loss.item() print("Valid | Loss:{:.5f} Acc: {:.3f} ".format(total_loss/v_batch, total_acc/v_batch*100)) if total_acc > best_acc: # 如果 validation 的結果優於之前所有的結果,就把當下的模型存下來以備之後做預測時使用 best_acc = total_acc torch.save(model, "{}/ckpt0.model".format(model_dir)) print('saving model with acc {:.3f}'.format(total_acc/v_batch*100)) print('-----------------------------------------------') model.train() # 將 model 的模式設為 train,這樣 optimizer 就可以更新 model 的參數(因為剛剛轉成 eval 模式)
def validate(test_loader, model, pooling, embedding, k_list, args): # switch to evaluation mode model.eval() embedding.eval() testdata = torch.Tensor() testdata_l2 = torch.Tensor() testlabel = torch.LongTensor() with torch.no_grad(): for i, (input, target) in tqdm(enumerate(test_loader), total=len(test_loader)): if args.gpu is not None: input = input.cuda(args.gpu, non_blocking=True) # compute output output = model(input) output = pooling(output) output = embedding(output) output_l2 = F.normalize(output, p=2, dim=1) testdata = torch.cat((testdata, output.cpu()), 0) testdata_l2 = torch.cat((testdata_l2, output_l2.cpu()), 0) testlabel = torch.cat((testlabel, target)) features = testdata.numpy() features_l2 = testdata_l2.numpy() labels = testlabel.numpy() nmi, recall, RP, MAP = utils.evaluation(features_l2, labels, k_list, args) return nmi, recall, RP, MAP, features, labels
def evaluate(): device = torch.device('cuda') torch.backends.cudnn.benchmark = True # ------------------------------- # # Load Model # ------------------------------- # PATH = './baseline_89.pth' model = Conv4Classifier(64) model.load_state_dict(torch.load(PATH)) # model = Conv4Classifier(64) # checkpoint = torch.load('./model_best.pth.tar') # model_dict = model.state_dict() # params = checkpoint['state_dict'] # params = {k: v for k, v in params.items() if k in model_dict} # model_dict.update(params) # model.load_state_dict(model_dict) model.to(device) model.eval() #################### # Prepare Data Set # #################### print('preparing dataset') n = 5 # number of samples per supporting class k = 5 # number of classes q = 15 # query image per class episodes_per_epoch = 10000 base_cls, val_cls, support_cls = get_splits() support = MiniImageNet('support', base_cls, val_cls, support_cls) support_loader = DataLoader(support, batch_sampler=SupportingSetSampler( support, n, k, q, episodes_per_epoch), num_workers=4) logging.basicConfig( filename=f'./logs/baseline_cosine_result_{k}-way_{n}-shot.log', filemode='w', format='%(asctime)s - %(message)s', level=logging.INFO) print('start to evaluate') accs = 0 for i, data in enumerate(tqdm(support_loader)): inputs, labels = prepare_nshot_task(n, k, q, data) embeddings = model(inputs, feature=True) acc = evaluation(embeddings, labels, n, k, q) logging.info(f'[{i:3d}]: {acc}%') accs += acc logging.info( f'Average ACC is {accs}/{len(support_loader)}={accs/len(support_loader)}' )
def run_model(filename, config): os.environ['CUDA_VISIBLE_DEVICES'] = '0' gpu_config = tf.ConfigProto() gpu_config.gpu_options.allow_growth = True testing_data, testing_label = utils.load_data(filename) testing_label, num_classes = utils.transfer_labels(testing_label) config.class_num = num_classes config.embedding_size = 1 config.batch_size = testing_data.shape[0] config.num_steps = testing_data.shape[1] test_noise_data = np.zeros(shape=testing_data.shape) with tf.Session(config=gpu_config) as sess: model = RNN_clustering_model(config=config) input_tensors, loss_tensors, hidden_abstract, F_update, output_tensor = model.build_model() sess.run(tf.global_variables_initializer()) # model_path = './Model/model.ckpt' model_path = './model_test/beetlefly' saver = tf.train.Saver() saver.restore(sess, model_path) test_total_abstract = sess.run(hidden_abstract, feed_dict={input_tensors['inputs']: testing_data, input_tensors['noise']: test_noise_data }) test_hidden_val = np.array(test_total_abstract).reshape(-1, np.sum(config.hidden_size) * 2) km = KMeans(n_clusters=num_classes) km_idx = km.fit_predict(test_hidden_val) ri, nmi, acc = utils.evaluation(prediction=km_idx, label=testing_label)
def main(): k = utils.numOfClasses columns = int(((k * k) * (k - 1)) / 4) ecoc_matrix = np.zeros((k, columns), dtype=float) classifiers = [] trainset = utils.fetchData() print("total train set data len: {}".format(str(len(trainset)))) testset = utils.loadTestData() lambda_p = 1 epoch_number = 15 pair_index = 0 # Train All-Pair Classifiers for i in range(utils.numOfClasses): # add all other classes that are not the positive class oppsiteClasses = [c for c in range(utils.numOfClasses) if c != i] for y0, y1 in get_all_pairs(oppsiteClasses): update_ecoc_matrix(ecoc_matrix, pair_index, i, (y0, y1)) print("working on {} vs {},{}".format(i, y0, y1)) pair_index = pair_index + 1 filtered_data = filter_data(trainset, (i, y0, y1)) print("relevant data: {}".format(str(len(filtered_data)))) binary_data = utils.transformToBinaryClasses(filtered_data, positiveClass=i) model = utils.SVM(utils.inputDim, utils.eta, lambda_p, epoch_number) model.train(binary_data) classifiers.append(model) print("finished with #{} model".format(pair_index)) # Evaluate Test Data by Hamming Distance utils.evaluation(testset, utils.HammingDistance, ecoc_matrix, 'test.random2.ham.pred', classifiers, distanceMetric="Hamming") # Evaluate Test Data by Loss Base Decoding utils.evaluation(testset, utils.lossBaseDecoding, ecoc_matrix, 'test.random2.loss.pred', classifiers, distanceMetric="LBD")
def pred_evaluation(self, mode): if mode == "valid": sess_idx = self.val_sess_idx df_x = self.val_x df_y = self.val_y elif mode == "test": sess_idx = self.te_sess_idx df_x = self.te_x df_y = self.te_y batch_loss_list = [] recalls = [] mrrs = [] evaluation_point_count = [] for itr in range(len(self.k)): recalls.append(0) mrrs.append(0) evaluation_point_count.append(0) num_batch = math.ceil(np.float32(len(sess_idx)) / self.batch_size) for batch_itr in range(int(num_batch)): start_itr = self.batch_size * batch_itr end_itr = np.minimum(self.batch_size * (batch_itr + 1), len(sess_idx)) temp_batch_x = df_x[sess_idx[start_itr:end_itr]] temp_batch_y = df_y[sess_idx[start_itr:end_itr]] batch_x, batch_y, mask, labels, lengths = convert_batch_data( temp_batch_x, temp_batch_y, self.num_items, maxlen=None) feed_dict = { self.rnn_x: batch_x, self.rnn_y: batch_y, self.mask: mask, self.keep_prob_input: 1.0, self.keep_prob_ho: 1.0, self.batch_var_length: lengths } preds, pred_loss_ = self.sess.run([self.pred, self.cost], feed_dict=feed_dict) batch_loss_list.append(pred_loss_) recalls, mrrs, evaluation_point_count = evaluation( labels, preds, recalls, mrrs, evaluation_point_count, self.k) recall_list = [] mrr_list = [] for itr in range(len(self.k)): recall = np.asarray(recalls[itr], dtype=np.float32) / evaluation_point_count[itr] mrr = np.asarray(mrrs[itr], dtype=np.float32) / evaluation_point_count[itr] if self.max_val_recall[itr] < recall and mode == "valid": self.max_val_recall[itr] = recall if self.max_te_recall[itr] < recall and mode == "test": self.max_te_recall[itr] = recall recall_list.append(recall) mrr_list.append(mrr) return np.mean(batch_loss_list), recall_list, mrr_list
def evaluate(model, normalize, epoch, support_loader, n, k, q, device, logger): accs_l2 = [] accs_cosine = [] model.eval() with torch.no_grad(): for data in tqdm(support_loader): imgs, labels = prepare_nshot_task(n, k, q, data, device) _, outputs, _ = model(imgs, norm=normalize) acc_l2 = evaluation(outputs, labels, n, k, q, 'l2') acc_cosine = evaluation(outputs, labels, n, k, q, 'cosine') accs_l2.append(acc_l2) accs_cosine.append(acc_cosine) m_l2, pm_l2 = compute_confidence_interval(accs_l2) m_cosine, pm_cosine = compute_confidence_interval(accs_cosine) # file_writer.write(f'{epoch:3d}.pth {n}-shot\tAccuracy_l2: {m_l2:.2f}+/-{pm_l2:.2f} Accuracy_cosine: {m_cosine:.2f}+/-{pm_cosine:.2f}\n') logger.info(f'{epoch:3d}.pth: {n}-shot \t l2: {m_l2:.2f}+/-{pm_l2:.2f} \t ' f'cosine: {m_cosine:.2f}+/-{pm_cosine:.2f}')
def main(): net = Baseline(num_classes=culane.num_classes, deep_base=args['deep_base']).cuda() print('load checkpoint \'%s.pth\' for evaluation' % args['checkpoint']) pretrained_dict = torch.load(os.path.join(ckpt_path, exp_name, args['checkpoint'] + '_checkpoint.pth')) pretrained_dict = {k[7:]: v for k, v in pretrained_dict.items()} net.load_state_dict(pretrained_dict) net.eval() save_dir = os.path.join(ckpt_path, exp_name, 'vis_%s_test' % args['checkpoint']) check_mkdir(save_dir) log_path = os.path.join(save_dir, str(datetime.datetime.now()) + '.log') data_list = [l.strip('\n') for l in open(os.path.join(culane.root, culane.list, 'test_gt.txt'), 'r')] loss_record = AverageMeter() gt_all, prediction_all=[], [] for idx in range(len(data_list)): print('evaluating %d / %d' % (idx + 1, len(data_list))) img = Image.open(culane.root + data_list[idx].split(' ')[0]).convert('RGB') gt = Image.open(culane.root + data_list[idx].split(' ')[1]) img, gt = val_joint_transform(img, gt) with torch.no_grad(): img_var = Variable(img_transform(img).unsqueeze(0)).cuda() gt_var = Variable(mask_transform(gt).unsqueeze(0)).cuda() prediction = net(img_var)[0] loss = criterion(prediction, gt_var) loss_record.update(loss.data, 1) scoremap = F.softmax(prediction, dim=1).data.squeeze().cpu().numpy() prediction = prediction.data.max(1)[1].squeeze().cpu().numpy().astype(np.uint8) prediction_all.append(prediction) gt_all.append(np.array(gt)) if args['save_results']: check_mkdir(save_dir + data_list[idx].split(' ')[0][:-10]) out_file = open(os.path.join(save_dir, data_list[idx].split(' ')[0][1:-4] + '.lines.txt'), 'w') prob2lines(scoremap, out_file) acc, acc_cls, mean_iu, fwavacc = evaluation(prediction_all, gt_all, culane.num_classes) log = 'val results: loss %.5f acc %.5f acc_cls %.5f mean_iu %.5f fwavacc %.5f' % \ (loss_record.avg, acc, acc_cls, mean_iu, fwavacc) print(log) open(log_path, 'w').write(log + '\n')
def test_model(self, itr): start_time = time.time() mask_corruption_np = np.random.binomial( 1, 1 - 0, (self.num_users, self.num_items)) batch_set_idx = np.arange(self.num_users) Cost, Decoder = self.sess.run( [self.cost, self.Decoder], feed_dict={ self.model_mask_corruption: mask_corruption_np, self.input_R: self.test_R, self.input_mask_R: self.test_mask_R, self.model_batch_data_idx: batch_set_idx }) self.test_cost_list.append(Cost) Estimated_R = Decoder.clip(min=0, max=1) RMSE, MAE, ACC, AVG_loglikelihood = evaluation(self.test_R, self.test_mask_R, Estimated_R, self.num_test_ratings) self.test_rmse_list.append(RMSE) self.test_mae_list.append(MAE) self.test_acc_list.append(ACC) self.test_avg_loglike_list.append(AVG_loglikelihood) if itr % self.display_step == 0: print("Testing //", "Epoch %d //" % (itr), " Total cost = {:.2f}".format(Cost), "Elapsed time : %d sec" % (time.time() - start_time)) print("RMSE = {:.4f}".format(RMSE), "MAE = {:.4f}".format(MAE), "ACC = {:.10f}".format(ACC), "AVG Loglike = {:.4f}".format(AVG_loglikelihood)) print("=" * 100) if RMSE <= self.min_RMSE: self.min_RMSE = RMSE self.min_epoch = itr self.patience = 0 else: self.patience = self.patience + 1 if (itr > 100) and (self.patience >= self.total_patience): self.test_rmse_list.append(self.test_rmse_list[self.min_epoch]) self.test_mae_list.append(self.test_mae_list[self.min_epoch]) self.test_acc_list.append(self.test_acc_list[self.min_epoch]) self.test_avg_loglike_list.append( self.test_avg_loglike_list[self.min_epoch]) self.earlystop_switch = True print("========== Early Stopping at Epoch %d" % itr)
def predict(self, x_test, y_test): y_pred = self.model.predict(x_test) output_df = x_test.copy() output_df['predicted_demand'] = y_pred.astype(int) output_df['model_name'] = 'random_forest' # 만약 label encoding한 것을 reverse하고 싶다면 그 부분을 구현해야 함 evaluation_value = evaluation(y_test, y_pred) print(evaluation_value) # BigQuery에 데이터 적재 => 활용 방식에 따라 DB에 적재 vs API 만들기 등이 가능 output_df.to_gbq(destination_table=f'{self.dataset}.output', project_id=self.project, if_exists='append', private_key=self.jwt) return output_df
def regress(trainX, trainY, test, dep_var, performance): methods = [LinearRegression] print(np.asarray(trainY)) for method in methods: sk_linear_reg = method().fit(np.asarray(trainX), np.asarray(trainY)) pred_reg_t = sk_linear_reg.predict(np.asarray(test.drop([dep_var], axis=1))) rmse, mae, smape = evaluation(test[dep_var], pred_reg_t) performance['Data'].append(data_name) performance['Method'].append('OLS') performance['Index'].append(k) performance['RMSE'].append(rmse) performance['MAE'].append(mae) performance['sMAPE'].append(smape) return performance
def validate(validate_loader,model,criterion,args): model.eval() val_error_rate_all=list() val_loss_all=list() for i, (node_feature, edge_feature, gt_label,node_num_rec) in enumerate(validate_loader): if args.cuda: node_feature = torch.autograd.Variable(node_feature.cuda()) edge_feature = torch.autograd.Variable(edge_feature.cuda()) gt_label = torch.autograd.Variable(gt_label.cuda()) node_num_rec = torch.autograd.Variable(node_num_rec.cuda()) pred_label= model(node_feature, edge_feature, node_num_rec,args) for sq_idx in range(pred_label.size()[1]): valid_node_num = node_num_rec[0, sq_idx] if sq_idx == 0: pred_label_all = pred_label[0, sq_idx, :valid_node_num, :] gt_label_all = gt_label[0, sq_idx, :valid_node_num] else: pred_label_all = torch.cat((pred_label_all, pred_label[0, sq_idx, :valid_node_num, :]), dim=0) gt_label_all = torch.cat((gt_label_all, gt_label[0, sq_idx, :valid_node_num]), dim=0) error_rate = evaluation(pred_label_all.unsqueeze(0), gt_label_all.unsqueeze(0)) val_error_rate_all.append(error_rate) val_loss = criterion(pred_label_all, gt_label_all) val_loss_all.append(val_loss.data.cpu().numpy().item()) visdom_viz(vis, val_loss_all, win=0, ylabel='validation loss over batch', title='HGNN Resnet Msgpassing balanced lstm', color='red') print('batch [{}], validation loss: {}, validation error rate: {}'.format(i, val_loss, error_rate)) del node_feature, edge_feature, gt_label, node_num_rec return np.mean(val_error_rate_all), np.mean(val_loss_all)
def test_model(self, itr): start_time = time.time() Cost, R_hat = self.sess.run([self.cost, self.r_hat], feed_dict={ self.input_R: self.test_R, self.input_mask_R: self.test_mask_R }) self.test_cost_list.append(Cost) Estimated_R = R_hat.clip(min=0, max=1) RMSE, MAE, ACC, AVG_loglikelihood = evaluation(self.test_R, self.test_mask_R, Estimated_R, self.num_test_ratings) self.test_rmse_list.append(RMSE) self.test_mae_list.append(MAE) self.test_acc_list.append(ACC) self.test_avg_loglike_list.append(AVG_loglikelihood) if itr % self.display_step == 0: print("Testing //", "Epoch %d //" % (itr), " Total cost = {:.2f}".format(Cost), "Elapsed time : %d sec" % (time.time() - start_time)) print("RMSE = {:.4f}".format(RMSE), "MAE = {:.4f}".format(MAE), "ACC = {:.10f}".format(ACC), "AVG Loglike = {:.4f}".format(AVG_loglikelihood)) print("=" * 100) if RMSE <= self.min_RMSE: self.min_RMSE = RMSE self.min_epoch = itr self.patience = 0 else: self.patience = self.patience + 1 if (itr > 100) and (self.patience >= self.total_patience): self.test_rmse_list.append(self.test_rmse_list[self.min_epoch]) self.test_mae_list.append(self.test_mae_list[self.min_epoch]) self.test_acc_list.append(self.test_acc_list[self.min_epoch]) self.test_avg_loglike_list.append( self.test_avg_loglike_list[self.min_epoch]) self.earlystop_switch = True print("========== Early Stopping at Epoch %d" % itr)
def train(model, loaders, args, DEVICE, log_dir, model_dir): train_loader, valid_loader, test_loader = loaders model.to(DEVICE) criterion = nn.CrossEntropyLoss() learning_rate = args.lr optimizer = optim.Adam(model.parameters(), lr=learning_rate) filename = os.path.join(model_dir, args.dataset + '_params.pt') if os.path.exists(model_dir) and args.train == False: # Only evaluate and then return if torch.cuda.is_available(): model.load_state_dict(torch.load(filename)) else: model.load_state_dict(torch.load(filename, map_location='cpu')) if args.train == False: acc_train = evaluation(model, train_loader, DEVICE) acc_valid = evaluation(model, valid_loader, DEVICE) acc_test = evaluation(model, test_loader, DEVICE) print( 'Model loaded. Train acc: %0.2f, Valid acc: %0.2f, Test acc: %0.2f' % (acc_train, acc_valid, acc_test)) return if not os.path.exists(model_dir): os.makedirs(model_dir) acc_best = 0.0 for epoch in range(args.epochs): model.train() for i, (images, labels) in enumerate(train_loader): images = images.to(DEVICE) labels = labels.to(DEVICE) optimizer.zero_grad() outputs = model(images) loss = criterion(outputs, labels) loss.backward() optimizer.step() acc_train = evaluation(model, train_loader, DEVICE) acc_valid = evaluation(model, valid_loader, DEVICE) acc_test = evaluation(model, test_loader, DEVICE) print( 'Epoch: %d/%d, Train acc: %0.2f, Valid acc: %0.2f, Test acc: %0.2f' % (epoch, args.epochs, acc_train, acc_valid, acc_test)) if acc_valid > acc_best: acc_best = acc_valid torch.save(model.state_dict(), filename)
#print(utils.evaluation(model,test_iter)) for i in range(opt.max_epoch): for epoch,batch in enumerate(train_iter): start= time.time() predicted = model(batch.text[0]) loss= F.cross_entropy(predicted,batch.label) loss.backward() utils.clip_gradient(optimizer, opt.grad_clip) optimizer.step() if epoch% 100==0: if torch.cuda.is_available(): print("%d ieration %d epoch with loss : %.5f in %.4f seconds" % (i,epoch,loss.cpu().data.numpy()[0],time.time()-start)) else: print("%d ieration %d epoch with loss : %.5f in %.4f seconds" % (i,epoch,loss.data.numpy()[0],time.time()-start)) percision=utils.evaluation(model,test_iter) print("%d ieration with percision %.4f" % (i,percision))
if "CUDA_VISIBLE_DEVICES" not in os.environ.keys(): os.environ["CUDA_VISIBLE_DEVICES"] =opt.gpu #opt.model ='lstm' #opt.model ='capsule' if from_torchtext: train_iter, test_iter = utils.loadData(opt) else: import dataHelper as helper train_iter, test_iter = dataHelper.loadData(opt) opt.lstm_layers=2 print('Print loading models') model2=models.setup(opt) model2.load_state_dict(torch.load('saved_models/lstm_test.pt')) model2.cuda() percision=utils.evaluation(model2,test_iter,from_torchtext) print("After iteration with model 2 Test Acc %.4f" % (percision)) ipdb.set_trace() model=models.setup(opt) # model.load_state_dict(torch.load('lstm_new.pt')) if torch.cuda.is_available(): model.cuda() model.train() print("# parameters:", sum(param.numel() for param in model.parameters() if param.requires_grad)) optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=opt.learning_rate, weight_decay=1e-3) optimizer.zero_grad() loss_fun = F.cross_entropy #batch = next(iter(train_iter))
def evaluation(args, model): tf.reset_default_graph() utils.evaluation(args, model)
# DATASET dataset = tf.data.Dataset.from_generator( gen, (tf.float32, tf.int64, tf.int32, tf.int64, tf.int64, tf.int32, tf.int64, tf.int64, tf.int32, tf.int64)) dataset = dataset.map( lambda q, s_mnt, t_mnt, v_mnt, s_ent, t_ent, v_ent, s_entn, t_entn, v_entn: (q, tf.SparseTensor(s_mnt, t_mnt, v_mnt), tf.SparseTensor(s_ent, t_ent, v_ent), tf.SparseTensor(s_entn, t_entn, v_entn)), num_parallel_calls=4) dataset = dataset.prefetch(BATCH * 2) # VALIDATION dev_file = "../mentions_dumps/{}/mentions_dev_type.txt".format(lang) dev_ent, dev_mnt, dev_mnt_str, difficulty = evaluation(dev_file, tokenizer, voc_size_mnt) cands_qid, qid2id, id2qid = parse_QID_candidates(qid2title, tokenizer) # MODEL INITIALIZATION model = Speller(voc_size_mnt, voc_size_ent, EMBED_SIZE) optimizer = tf.optimizers.SGD(0.1) total_samples = int( Popen(["wc", "-l", data_tr_sampled], stdout=PIPE).communicate()[0].decode("utf-8").split(" ")[0]) total_batches = total_samples / BATCH # LOCATION TO SAVE checkpoint_dir = './models/{}/charagram_char_hr={}_{}_{}'.format( lang, hr_lang, num_data, weight_hr) checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
def training(batch_size, n_epoch, lr, model_dir, train, valid, model, device): total = sum(p.numel() for p in model.parameters()) trainable = sum(p.numel() for p in model.parameters() if p.requires_grad) print('\nstart training, parameter total:{}, trainable:{}\n'.format(total, trainable)) model.train() # 將model的模式設為train,這樣optimizer就可以更新model的參數 criterion = nn.BCELoss() # 定義損失函數,這裡我們使用binary cross entropy loss t_batch = len(train) v_batch = len(valid) # optimizer = optim.Adam(model.parameters(), lr=lr) optimizer = optim.Adam(model.parameters(), lr=lr, betas=(0.9, 0.999), eps=1e-08) # 將模型的參數給optimizer,並給予適當的learning rate total_loss, total_acc, best_acc = 0, 0, 0 train_loss_his = [] val_loss_his = [] train_acc_his = [] val_acc_his = [] timestr = time.strftime("%Y%m%d-%H-%M-%S") for epoch in range(n_epoch): total_loss, total_acc = 0, 0 # 這段做training for i, (inputs, labels) in enumerate(train): inputs = inputs.to(device, dtype=torch.long) # device為"cuda",將inputs轉成torch.cuda.LongTensor labels = labels.to(device, dtype=torch.float) # device為"cuda",將labels轉成torch.cuda.FloatTensor,因為等等要餵進criterion,所以型態要是float optimizer.zero_grad() # 由於loss.backward()的gradient會累加,所以每次餵完一個batch後需要歸零 outputs = model(inputs) # 將input餵給模型 outputs = outputs.squeeze() # 去掉最外面的dimension,好讓outputs可以餵進criterion() loss = criterion(outputs, labels) # 計算此時模型的training loss loss.backward() # 算loss的gradient nn.utils.clip_grad_norm_(model.parameters(), 0.5, norm_type=2) # added optimizer.step() # 更新訓練模型的參數 correct = evaluation(outputs, labels) # 計算此時模型的training accuracy total_acc += (correct / batch_size) total_loss += loss.item() print('[ Epoch{}: {}/{} ] loss:{:.3f} acc:{:.3f} '.format( epoch+1, i+1, t_batch, loss.item(), correct*100/batch_size), end='\r') train_acc_his.append(total_acc/t_batch*100) train_loss_his.append(total_loss/t_batch) print('\nTrain | Loss:{:.5f} Acc: {:.3f}'.format(total_loss/t_batch, total_acc/t_batch*100)) # 這段做validation model.eval() # 將model的模式設為eval,這樣model的參數就會固定住 with torch.no_grad(): total_loss, total_acc = 0, 0 for i, (inputs, labels) in enumerate(valid): inputs = inputs.to(device, dtype=torch.long) # device為"cuda",將inputs轉成torch.cuda.LongTensor labels = labels.to(device, dtype=torch.float) # device為"cuda",將labels轉成torch.cuda.FloatTensor,因為等等要餵進criterion,所以型態要是float outputs = model(inputs) # 將input餵給模型 outputs = outputs.squeeze() # 去掉最外面的dimension,好讓outputs可以餵進criterion() loss = criterion(outputs, labels) # 計算此時模型的validation loss correct = evaluation(outputs, labels) # 計算此時模型的validation accuracy total_acc += (correct / batch_size) total_loss += loss.item() val_acc_his.append(total_acc/v_batch*100) val_loss_his.append(total_loss/v_batch) print("Valid | Loss:{:.5f} Acc: {:.3f} ".format(total_loss/v_batch, total_acc/v_batch*100)) if total_acc > best_acc: # 如果validation的結果優於之前所有的結果,就把當下的模型存下來以備之後做預測時使用 best_acc = total_acc #torch.save(model, "{}/val_acc_{:.3f}.model".format(model_dir,total_acc/v_batch*100)) torch.save(model, "{}/ckpt_{}.model".format(model_dir, timestr)) print('saving model with acc {:.3f}'.format(total_acc/v_batch*100)) print('-----------------------------------------------') model.train() # 將model的模式設為train,這樣optimizer就可以更新model的參數(因為剛剛轉成eval模式) # Plot # Loss curve plt.plot(train_loss_his) plt.plot(val_loss_his) plt.title('Loss') plt.legend(['train', 'val']) plt.savefig('./data/figure/loss_{}.png'.format(timestr)) # plt.show() plt.clf() # Accuracy curve plt.plot(train_acc_his) plt.plot(val_acc_his) plt.title('Accuracy') plt.legend(['train', 'val']) plt.savefig('./data/figure/accuracy_{}.png'.format(timestr)) # plt.show() plt.clf()
def advTrain_explanation_indirect(model_org, model_new, loaders, args, DEVICE, log_dir, model_dir, MAG, n_rep=16): train_loader, valid_loader, test_loader = loaders model_org.to(DEVICE) model_new.to(DEVICE) criterion = nn.KLDivLoss() learning_rate = args.lr optimizer = optim.Adam(model_new.parameters(), lr=learning_rate) filename_load = os.path.join(model_dir, 'params.pt') filename_save = os.path.join(model_dir, 'params_advintp.pt') if os.path.exists(model_dir): model_new.load_state_dict(torch.load(filename_load)) if True: acc_train = evaluation(model_org, train_loader, DEVICE) acc_valid = evaluation(model_org, valid_loader, DEVICE) acc_test = evaluation(model_org, test_loader, DEVICE) print( 'Original model loaded. Train acc: %0.2f, Valid acc: %0.2f, Test acc: %0.2f' % (acc_train, acc_valid, acc_test)) acc_train = evaluation(model_new, train_loader, DEVICE) acc_valid = evaluation(model_new, valid_loader, DEVICE) acc_test = evaluation(model_new, test_loader, DEVICE) print( 'New model loaded. Train acc: %0.2f, Valid acc: %0.2f, Test acc: %0.2f' % (acc_train, acc_valid, acc_test)) else: sys.exit("Pre-trained model does not exist! Terminate.") explainer = None if args.intp == 'grad': explainer = ExplainerGradient(model_new) acc_best = 0.0 for epoch in range(args.epochs): model_org.eval() model_new.train() for i, (images, labels) in enumerate(train_loader): images = images.to(DEVICE).requires_grad_() labels = labels.to(DEVICE) # Get smooth interpretation smooth_intp = explainer.generate_smoothgrad( images, labels, DEVICE, args.iter_smoothgrad, args.epsilon) # Training batches along the perpendicular direction of interpretation perturb_along_max = normalize_dim_max_mag(smooth_intp, MAG * 8) for t in range(args.iter_along): # 1. data copies, where inputs, outputs, interpretations are correspondent images_rep = images.repeat(n_rep, 1, 1, 1) softlabels_aug = F.softmax(outputs.repeat(n_rep, 1), dim=-1) intp_rep = smooth_intp.repeat(n_rep, 1, 1, 1) # 2. perturb instance randomly noises = ((torch.rand(images_rep.shape) - 0.5) * 2 * MAG).to(DEVICE) images_rep += noises images_aug = perturb_tangent(intp_rep, images_rep, MAG, DEVICE) # show #show_2times2(images_rep, images_aug) # 3. train this augmented batch images_aug = images_aug.detach() softlabels_aug = softlabels_aug.detach() optimizer.zero_grad() outputs_new = model_new(images_aug) loss = criterion(F.log_softmax(outputs_new, dim=-1), softlabels_aug) loss.backward() optimizer.step() acc_train = evaluation(model_new, train_loader, DEVICE) acc_valid = evaluation(model_new, valid_loader, DEVICE) acc_test = evaluation(model_new, test_loader, DEVICE) acc_valid_aug = evaluation_aug(model_new, valid_loader, DEVICE, explainer, args.iter_smoothgrad, n_rep, MAG) acc_test_aug = evaluation_aug(model_new, test_loader, DEVICE, explainer, args.iter_smoothgrad, n_rep, MAG) print( 'Epoch: %d/%d, Train acc: %0.2f, Valid acc: %0.2f, Test acc: %0.2f ; AugValid acc: %0.2f, AugTest acc: %0.2f' % (epoch, args.epochs, acc_train, acc_valid, acc_test, acc_valid_aug, acc_test_aug)) torch.save(model_new.state_dict(), filename_save)
def train(opt, train_iter, test_iter, verbose=True): global_start = time.time() logger = utils.getLogger() model = models.setup(opt) if torch.cuda.is_available(): model.cuda() params = [param for param in model.parameters() if param.requires_grad ] #filter(lambda p: p.requires_grad, model.parameters()) model_info = ";".join([ str(k) + ":" + str(v) for k, v in opt.__dict__.items() if type(v) in (str, int, float, list, bool) ]) logger.info("# parameters:" + str(sum(param.numel() for param in params))) logger.info(model_info) model.train() optimizer = utils.getOptimizer(params, name=opt.optimizer, lr=opt.learning_rate, scheduler=utils.get_lr_scheduler( opt.lr_scheduler)) loss_fun = F.cross_entropy filename = None percisions = [] for i in range(opt.max_epoch): for epoch, batch in enumerate(train_iter): optimizer.zero_grad() start = time.time() text = batch.text[0] if opt.from_torchtext else batch.text predicted = model(text) loss = loss_fun(predicted, batch.label) loss.backward() utils.clip_gradient(optimizer, opt.grad_clip) optimizer.step() if verbose: if torch.cuda.is_available(): logger.info( "%d iteration %d epoch with loss : %.5f in %.4f seconds" % (i, epoch, loss.cpu().data.numpy(), time.time() - start)) else: logger.info( "%d iteration %d epoch with loss : %.5f in %.4f seconds" % (i, epoch, loss.data.numpy()[0], time.time() - start)) percision = utils.evaluation(model, test_iter, opt.from_torchtext) if verbose: logger.info("%d iteration with percision %.4f" % (i, percision)) if len(percisions) == 0 or percision > max(percisions): if filename: os.remove(filename) filename = model.save(metric=percision) percisions.append(percision) # while(utils.is_writeable(performance_log_file)): df = pd.read_csv(performance_log_file, index_col=0, sep="\t") df.loc[model_info, opt.dataset] = max(percisions) df.to_csv(performance_log_file, sep="\t") logger.info(model_info + " with time :" + str(time.time() - global_start) + " ->" + str(max(percisions))) print(model_info + " with time :" + str(time.time() - global_start) + " ->" + str(max(percisions)))
num_edges_add_this_level = new_global_edges_len - global_edges_len if stop_iterating(level, args.levels, args.early_stop, num_edges_add_this_level, num_edges_add_last_level, args.knn_k): break global_edges_len = new_global_edges_len num_edges_add_last_level = num_edges_add_this_level # build new dataset features, labels, cluster_features = build_next_level( features, labels, peaks, global_features, global_pred_labels, global_peaks) # After the first level, the number of nodes reduce a lot. Using cpu faiss is faster. dataset = LanderDataset(features=features, labels=labels, k=args.knn_k, levels=1, faiss_gpu=False, cluster_features=cluster_features) g = dataset.gs[0] g.ndata['pred_den'] = torch.zeros((g.number_of_nodes())) g.edata['prob_conn'] = torch.zeros((g.number_of_edges(), 2)) test_loader = dgl.dataloading.DataLoader(g, torch.arange(g.number_of_nodes()), sampler, batch_size=args.batch_size, shuffle=False, drop_last=False, num_workers=args.num_workers) evaluation(global_pred_labels, global_labels, args.metrics)
### TEST ####################################################################### # encoder ## test vs normal X X_test = tf.placeholder(tf.float32, shape=[None, pixels], name='X_test') representation_t = xx.encoder_full(X_test, e_weights) y_x_logits_t = xx.y_latent(representation_t, y_weights) y_x_t = tf.nn.softmax(y_x_logits_t) z_x_logits_t = xx.z_latent(representation_t, z_weights) yz_x_t = tf.concat([y_x_t, z_x_logits_t], 1) # decoder reconstruction_t, reconstruction_logits_t = xx.decoder_full(yz_x_t, d_weights) # evaluation SS_score = xx.evaluation(y_x_logits, Y) SS_score_t = xx.evaluation(y_x_logits_t, Y) ### DEFINE LOSSES ############################################################## #reconstruction loss if obj == 'MSE': R_loss = 0.5 * tf.reduce_mean( tf.reduce_sum(tf.pow(X - reconstruction, 2), 1)) else: R_loss = 0.5 * tf.reduce_mean(tf.reduce_sum(tf.abs(X - reconstruction), 1)) #discriminator loss DZ_loss = xx.discriminator_loss(dz_real_logits, dz_fake_logits) DY_loss = xx.discriminator_loss(dy_real_logits, dy_fake_logits) D_loss = DZ_loss + DY_loss
def FL(support_train, support_test, test_train, test_test, args): # Convert to data loader for i in range(len(support_train)): support_train[i] = Data.DataLoader(support_train[i], batch_size=args.batch_size, shuffle=True) support_test[i] = Data.DataLoader(support_test[i], batch_size=args.batch_size, shuffle=False) for i in range(len(test_train)): test_train[i] = Data.DataLoader(test_train[i], batch_size=args.batch_size, shuffle=True) test_test[i] = Data.DataLoader(test_test[i], batch_size=args.batch_size, shuffle=False) logging.info('number_support_client: {}'.format(len(support_train))) logging.info('number_test_client: {}'.format(len(test_train))) # number of selected clients per round num_clients = int(args.fraction * len(support_train)) logging.info('number_selected_clients_per_round: {}'.format(num_clients)) print('FL:\n') # initial model model = create_model(args) optimizer = optim.SGD(model.parameters(), lr=args.train_lr) scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=args.num_rounds) op_local = optim.SGD(model.parameters(), lr=args.local_lr) # FL iterations for round_num in range(1, args.num_rounds + 1): # update inner lr optimizer = optim.SGD(model.parameters(), lr=scheduler.get_lr()[0]) scheduler.step() # Train on support client sets clientIDs = np.random.choice(range(len(support_train)), num_clients, replace=False) weights, train_acc, train_loss, acc1, loss1 = aggregation( support_train, support_test, args, clientIDs, model, optimizer) model.load_state_dict(weights) # log info logging.info( 'round {:2d}: support_train_acc {:.6f}, support_train_loss {:.6f}, support_test_acc {:.6f}, ' 'support_test_loss {:.6f}'.format(round_num, train_acc, train_loss, acc1, loss1)) if round_num % args.local_interval == 0: # Eval on test client sets with current weights acc2, loss2 = evaluation(test_test, args, model) # log info logging.info('initial_acc {:.6f}, initial_loss {:.6f}'.format( acc2, loss2)) # Eval on test client sets with localization acc3, loss3, test_acc, test_loss = localization( test_train, test_test, args, model, op_local) # log info logging.info( 'localization_acc {:.6f}, localization_loss {:.6f}'.format( acc3, loss3)) for i in range(len(test_acc)): logging.info( 'epoch: {:2d}: test acc: {:.6f}, test loss: {:.6f}'.format( i + 1, test_acc[i], test_loss[i])) return
save_model = "advacnced_start_optim.pth" for it in range(1, epoch + 1): output, loss = train(*utils.randomTrainingExample_new( all_categories, category_lines, n_letters, all_letters), teacher_forcing=True) total_loss += loss if it % print_every == 0: print('%s (%d %d%%) %.4f' % (utils.timeSince(start), it, it / epoch * 100, loss)) torch.save(rnn.state_dict(), save_model) utils.evaluation(it, all_categories, n_letters, all_letters, rnn, start_token=True) #utils.samples('Russian', all_categories, n_letters, all_letters, rnn, 'RUS', start_token=True) #utils.samples('German', all_categories, n_letters, all_letters, rnn, 'GER', start_token=True) #utils.samples('Spanish', all_categories, n_letters, all_letters, rnn, 'SPA', start_token=True) #utils.samples('Chinese', all_categories, n_letters, all_letters, rnn, 'CHI', start_token=True) if it % plot_every == 0: all_losses.append(total_loss / plot_every) total_loss = 0 utils.plot(all_losses) torch.save(rnn.state_dict(), save_model)
def training(batch_size, n_epoch, lr, model_dir, train, valid, model, device): # Keep the loss and accuracy at every iteration for plotting train_loss_list = [] valid_loss_list = [] train_acc_list = [] valid_acc_list = [] # print model status total = sum(p.numel() for p in model.parameters()) trainable = sum(p.numel() for p in model.parameters() if p.requires_grad) print('\nstart training, parameter total:{}, trainable:{}\n'.format( total, trainable)) # model params criterion = nn.BCELoss() #binary cross entropy loss t_batch_num = len(train) v_batch_num = len(valid) optimizer = optim.Adam(model.parameters(), lr=lr) #optimizer = torch.optim.SGD(model.parameters(), lr, momentum=0.9, weight_decay=1e-4) #optimizer = optim.Adadelta(model.parameters(), lr=lr) #lr=1 # training loop model.train() train_total_loss, train_total_acc, best_acc, best_loss = 0, 0, 0, 10000 for epoch in range(n_epoch): # training train_total_loss, train_total_acc = 0, 0 for i, (inputs, labels) in enumerate(train): inputs = inputs.to( device, dtype=torch.long ) # device is "cuda" inputs to torch.cuda.LongTensor labels = labels.to( device, dtype=torch.float ) # device is "cuda" labels to torch.cuda.FloatTensor optimizer.zero_grad( ) # duo to loss.backward(), gradient will accumulate,so we need to zero it every batch outputs = model(inputs) outputs = outputs.squeeze( ) # squeeze (batch_size,1) to (batch_size) loss = criterion(outputs, labels) # calculate training loss loss.backward() # compute gradient from loss optimizer.step() # update model parameters correct = evaluation(outputs, labels) train_total_acc += (correct / batch_size) train_total_loss += loss.item() print('[ Epoch{}: {}/{} ] '.format(epoch + 1, i + 1, t_batch_num), end='\r') print('\nTrain | Loss:{:.5f} Acc: {:.3f}'.format( train_total_loss / t_batch_num, train_total_acc / t_batch_num * 100)) # validation model.eval() # set model to eval mode,fix model parameters with torch.no_grad(): valid_total_loss, valid_total_acc = 0, 0 for i, (inputs, labels) in enumerate(valid): inputs = inputs.to(device, dtype=torch.long) labels = labels.to(device, dtype=torch.float) outputs = model(inputs) outputs = outputs.squeeze() loss = criterion(outputs, labels) correct = evaluation(outputs, labels) valid_total_acc += (correct / batch_size) valid_total_loss += loss.item() print("Valid | Loss:{:.5f} Acc: {:.3f} ".format( valid_total_loss / v_batch_num, valid_total_acc / v_batch_num * 100)) if valid_total_acc > best_acc or valid_total_loss < best_loss: best_acc = valid_total_acc best_loss = valid_total_loss torch.save( model, "{}/ckpt_{}.model".format( model_dir, valid_total_acc / v_batch_num * 100)) print('saving model with acc {:.3f}'.format(valid_total_acc / v_batch_num * 100)) print('-----------------------------------------------') model.train() # set model to train mode,let model parameters updatable # store acc and loss result train_loss_list.append(train_total_loss / t_batch_num) valid_loss_list.append(valid_total_loss / v_batch_num) train_acc_list.append(train_total_acc / t_batch_num * 100) valid_acc_list.append(valid_total_acc / v_batch_num * 100) # plotting result import matplotlib.pyplot as plt # Loss curve plt.plot(train_loss_list) plt.plot(valid_loss_list) plt.title('Loss') plt.legend(['train', 'valid']) plt.savefig('loss.png') plt.show() # Accuracy curve plt.plot(train_acc_list) plt.plot(valid_acc_list) plt.title('Accuracy') plt.legend(['train', 'valid']) plt.savefig('acc.png') plt.show()
end = time.time() # test process TP = 0 FN = 0 FP = 0 with torch.no_grad(): batch_time = AverageMeter() for i, (sentence, truth_tags_lst) in enumerate(test_data): # print('sentence :', sentence) precheck_sent = prepare_sequence(sentence, word_to_ix) output = model(precheck_sent) out_tag_lst = [all_tags_lst[tag_idx] for tag_idx in output[1]] # print('out_tag_lst = ', out_tag_lst) # print('truth_tag_lst = ', truth_tags_lst) tp, fn, fp = evaluation(out_tag_lst, truth_tags_lst) TP += tp FN += fn FP += fp # print(TP, FN, FP) precision = TP / (TP + FP + eps) recall = TP / (TP + FN + eps) F_value = (2 * precision * recall) / (precision + recall + eps) if F_value > F_value_best: F_value_best = F_value print('Test epoch [{0}]\t' 'Time:{epoch_time:.2f}\t' 'F-value:{value:.4f}'.format(epoch, epoch_time=time.time() - end, value=F_value)) if train_mode and losses.avg < best_loss: best_loss = losses.avg
import os if "CUDA_VISIBLE_DEVICES" not in os.environ.keys(): os.environ["CUDA_VISIBLE_DEVICES"] = "0" opt = opts.parse_opt() opt.model = 'lstm' train_iter, test_iter = utils.loadData(opt) model = models.setup(opt) if torch.cuda.is_available(): model.cuda() model.train() optimizer = optim.Adam(model.parameters(), lr=opt.learning_rate) optimizer.zero_grad() loss_fun = NLLLoss() for batch in train_iter.__iter__(): predicted = model(batch.text[0]) loss = loss_fun(predicted, batch.label) loss.backward() utils.clip_gradient(optimizer, opt.grad_clip) optimizer.step() if torch.cuda.is_available(): print("loss : %.5f" % loss.cpu().data.numpy()[0]) else: print("loss : %.5f" % loss.data.numpy()[0]) utils.evaluation(model, test_iter)