def train(args, model, train_loader, epoch, optimizer, scaler, run_avg): model.train() device = torch.device('cuda') criterion = nn.CrossEntropyLoss(reduction='mean') for batch_index, input_tensor in enumerate(train_loader): input_data, target = input_tensor if args.cuda: input_data = input_data.to(device) target = target.to(device) optimizer.zero_grad() with torch.cuda.amp.autocast(): output = model(input_data) loss = criterion(output, target) run_avg.update_train_loss_avg(loss.item(), args.batch_size) scaler.scale(loss).backward() scaler.step(optimizer) scaler.update() accuracy = acc(output, target) run_avg.update_train_acc_avg(accuracy, args.batch_size) if batch_index % 10 == 9: print('epoch =', epoch, ' train_loss = ', run_avg.train_loss_run_avg, ' accuracy =', run_avg.train_acc_run_avg) wandb.log({ 'epoch': epoch, 'train_avg_loss': run_avg.train_loss_run_avg, 'train_accuracy': run_avg.train_acc_run_avg })
def evaluation(args, model, valid_loader, epoch, run_avg): model.eval() device = args.device model.to(device) criterion = nn.CrossEntropyLoss(reduction='mean') with torch.no_grad(): for batch_index, input_tensor in enumerate(valid_loader): input_data, target = input_tensor input_data, target = input_data.to(device), target.to(device) with torch.cuda.amp.autocast(): output = model(input_data) valid_loss = criterion(output, target) run_avg.update_val_loss_avg(valid_loss.item(), args.batch_size) accuracy = acc(output, target) run_avg.update_val_acc_avg(accuracy, args.batch_size) if batch_index % 10 == 9: print('epoch', epoch, 'val_loss = ', run_avg.val_loss_run_avg, ' accuracy =', run_avg.val_acc_run_avg) wandb.log({ 'epoch': epoch, 'valid_avg_loss': run_avg.val_loss_run_avg, 'val_accuracy': run_avg.val_acc_run_avg }) return run_avg.val_acc_run_avg
def stats(predict_dir): """Calculates prediction and uncertainty statistics.""" bp = np.load(predict_dir + "/bayesian/bayesian_pred.npy").squeeze() bu = np.load(predict_dir + "/bayesian/bayesian_unc.npy").squeeze() dp = np.load(predict_dir + "/dropout/dropout_pred.npy").squeeze() du = np.load(predict_dir + "/dropout/dropout_unc.npy").squeeze() y = np.load(predict_dir + "/test_targets.npy").squeeze() with open(predict_dir + "/stats.csv", "w") as csvfile: w = csv.writer(csvfile, delimiter=" ") w.writerow(["Category", "Dropout", "Bayesian"]) w.writerow(["Pred_Acc", acc(dp, y), acc(bp, y)]) w.writerow(["Unc_Mean", du.mean(), bu.mean()]) w.writerow(["Unc_Var", du.var(), bu.var()]) w.writerow(["Unc_Max", du.max(), bu.max()]) w.writerow(["Unc_Min", du.min(), bu.min()])
def compute_output(self, X, Y, keep_prob=cfg.keep_prob, regularization_scale=cfg.regularization_scale): print("Size of input:") print(X.get_shape()) # 1. Convolve the input image up to the digit capsules. digit_caps = self._image_to_digitcaps(X) # 2. Get the margin loss margin_loss = u.margin_loss(digit_caps, Y) # 3. Reconstruct the images reconstructed_image, reconstruction_1, reconstruction_2 = self._digitcaps_to_image( digit_caps, Y) # 4. Get the reconstruction loss reconstruction_loss = u.reconstruction_loss(reconstructed_image, X) # 5. Get the total loss total_loss = margin_loss + regularization_scale * reconstruction_loss # 6. Get the batch accuracy batch_accuracy = u.acc(digit_caps, Y) # 7. Reconstruct all possible images memo = self._digitcaps_to_memo(X, digit_caps) # 8. Get the memo capsules memo_caps = self._memo_to_digitcaps(memo, keep_prob=keep_prob) # 9. Get the memo margin loss memo_margin_loss = u.margin_loss(memo_caps, Y) # 10. Get the memo accuracy memo_accuracy = u.acc(memo_caps, Y) # 11. Return all of the losses and reconstructions return (total_loss, margin_loss, reconstruction_loss, reconstructed_image, reconstruction_1, reconstruction_2, batch_accuracy, memo, memo_margin_loss, memo_accuracy)
def train(model, training_data, dev_data, learning_rate, batch_size, max_epoch): X_train, Y_train = training_data['X'], training_data['Y'] X_dev, Y_dev = dev_data['X'], dev_data['Y'] for i in range(max_epoch): for X, Y in data_loader(X_train, Y_train, batch_size=batch_size, shuffle=True): training_loss, grad_Ws, grad_bs = model.compute_gradients(X, Y) model.update(grad_Ws, grad_bs, learning_rate) dev_acc = acc(model.predict(X_dev), Y_dev) print("Epoch {: >3d}/{}\tloss:{:.5f}\tdev_acc:{:.5f}".format( i + 1, max_epoch, training_loss, dev_acc)) return model
def operate(phase): if phase == 'train': model.train() loader = trainloader else: model.eval() loader = valloader for i, (data, target) in enumerate(loader): start = time.time() data = data.to(device) target = target.to(device) output = model(data) loss = lossf(output, target) loss.backward() optimizer.step() optimizer.zero_grad() acc = U.acc(output, target) print( f'{e}/{epoch}:{i}/{len(loader)}, loss:{loss:.2f},acc":{acc:.2f},time:{time.time()-start:.4f}' ) Co.addvalue(writer, 'loss', loss.item(), e) Co.addvalue(writer, 'acc', acc.item(), e)
def train_1pass(model, training_data, dev_data, learning_rate, batch_size, print_every=100, plot_every=10): X_train, Y_train = training_data['X'], training_data['Y'] X_dev, Y_dev = dev_data['X'], dev_data['Y'] num_samples = 0 print_loss_total = 0 plot_loss_total = 0 plot_losses = [] plot_num_samples = [] for idx, (X, Y) in enumerate( data_loader(X_train, Y_train, batch_size=batch_size, shuffle=True), 1): training_loss, grad_Ws, grad_bs = model.compute_gradients(X, Y) model.update(grad_Ws, grad_bs, learning_rate) num_samples += Y.shape[1] print_loss_total += training_loss plot_loss_total += training_loss if idx % print_every == 0: dev_acc = acc(model.predict(X_dev), Y_dev) print_loss_avg = print_loss_total / print_every print_loss_total = 0 print("#Samples {: >5d}\tloss:{:.5f}\tdev_acc:{:.5f}".format( num_samples, print_loss_avg, dev_acc)) if idx % plot_every == 0: plot_loss_avg = plot_loss_total / plot_every plot_loss_total = 0 plot_losses.append(plot_loss_avg) plot_num_samples.append(num_samples) return model, {"losses": plot_losses, "num_samples": plot_num_samples}
def estimate_effects(model_dat): """nonlinear estimation of linearized structural model using theoretical direct effects as starting values""" # ToDo: reintroduce # yyyy if model_dat["alpha"] is None: if model_dat["dof"] is not None: raise ValueError("dof is determined together with alpha.") # alpha_min (with posdef hessian) and alpha_max to search over alpha_min, alpha_max = alpha_min_max(model_dat) # optimal alpha with minimal out-of-sample sse alpha, dof = estimate_alpha(alpha_min, alpha_max, model_dat) model_dat["alpha"] = alpha model_dat["dof"] = dof else: if model_dat["dof"] is None: raise ValueError("dof must be given together with alpha.") print("\ngiven alpha: {:10f}, dof: {:10f}".format( model_dat["alpha"], model_dat["dof"])) # final estimation given optimal alpha # algebraic Hessian (check, hessian_hat, direct_hat, sse_hat, mx_hat, my_hat, ex_hat, ey_hat) = check_estimate_effects(model_dat) # automatic Hessian hessian = utils.sse_hess(mx_hat, my_hat, model_dat) # numeric Hessian hessian_num = sse_hess_num(mx_hat, my_hat, model_dat) print( "\nAlgebraic and numeric Hessian allclose: {} with accuracy {:10f}.". format(allclose(hessian_hat, hessian_num), utils.acc(hessian_hat, hessian_num))) print("Automatic and numeric Hessian allclose: {} with accuracy {:10f}.". format(allclose(hessian, hessian_num), utils.acc(hessian, hessian_num))) print("Automatic and algebraic Hessian allclose: {} with accuracy {:10f}.". format(allclose(hessian, hessian_hat), utils.acc(hessian, hessian_hat))) assert check, "Hessian not well conditioned." cov_direct_hat = compute_cov_direct(sse_hat, hessian_hat, model_dat) # compute estimated direct, total and mediation effects and standard deviations mx_hat_std, my_hat_std = utils.compute_direct_std(cov_direct_hat, model_dat) ex_hat_std, ey_hat_std = utils.total_effects_std(direct_hat, cov_direct_hat, model_dat) exj_hat, eyj_hat, eyx_hat, eyy_hat = utils.compute_mediation_effects( mx_hat, my_hat, ex_hat, ey_hat, model_dat["yvars"], model_dat["final_var"]) (exj_hat_std, eyj_hat_std, eyx_hat_std, eyy_hat_std) = utils.compute_mediation_std(ex_hat_std, ey_hat_std, eyx_hat, eyy_hat, model_dat["yvars"], model_dat["final_var"]) estimate_dat = { "direct_hat": direct_hat, "sse_hat": sse_hat, "hessian_hat": hessian_hat, "cov_direct_hat": cov_direct_hat, "mx_hat": mx_hat, "my_hat": my_hat, "mx_hat_std": mx_hat_std, "my_hat_std": my_hat_std, "ex_hat": ex_hat, "ey_hat": ey_hat, "ex_hat_std": ex_hat_std, "ey_hat_std": ey_hat_std, "exj_hat": exj_hat, "eyj_hat": eyj_hat, "eyx_hat": eyx_hat, "eyy_hat": eyy_hat, "exj_hat_std": exj_hat_std, "eyj_hat_std": eyj_hat_std, "eyx_hat_std": eyx_hat_std, "eyy_hat_std": eyy_hat_std, } return estimate_dat
train_acc = 0 model.train() for step, batch in tqdm(enumerate(train), desc='steps', total=len(train)): x_train, y_train = map(lambda x: x.to(device), batch) optimizer.zero_grad() y_hat = model(x_train) loss = loss_fn(y_hat, y_train) loss.backward() clip_grad_norm_(model.parameters(), config.max_grad_norm) optimizer.step() with torch.no_grad(): batch_acc = utils.acc(y_hat, y_train) train_loss += loss.item() train_acc += batch_acc.item() # evaluation if (epoch * len(train) + step) % config.summary_step == 0: val_loss = 0 val_acc = 0 if model.training: model.eval() for val_step, batch in enumerate(dev):
def test(testing_data_file, super_batch_size, tokenizer, mode, kw, p_key, \ model1, device, model2, model3): '''Train three models Train models through bundles Args: training_data_file (list) : training data json file, raw json file used to load data super_batch_size (int) : how many samples will be loaded into memory at once tokenizer : SentencePiece tokenizer used to obtain the token ids mode (str): mode of the passage format, coule be a list (processed) or a long string (unprocessed). kw (str) : the key word map to the passage in each data dictionary. Defaults to 'abstract' p_key (str) : the key word to search for specific passage. Default to 'title' model1 (nn.DataParallel) : local dependency encoder device (torch.device) : The device which models and data are on. model2 (nn.Module) : global coherence encoder model3 (nn.Module) : attention decoder Returns: result_list (list) : the list that contains the result of all samples organized in a dictionary form {taus, accs, pmrs, rouge-ws, pred, truth} over_all (dict) : the overall result of the result. keys include four metrics ''' with torch.no_grad(): print('test..............') valid_critic_dict = { 'rouge-w': rouge_w, 'acc': acc, 'ken-tau': kendall_tau, 'pmr': pmr } result_list = [] over_all = { 'Kendall-tau': None, 'Accuracy': None, 'ROUGE-w': None, 'PMR': None } accs = [] rouge_ws = [] ken_taus = [] pmrs = [] for superbatch in load_superbatch(testing_data_file, super_batch_size): bundles = [] for data in superbatch: try: bundles.append( convert_passage_to_samples_bundle( tokenizer, data, mode, kw, p_key)) except: traceback.print_exc() num_batch, valid_dataloader = homebrew_data_loader(bundles, batch_size=1) valid_value = [] for step, batch in enumerate(valid_dataloader): try: batch = tuple(t for idx, t in enumerate(batch)) pointers_output, ground_truth \ = dev_test(batch, model1, model2, model3, device) # valid_value.append(valid_critic_dict[valid_critic](pointers_output, ground_truth)) except Exception as err: traceback.print_exc() rouge_ws.append(rouge_w(pointers_output, ground_truth)) accs.append(acc(pointers_output, ground_truth)) ken_taus.append(kendall_tau(pointers_output, ground_truth)) pmrs.append(pmr(pointers_output, ground_truth)) result_list.append({ 'Kendall-tau': ken_taus[-1], 'Accuracy': accs[-1], 'ROUGE-w': rouge_ws[-1], 'PMR': pmrs[-1], 'true': ground_truth, 'pred': pointers_output }) print('finishe {} samples. \n'.format(len(rouge_ws))) over_all['Kendall-tau'] = np.mean(ken_taus) over_all['Accuracy'] = np.mean(accs) over_all['ROUGE-w'] = np.mean(rouge_ws) over_all['PMR'] = np.mean(pmrs) print('Final scores: kendall:{:.4f}, accuracy:{:.4f}, rouge-w:{:.4f}, pmr:{:.4f}\n'.format( \ over_all['Kendall-tau'], over_all['Accuracy'], over_all['ROUGE-w'], over_all['PMR'])) return result_list, over_all
def train(training_data_file, valid_data_file, super_batch_size, tokenizer, mode, kw, p_key, model1, device, model2, model3, \ batch_size, num_epoch, gradient_accumulation_steps, lr1, lr2, lambda_, valid_critic, early_stop): '''Train three models Train models through bundles Args: training_data_file (list) : training data json file, raw json file used to load data super_batch_size (int) : how many samples will be loaded into memory at once tokenizer : SentencePiece tokenizer used to obtain the token ids mode (str): mode of the passage format, coule be a list (processed) or a long string (unprocessed). kw (str) : the key word map to the passage in each data dictionary. Defaults to 'abstract' p_key (str) : the key word to search for specific passage. Default to 'title' model1 (nn.DataParallel) : local dependency encoder device (torch.device): The device which models and data are on. model2 (nn.Module): global coherence encoder model3 (nn.Module): attention decoder batch_size (int): Defaults to 4. num_epoch (int): Defaults to 1. gradient_accumulation_steps (int): Defaults to 1. lr (float): Defaults to 1e-4. The Start learning rate. lambda_ (float): Defaults to 0.01. Balance factor for param nomalization. valid_critic (bool) : what critic to use when early stop evaluation. Default to 5 early_stop (int) : set the early stop boundary. Default to 5 ''' # Prepare optimizer for Sys1 param_optimizer_bert = list(model1.named_parameters()) param_optimizer_others = list(model2.named_parameters()) + list( model3.named_parameters()) no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] # We tend to fix the embedding. Temeporarily we doesn't find the embedding layer optimizer_grouped_parameters_bert = [{ 'params': [ p for n, p in param_optimizer_bert if not any(nd in n for nd in no_decay) ], 'weight_decay': lambda_ }, { 'params': [ p for n, p in param_optimizer_bert if any(nd in n for nd in no_decay) ], 'weight_decay': 0.0 }] optimizer_grouped_parameters_others = [{ 'params': [ p for n, p in param_optimizer_others if not any(nd in n for nd in no_decay) ], 'weight_decay': lambda_ }, { 'params': [ p for n, p in param_optimizer_others if any(nd in n for nd in no_decay) ], 'weight_decay': 0.0 }] # We shall adda module to count the num of parameters here critic = nn.NLLLoss(reduction='none') line_num = int(os.popen("wc -l " + training_data_file).read().split()[0]) global_step = 0 # global step opt1 = BertAdam(optimizer_grouped_parameters_bert, lr=lr1, warmup=0.1, t_total=line_num / batch_size * num_epoch) # optimizer 1 # opt = Adam(optimizer_grouped_parameter, lr=lr) opt2 = Adadelta(optimizer_grouped_parameters_others, lr=lr2, rho=0.95) model1.to(device) # model1.train() # model2.to(device) # model2.train() # model3.to(device) # model3.train() # warmed = True for epoch in trange(num_epoch, desc='Epoch'): smooth_mean = WindowMean() opt1.zero_grad() opt2.zero_grad() for superbatch, line_num in load_superbatch(training_data_file, super_batch_size): bundles = [] for data in superbatch: try: bundles.append( convert_passage_to_samples_bundle( tokenizer, data, mode, kw, p_key)) except: print_exc() num_batch, dataloader = homebrew_data_loader(bundles, batch_size=batch_size) tqdm_obj = tqdm(dataloader, total=num_batch) num_steps = line_num # for step, batch in enumerate(tqdm_obj): try: #batch[0] = batch[0].to(device) #batch[1] = batch[1].to(device) #batch[2] = batch[2].to(device) batch = tuple(t for t in batch) log_prob_loss, pointers_output, ground_truth = calculate_loss( batch, model1, model2, model3, device, critic) # here we need to add code to cal rouge-w and acc rouge_ws = [] accs = [] ken_taus = [] pmrs = [] for pred, true in zip(pointers_output, ground_truth): rouge_ws.append(rouge_w(pred, true)) accs.append(acc(pred, true)) ken_taus.append(kendall_tau(pred, true)) pmrs.append(pmr(pred, true)) log_prob_loss.backward() # ******** In the following code we gonna edit it and made early stop ************ if (step + 1) % gradient_accumulation_steps == 0: # modify learning rate with special warm up BERT uses. From BERT pytorch examples lr_this_step = lr1 * warmup_linear( global_step / num_steps, warmup=0.1) for param_group in opt1.param_groups: param_group['lr'] = lr_this_step global_step += 1 opt2.step() opt2.zero_grad() smooth_mean_loss = smooth_mean.update( log_prob_loss.item()) tqdm_obj.set_description( '{}: {:.4f}, {}: {:.4f}, smooth_mean_loss: {:.4f}'. format('accuracy', np.mean(accs), 'rough-w', np.mean(rouge_ws), smooth_mean_loss)) # During warming period, model1 is frozen and model2 is trained to normal weights if smooth_mean_loss < 1.0 and step > 100: # ugly manual hyperparam warmed = True if warmed: opt1.step() opt1.zero_grad() if step % 1000 == 0: output_model_file = './models/bert-base-cased.bin.tmp' saved_dict = { 'params1': model1.module.state_dict() } saved_dict['params2'] = model2.state_dict() saved_dict['params3'] = model3.state_dict() torch.save(saved_dict, output_model_file) except Exception as err: traceback.print_exc() exit() # if mode == 'list': # print(batch._id) if epoch < 5: best_score = 0 continue with torch.no_grad(): print('valid..............') valid_critic_dict = { 'rouge-w': rouge_w, 'acc': acc, 'ken-tau': kendall_tau, 'pmr': pmr } for superbatch, _ in load_superbatch(valid_data_file, super_batch_size): bundles = [] for data in superbatch: try: bundles.append( convert_passage_to_samples_bundle( tokenizer, data, mode, kw, p_key)) except: print_exc() num_batch, valid_dataloader = homebrew_data_loader( bundles, batch_size=1) valid_value = [] for step, batch in enumerate(valid_dataloader): try: batch = tuple(t for idx, t in enumerate(batch)) pointers_output, ground_truth \ = dev_test(batch, model1, model2, model3, device) valid_value.append(valid_critic_dict[valid_critic]( pointers_output, ground_truth)) except Exception as err: traceback.print_exc() # if mode == 'list': # print(batch._id) score = np.mean(valid_value) print('epc:{}, {} : {:.2f} best : {:.2f}\n'.format( epoch, valid_critic, score, best_score)) if score > best_score: best_score = score best_iter = epoch print('Saving model to {}'.format( output_model_file)) # save model structure saved_dict = { 'params1': model1.module.state_dict() } # save parameters saved_dict['params2'] = model2.state_dict() # save parameters saved_dict['params3'] = model3.state_dict() torch.save(saved_dict, output_model_file) # # print('save best model at epc={}'.format(epc)) # checkpoint = {'model': model.state_dict(), # 'args': args, # 'loss': best_score} # torch.save(checkpoint, '{}/{}.best.pt'.format(args.model_path, args.model)) if early_stop and (epoch - best_iter) >= early_stop: print('early stop at epc {}'.format(epoch)) break
def train(rank, args): if rank is None: is_distributed = False rank = 0 else: is_distributed = True if is_distributed: utils.setuplogger() dist.init_process_group('nccl', world_size=args.nGPU, init_method='env://', rank=rank) torch.cuda.set_device(rank) news, news_index, category_dict, subcategory_dict, word_dict = read_news( os.path.join(args.train_data_dir, 'news.tsv'), args, mode='train') news_title, news_category, news_subcategory = get_doc_input( news, news_index, category_dict, subcategory_dict, word_dict, args) news_combined = np.concatenate([ x for x in [news_title, news_category, news_subcategory] if x is not None ], axis=-1) if rank == 0: logging.info('Initializing word embedding matrix...') embedding_matrix, have_word = utils.load_matrix(args.glove_embedding_path, word_dict, args.word_embedding_dim) if rank == 0: logging.info(f'Word dict length: {len(word_dict)}') logging.info(f'Have words: {len(have_word)}') logging.info( f'Missing rate: {(len(word_dict) - len(have_word)) / len(word_dict)}' ) module = importlib.import_module(f'model.{args.model}') model = module.Model(args, embedding_matrix, len(category_dict), len(subcategory_dict)) if args.load_ckpt_name is not None: ckpt_path = utils.get_checkpoint(args.model_dir, args.load_ckpt_name) checkpoint = torch.load(ckpt_path, map_location='cpu') model.load_state_dict(checkpoint['model_state_dict']) logging.info(f"Model loaded from {ckpt_path}.") optimizer = optim.Adam(model.parameters(), lr=args.lr) if args.enable_gpu: model = model.cuda(rank) if is_distributed: model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[rank]) # if rank == 0: # print(model) # for name, param in model.named_parameters(): # print(name, param.requires_grad) data_file_path = os.path.join(args.train_data_dir, f'behaviors_np{args.npratio}_{rank}.tsv') dataset = DatasetTrain(data_file_path, news_index, news_combined, args) dataloader = DataLoader(dataset, batch_size=args.batch_size) logging.info('Training...') for ep in range(args.start_epoch, args.epochs): loss = 0.0 accuary = 0.0 for cnt, (log_ids, log_mask, input_ids, targets) in enumerate(dataloader): if args.enable_gpu: log_ids = log_ids.cuda(rank, non_blocking=True) log_mask = log_mask.cuda(rank, non_blocking=True) input_ids = input_ids.cuda(rank, non_blocking=True) targets = targets.cuda(rank, non_blocking=True) bz_loss, y_hat = model(log_ids, log_mask, input_ids, targets) loss += bz_loss.data.float() accuary += utils.acc(targets, y_hat) optimizer.zero_grad() bz_loss.backward() optimizer.step() if cnt % args.log_steps == 0: logging.info( '[{}] Ed: {}, train_loss: {:.5f}, acc: {:.5f}'.format( rank, cnt * args.batch_size, loss.data / cnt, accuary / cnt)) if rank == 0 and cnt != 0 and cnt % args.save_steps == 0: ckpt_path = os.path.join(args.model_dir, f'epoch-{ep+1}-{cnt}.pt') torch.save( { 'model_state_dict': { '.'.join(k.split('.')[1:]): v for k, v in model.state_dict().items() } if is_distributed else model.state_dict(), 'category_dict': category_dict, 'word_dict': word_dict, 'subcategory_dict': subcategory_dict }, ckpt_path) logging.info(f"Model saved to {ckpt_path}.") logging.info('Training finish.') if rank == 0: ckpt_path = os.path.join(args.model_dir, f'epoch-{ep+1}.pt') torch.save( { 'model_state_dict': { '.'.join(k.split('.')[1:]): v for k, v in model.state_dict().items() } if is_distributed else model.state_dict(), 'category_dict': category_dict, 'subcategory_dict': subcategory_dict, 'word_dict': word_dict, }, ckpt_path) logging.info(f"Model saved to {ckpt_path}.")
#%% Testing the result need modification if use different input def Yes_No(digit): if digit > 0.02: # Setting the threshold return 1 else: return 0 def test_model(testSet, model): _, n = testSet.size() n = n - 59 pred = torch.zeros(n) for i in range(n): curr_win = testSet[1:9, i:i + 60] curr_win = curr_win.reshape(1, 1, 8, 60) pred[i] = Yes_No(model(curr_win)) #print(i) return pred test_result = test_model(torch.tensor(test, dtype=torch.float32), net) #%% from utils import acc result = acc(test_result.numpy(), test[10, :], 100)
def train(**kwargs): model = kwargs['model'] dataloader = kwargs['dataloader'] epochs = kwargs['epochs'] pth_file = kwargs['pth'] root = kwargs['root'] alpha = kwargs['alpha'] # optimizer = torch.optim.SGD(params=model.parameters(), lr=1e-2, momentum=0.9, weight_decay=1e-5) optimizer = torch.optim.Adam(params=model.parameters(), lr=1e-3, weight_decay=1e-5) mse_criterion = nn.MSELoss() kl_criterion = nn.KLDivLoss(reduction='mean') # ============K-means======================================= features = [] y_true = [] for x, y in dataloader: y_true.append(y.detach().cpu().numpy()) x = x.cuda() f = model(x)['feature'] features.append(f.detach().cpu().numpy()) features = np.concatenate(features, axis=0) kmeans = KMeans(n_clusters=model.n_clusters, random_state=0).fit(features) cluster_centers = kmeans.cluster_centers_ cluster_centers = torch.tensor(cluster_centers, dtype=torch.float).cuda() model.ClusteringLayer.centers = torch.nn.Parameter(cluster_centers) # ========================================================= y_pred = kmeans.predict(features) y_true = np.concatenate(y_true, axis=0) accuracy = acc(y_true, y_pred) logger.info('Initial Accuracy: {}'.format(accuracy)) best_acc = 0.0 model.train() for epoch in range(1, epochs + 1): train_mse_loss = 0.0 train_kl_loss = 0.0 accuracy = 0.0 for cnt, (x, y) in enumerate(dataloader): x = x.cuda() # ===================forward===================== output = model(x) x_hat = output['rec'] rec_loss = mse_criterion(x_hat, x) train_mse_loss += rec_loss.item() source_ = output['source'] # if target not detach, the model collapse target_ = model.target_distribute(source_).detach() kl_loss = kl_criterion(source_.log(), target_) train_kl_loss += kl_loss.item() y_pred = source_.argmax(1) accuracy += acc(y.cpu().numpy(), y_pred.cpu().numpy()) if epoch % 10 == 0 and cnt == 0: visualize(epoch, output['feature'].detach().cpu().numpy(), y.detach().cpu().numpy(), root) x = x[0] x_hat = x_hat[0] final = torch.cat([x, x_hat], dim=1).detach().cpu().numpy() final = np.transpose(final, (2, 1, 0)) final = np.clip(final * 255.0, 0, 255).astype(np.uint8) cv2.imwrite(f"{root}/clustering.png", final) # ===================backward==================== optimizer.zero_grad() total_loss = rec_loss + alpha * kl_loss total_loss.backward() optimizer.step() # ===================log======================== train_mse_loss /= len(dataloader) train_kl_loss /= len(dataloader) accuracy /= len(dataloader) logger.info( 'epoch [{}/{}], MSE_loss:{:.4f}, KL_loss:{:.4f}, Accuracv:{:.4f}'. format(epoch, epochs, train_mse_loss, train_kl_loss, accuracy)) if best_acc < accuracy: torch.save(model.state_dict(), pth_file)
loss = criterion(labels, outputs) loss.backward() optimizer.step() # print statistics running_loss += loss.item() if i % 200 == 199: # print every 2000 mini-batches print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 200)) running_loss = 0.0 print('Finished Training') test_result = test_model(torch.tensor(test_new, dtype=torch.float32), net, thrs) try: result = acc(test_result.numpy(), test_new[10, :], 10) specificity = result[3] precision = result[1] TP = result[4] except (ZeroDivisionError): specificity = 0 precision = 0 TP = 0 pass count = 0 while (not (0.45 <= specificity <= 0.55) and count < 10000): if specificity > 0.55: thrs += 0.01 if specificity < 0.45: thrs -= 0.01 test_result = test_model(
eval_batches, num_eval_batches, num_eval_samples = get_batch(hp.eval1, hp.eval2, 100000, 100000, hp.vocab, hp.batch_size, shuffle=False) iter = tf.data.Iterator.from_structure(eval_batches.output_types, eval_batches.output_shapes) xs, ys = iter.get_next() decoder_inputs, y, y_seqlen, sents2 = ys eval_init_op = iter.make_initializer(eval_batches) logging.info("# Load model") m = Transformer(hp) y_mask = m.y_masks(y) y_hat, eval_summaries = m.eval(xs, ys, y_mask) saver = tf.train.Saver() with tf.Session() as sess: ckpt = tf.train.latest_checkpoint(hp.logdir) saver.restore(sess, ckpt) summary_writer = tf.summary.FileWriter(hp.logdir, sess.graph) sess.run(eval_init_op) _y_hat, _y = sess.run([y_hat, y]) print(_y_hat) print(_y) print(acc(_y_hat, _y)) #hypotheses = get_hypotheses(1, 128, sess, y_hat, m.idx2token) #print(hypotheses)
genreId = 1 genre_map = {} clips = utils.get_clip_set() with open('initial/genres.csv', encoding="utf8") as csvfile: reader = csv.reader(csvfile) next(reader) with open('cleaned/genres_cleaned.csv', 'w', encoding="utf8") as out: wr = csv.writer(out) added = set() for row in reader: if row[0] in clips: clipid = row[0] genre = row[1] l = utils.acc(genre) b = utils.lettres(l) if utils.diff_letters(l, b) < 2 and len(b) != 0 and b.lower( ) != 'null' and b.lower() != 'none': if b not in genres: new_row = (genreId, b) if new_row not in added: genres.add(b) genre_map[b] = genreId genreId += 1 wr.writerow(new_row) added.add(new_row) else: new_row = (genre_map[b], b) if new_row not in added: wr.writerow(new_row)
def train(args): # Only support title Turing now assert args.enable_hvd # TODO if args.enable_hvd: import horovod.torch as hvd if args.load_ckpt_name is not None: #TODO: choose ckpt_path ckpt_path = utils.get_checkpoint(args.model_dir, args.load_ckpt_name) else: ckpt_path = utils.latest_checkpoint(args.model_dir) hvd_size, hvd_rank, hvd_local_rank = utils.init_hvd_cuda( args.enable_hvd, args.enable_gpu) tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased") config = AutoConfig.from_pretrained("bert-base-uncased", output_hidden_states=True) bert_model = AutoModel.from_pretrained("bert-base-uncased", config=config) #bert_model.load_state_dict(torch.load('../bert_encoder_part.pkl')) # freeze parameters for name, param in bert_model.named_parameters(): if name not in finetuneset: param.requires_grad = False news, news_index, category_dict, domain_dict, subcategory_dict = read_news_bert( os.path.join(args.root_data_dir, f'{args.dataset}/{args.train_dir}/news.tsv'), args, tokenizer) news_title, news_title_type, news_title_attmask, \ news_abstract, news_abstract_type, news_abstract_attmask, \ news_body, news_body_type, news_body_attmask, \ news_category, news_domain, news_subcategory = get_doc_input_bert( news, news_index, category_dict, domain_dict, subcategory_dict, args) news_combined = np.concatenate([ x for x in [news_title, news_title_type, news_title_attmask, \ news_abstract, news_abstract_type, news_abstract_attmask, \ news_body, news_body_type, news_body_attmask, \ news_category, news_domain, news_subcategory] if x is not None], axis=1) model = ModelBert(args, bert_model, len(category_dict), len(domain_dict), len(subcategory_dict)) word_dict = None if args.enable_gpu: model = model.cuda() lr_scaler = hvd.local_size() optimizer = optim.Adam(model.parameters(), lr=args.lr) if args.enable_hvd: hvd.broadcast_parameters(model.state_dict(), root_rank=0) hvd.broadcast_optimizer_state(optimizer, root_rank=0) compression = hvd.Compression.none optimizer = hvd.DistributedOptimizer( optimizer, named_parameters=model.named_parameters(), compression=compression, op=hvd.Average) dataloader = DataLoaderTrain( news_index=news_index, news_combined=news_combined, word_dict=word_dict, data_dir=os.path.join(args.root_data_dir, f'{args.market}/{args.train_dir}'), filename_pat=args.filename_pat, args=args, world_size=hvd_size, worker_rank=hvd_rank, cuda_device_idx=hvd_local_rank, enable_prefetch=True, enable_shuffle=True, enable_gpu=args.enable_gpu, ) logging.info('Training...') for ep in range(args.epochs): loss = 0.0 accuary = 0.0 for cnt, (log_ids, log_mask, input_ids, targets) in enumerate(dataloader): if cnt > args.max_steps_per_epoch: break if args.enable_gpu: log_ids = log_ids.cuda(non_blocking=True) log_mask = log_mask.cuda(non_blocking=True) input_ids = input_ids.cuda(non_blocking=True) targets = targets.cuda(non_blocking=True) bz_loss, y_hat = model(input_ids, log_ids, log_mask, targets) loss += bz_loss.data.float() accuary += utils.acc(targets, y_hat) optimizer.zero_grad() bz_loss.backward() optimizer.step() if cnt % args.log_steps == 0: logging.info( '[{}] Ed: {}, train_loss: {:.5f}, acc: {:.5f}'.format( hvd_rank, cnt * args.batch_size, loss.data / cnt, accuary / cnt)) # save model minibatch print(hvd_rank, cnt, args.save_steps, cnt % args.save_steps) if hvd_rank == 0 and cnt % args.save_steps == 0: ckpt_path = os.path.join(args.model_dir, f'epoch-{ep+1}-{cnt}.pt') torch.save( { 'model_state_dict': model.state_dict(), 'category_dict': category_dict, 'word_dict': word_dict, 'domain_dict': domain_dict, 'subcategory_dict': subcategory_dict }, ckpt_path) logging.info(f"Model saved to {ckpt_path}") loss /= cnt print(ep + 1, loss) # save model last of epoch if hvd_rank == 0: ckpt_path = os.path.join(args.model_dir, f'epoch-{ep+1}.pt') torch.save( { 'model_state_dict': model.state_dict(), 'category_dict': category_dict, 'word_dict': word_dict, 'domain_dict': domain_dict, 'subcategory_dict': subcategory_dict }, ckpt_path) logging.info(f"Model saved to {ckpt_path}") dataloader.join()
import utils with open('initial/release_dates.csv', encoding="utf8") as csvfile: reader = csv.reader(csvfile) with open('cleaned/releasedates_cleaned.csv', 'w', encoding="utf8") as out: wr = csv.writer(out) country_map = utils.get_country_map() clips = utils.get_clip_set() next(reader) added = set() for row in reader: if row[0] in clips: clipid = row[0] no_accents = utils.acc(row[1]) only_letters = utils.lettres(no_accents).lstrip() if only_letters == 'Democratic Republic of Congo': only_letters = 'Democratic Republic of the Congo' # Only keep the numbers and the letters in the "ReleaseDate" column only_numbers_letters = utils.alet(row[2]) if only_letters in country_map: countryId = country_map[only_letters] new_row = (clipid, countryId) if new_row not in added: wr.writerow((clipid, countryId, only_numbers_letters)) added.add(new_row)
languageId = 1 language_map = {} clips = utils.get_clip_set() with open('initial/languages.csv', encoding="utf8") as csvfile: reader = csv.reader(csvfile) next(reader) with open('cleaned/languages_cleaned.csv', 'w', encoding="utf8") as out: wr = csv.writer(out) added = set() for row in reader: if row[0] in clips: clipid = row[0] language = row[1] l = utils.acc(language) b = utils.lettres(l) if b == 'some dialogue with English subtitles some without': b = 'English' if utils.diff_letters(l, b) < 2 and len(b) != 0 and b.lower() != 'null' and b.lower() != 'none': if b not in languages: new_row = (languageId, b) if new_row not in added: languages.add(b) language_map[b] = languageId languageId += 1 wr.writerow(new_row) added.add(new_row) else: new_row = (language_map[b], b)