def main(): # TODO: Parse hyper-parameters from a json config file? parser = argparse.ArgumentParser() parser.add_argument('--cuda', action='store_true', default=False, help='Enable CUDA training.') parser.add_argument('--seed', type=int, default=42, help='Random seed.') parser.add_argument('--epochs', type=int, default=200, help='Number of epochs to train.') parser.add_argument('--lr', type=float, default=0.01, help='Initial learning rate.') parser.add_argument('--weight-decay', type=float, default=5e-4, help='Weight decay (L2 loss on parameters).') parser.add_argument('--nhidden', type=int, nargs='*', default=[16], help='Number of hidden units for each layer.') parser.add_argument('--dropout', type=float, default=0.5, help='Dropout rate (1 - keep probability).') parser.add_argument('--alpha', type=float, default=0.5, help='Mixing weight between word and document losses.') parser.add_argument('--word-features', type=str, nargs='*', default=[], help='List of word features to use. If empty, uses identity matrix.') parser.add_argument('--doc-features', type=str, nargs='*', default=[], help='List of doc features to use. If empty, uses identity matrix.') parser.add_argument('--activation', type=str, default='none', choices=['none', 'relu', 'tanh'], help='Add the specified activation function for each GCN layer.') parser.add_argument('--efcamdat-file-path', type=str, default=None, help='Path to EFCamDat. ' 'If not specified, the dataset will not be used.') parser.add_argument('--heads', type=str, default='twin', choices=['single', 'twin'], help='Use either single/same or different linear layer for both word and doc as a final layer.') parser.add_argument('--tfidf', action='store_true', help='If specified, weight the adjacency matrix by tf.idf') parser.add_argument('--pmi-window-width', type=int, default=-1, help='Window size for calculating PMI, which is disabled when -1') parser.add_argument('--conversion', type=str, default='max', choices=['max', 'weighted_sum'], help='If using correlation during evaluation, select whether to convert' 'classification to a real value by weighted sum or taking the max.') parser.add_argument('--mode', type=str, default='classification', choices=['classification', 'regression'], help='Use either classification or regression loss during training.') parser.add_argument('--training-portion', type=int, default=10, help='Specify the amount of training data between 1 (10%) and 10 (100%).') args = parser.parse_args() np.random.seed(args.seed) torch.manual_seed(args.seed) words = list(read_cefrj_wordlist(training_portion=args.training_portion)) datasets = [ read_cambridge_readability_dataset(training_portion=args.training_portion), read_a1_passages(training_portion=args.training_portion)] if args.efcamdat_file_path: datasets.append(read_efcamdat_dataset(args.efcamdat_file_path)) docs = list(itertools.chain(*datasets)) # Initialize FeatureExtractor doc_value2feat = {feat.value: feat for feat in DocFeature} doc_features = {doc_value2feat[value] for value in args.doc_features} word_value2feat = {feat.value: feat for feat in WordFeature} word_features = {word_value2feat[value] for value in args.word_features} feature_extractor = FeatureExtractor(word_features=word_features, doc_features=doc_features, cuda=args.cuda) graph = Graph(feature_extractor) graph.add_words(words) graph.add_documents(docs) num_labeled_docs = sum(1 for doc in docs if doc.label) max_word_freq = int(.1 * num_labeled_docs) # it's too much if a word appears more than 10% of labeled docs graph.build_mapping(min_word_freq=3, max_word_freq=max_word_freq, min_document_len=5) graph.index() print(graph, file=sys.stderr) # show graph stats adj = graph.get_adj(use_tfidf=args.tfidf, pmi_window_width=args.pmi_window_width) x = graph.get_feature_matrix() type_masks, split_masks = graph.get_type_and_split_masks() labels = graph.get_labels() labels_beta = torch.Tensor([cefr_to_beta(CEFR_LEVELS[i.item()]) for i in labels]) # TODO: complete the training pipeline # Training nclass = 1 if args.mode == "regression" else len(CEFR_LEVELS) model = GCN(nfeat=x.shape[1], nhidden=args.nhidden, nclass=nclass, dropout=args.dropout, activation=args.activation, heads=args.heads) if args.cuda: adj = adj.cuda() model = model.cuda() x = x.cuda() labels = labels.cuda() labels_beta = labels_beta.cuda() for k, v in type_masks.items(): type_masks[k] = v.cuda() for k, v in split_masks.items(): split_masks[k] = v.cuda() optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) stats = defaultdict(list) if not args.efcamdat_file_path: stats['num_unlabeled_docs'] = 0 else: stats['num_unlabeled_docs'] = len(list(read_efcamdat_dataset(args.efcamdat_file_path))) stats['num_docs'] = graph.get_num_indexed_docs() for epoch in range(args.epochs): print('Epoch: {:04d}'.format(epoch + 1), file=sys.stderr) model.train() optimizer.zero_grad() logit1, logit2 = model(adj, x) if args.mode == "regression": loss1 = masked_mean_squared_error( logit1, labels_beta, type_masks[NodeType.WORD] * split_masks[DatasetSplit.TRAIN]) loss2 = masked_mean_squared_error( logit2, labels_beta, type_masks[NodeType.DOC] * split_masks[DatasetSplit.TRAIN]) else: loss1 = masked_cross_entropy( logit1, labels, type_masks[NodeType.WORD] * split_masks[DatasetSplit.TRAIN]) loss2 = masked_cross_entropy( logit2, labels, type_masks[NodeType.DOC] * split_masks[DatasetSplit.TRAIN]) loss = (args.alpha * loss1 + (1. - args.alpha) * loss2) * 2 loss.backward() optimizer.step() # compute and save loss with torch.no_grad(): if args.mode == "regression": dev_loss1 = masked_mean_squared_error( logit1, labels_beta, type_masks[NodeType.WORD] * split_masks[DatasetSplit.DEV]) dev_loss2 = masked_mean_squared_error( logit2, labels_beta, type_masks[NodeType.DOC] * split_masks[DatasetSplit.DEV]) else: dev_loss1 = masked_cross_entropy( logit1, labels, type_masks[NodeType.WORD] * split_masks[DatasetSplit.DEV]) dev_loss2 = masked_cross_entropy( logit2, labels, type_masks[NodeType.DOC] * split_masks[DatasetSplit.DEV]) dev_loss = dev_loss1 + dev_loss2 print('\tloss: {:.4f}, dev_loss: {:.4f}'.format(loss.item(), dev_loss.item()), file=sys.stderr) stats['train_loss'].append(loss.item()) stats['dev_loss'].append(dev_loss.item()) # compute and save accuracy for train and dev model.eval() for split in [DatasetSplit.TRAIN, DatasetSplit.DEV]: for node_type in [NodeType.WORD, NodeType.DOC]: if node_type == NodeType.WORD: logit = logit1 else: logit = logit2 acc = accuracy(logit, labels, type_masks[node_type] * split_masks[split], mode=args.mode) corr = correlation(logit, labels, type_masks[node_type] * split_masks[split], mode=args.mode, conversion=args.conversion) stats_acc_key = '{}_acc_{}'.format(split.value, node_type.value) print('\t{}: {:.4f}'.format(stats_acc_key, acc), file=sys.stderr) stats[stats_acc_key].append(acc) stats_corr_key = '{}_corr_{}'.format(split.value, node_type.value) print('\t{}: {:.4f}'.format(stats_corr_key, corr), file=sys.stderr) stats[stats_corr_key].append(corr) macro_avg_dev_acc = (stats['dev_acc_word'][-1] + stats['dev_acc_doc'][-1]) / 2 stats['dev_acc_avr'].append(macro_avg_dev_acc) macro_avg_dev_corr = (stats['dev_corr_word'][-1] + stats['dev_corr_doc'][-1]) / 2 stats['dev_corr_avr'].append(macro_avg_dev_corr) # Evaluation model.eval() # turn off dropout (if we are using one) logit1, logit2 = model(adj, x) print('Evaluation', file=sys.stderr) for split in [DatasetSplit.DEV, DatasetSplit.TEST]: for node_type in NodeType: if node_type == NodeType.WORD: logit = logit1 else: logit = logit2 acc = accuracy(logit, labels, type_masks[node_type] * split_masks[split], mode=args.mode) corr = correlation(logit, labels, type_masks[node_type] * split_masks[split], mode=args.mode, conversion=args.conversion) stats_key_acc = 'eval_{}_acc_{}'.format(split.value, node_type.value) print('\t{}: {:.4f}'.format(stats_key_acc, acc), file=sys.stderr) stats[stats_key_acc].append(acc) stats_key_corr = 'eval_{}_corr_{}'.format(split.value, node_type.value) print('\t{}: {:.4f}'.format(stats_key_corr, corr), file=sys.stderr) stats[stats_key_corr].append(corr) macro_avg_acc = (stats[f"eval_{split.value}_acc_word"][-1] + stats[f"eval_{split.value}_acc_doc"][-1]) / 2 print('\teval_{}_acc_avr: {:.4f}'.format(split.value, macro_avg_acc), file=sys.stderr) stats[f'eval_{split.value}_acc_avr'].append(macro_avg_acc) macro_avg_corr = (stats[f"eval_{split.value}_corr_word"][-1] + stats[f"eval_{split.value}_corr_doc"][-1]) / 2 print('\teval_{}_corr_avr: {:.4f}'.format(split.value, macro_avg_corr), file=sys.stderr) stats[f'eval_{split.value}_corr_avr'].append(macro_avg_corr) # Dump stats print(json.dumps(stats))
for epoch in range(3): print('Start of epoch %d' % (epoch, )) for step, (batch_x, batch_y) in enumerate(train_data): run_optimization(batch_x, batch_y, step, loss_type=use_loss, use_vat=use_vat) if step % display_step == 0: embed, pred = conv_net(x_test) acc = utils.accuracy(pred, y_test) if use_loss == 'arcface': # arcface_logit = arcface_loss(embedding=embed, labels=y_test, out_num=num_classes, # weights=conv_net.out.weights[0], m=m_arcface) # embed_loss = tf.reduce_mean(focal_loss_with_softmax(logits=arcface_logit, labels=y_test)) # infer_loss = utils.cross_entropy_loss(pred, y_test) embed_loss, infer_loss = embed_infer_loss(embed, pred, y_test) print( "step: %i, embed_loss: %f, infer_loss: %f, accuracy: %f" % (step, embed_loss, infer_loss, acc)) else: loss = utils.cross_entropy_loss(pred, y_test) print("step: %i, loss: %f, accuracy: %f" % (step, loss, acc)) ''' for step, (batch_x, batch_y) in enumerate(train_data.take(training_steps), 1):
batch_pointclass_preds = lasernet_seg(x=batch_rv) L_train = loss(batch_pointclass_preds, batch_labels) if torch.isnan(L_train): print("L_train had value of nan") break lasernet_seg.zero_grad() L_train.backward() optimizer.step() # save loss and accuracy losses_train.append(L_train.item()) accs_train.append( accuracy(batch_pointclass_preds, batch_labels).item()) with torch.no_grad(): for batch_rv, batch_labels, _ in tqdm(val_dataloader): batch_pointclass_preds = lasernet_seg(x=batch_rv) L_val = loss(batch_pointclass_preds, batch_labels) if torch.isnan(L_val): print("L_val had value of nan") break losses_val.append(L_val.item()) accs_val.append( accuracy(batch_pointclass_preds, batch_labels).item())
def train(model, reglog, optimizer, loader, epoch): """ Train the models on the dataset. """ # running statistics batch_time = AverageMeter() data_time = AverageMeter() # training statistics top1 = AverageMeter() top5 = AverageMeter() losses = AverageMeter() end = time.perf_counter() model.eval() reglog.train() criterion = nn.CrossEntropyLoss().cuda() for iter_epoch, (inp, target) in enumerate(loader): # measure data loading time data_time.update(time.perf_counter() - end) # move to gpu inp = inp.cuda(non_blocking=True) target = target.cuda(non_blocking=True) # forward with torch.no_grad(): output = model(inp) output = reglog(output) # compute cross entropy loss loss = criterion(output, target) # compute the gradients optimizer.zero_grad() loss.backward() # step optimizer.step() # update stats acc1, acc5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), inp.size(0)) top1.update(acc1[0], inp.size(0)) top5.update(acc5[0], inp.size(0)) batch_time.update(time.perf_counter() - end) end = time.perf_counter() # verbose if args.rank == 0 and iter_epoch % 50 == 0: logger.info("Epoch[{0}] - Iter: [{1}/{2}]\t" "Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t" "Data {data_time.val:.3f} ({data_time.avg:.3f})\t" "Loss {loss.val:.4f} ({loss.avg:.4f})\t" "Prec {top1.val:.3f} ({top1.avg:.3f})\t" "LR {lr}".format( epoch, iter_epoch, len(loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, lr=optimizer.param_groups[0]["lr"], )) return epoch, losses.avg, top1.avg.item(), top5.avg.item()
def train(opt: argparse.Namespace): if torch.cuda.is_available(): torch.cuda.manual_seed(123) else: torch.manual_seed(123) training_params = { "batch_size": opt.batch_size, "shuffle": True, "drop_last": True } test_params = { "batch_size": opt.batch_size, "shuffle": False, "drop_last": False } docs = list( itertools.chain( read_cambridge_readability_dataset( training_portion=opt.training_portion), read_a1_passages(training_portion=opt.training_portion))) max_word_length, max_sent_length = get_max_lengths(docs) training_set = MyDataset(docs=docs, split=DatasetSplit.TRAIN, max_length_word=max_word_length, max_length_sentences=max_sent_length) training_generator = DataLoader(training_set, **training_params) dev_set = MyDataset(docs=docs, split=DatasetSplit.DEV, max_length_word=max_word_length, max_length_sentences=max_sent_length) dev_generator = DataLoader(dev_set, **test_params) test_set = MyDataset(docs=docs, split=DatasetSplit.TEST, max_length_word=max_word_length, max_length_sentences=max_sent_length) test_generator = DataLoader(test_set, **test_params) model = HierAttNet(opt.word_hidden_size, opt.sent_hidden_size, opt.batch_size, training_set.num_classes, opt.word2vec_path, max_sent_length, max_word_length) # Handling skewed dataset label_dist = [0] * training_set.num_classes for doc in docs: label_id = CEFR2INT[doc.label] label_dist[label_id] += 1 weight = torch.FloatTensor([1 / i for i in label_dist]) criterion = nn.CrossEntropyLoss(weight=weight) if torch.cuda.is_available(): model = model.cuda() criterion = criterion.cuda() optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=opt.lr, momentum=opt.momentum) best_metrics = {"accuracy": 0.0} best_epoch = 0 model.train() num_iter_per_epoch = len(training_generator) for epoch in range(opt.num_epoches): for iter, (feature, label) in enumerate(training_generator): if torch.cuda.is_available(): feature = feature.cuda() label = label.cuda() optimizer.zero_grad() model._init_hidden_state() predictions = model(feature) loss = criterion(predictions, label) loss.backward() nn.utils.clip_grad_norm_(model.parameters(), opt.clip) optimizer.step() acc = accuracy(predictions, label, mask=torch.ones_like(label, dtype=torch.long), mode=opt.mode) print( "Epoch: {}/{}, Iteration: {}/{}, Lr: {}, Loss: {}, Accuracy: {}" .format(epoch + 1, opt.num_epoches, iter + 1, num_iter_per_epoch, optimizer.param_groups[0]['lr'], loss, acc)) if epoch % opt.test_interval == 0: te_loss, test_metrics = evaluate(criterion, dev_set, model, dev_generator, opt.mode) print( "Epoch: {}/{}, Lr: {}, Dev Loss: {}, Dev Accuracy: {}, Dev Corr: {}" .format( epoch + 1, opt.num_epoches, optimizer.param_groups[0]['lr'], te_loss, test_metrics["accuracy"], test_metrics["corr"], ), file=sys.stderr) model.train() if test_metrics["accuracy"] > best_metrics["accuracy"]: best_metrics = test_metrics best_metrics["loss"] = te_loss best_epoch = epoch torch.save(model, opt.saved_path + os.sep + "whole_model_han") # Early stopping if epoch - best_epoch > opt.es_patience > 0: print( "Stop training at epoch {}. The lowest loss achieved is {}" .format(epoch, te_loss)) break model = torch.load(opt.saved_path + os.sep + "whole_model_han") te_loss, test_metrics = evaluate(criterion, test_set, model, test_generator, opt.mode) print("Best Dev Loss: {}, Best Dev Accuracy: {}, Best Dev Corr: {}".format( best_metrics["loss"], best_metrics["accuracy"], best_metrics["corr"]), file=sys.stderr) print("Test Loss: {}, Test Accuracy: {}, Test Corr: {}".format( te_loss, test_metrics["accuracy"], test_metrics["corr"]), file=sys.stderr)
def train(model, data_loader, optimizer, epoch, train_mloss, train_rloss, train_acc, learning_rate, lr_wr, output_tensor): """ Train CapsuleNet model on training set Args: model: The CapsuleNet model. data_loader: An interator over the dataset. It combines a dataset and a sampler. optimizer: Optimization algorithm. epoch: Current epoch. """ print('===> Training mode') num_batches = len(data_loader) # iteration per epoch. e.g: 469 total_step = args.epochs * num_batches epoch_tot_acc = 0 # Switch to train mode model.train() if args.cuda: # When we wrap a Module in DataParallel for multi-GPUs model = model.module start_time = timer() for batch_idx, (data, target) in enumerate(tqdm(data_loader, unit='batch')): batch_size = data.size(0) global_step = batch_idx + (epoch * num_batches) - num_batches labels = target target_one_hot = utils.one_hot_encode(target, length=args.num_classes) assert target_one_hot.size() == torch.Size([batch_size, 10]) data, target = Variable(data), Variable(target_one_hot) if args.cuda: data = data.to(args.device) target = target.to(args.device) labels = labels.to(args.device) # Train step - forward, backward and optimize optimizer.zero_grad() #utils.exponential_decay_LRR(optimizer, args.lr, global_step, args.decay_steps, args.decay_rate, args.staircase) # learning rate policies if args.find_lr: utils.find_lr(optimizer, global_step) elif args.exp_decay_lr: utils.exponential_decay_LRR(optimizer, args.lr, global_step, args.decay_steps, args.decay_rate, args.staircase) elif args.one_cycle_policy: utils.one_cycle_policy(optimizer, args.lr, global_step, total_step) elif args.warm_restarts: # lr_wr.update_lr(optimizer, num_batches) lr_wr.update_lr(optimizer) output, reconstruction = model(data, labels, True) # utils.write_tensor(output, output_tensor) loss, margin_loss, recon_loss = loss_func(output, target, args.regularization_scale, reconstruction, data, args.device, batch_size) loss.backward() optimizer.step() for param_group in optimizer.param_groups: lr_temp = param_group['lr'] learning_rate.write('%.10f \n' % lr_temp) # Calculate accuracy for each step and average accuracy for each epoch acc = utils.accuracy(output, labels, args.cuda) epoch_tot_acc += acc epoch_avg_acc = epoch_tot_acc / (batch_idx + 1) train_mloss.write('%.6f \n' % margin_loss) train_rloss.write('%.6f \n' % recon_loss) train_acc.write('%.6f \n' % acc) # Print losses if batch_idx % args.log_interval == 0: template = 'Epoch {}/{}, ' \ 'Step {}/{}: ' \ '[Total loss: {:.6f},' \ '\tMargin loss: {:.6f},' \ '\tReconstruction loss: {:.6f},' \ '\tBatch accuracy: {:.6f},' \ '\tAccuracy: {:.6f}]' tqdm.write( template.format( epoch, args.epochs, global_step, total_step, loss.data.item(), margin_loss.data.item(), recon_loss.data.item() if args.use_reconstruction_loss else 0, acc, epoch_avg_acc)) # Print time elapsed for an epoch end_time = timer() global avg_training_time_per_epoch avg_training_time_per_epoch = (avg_training_time_per_epoch * (epoch - 1) + end_time - start_time) / epoch print('Time elapsed for epoch {}: {:.0f}s.'.format(epoch, end_time - start_time))
def job(tuning, params_path, devices, resume): """ Example: python exp0.py job --devices 0,1 -s python exp0.py tuning --devices 0,1 --n-gpu 1 --mode 'random' --n-iter 4 """ exp_path = ROOT + f'experiments/{params["ex_name"]}/' os.environ['CUDA_VISIBLE_DEVICES'] = devices global params if tuning: with open(params_path, 'r') as f: params = json.load(f) mode_str = 'tuning' setting = '_'.join(f'{tp}-{params[tp]}' for tp in params['tuning_params']) else: mode_str = 'train' setting = '' logger, writer = utils.get_logger( log_dir=exp_path + f'{mode_str}/log/{setting}', tensorboard_dir=exp_path + f'{mode_str}/tf_board/{setting}') train_df = pd.read_csv(ROOT + 'data/train.csv') train_df, val_df = train_test_split(train_df, test_size=1024, random_state=params['seed']) model = models.UNet(in_channels=3, n_classes=2, depth=4, ch_first=32, padding=True, batch_norm=False, up_mode='upconv').cuda() optimizer = utils.get_optim(model, params) if resume is not None: model, optimizer = utils.load_checkpoint(model, resume, optimizer=optimizer) if len(devices.split(',')) > 1: model = nn.DataParallel(model) data_transforms = { 'train': transforms.Compose([ transforms.ToPILImage(), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]) ]), 'val': transforms.Compose([ transforms.ToPILImage(), transforms.ToTensor(), transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]) ]), } image_datasets = { 'train': data_utils.CSVDataset(train_df, data_transforms['train']), 'val': data_utils.CSVDataset(val_df, data_transforms['val']) } data_loaders = { 'train': DataLoader(image_datasets['train'], batch_size=params['batch_size'], pin_memory=True, shuffle=True, drop_last=True, num_workers=params['workers']), 'val': DataLoader(image_datasets['val'], batch_size=params['test_batch_size'], pin_memory=True, shuffle=False, num_workers=params['workers']) } criterion = nn.CrossEntropyLoss() scheduler = optim.lr_scheduler.MultiStepLR( optimizer, milestones=[int(params['epochs'] * 0.7), int(params['epochs'] * 0.9)], gamma=0.1) for epoch in range(params['epochs']): logger.info( f'Epoch {epoch}/{params["epochs"]} | lr: {optimizer.param_groups[0]["lr"]}' ) # ============================== train ============================== # model.train(True) losses = utils.AverageMeter() prec1 = utils.AverageMeter() for i, (x, y) in tqdm(enumerate(data_loaders['train']), total=len(data_loaders['train']), miniters=50): x = x.to('cuda:0') y = y.to('cuda:0', non_blocking=True) outputs = model(x) loss = criterion(outputs, y) optimizer.zero_grad() loss.backward() optimizer.step() acc = utils.accuracy(outputs, y) losses.update(loss.item(), x.size(0)) prec1.update(acc.item(), x.size(0)) train_loss = losses.avg train_acc = prec1.avg # ============================== validation ============================== # model.train(False) losses.reset() prec1.reset() for i, (x, y) in tqdm(enumerate(data_loaders['val']), total=len(data_loaders['val'])): x = x.cuda() y = y.cuda(non_blocking=True) with torch.no_grad(): outputs = model(x) loss = criterion(outputs, y) acc = utils.accuracy(outputs, y) losses.update(loss.item(), x.size(0)) prec1.update(acc.item(), x.size(0)) val_loss = losses.avg val_acc = prec1.avg logger.info(f'[Val] Loss: \033[1m{val_loss:.4f}\033[0m | ' f'Acc: \033[1m{val_acc:.4f}\033[0m\n') writer.add_scalars('Loss', {'train': train_loss}, epoch) writer.add_scalars('Acc', {'train': train_acc}, epoch) writer.add_scalars('Loss', {'val': val_loss}, epoch) writer.add_scalars('Acc', {'val': val_acc}, epoch) writer.add_scalar('LR', optimizer.param_groups[0]['lr'], epoch) scheduler.step() if not tuning: utils.save_checkpoint(model, epoch, exp_path + 'model_optim.pth', optimizer) if tuning: tuning_result = {} for key in ['train_loss', 'train_acc', 'val_loss', 'val_acc']: tuning_result[key] = [eval(key)] utils.write_tuning_result(params, tuning_result, exp_path + 'tuning/results.csv')
vanilla_loss = net.softmax_crossent(predict, y) regularized_loss = net.weighted_loss((vanilla_loss, 1.0), (regularizer1, .1), (regularizer2, .1)) net.optimize(regularized_loss, 'rmsprop', 1e-3) # Helper function def real_len(x_batch): return [np.argmin(s + [0]) for s in x_batch] # Training batch = int(64) epoch = int(15) step = int(0) for sentences, label in dat.yield_batch(batch, epoch): pred, loss = net.train([predict], { x: sentences, y: label, keep: .8, lens: real_len(sentences), center: 0. }) acc = accuracy(pred, label) print('Step {}, Loss {}, Accuracy {}%'.format(step + 1, loss, acc * 100)) step += 1 x_test, y_test = dat.yield_test() pred = net.forward([predict], {x: x_test, keep: 1., lens: real_len(x_test)})[0] acc = accuracy(pred, y_test) print('Accuracy on test set: {}'.format(acc))