def __init__(self, hparams, dataset: HeteroNetDataset, metrics=["precision"]): num_edge = len(dataset.edge_index_dict) num_layers = hparams.num_layers num_class = dataset.n_classes self.collate_fn = hparams.collate_fn self.multilabel = dataset.multilabel num_nodes = dataset.num_nodes_dict[dataset.head_node_type] if dataset.in_features: w_in = dataset.in_features else: w_in = hparams.embedding_dim w_out = hparams.embedding_dim super(HAN, self).__init__(num_edge=num_edge, w_in=w_in, w_out=w_out, num_class=num_class, num_nodes=num_nodes, num_layers=num_layers) if not hasattr(dataset, "x") and not hasattr(dataset, "x_dict"): if num_nodes > 10000: self.embedding = { dataset.head_node_type: torch.nn.Embedding( num_embeddings=num_nodes, embedding_dim=hparams.embedding_dim).cpu() } else: self.embedding = torch.nn.Embedding( num_embeddings=num_nodes, embedding_dim=hparams.embedding_dim) self.dataset = dataset self.head_node_type = self.dataset.head_node_type hparams.n_params = self.get_n_params() self.train_metrics = Metrics(prefix="", loss_type=hparams.loss_type, n_classes=dataset.n_classes, multilabel=dataset.multilabel, metrics=metrics) self.valid_metrics = Metrics(prefix="val_", loss_type=hparams.loss_type, n_classes=dataset.n_classes, multilabel=dataset.multilabel, metrics=metrics) self.test_metrics = Metrics(prefix="test_", loss_type=hparams.loss_type, n_classes=dataset.n_classes, multilabel=dataset.multilabel, metrics=metrics) hparams.name = self.name() hparams.inductive = dataset.inductive self.hparams = hparams
def test(model, dataloader, params): val_data = tqdm(dataloader.data_iterator(data_type='test', batch_size=params.batch_size), total=(dataloader.size()[0] // params.batch_size)) metrics = Metrics() loss_avg = RunningAverage() with torch.no_grad(): for data, labels in val_data: model.eval() data = torch.tensor(data, dtype=torch.long).to(params.device) labels = torch.tensor(labels, dtype=torch.long).to(params.device) batch_masks = data != 0 loss, logits = model(data, attention_mask=batch_masks, labels=labels) predicted = logits.max(2)[1] metrics.update(batch_pred=predicted.cpu().numpy(), batch_true=labels.cpu().numpy(), batch_mask=batch_masks.cpu().numpy()) loss_avg.update(torch.mean(loss).item()) val_data.set_postfix(type='VAL', loss='{:05.3f}'.format(loss_avg())) metrics.loss = loss_avg() return metrics
def train_epoch(self, epoch): """Train an epoch.""" self.model.train() # Set model to training mode losses = Metrics() total_iter = len(self.train_data_loader.dataset) // self.train_data_loader.batch_size for idx, (x, y) in enumerate(self.train_data_loader): s = time.monotonic() x = x.to(self.device) y = y.to(self.device) y_pred = self.model(x) self.optimizer.zero_grad() loss = self.criterion(y_pred, y) loss.backward() self.optimizer.step() losses.update(loss.item(), x.size(0)) self.writer.add_scalar('train/current_loss', losses.val, self.train_step) self.writer.add_scalar('train/avg_loss', losses.avg, self.train_step) self.train_step += 1 e = time.monotonic() if idx % self.print_freq == 0: log_time = self.print_freq * (e - s) eta = ((total_iter - idx) * log_time) / 60.0 print(f'Epoch {epoch} [{idx}/{total_iter}], loss={loss:.3f}, time={log_time:.2f}, ETA={eta:.2f}') return losses.avg
def debug_test_set(): clf = pickle_load(os.path.join('models6', 'strong_classifier_276.pkl')) trainer = Trainer(mp_pool=Pool(8)) trainer.load_data('data') print("Strong classifier test metrics:") predictions = clf.classify_batch(trainer.test_ds.X_integral) print(Metrics(predictions, trainer.test_ds.y))
def validate(model, val_set, params): val_data = tqdm(DataLoader(val_set, batch_size=params.batch_size, collate_fn=KeyphraseData.collate_fn), total=(len(val_set) // params.batch_size)) metrics = Metrics() loss_avg = RunningAverage() with torch.no_grad(): model.eval() for data, labels, mask in val_data: data = data.to(params.device) labels = labels.to(params.device) mask = mask.to(params.device) loss, logits = model(data, attention_mask=mask, labels=labels) predicted = logits.max(2)[1] metrics.update(batch_pred=predicted.cpu().numpy(), batch_true=labels.cpu().numpy(), batch_mask=mask.cpu().numpy()) loss_avg.update(torch.mean(loss).item()) val_data.set_postfix(type='VAL', loss='{:05.3f}'.format(loss_avg())) metrics.loss = loss_avg() return metrics
def _evaluate(self, model_param, criterion): # evaluate in CPU # can't move all the training dataset to GPU, in my case and resources it is too much with torch.no_grad(): # operations inside don't track history self.model_eval.load_state_dict(state_dict=model_param) self.model_eval.eval() #train_prob = self.model_eval(self.training_set.x_data) #train_pred = train_prob.argmax(1) #train_loss = criterion(train_prob, self.training_set.y_data) #train_acc = (train_pred == self.training_set.y_data.long()).float().mean() #train_f1 = metrics.f1_score(self.training_set.y_data.long().numpy(), train_pred.numpy(), average='macro') #train_m = Metrics(self.training_set.y_data, train_pred, self.labels) #train_b = train_m.balanced_score() gc.collect() val_prob = self.model_eval(self.validation_set.x_data) val_pred = val_prob.argmax(1) val_loss = criterion(val_prob, self.validation_set.y_data) val_acc = ( val_pred == self.validation_set.y_data.long()).float().mean() val_f1 = metrics.f1_score( self.validation_set.y_data.long().numpy(), val_pred.numpy(), average='macro') val_m = Metrics(self.validation_set.y_data, val_pred, self.labels) val_b = val_m.balanced_score() gc.collect() # evaluating train uses too much CPU, so I actually justr need the vslidate values for now train_prob = val_prob train_pred = val_prob.argmax(1) train_loss = criterion(val_prob, self.validation_set.y_data) train_acc = ( val_pred == self.validation_set.y_data.long()).float().mean() train_f1 = metrics.f1_score( self.validation_set.y_data.long().numpy(), val_pred.numpy(), average='macro') train_m = Metrics(self.validation_set.y_data, val_pred, self.labels) train_b = val_m.balanced_score() return train_loss.item(), train_acc, train_f1, val_loss.item( ), val_acc, val_f1, train_b, val_b
def run_experiment(self, load_controller, expert_demos): """Model predictive control. Arguments: load_controller (bool): If True, load mpc controller. expert_demos (bool): If True, initialize training set with extra expert demonstrations. """ if load_controller: self.mpc = torch.load(os.path.join(self.savedir, 'mpc.pth')) else: # Initial random rollouts obs, acts, lengths, _, _ = self._sample_rollouts(self.init_steps, actor=self.mpc) if expert_demos: obs_expert, acts_expert = self._load_expert_demos() obs = obs + tuple(o for o in obs_expert) acts = acts + tuple(a for a in acts_expert) # Train initial model self.mpc.train_initial(obs, acts) # Training loop step = self.mpc.X.shape[0] while step < self.total_steps: # Sample rollouts start = time.time() print(f"Rolling out {self.train_freq} timesteps...") obs, acts, lengths, scores, rollouts_metrics = self._sample_rollouts( self.train_freq, actor=self.mpc) step += sum(lengths) print_rollout_stats(obs[0], acts[0], lengths[0], scores[0]) act_metrics = Metrics() flat_rollouts_metrics = [ item for sublist in rollouts_metrics for item in sublist ] for x in flat_rollouts_metrics: act_metrics.store(x) for k, v in act_metrics.average().items(): self.logger.log_scalar(k, v, step) self.logger.log_scalar("score/avg_length", np.mean(lengths), step) self.logger.log_scalar("score/avg_score", np.mean(scores), step) self.logger.log_scalar("time/rollout_time", (time.time() - start), step) # Train model train_metrics, weights = self.mpc.train_iteration(obs, acts) for k, v in train_metrics.items(): self.logger.log_scalar(k, v, step) for k, v in weights.items(): self.logger.log_histogram(k, v, step) # Save model torch.save(self.mpc, os.path.join(self.savedir, 'mpc.pth'))
def test(exp_name): print('loading data......') test_data = getattr(datasets, opt.dataset)(opt.root, opt.test_data_dir, mode='test', size=opt.testsize) test_dataloader = DataLoader(test_data, batch_size=1, shuffle=False, num_workers=opt.num_workers) total_batch = int(len(test_data) / 1) model, _, _ = generate_model(opt) model.eval() # metrics_logger initialization metrics = Metrics([ 'recall', 'specificity', 'precision', 'F1', 'F2', 'ACC_overall', 'IoU_poly', 'IoU_bg', 'IoU_mean' ]) logger = get_logger('./results/' + exp_name + '.log') with torch.no_grad(): for i, data in enumerate(test_dataloader): img, gt = data['image'], data['label'] if opt.use_gpu: img = img.cuda() gt = gt.cuda() output = model(img) _recall, _specificity, _precision, _F1, _F2, \ _ACC_overall, _IoU_poly, _IoU_bg, _IoU_mean = evaluate(output, gt) metrics.update(recall=_recall, specificity=_specificity, precision=_precision, F1=_F1, F2=_F2, ACC_overall=_ACC_overall, IoU_poly=_IoU_poly, IoU_bg=_IoU_bg, IoU_mean=_IoU_mean) metrics_result = metrics.mean(total_batch) print("Test Result:") logger.info( 'recall: %.4f, specificity: %.4f, precision: %.4f, F1: %.4f, F2: %.4f, ' 'ACC_overall: %.4f, IoU_poly: %.4f, IoU_bg: %.4f, IoU_mean: %.4f' % (metrics_result['recall'], metrics_result['specificity'], metrics_result['precision'], metrics_result['F1'], metrics_result['F2'], metrics_result['ACC_overall'], metrics_result['IoU_poly'], metrics_result['IoU_bg'], metrics_result['IoU_mean']))
def __init__(self, hparams, dataset, metrics, *args): super().__init__(*args) self.train_metrics = Metrics(prefix="", loss_type=hparams.loss_type, n_classes=dataset.n_classes, multilabel=dataset.multilabel, metrics=metrics) self.valid_metrics = Metrics(prefix="val_", loss_type=hparams.loss_type, n_classes=dataset.n_classes, multilabel=dataset.multilabel, metrics=metrics) self.test_metrics = Metrics(prefix="test_", loss_type=hparams.loss_type, n_classes=dataset.n_classes, multilabel=dataset.multilabel, metrics=metrics) hparams.name = self.name() hparams.inductive = dataset.inductive self.hparams = hparams
def __init__(self, args): BaseModel.__init__(self, args) self.metrics = Metrics() # 一些测量指标 self.visual_images = [] # 需要可视化的图像 self.visual_losses = [] # 需要可视化的loss if self.args.mode == 'train': self.visual_images += ['train_confusion_matrix'] self.visual_losses += ['train_loss', 'train_precision', 'train_recall', 'train_f1_score'] if self.args.mode == 'valid': self.visual_images += ['valid_confusion_matrix'] self.visual_losses += ['valid_loss', 'valid_precision', 'valid_recall', 'valid_f1_score'] if self.args.mode == 'test': self.visual_images += ['test_confusion_matrix'] self.visual_losses += ['test_loss', 'test_precision', 'test_recall', 'test_f1_score']
def validate(self, val_loader, models, criterions, last_best_epochs): batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() metrics = Metrics() losses_per_class = LossPerClassMeter(len(val_loader.dataset.dataset.classes)) models['backbone'].eval() models['module'].eval() end = time.time() with torch.no_grad(): for i, (data_x, data_y) in enumerate(val_loader): data_y = data_y.cuda(non_blocking=True) data_x = data_x.cuda(non_blocking=True) output = models['backbone'](data_x) loss = criterions['backbone'](output, data_y) losses_per_class.update(loss.cpu().detach().numpy(), data_y.cpu().numpy()) loss = torch.sum(loss) / loss.size(0) acc = accuracy(output.data, data_y, topk=(1, 2,)) losses.update(loss.data.item(), data_x.size(0)) top1.update(acc[0].item(), data_x.size(0)) top5.update(acc[1].item(), data_x.size(0)) metrics.add_mini_batch(data_y, output) batch_time.update(time.time() - end) end = time.time() if i % self.args.print_freq == 0: print('Test: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Last best epoch {last_best_epoch}' .format(i, len(val_loader), batch_time=batch_time, loss=losses, top1=top1, last_best_epoch=last_best_epochs)) report = metrics.get_report(target_names=val_loader.dataset.dataset.classes) print(' * Acc@1 {top1.avg:.3f}\t * Prec {0}\t * Recall {1} * Acc@5 {top5.avg:.3f}\t' .format(report['macro avg']['precision'], report['macro avg']['recall'], top1=top1, top5=top5)) return pd.DataFrame.from_dict({f'{k}-val-loss': losses_per_class.avg[i] for i, k in enumerate(val_loader.dataset.dataset.classes)}, orient='index').T, \ pd.DataFrame.from_dict(report)
def validate_epoch(self): """Validate after training an epoch.""" self.model.eval() # Set model to evaluate mode losses = Metrics() with torch.no_grad(): for idx, (x, y) in enumerate(self.val_data_loader): x = x.to(self.device) y = y.to(self.device) y_pred = self.model(x) loss = self.criterion(y_pred, y) losses.update(loss.item(), x.size(0)) self.writer.add_scalar('val/current_loss', losses.val, self.val_step) self.writer.add_scalar('val/avg_loss', losses.avg, self.val_step) self.val_step += 1 return losses.avg
def test(model: nn.Module, device: torch.device, test_loader: DataLoader, criterion: nn.Module, text_transform: Callable, log_every=40): print('Evaluating...') model.eval() test_cer, test_wer, test_loss = [], [], [] data_len = len(test_loader) with torch.no_grad(): for i, _data in enumerate(test_loader): spectrograms, labels, input_lengths, label_lengths = _data spectrograms, labels = spectrograms.to(device), labels.to(device) output = model(spectrograms) # (batch, time, n_class) output = F.log_softmax(output, dim=2) output = output.transpose(0, 1) # (time, batch, n_class) loss = criterion(output, labels, input_lengths, label_lengths) test_loss.append(loss.item()) decoded_preds, decoded_targets = greedy_decode( output.transpose(0, 1), labels, label_lengths, text_transform) test_cer.append( word_error_rate(decoded_targets, decoded_preds, use_cer=True)) test_wer.append(word_error_rate(decoded_targets, decoded_preds)) if i % log_every == 0: print(f'{i}/{data_len}') print(f'Test WER: {test_wer[-1]}; CER: {test_cer[-1]}') for p, t in zip(decoded_preds, decoded_targets): print(f'Prediction: [{p}]\t Ground Truth: [{t}]') avg_cer = np.mean(test_cer) avg_wer = np.mean(test_wer) avg_loss = np.mean(test_loss) print( f'Test set: Average loss: {avg_loss}, Average CER: {avg_cer} Average WER: {avg_wer}' ) return Metrics(loss=avg_loss, cer=avg_cer, wer=avg_wer)
def train(models, optimizers, dataset, corpus, ckpts, params, args): epoch_num = params.epoch_num batch_epoch = params.batch_epoch autoencoder.noise_radius = params.noise_radius step = 0 for e in range(epoch_num, params.max_epoch): for batch, (source, target) in islice(enumerate(dataset), batch_epoch, None): metrics = Metrics( epoch=e, max_epoch=params.max_epoch, ) for p in range(params.epoch_ae): ae_metrics = train_autoencoder(models, optimizers, source, target, params) metrics.accum(ae_metrics) metrics['ae_loss'] /= params.epoch_ae metrics['acc'] /= params.epoch_ae batch_epoch += 1 # anneal noise every 5 batch_epoch for now if batch_epoch % 5 == 0: autoencoder.noise_radius = autoencoder.noise_radius * 0.995 if batch_epoch % params.print_every == 0: ckpts.save() logging.info('--- Epoch {}/{} Batch {} ---'.format(e + 1, metrics['max_epoch'], batch_epoch)) logging.info('Loss {:.4f}'.format(float(metrics['ae_loss']))) params.batch_epoch = batch_epoch params.epoch_num = e params.noise_radius = autoencoder.noise_radius params.save(os.path.join(args.model_dir, 'params.json')) # Floydhub metrics print('{{"metric": "acc", "value": {}, "step": {}}}'.format(float(metrics['acc']), step)) print('{{"metric": "ae_loss", "value": {}, "step": {}}}'.format(float(metrics['ae_loss']), step)) step += 1 tb_writer.add_scalar('train/acc', metrics['acc'], step) tb_writer.add_scalar('train/ae_loss', metrics['ae_loss'], step) batch_epoch = 0
def train(model: nn.Module, device: torch.device, train_loader: DataLoader, criterion: nn.Module, optimizer: nn.Module, scheduler, epoch: int, iter_meter, tb_writer: SummaryWriter, log_every=20) -> Metrics: model.train() data_len = len(train_loader) epoch_loss = [] print('Training') for batch_idx, _data in enumerate(train_loader): spectrograms, labels, input_lengths, label_lengths = _data spectrograms, labels = spectrograms.to(device), labels.to(device) optimizer.zero_grad() output = model(spectrograms) # (batch, time, n_class) output = F.log_softmax(output, dim=2) output = output.transpose(0, 1) # (time, batch, n_class) loss = criterion(output, labels, input_lengths, label_lengths) loss.backward() loss_scalar = loss.item() optimizer.step() if scheduler: scheduler.step() iter_meter.step() if batch_idx % log_every == 0 or batch_idx == data_len: print(f'Train Epoch: {epoch} \t batch: {batch_idx}/{data_len}') print(f'Loss: {loss_scalar}') epoch_loss.append(loss_scalar) tb_writer.add_scalar('batch_loss', loss_scalar, iter_meter.get()) return Metrics(loss=np.mean(epoch_loss))
def test(self): self.model.eval() losses = Metrics() # accuracy = Metrics() with torch.no_grad(): for idx, (x, y) in enumerate(self.test_data_loader): x = x.to(self.device) y = y.to(self.device) y_pred = self.model(x) loss = self.criterion(y_pred, y) losses.update(loss.item(), x.size(0)) # predict = 1 if get_mean_score(y_pred.cpu().numpy()[0]) > 5 else 0 # target = 1 if get_mean_score(y.cpu().numpy()[0]) > 5 else 0 # # accuracy.update(1 if predict == target else 0) logger.info(f'test loss={losses.avg}') print(losses.avg) return losses.avg
def main(): username = password = server = None parser = argparse.ArgumentParser(description='Show all boards in JIRA') cfg = None try: cf = ConfigFile('config.yaml') cfg = cf.config username = cfg['username'] password = cfg['password'] server = cfg['server'] except FileNotFoundError as e: print("Config File does not exist, falling back to argument parsing") parser.add_argument('-u', help="Provide User Name") parser.add_argument('-p', help="Provide Password") parser.add_argument('-s', help="Provide Server URL") args = parser.parse_args() if (cfg is None): username = args.u password = args.p server = args.s jc = JiraConn(username, password, server) m = Metrics(jc.jira) m.list_boards()
def valid(model, valid_dataloader, total_batch): model.eval() # Metrics_logger initialization metrics = Metrics([ 'recall', 'specificity', 'precision', 'F1', 'F2', 'ACC_overall', 'IoU_poly', 'IoU_bg', 'IoU_mean' ]) with torch.no_grad(): bar = tqdm(enumerate(valid_dataloader), total=total_batch) for i, data in bar: img, gt = data['image'], data['label'] if opt.use_gpu: img = img.cuda() gt = gt.cuda() output = model(img) _recall, _specificity, _precision, _F1, _F2, \ _ACC_overall, _IoU_poly, _IoU_bg, _IoU_mean = evaluate(output, gt) metrics.update(recall=_recall, specificity=_specificity, precision=_precision, F1=_F1, F2=_F2, ACC_overall=_ACC_overall, IoU_poly=_IoU_poly, IoU_bg=_IoU_bg, IoU_mean=_IoU_mean) metrics_result = metrics.mean(total_batch) model.train() return metrics_result
def xval(data_path, adaptor, classifier, summ): input_ = Input(FLAGS.xval_batch_size, FLAGS.num_points) waves, labels = input_(data_path) # Calculate the loss of the model. if FLAGS.adp: logits = adaptor(waves) logits = classifier(logits) else: logits = classifier(waves, expand_dims=True) logits = tf.argmax(logits, axis=-1) metrics = Metrics("accuracy") with tf.control_dependencies( [tf.assert_equal(tf.rank(labels), tf.rank(logits))]): score, xval_accu_op = metrics(labels, logits) assert summ, "invalid summary helper object" summ.register('xval', 'accuracy', score) xval_summ_op = summ('xval') return xval_accu_op, xval_summ_op
def train_model(args, model, train, dev, teacher_model=None, save_path=None, maxsteps=None): if args.tensorboard and (not args.debug): from tensorboardX import SummaryWriter writer = SummaryWriter('./runs/{}'.format(args.prefix + args.hp_str)) # optimizer if args.optimizer == 'Adam': opt = torch.optim.Adam( [p for p in model.parameters() if p.requires_grad], betas=(0.9, 0.98), eps=1e-9) else: raise NotImplementedError # if resume training if (args.load_from is not None) and (args.resume): with torch.cuda.device(args.gpu): # very important. offset, opt_states = torch.load( './models/' + args.load_from + '.pt.states', map_location=lambda storage, loc: storage.cuda()) opt.load_state_dict(opt_states) else: offset = 0 # metrics if save_path is None: save_path = args.model_name best = Best(max, 'corpus_bleu', 'corpus_gleu', 'gleu', 'loss', 'i', model=model, opt=opt, path=save_path, gpu=args.gpu) train_metrics = Metrics('train', 'loss', 'real', 'fake') dev_metrics = Metrics('dev', 'loss', 'gleu', 'real_loss', 'fake_loss', 'distance', 'alter_loss', 'distance2', 'fertility_loss', 'corpus_gleu') progressbar = tqdm(total=args.eval_every, desc='start training.') for iters, batch in enumerate(train): iters += offset if iters % args.save_every == 0: args.logger.info( 'save (back-up) checkpoints at iter={}'.format(iters)) with torch.cuda.device(args.gpu): torch.save(best.model.state_dict(), '{}_iter={}.pt'.format(args.model_name, iters)) torch.save([iters, best.opt.state_dict()], '{}_iter={}.pt.states'.format( args.model_name, iters)) if iters % args.eval_every == 0: progressbar.close() dev_metrics.reset() if args.distillation: outputs_course = valid_model(args, model, dev, dev_metrics, distillation=True, teacher_model=None) outputs_data = valid_model( args, model, dev, None if args.distillation else dev_metrics, teacher_model=None, print_out=True) if args.tensorboard and (not args.debug): writer.add_scalar('dev/GLEU_sentence_', dev_metrics.gleu, iters) writer.add_scalar('dev/Loss', dev_metrics.loss, iters) writer.add_scalar('dev/GLEU_corpus_', outputs_data['corpus_gleu'], iters) writer.add_scalar('dev/BLEU_corpus_', outputs_data['corpus_bleu'], iters) if args.distillation: writer.add_scalar('dev/GLEU_corpus_dis', outputs_course['corpus_gleu'], iters) writer.add_scalar('dev/BLEU_corpus_dis', outputs_course['corpus_bleu'], iters) if not args.debug: best.accumulate(outputs_data['corpus_bleu'], outputs_data['corpus_gleu'], dev_metrics.gleu, dev_metrics.loss, iters) args.logger.info( 'the best model is achieved at {}, average greedy GLEU={}, corpus GLEU={}, corpus BLEU={}' .format(best.i, best.gleu, best.corpus_gleu, best.corpus_bleu)) args.logger.info('model:' + args.prefix + args.hp_str) # ---set-up a new progressor--- progressbar = tqdm(total=args.eval_every, desc='start training.') if maxsteps is None: maxsteps = args.maximum_steps if iters > maxsteps: args.logger.info('reach the maximum updating steps.') break # --- training --- # model.train() def get_learning_rate(i, lr0=0.1, disable=False): if not disable: return lr0 * 10 / math.sqrt(args.d_model) * min( 1 / math.sqrt(i), i / (args.warmup * math.sqrt(args.warmup))) return 0.00002 opt.param_groups[0]['lr'] = get_learning_rate( iters + 1, disable=args.disable_lr_schedule) opt.zero_grad() # prepare the data inputs, input_masks, \ targets, target_masks, \ sources, source_masks,\ encoding, batch_size = model.quick_prepare(batch, args.distillation) input_reorder, fertility_cost, decoder_inputs = None, None, inputs batch_fer = batch.fer_dec if args.distillation else batch.fer #print(input_masks.size(), target_masks.size(), input_masks.sum()) if type(model) is FastTransformer: inputs, input_reorder, input_masks, fertility_cost = model.prepare_initial( encoding, sources, source_masks, input_masks, batch_fer) # Maximum Likelihood Training if not args.finetuning: loss = model.cost(targets, target_masks, out=model(encoding, source_masks, inputs, input_masks)) if args.fertility: loss += fertility_cost else: # finetuning: # loss_student (MLE) if not args.fertility: decoding, out, probs = model(encoding, source_masks, inputs, input_masks, return_probs=True, decoding=True) loss_student = model.batched_cost(targets, target_masks, probs) # student-loss (MLE) decoder_masks = input_masks else: # Note that MLE and decoding has different translations. We need to run the same code twice # truth decoding, out, probs = model(encoding, source_masks, inputs, input_masks, decoding=True, return_probs=True) loss_student = model.cost(targets, target_masks, out=out) decoder_masks = input_masks # baseline decoder_inputs_b, _, decoder_masks_b, _, _ = model.prepare_initial( encoding, sources, source_masks, input_masks, None, mode='mean') decoding_b, out_b, probs_b = model( encoding, source_masks, decoder_inputs_b, decoder_masks_b, decoding=True, return_probs=True) # decode again # reinforce decoder_inputs_r, _, decoder_masks_r, _, _ = model.prepare_initial( encoding, sources, source_masks, input_masks, None, mode='reinforce') decoding_r, out_r, probs_r = model( encoding, source_masks, decoder_inputs_r, decoder_masks_r, decoding=True, return_probs=True) # decode again if args.fertility: loss_student += fertility_cost # loss_teacher (RKL+REINFORCE) teacher_model.eval() if not args.fertility: inputs_student_index, _, targets_student_soft, _, _, _, encoding_teacher, _ = model.quick_prepare( batch, False, decoding, probs, decoder_masks, decoder_masks, source_masks) out_teacher, probs_teacher = teacher_model( encoding_teacher, source_masks, inputs_student_index.detach(), decoder_masks, return_probs=True) loss_teacher = teacher_model.batched_cost( targets_student_soft, decoder_masks, probs_teacher.detach()) loss = ( 1 - args.beta1 ) * loss_teacher + args.beta1 * loss_student # final results else: inputs_student_index, _, targets_student_soft, _, _, _, encoding_teacher, _ = model.quick_prepare( batch, False, decoding, probs, decoder_masks, decoder_masks, source_masks) out_teacher, probs_teacher = teacher_model( encoding_teacher, source_masks, inputs_student_index.detach(), decoder_masks, return_probs=True) loss_teacher = teacher_model.batched_cost( targets_student_soft, decoder_masks, probs_teacher.detach()) inputs_student_index, _ = model.prepare_inputs( batch, decoding_b, False, decoder_masks_b) targets_student_soft, _ = model.prepare_targets( batch, probs_b, False, decoder_masks_b) out_teacher, probs_teacher = teacher_model( encoding_teacher, source_masks, inputs_student_index.detach(), decoder_masks_b, return_probs=True) _, loss_1 = teacher_model.batched_cost(targets_student_soft, decoder_masks_b, probs_teacher.detach(), True) inputs_student_index, _ = model.prepare_inputs( batch, decoding_r, False, decoder_masks_r) targets_student_soft, _ = model.prepare_targets( batch, probs_r, False, decoder_masks_r) out_teacher, probs_teacher = teacher_model( encoding_teacher, source_masks, inputs_student_index.detach(), decoder_masks_r, return_probs=True) _, loss_2 = teacher_model.batched_cost(targets_student_soft, decoder_masks_r, probs_teacher.detach(), True) rewards = -(loss_2 - loss_1).data rewards = rewards - rewards.mean() rewards = rewards.expand_as(source_masks) rewards = rewards * source_masks model.predictor.saved_fertilities.reinforce( 0.1 * rewards.contiguous().view(-1, 1)) loss = ( 1 - args.beta1 ) * loss_teacher + args.beta1 * loss_student # detect reinforce # accmulate the training metrics train_metrics.accumulate(batch_size, loss, print_iter=None) train_metrics.reset() # train the student if args.finetuning and args.fertility: torch.autograd.backward( (loss, model.predictor.saved_fertilities), (torch.ones(1).cuda(loss.get_device()), None)) else: loss.backward() opt.step() info = 'training step={}, loss={:.3f}, lr={:.5f}'.format( iters, export(loss), opt.param_groups[0]['lr']) if args.finetuning: info += '| NA:{:.3f}, AR:{:.3f}'.format(export(loss_student), export(loss_teacher)) if args.fertility: info += '| RL: {:.3f}'.format(export(rewards.mean())) if args.fertility: info += '| RE:{:.3f}'.format(export(fertility_cost)) if args.tensorboard and (not args.debug): writer.add_scalar('train/Loss', export(loss), iters) progressbar.update(1) progressbar.set_description(info)
def train(train_data, val_data, user_list_train_filtered, user_list_val_filtered, user_beta_train, user_beta_val, k, dataset, eta=0.1, lamb=0.1, tolerance=1e-4, num_iter_val=5, num_total_iter_training=6, random_seed=786, kU=None, cv_flag=True, verbose=False): np.random.seed(random_seed) user_feat = val_data.drop(['user', 'label'], axis=1).values user_feat_train = train_data.drop(['user', 'label'], axis=1).values w = np.random.normal(0, 1, user_feat.shape[1]) metrics = Metrics() metrics.eta_lr = eta metrics.lamb_reg = lamb print("running for eta", eta, "and lambda", lamb) for i in range(num_total_iter_training): grad, loss = subgradient(w, train_data, user_list_train_filtered, user_beta_train, k) grad += lamb * w w = w - (eta / np.sqrt(i + 1)) * grad metrics.w_list.append(w) metrics.loss_opt_list_train.append(loss) y_scores = user_feat_train.dot(w) data_true = deepcopy(train_data) data_true['scores'] = y_scores data_true = data_true.sort_values(by='scores', ascending=False) data_true = data_true.reset_index(drop=True) metrics.micro_auc_rel_k_list_train.append( compute_micro(data_true, user_list_train_filtered, user_beta_train, w, k)) if verbose: print('Epoch', i + 1, 'completed out of', num_total_iter_training, 'for prec@k loss train:', metrics.loss_opt_list_train[-1]) print('Epoch', i + 1, 'completed out of', num_total_iter_training, 'for prec@k grad train:', np.linalg.norm(grad)) # evaluate combined weights if (cv_flag): if i % num_iter_val == 0: y_scores = user_feat.dot(w) data_true = deepcopy(val_data) data_true['scores'] = y_scores data_true = data_true.sort_values(by='scores', ascending=False) data_true = data_true.reset_index(drop=True) metrics.micro_auc_rel_k_list_val.append( compute_micro(data_true, user_list_val_filtered, user_beta_val, w, k)) if verbose: print("\n") print('Epoch', i + 1, 'completed out of', num_total_iter_training, 'for prec@k loss val:', metrics.micro_auc_rel_k_list_val[-1]) print("\n") return metrics, None
model.compile(loss=WeightedBinaryCrossEntropy(POS_RATIO), optimizer='rmsprop', metrics=['binary_accuracy', f1]) logger.debug('Model summary: %s', model.summary()) # Set tensorboard callback tb = TensorBoard(log_dir='./learn_embedding_logs', histogram_freq=1, write_graph=True, write_images=False) # Metrics is now defined in utils metrics = Metrics(logger) # Train model # NOTE: Tensorboard callback is disabled to reduce model run time from # approx 3 horus to 17 minutes model.fit( x_train, y_train, validation_data=(x_dev, y_dev), epochs=EPOCHS, batch_size=BATCH_SIZE, #callbacks=[tb] )
# # In[ ]: from utils import Metrics # In[ ]: run_id = 'seg_model_gpu{}_n{}_bs{}_lr{}'.format(gpu_id, epochs, batch_size, learning_rate) print('\n\nTraining', run_id) save_path = run_id + '.pkl' optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) metrics = Metrics(train_loader.dataset.num_classes, train_loader.dataset.class_names) # Used to keep track of statistics class AverageMeter(object): def __init__(self): self.val = 0 self.avg = 0 self.sum = 0 self.count = 0 def update(self, val, n=1): self.val = val self.sum += val * n self.count += n self.avg = self.sum / self.count
def train(train_data, val_data, user_list_train_filtered, user_list_val_filtered, user_beta_train, user_beta_val, k, eta=0.1, lamb=0.1, num_iter_val=5, num_total_iter_training=6, n_classifiers=5, random_seed=786, verbose=False): np.random.seed(random_seed) user_list_val_filtered = user_list_train_filtered[ 0:int(0.2 * len(user_list_train_filtered))] user_list_train_filtered = list( set(user_list_train_filtered) - set(user_list_val_filtered)) val_data = train_data[train_data['user'].isin(user_list_val_filtered)] train_data = train_data[train_data['user'].isin(user_list_train_filtered)] metrics = Metrics() metrics.eta_lr = eta metrics.lamb_reg = lamb classifier_list = [] kf = KFold(n_splits=n_classifiers, shuffle=True) features = train_data.drop(['user', 'label'], axis=1) labels = train_data['label'] for _, split_indices in kf.split(features): split_features = features.iloc[split_indices].values split_labels = labels.iloc[split_indices].values num_examples = split_features.shape[0] w = np.random.normal(0, 1, (split_features.shape[1], )) w = w / np.linalg.norm(w) for num_iter in np.arange(num_total_iter_training): scores = sigmoid(np.dot(split_features, w)) loss = -1 / num_examples * np.sum(split_labels * np.log(scores) + (1 - split_labels) * np.log(1 - scores)) print("loss is ", loss) dLdwx = (scores - split_labels) * scores * (1 - scores) grad = 1 / num_examples * np.sum( dLdwx.reshape(-1, 1) * split_features) grad += lamb * w print("grad is ", np.linalg.norm(grad)) print("\n") w = w - (eta / np.sqrt(num_iter + 1)) * grad accuracy = np.sum(split_labels * (scores > 0.5) + (1 - split_labels) * (scores < 0.5)) print('accuracy: {}'.format(accuracy / num_examples)) classifier_list.append(w) print('eta is ', eta, 'and lambda is ', lamb) print('\n') classifiers_with_metrics = [] for w in classifier_list: user_feat = val_data.drop(['user', 'label'], axis=1).values y_scores = user_feat.dot(w) data_true = deepcopy(val_data) data_true['scores'] = y_scores data_true = data_true.sort_values(by='scores', ascending=False) data_true = data_true.reset_index(drop=True) metric = compute_micro(data_true, user_list_val_filtered, user_beta_train, w, k) classifiers_with_metrics.append((metric, w)) classifiers_with_metrics.sort(reverse=True, key=lambda x: x[0]) combined_w = classifiers_with_metrics[0][1] for _, w in classifiers_with_metrics[1:]: combined_w = merge_micro(val_data, combined_w, w, user_list_val_filtered, user_beta_train, k) # create dummy metrics # need weights and one validation loss for the "best iter" logic metrics = Metrics() metrics.w_list.append(combined_w) metrics.micro_auc_rel_k_list_val.append(0) metrics.micro_auc_rel_k_list_train.append(0) metrics.loss_opt_list_train.append(0) return metrics, None
progressbar.set_description(info) if use_prog_bar: progressbar.close() return model.save_fast_weights() # training start.. best = Best(max, 'corpus_bleu', 'i', model=model, opt=meta_opt, path=args.model_name, gpu=args.gpu) train_metrics = Metrics('train', 'loss', 'real', 'fake') dev_metrics = Metrics('dev', 'loss', 'gleu', 'real_loss', 'fake_loss', 'distance', 'alter_loss', 'distance2', 'fertility_loss', 'corpus_gleu') # overlall progress-ba progressbar = tqdm(total=args.eval_every, desc='start training') while True: # ----- saving the checkpoint ----- # if iters % args.save_every == 0: args.logger.info('save (back-up) checkpoints at iter={}'.format(iters)) with torch.cuda.device(args.gpu): torch.save(best.model.state_dict(), '{}_iter={}.pt'.format(args.model_name, iters))
def train(models, optimizers, dataset, corpus, ckpts, params, args): epoch_num = params.epoch_num epoch_gan = params.epoch_gan batch_epoch = params.batch_epoch autoencoder.noise_radius = params.noise_radius step = 0 for e in range(epoch_num, params.max_epoch): for batch, (source, target) in islice(enumerate(dataset), batch_epoch, None): metrics = Metrics( epoch=e, max_epoch=params.max_epoch, ) for p in range(params.epoch_ae): ae_metrics = train_autoencoder(models, optimizers, source, target, params) metrics.accum(ae_metrics) for q in range(params.epoch_gan): for r in range(params.epoch_disc): disc_metrics = train_disc(models, optimizers, source) metrics.accum(disc_metrics) for r in range(params.epoch_enc): enc_metrics = train_encoder_by_disc(models, optimizers, source, params) metrics.accum(enc_metrics) for t in range(params.epoch_gen): gen_metrics = train_gen(models, optimizers, source) metrics.accum(gen_metrics) metrics['ae_loss'] /= params.epoch_ae metrics['acc'] /= params.epoch_ae metrics['disc_loss'] /= (params.epoch_gan * params.epoch_disc) metrics['disc_fake_loss'] /= (params.epoch_gan * params.epoch_disc) metrics['disc_real_loss'] /= (params.epoch_gan * params.epoch_disc) metrics['real_norm'] /= (params.epoch_gan * params.epoch_disc) metrics['fake_norm'] /= (params.epoch_gan * params.epoch_disc) metrics['gen_loss'] /= (params.epoch_gan * params.epoch_gen) batch_epoch += 1 # anneal noise every 5 batch_epoch for now if batch_epoch % 5 == 0: autoencoder.noise_radius = autoencoder.noise_radius * 0.995 if batch_epoch % params.print_every == 0: ckpts.save() logging.info('--- Epoch {}/{} Batch {} ---'.format(e + 1, metrics['max_epoch'], batch_epoch)) logging.info('Loss {:.4f}'.format(float(metrics['ae_loss']))) logging.info('Disc_Loss {:.4f}'.format(float(metrics['disc_loss']))) logging.info('Gen_Loss {:.4f}'.format(float(metrics['gen_loss']))) params.batch_epoch = batch_epoch params.epoch_num = e params.epoch_gan = epoch_gan params.noise_radius = autoencoder.noise_radius params.save(os.path.join(args.model_dir, 'params.json')) # Floydhub metrics print('{{"metric": "acc", "value": {}, "step": {}}}'.format(float(metrics['acc']), step)) print('{{"metric": "ae_loss", "value": {}, "step": {}}}'.format(float(metrics['ae_loss']), step)) print('{{"metric": "disc_loss", "value": {}, "step": {}}}'.format(float(metrics['disc_loss']), step)) print('{{"metric": "disc_fake_loss", "value": {}, "step": {}}}'.format(float(metrics['disc_fake_loss']), step)) print('{{"metric": "disc_real_loss", "value": {}, "step": {}}}'.format(float(metrics['disc_real_loss']), step)) print('{{"metric": "real_norm", "value": {}, "step": {}}}'.format(float(metrics['real_norm']), step)) print('{{"metric": "fake_norm", "value": {}, "step": {}}}'.format(float(metrics['fake_norm']), step)) print('{{"metric": "gen_loss", "value": {}, "step": {}}}'.format(float(metrics['gen_loss']), step)) step += 1 tb_writer.add_scalar('train/acc', metrics['acc'], step) tb_writer.add_scalar('train/ae_loss', metrics['ae_loss'], step) tb_writer.add_scalar('train/disc_loss', metrics['disc_loss'], step) tb_writer.add_scalar('train/disc_fake_loss', metrics['disc_fake_loss'], step) tb_writer.add_scalar('train/disc_real_loss', metrics['disc_real_loss'], step) tb_writer.add_scalar('train/gen_loss', metrics['gen_loss'], step) if batch_epoch % (params.print_every * 2) == 0: step += 1 generate_sentence(models, source, corpus, step, args) batch_epoch = 0
def train_model(args, model, train, dev, src=None, trg=None, trg_len_dic=None, teacher_model=None, save_path=None, maxsteps=None): if args.tensorboard and (not args.debug): from tensorboardX import SummaryWriter writer = SummaryWriter(str(args.event_path / args.id_str)) if type(model) is FastTransformer and args.denoising_prob > 0.0: denoising_weights = [ args.denoising_weight for idx in range(args.train_repeat_dec) ] denoising_out_weights = [ args.denoising_out_weight for idx in range(args.train_repeat_dec) ] if type(model) is FastTransformer and args.layerwise_denoising_weight: start, end = 0.9, 0.1 diff = (start - end) / (args.train_repeat_dec - 1) denoising_weights = np.arange(start=end, stop=start, step=diff).tolist()[::-1] + [0.1] # optimizer for k, p in zip(model.state_dict().keys(), model.parameters()): # only finetune layers that are responsible to predicting target len if args.finetune_trg_len: if "pred_len" not in k: p.requires_grad = False else: if "pred_len" in k: p.requires_grad = False params = [p for p in model.parameters() if p.requires_grad] if args.optimizer == 'Adam': opt = torch.optim.Adam(params, betas=(0.9, 0.98), eps=1e-9) else: raise NotImplementedError # if resume training if (args.load_from is not None) and (args.resume): with torch.cuda.device(args.gpu): # very important. offset, opt_states = torch.load( str(args.model_path / args.load_from) + '.pt.states', map_location=lambda storage, loc: storage.cuda()) opt.load_state_dict(opt_states) else: offset = 0 if not args.finetune_trg_len: best = Best(max, *[ 'BLEU_dec{}'.format(ii + 1) for ii in range(args.valid_repeat_dec) ], 'i', model=model, opt=opt, path=str(args.model_path / args.id_str), gpu=args.gpu, which=range(args.valid_repeat_dec)) else: best = Best(max, *['pred_target_len_correct'], 'i', model=model, opt=opt, path=str(args.model_path / args.id_str), gpu=args.gpu, which=[0]) train_metrics = Metrics( 'train loss', *['loss_{}'.format(idx + 1) for idx in range(args.train_repeat_dec)], data_type="avg") dev_metrics = Metrics( 'dev loss', *['loss_{}'.format(idx + 1) for idx in range(args.valid_repeat_dec)], data_type="avg") if "predict" in args.trg_len_option: train_metrics_trg = Metrics('train loss target', *[ "pred_target_len_loss", "pred_target_len_correct", "pred_target_len_approx" ], data_type="avg") train_metrics_average = Metrics( 'train loss average', *["average_target_len_correct", "average_target_len_approx"], data_type="avg") dev_metrics_trg = Metrics('dev loss target', *[ "pred_target_len_loss", "pred_target_len_correct", "pred_target_len_approx" ], data_type="avg") dev_metrics_average = Metrics( 'dev loss average', *["average_target_len_correct", "average_target_len_approx"], data_type="avg") else: train_metrics_trg = None train_metrics_average = None dev_metrics_trg = None dev_metrics_average = None if not args.no_tqdm: progressbar = tqdm(total=args.eval_every, desc='start training.') if maxsteps is None: maxsteps = args.maximum_steps #targetlength = TargetLength() for iters, train_batch in enumerate(train): #targetlength.accumulate( train_batch ) #continue iters += offset if args.save_every > 0 and iters % args.save_every == 0: args.logger.info( 'save (back-up) checkpoints at iter={}'.format(iters)) with torch.cuda.device(args.gpu): torch.save( best.model.state_dict(), '{}_iter={}.pt'.format(str(args.model_path / args.id_str), iters)) torch.save([iters, best.opt.state_dict()], '{}_iter={}.pt.states'.format( str(args.model_path / args.id_str), iters)) if iters % args.eval_every == 0: torch.cuda.empty_cache() gc.collect() dev_metrics.reset() if dev_metrics_trg is not None: dev_metrics_trg.reset() if dev_metrics_average is not None: dev_metrics_average.reset() outputs_data = valid_model(args, model, dev, dev_metrics, dev_metrics_trg=dev_metrics_trg, dev_metrics_average=dev_metrics_average, teacher_model=None, print_out=True, trg_len_dic=trg_len_dic) #outputs_data = [0, [0,0,0,0], 0, 0] if args.tensorboard and (not args.debug): for ii in range(args.valid_repeat_dec): writer.add_scalar('dev/single/Loss_{}'.format(ii + 1), getattr(dev_metrics, "loss_{}".format(ii + 1)), iters) # NLL averaged over dev corpus writer.add_scalar('dev/single/BLEU_{}'.format(ii + 1), outputs_data['real'][ii][0], iters) # NOTE corpus bleu if "predict" in args.trg_len_option: writer.add_scalar("dev/single/pred_target_len_loss", outputs_data["pred_target_len_loss"], iters) writer.add_scalar("dev/single/pred_target_len_correct", outputs_data["pred_target_len_correct"], iters) writer.add_scalar("dev/single/pred_target_len_approx", outputs_data["pred_target_len_approx"], iters) writer.add_scalar( "dev/single/average_target_len_correct", outputs_data["average_target_len_correct"], iters) writer.add_scalar( "dev/single/average_target_len_approx", outputs_data["average_target_len_approx"], iters) """ writer.add_scalars('dev/total/BLEUs', {"iter_{}".format(idx+1):bleu for idx, bleu in enumerate(outputs_data['bleu']) }, iters) writer.add_scalars('dev/total/Losses', { "iter_{}".format(idx+1):getattr(dev_metrics, "loss_{}".format(idx+1)) for idx in range(args.valid_repeat_dec) }, iters ) """ if not args.debug: if not args.finetune_trg_len: best.accumulate(*[xx[0] for xx in outputs_data['real']], iters) values = list(best.metrics.values()) args.logger.info("best model : {}, {}".format( "BLEU=[{}]".format(", ".join( [ str(x) for x in values[:args.valid_repeat_dec] ] ) ), \ "i={}".format( values[args.valid_repeat_dec] ), ) ) else: best.accumulate(*[outputs_data['pred_target_len_correct']], iters) values = list(best.metrics.values()) args.logger.info("best model : {}".format( "pred_target_len_correct = {}".format(values[0]))) args.logger.info('model:' + args.prefix + args.hp_str) # ---set-up a new progressor--- if not args.no_tqdm: progressbar.close() progressbar = tqdm(total=args.eval_every, desc='start training.') if type(model) is FastTransformer and args.anneal_denoising_weight: for ii, bb in enumerate([xx[0] for xx in outputs_data['real']][:-1]): denoising_weights[ii] = 0.9 - 0.1 * int( math.floor(bb / 3.0)) if iters > maxsteps: args.logger.info('reached the maximum updating steps.') break model.train() def get_lr_transformer(i, lr0=0.1): return lr0 * 10 / math.sqrt(args.d_model) * min( 1 / math.sqrt(i), i / (args.warmup * math.sqrt(args.warmup))) def get_lr_anneal(iters, lr0=0.1): lr_end = 1e-5 return max(0, (args.lr - lr_end) * (args.anneal_steps - iters) / args.anneal_steps) + lr_end if args.lr_schedule == "fixed": opt.param_groups[0]['lr'] = args.lr elif args.lr_schedule == "anneal": opt.param_groups[0]['lr'] = get_lr_anneal(iters + 1) elif args.lr_schedule == "transformer": opt.param_groups[0]['lr'] = get_lr_transformer(iters + 1) opt.zero_grad() if args.dataset == "mscoco": decoder_inputs, decoder_masks,\ targets, target_masks,\ _, source_masks,\ encoding, batch_size, rest = model.quick_prepare_mscoco(train_batch, all_captions=train_batch[1], fast=(type(model) is FastTransformer), inputs_dec=args.inputs_dec, trg_len_option=args.trg_len_option, max_len=args.max_offset, trg_len_dic=trg_len_dic, bp=args.bp) else: decoder_inputs, decoder_masks,\ targets, target_masks,\ sources, source_masks,\ encoding, batch_size, rest = model.quick_prepare(train_batch, fast=(type(model) is FastTransformer), trg_len_option=args.trg_len_option, trg_len_ratio=args.trg_len_ratio, trg_len_dic=trg_len_dic, bp=args.bp) losses = [] if type(model) is Transformer: loss = model.cost(targets, target_masks, out=model(encoding, source_masks, decoder_inputs, decoder_masks)) losses.append(loss) elif type(model) is FastTransformer: all_logits = [] all_denoising_masks = [] for iter_ in range(args.train_repeat_dec): curr_iter = min(iter_, args.num_decs - 1) next_iter = min(curr_iter + 1, args.num_decs - 1) out = model(encoding, source_masks, decoder_inputs, decoder_masks, iter_=curr_iter, return_probs=False) if args.self_distil > 0.0: loss, logits_masked = model.cost(targets, target_masks, out=out, iter_=curr_iter, return_logits=True) else: loss = model.cost(targets, target_masks, out=out, iter_=curr_iter) logits = model.decoder[curr_iter].out(out) if args.use_argmax: _, argmax = torch.max(logits, dim=-1) else: probs = softmax(logits) probs_sz = probs.size() logits_ = Variable(probs.data, requires_grad=False) argmax = torch.multinomial( logits_.contiguous().view(-1, probs_sz[-1]), 1).view(*probs_sz[:-1]) if args.self_distil > 0.0: all_logits.append(logits_masked) losses.append(loss) decoder_inputs_ = 0 denoising_mask = 1 if args.next_dec_input in ["both", "emb"]: if args.denoising_prob > 0.0 and np.random.rand( ) < args.denoising_prob: cor = corrupt_target(targets, decoder_masks, len(trg.vocab), denoising_weights[iter_], args.corruption_probs) emb = F.embedding( cor, model.decoder[next_iter].out.weight * math.sqrt(args.d_model)) denoising_mask = 0 else: emb = F.embedding( argmax, model.decoder[next_iter].out.weight * math.sqrt(args.d_model)) if args.denoising_out_weight > 0: if denoising_out_weights[iter_] > 0.0: corrupted_argmax = corrupt_target( argmax, decoder_masks, denoising_out_weights[iter_]) else: corrupted_argmax = argmax emb = F.embedding( corrupted_argmax, model.decoder[next_iter].out.weight * math.sqrt(args.d_model)) decoder_inputs_ += emb all_denoising_masks.append(denoising_mask) if args.next_dec_input in ["both", "out"]: decoder_inputs_ += out decoder_inputs = decoder_inputs_ # self distillation loss if requested if args.self_distil > 0.0: self_distil_losses = [] for logits_i in range(1, len(all_logits) - 1): self_distill_loss_i = 0.0 for logits_j in range(logits_i + 1, len(all_logits)): self_distill_loss_i += \ all_denoising_masks[logits_j] * \ all_denoising_masks[logits_i] * \ (1/(logits_j-logits_i)) * args.self_distil * F.mse_loss(all_logits[logits_i], all_logits[logits_j].detach()) self_distil_losses.append(self_distill_loss_i) self_distil_loss = sum(self_distil_losses) loss = sum(losses) # accmulate the training metrics train_metrics.accumulate(batch_size, *losses, print_iter=None) if train_metrics_trg is not None: train_metrics_trg.accumulate(batch_size, *[rest[0], rest[1], rest[2]]) if train_metrics_average is not None: train_metrics_average.accumulate(batch_size, *[rest[3], rest[4]]) if type(model) is FastTransformer and args.self_distil > 0.0: (loss + self_distil_loss).backward() else: if "predict" in args.trg_len_option: if args.finetune_trg_len: rest[0].backward() else: loss.backward() else: loss.backward() if args.grad_clip > 0: total_norm = nn.utils.clip_grad_norm(params, args.grad_clip) opt.step() mid_str = '' if type(model) is FastTransformer and args.self_distil > 0.0: mid_str += 'distil={:.5f}, '.format( self_distil_loss.cpu().data.numpy()[0]) if type(model) is FastTransformer and "predict" in args.trg_len_option: mid_str += 'pred_target_len_loss={:.5f}, '.format( rest[0].cpu().data.numpy()[0]) if type(model) is FastTransformer and args.denoising_prob > 0.0: mid_str += "/".join( ["{:.1f}".format(ff) for ff in denoising_weights[:-1]]) + ", " info = 'update={}, loss={}, {}lr={:.1e}'.format( iters, "/".join(["{:.3f}".format(export(ll)) for ll in losses]), mid_str, opt.param_groups[0]['lr']) if args.no_tqdm: if iters % args.eval_every == 0: args.logger.info("update {} : {}".format( iters, str(train_metrics))) else: progressbar.update(1) progressbar.set_description(info) if iters % args.eval_every == 0 and args.tensorboard and ( not args.debug): for idx in range(args.train_repeat_dec): writer.add_scalar( 'train/single/Loss_{}'.format(idx + 1), getattr(train_metrics, "loss_{}".format(idx + 1)), iters) if "predict" in args.trg_len_option: writer.add_scalar( "train/single/pred_target_len_loss", getattr(train_metrics_trg, "pred_target_len_loss"), iters) writer.add_scalar( "train/single/pred_target_len_correct", getattr(train_metrics_trg, "pred_target_len_correct"), iters) writer.add_scalar( "train/single/pred_target_len_approx", getattr(train_metrics_trg, "pred_target_len_approx"), iters) writer.add_scalar( "train/single/average_target_len_correct", getattr(train_metrics_average, "average_target_len_correct"), iters) writer.add_scalar( "train/single/average_target_len_approx", getattr(train_metrics_average, "average_target_len_approx"), iters) train_metrics.reset() if train_metrics_trg is not None: train_metrics_trg.reset() if train_metrics_average is not None: train_metrics_average.reset()
path_figure = os.path.join(root_path, "figs") os.makedirs(model_path) os.makedirs(path_figure) shutil.copytree(os.path.abspath('config/'), os.path.join(root_path, 'config')) print("Using", device_type) number_agents = config.agents.number_predators + config.agents.number_preys # Definition of the agents agents = [AgentMADDPG("predator", "predator-{}".format(k), device, config.agents) for k in range(config.agents.number_predators)] agents += [AgentMADDPG("prey", "prey-{}".format(k), device, config.agents) for k in range(config.agents.number_preys)] metrics = [] collision_metric = Metrics() actors_noise = [] # Definition of the memories and set to device # Define the metrics for all agents for agent in agents: metrics.append(Metrics()) # If we have to load the pretrained model if config.learning.use_model: path = os.path.abspath(os.path.join(config.learning.model_path, agent.id + ".pth")) agent.load(path) env = Env(config.env, config) shared_memory = ReplayMemory(config.replay_memory.size) # Add agents to the environment for k in range(len(agents)):
p for p in model.get_parameters(type=args.finetune_params) if p.requires_grad ], betas=(0.9, 0.98), eps=1e-9) corpus_bleu = -1 # training start.. best = Best(max, 'corpus_bleu', 'i', model=model, opt=self_opt, path=args.model_name, gpu=args.gpu) dev_metrics = Metrics('dev', 'loss', 'gleu') outputs_data = valid_model(args, model, dev_real, dev_metrics, print_out=False) corpus_bleu0 = outputs_data['corpus_bleu'] fast_weights = [(weights, corpus_bleu0)] if args.tensorboard and (not args.debug): writer.add_scalar('dev/BLEU_corpus_', outputs_data['corpus_bleu'], dev_iters) for j in range(args.valid_epochs): args.logger.info("Fine-tuning epoch: {}".format(j))
pair_features = load_pair_features(f_pair_feature_key, f_pair_feature_value) train_data = DirectionalTripletsWithPairFeature(options["embedding"], train_hyper2hypo, pair_features) else: train_data = DirectionalTriplets(options["embedding"], train_hyper2hypo) print("=== Finish constructing dataset ===") print("Number of training hyposets: {}".format(len(train_data))) kwargs = {'num_workers': 1, 'pin_memory': True} if options["device_id"] != -1 else {} train_loader = torch.utils.data.DataLoader(train_data, batch_size=options["batch_size"], shuffle=True, drop_last=False) # Construct testing set f_test = options["test_pairs_file"] test_pairs = load_element_pairs(f_test, with_label=False) print("Number of testing term pairs: {}".format(len(test_pairs))) # Start model tunning results = Results('./results/tune_{}.txt'.format(args.comment)) metrics = Metrics() for hp in sample_hyperparameters(num=200): for m in hp: options[m] = hp[m] # update hyper-parameters options["pt"] = { "name": hp["pt_name"], "dropout": hp["edge_dropout"] } best_overall_metric, best_epoch, best_metrics = run(train_loader, test_pairs, options) metrics.metrics = best_metrics results.save_metrics(hp, metrics)