def output_and_loss(self, h_block, t_block): batch, length, units = h_block.shape # shape : (batch * sequence_length, num_classes) logits_flat = seq_func(self.affine, h_block, reconstruct_shape=False) # shape : (batch * sequence_length, num_classes) log_probs_flat = F.log_softmax(logits_flat, dim=-1) rebatch, _ = logits_flat.shape concat_t_block = t_block.view(rebatch) weights = (concat_t_block >= 1).type(h_block.type()) n_correct, n_total = utils.accuracy(logits_flat.data, concat_t_block.data, ignore_index=0) if self.confidence < 1: tdata = concat_t_block.data mask = torch.nonzero(tdata.eq(self.padding_idx)).squeeze() tmp_ = self.one_hot.repeat(concat_t_block.size(0), 1) tmp_.scatter_(1, tdata.unsqueeze(1), self.confidence) if mask.dim() > 0 and mask.numel() > 0: tmp_.index_fill_(0, mask, 0) concat_t_block = Variable(tmp_, requires_grad=False) loss = self.criterion(log_probs_flat, concat_t_block) loss = loss.sum() / (weights.sum() + 1e-8) stats = utils.Statistics(loss=loss.data.cpu() * n_total, n_correct=n_correct, n_words=n_total) return loss, stats
def main(): stats = utils.Statistics() pipes = [] procs = [] # make process group id match process id so all children # will share the same group id (for easier termination) os.setpgrp() with stats.time('setup'): args = parse_args() setup_execution(args, stats, os.getpid()) if args.same_seeds or args.comms_disable: assert args.parallel is not None, "some flags you have specified have to be tested in the parallel mode." if args.parallel: for i, mode in enumerate(args.parallel.split(',')): newargs = copy.copy(args) if mode == 'MUS': newargs.bias = 'MUSes' elif mode == 'MCS': newargs.bias = 'MCSes' elif mode == 'MCSonly': newargs.mcs_only = True else: assert False, "Invalid parallel mode: %s" % mode pipe, child_pipe = multiprocessing.Pipe() pipes.append(pipe) if args.same_seeds: if args.all_randomized: seed = 1 else: seed = None else: # TODO: Handle randomization with non-homogeneous thread modes if not args.all_randomized and i == 0: seed = None else: seed = i + 1 proc = multiprocessing.Process(target=run_enumerator, args=(stats, newargs, seed, child_pipe)) procs.append(proc) # useful for timing just the parsing / setup if args.limit == 0: sys.stderr.write("Result limit reached.\n") sys.exit(0) if args.parallel: for proc in procs: proc.start() run_master(stats, args, pipes) else: run_enumerator(stats, args, seed=args.rnd_init)
def output_and_loss(self, h_block, t_block): batch, units, length = h_block.shape # shape : (batch * sequence_length, num_classes) logits_flat = seq_func(self.affine, h_block, reconstruct_shape=False) rebatch, _ = logits_flat.shape concat_t_block = t_block.view(rebatch) weights = (concat_t_block >= 1).float() n_correct, n_total = utils.accuracy(logits_flat, concat_t_block, ignore_index=0) # shape : (batch * sequence_length, num_classes) log_probs_flat = F.log_softmax(logits_flat, dim=-1) # shape : (batch * max_len, 1) targets_flat = t_block.view(-1, 1).long() if self.label_smoothing is not None and self.label_smoothing > 0.0: num_classes = logits_flat.size(-1) smoothing_value = self.label_smoothing / (num_classes - 1) # Fill all the correct indices with 1 - smoothing value. one_hot_targets = input_like(log_probs_flat, smoothing_value) smoothed_targets = one_hot_targets.scatter_(-1, targets_flat, 1.0 - self.label_smoothing) negative_log_likelihood_flat = - log_probs_flat * smoothed_targets negative_log_likelihood_flat = negative_log_likelihood_flat.sum(-1, keepdim=True) else: # Contribution to the negative log likelihood only comes from the exact indices # of the targets, as the target distributions are one-hot. Here we use torch.gather # to extract the indices of the num_classes dimension which contribute to the loss. # shape : (batch * sequence_length, 1) negative_log_likelihood_flat = - torch.gather(log_probs_flat, dim=1, index=targets_flat) # shape : (batch, sequence_length) negative_log_likelihood = negative_log_likelihood_flat.view(rebatch) negative_log_likelihood = negative_log_likelihood * weights # shape : (batch_size,) loss = negative_log_likelihood.sum() / (weights.sum() + 1e-13) stats = utils.Statistics(loss=utils.to_cpu(loss) * n_total, n_correct=utils.to_cpu(n_correct), n_words=n_total) return loss, stats
def forward(self, *args): # Identify the row indexes corresponding to lang1 and lang2 lang1_input = index_select_train(self.lang1, args) if lang1_input is not None: loss1, stats1 = self.model1(*lang1_input) else: loss1 = 0. stats1 = utils.Statistics() lang2_input = index_select_train(self.lang2, args) if lang2_input is not None: loss2, stats2 = self.model2(*lang2_input) else: loss2 = 0. stats2 = utils.Statistics() n_total = stats1.n_words + stats2.n_words n_correct = stats1.n_correct + stats2.n_correct loss = ((loss1 * stats1.n_words) + (loss2 * stats2.n_words)) / n_total stats = utils.Statistics(loss=loss.data.cpu() * n_total, n_correct=n_correct, n_words=n_total) return loss, stats
def report_func(epoch, batch, num_batches, start_time, report_stats, report_every): """ This is the user-defined batch-level training progress report function. Args: epoch(int): current epoch count. batch(int): current batch count. num_batches(int): total number of batches. start_time(float): last report time. lr(float): current learning rate. report_stats(Statistics): old Statistics instance. Returns: report_stats(Statistics): updated Statistics instance. """ if batch % report_every == -1 % report_every: report_stats.output(epoch, batch + 1, num_batches, start_time) report_stats = utils.Statistics() return report_stats
def main(): stats = utils.Statistics() with stats.time('setup'): args = parse_args() setup_execution(args, stats) csolver, msolver = setup_solvers(args) config = setup_config(args) mp = MarcoPolo(csolver, msolver, stats, config) # useful for timing just the parsing / setup if args.limit == 0: sys.stderr.write("Result limit reached.\n") sys.exit(0) # enumerate results in a separate thread so signal handling works while in C code # ref: https://thisismiller.github.io/blog/CPython-Signal-Handling/ def enumerate(): remaining = args.limit for result in mp.enumerate(): output = result[0] if args.alltimes: output = "%s %0.3f" % (output, stats.current_time()) if args.verbose: output = "%s %s" % (output, " ".join( [str(x + 1) for x in result[1]])) print(output) if remaining: remaining -= 1 if remaining == 0: sys.stderr.write("Result limit reached.\n") sys.exit(0) enumthread = threading.Thread(target=enumerate) enumthread.daemon = True # so thread is killed when main thread exits (e.g. in signal handler) enumthread.start() enumthread.join(float( "inf")) # timeout required for signal handler to work; set to infinity
def __init__(self, validation_config): self._nondet_var_map = None self.machine_model = validation_config.machine_model self.config = validation_config self.witness_creator = wit_gen.WitnessCreator() self.harness_creator = harness_gen.HarnessCreator() self.naive_verification = validation_config.naive_verification # If a void appears in a line, there must be something between # the void and the __VERIFIER_error() symbol - otherwise # it is a function definition/declaration. self.error_method_pattern = re.compile( '((?!void).)*(void.*\S.*)?__VERIFIER_error\(\) *;.*') self.statistics = utils.Statistics('Test Validator ' + self.get_name()) self.timer_validation = utils.Stopwatch() self.statistics.add_value('Time for validation', self.timer_validation) self.timer_witness_validation = utils.Stopwatch() self.statistics.add_value('Time for witness validation', self.timer_witness_validation) self.counter_size_witnesses = utils.Counter() self.statistics.add_value('Total size of witnesses', self.counter_size_witnesses) self.timer_execution_validation = utils.Stopwatch() self.statistics.add_value('Time for execution validation', self.timer_execution_validation) self.counter_size_harnesses = utils.Counter() self.statistics.add_value('Total size of harnesses', self.counter_size_harnesses) self.timer_vector_gen = utils.Stopwatch() self.statistics.add_value("Time for test vector generation", self.timer_vector_gen) self.counter_handled_test_cases = utils.Counter() self.statistics.add_value('Number of looked-at test cases', self.counter_handled_test_cases) self.final_test_vector_size = utils.Constant() self.statistics.add_value("Size of successful test vector", self.final_test_vector_size)
def __init__(self, timelimit, machine_model, log_verbose): self.machine_model = machine_model self.timelimit = int(timelimit) if timelimit else 0 self.log_verbose = log_verbose self.statistics = utils.Statistics("Input Generator " + self.get_name()) self.timer_file_access = utils.Stopwatch() self.timer_prepare = utils.Stopwatch() self.timer_input_gen = utils.Stopwatch() self.timer_generator = utils.Stopwatch() self.number_generated_tests = utils.Constant() self.statistics.add_value('Time for full input generation', self.timer_input_gen) self.statistics.add_value('Time for test case generator', self.timer_generator) self.statistics.add_value('Time for controlled file accesses', self.timer_file_access) self.statistics.add_value('Time for file preparation', self.timer_prepare) self.statistics.add_value('Number of generated test cases', self.number_generated_tests)
criterion = TASummDecLoss(model.generator, 0, model.decoder.vocab_size) if args.model_type == 'abs': dec_params = [ p for n, p in model.decoder.named_parameters() if not n.startswith('encoder') ] optimizer = AdamW([{ 'params': model.encoder.parameters(), 'lr': args.lr_enc }, { 'params': dec_params, 'lr': args.lr_dec }], lr=1e-3) else: optimizer = AdamW(model.parameters(), lr=args.lr_enc) scheduler = ReduceLROnPlateau(optimizer, patience=2, factor=0.9) training_stats = utils.Statistics() # Train -------------------------------------------------------------------- logger.info(f'Start training {args.model_type} model ') for epoch in range(1, args.epochs + 1): training_stats.epoch = epoch if args.model_type in ['rel', 'ext']: train_encoder(model, criterion, optimizer, scheduler, training_stats) elif args.model_type == 'abs': train_abs(model, criterion, optimizer, scheduler, training_stats)
def main(): best_score = 0 args = get_train_args() print(json.dumps(args.__dict__, indent=4)) # Reading the int indexed text dataset train_data = np.load(os.path.join(args.input, args.data + ".train.npy")) train_data = train_data.tolist() dev_data = np.load(os.path.join(args.input, args.data + ".valid.npy")) dev_data = dev_data.tolist() test_data = np.load(os.path.join(args.input, args.data + ".test.npy")) test_data = test_data.tolist() # Reading the vocab file with open(os.path.join(args.input, args.data + '.vocab.pickle'), 'rb') as f: id2w = pickle.load(f) args.id2w = id2w args.n_vocab = len(id2w) # Define Model model = net.Transformer(args) tally_parameters(model) if args.gpu >= 0: model.cuda(args.gpu) print(model) optimizer = optim.TransformerAdamTrainer(model, args) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.model_file): print("=> loading checkpoint '{}'".format(args.model_file)) checkpoint = torch.load(args.model_file) args.start_epoch = checkpoint['epoch'] best_score = checkpoint['best_score'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.model_file, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.model_file)) src_data, trg_data = list(zip(*train_data)) total_src_words = len(list(itertools.chain.from_iterable(src_data))) total_trg_words = len(list(itertools.chain.from_iterable(trg_data))) iter_per_epoch = (total_src_words + total_trg_words) // args.wbatchsize print('Approximate number of iter/epoch =', iter_per_epoch) time_s = time() global_steps = 0 for epoch in range(args.start_epoch, args.epoch): random.shuffle(train_data) train_iter = data.iterator.pool( train_data, args.wbatchsize, key=lambda x: data.utils.interleave_keys(len(x[0]), len(x[1])), batch_size_fn=batch_size_func, random_shuffler=data.iterator.RandomShuffler()) report_stats = utils.Statistics() train_stats = utils.Statistics() valid_stats = utils.Statistics() if args.debug: grad_norm = 0. for num_steps, train_batch in enumerate(train_iter): global_steps += 1 model.train() optimizer.zero_grad() src_iter = list(zip(*train_batch))[0] src_words = len(list(itertools.chain.from_iterable(src_iter))) report_stats.n_src_words += src_words train_stats.n_src_words += src_words in_arrays = utils.seq2seq_pad_concat_convert(train_batch, -1) loss, stat = model(*in_arrays) loss.backward() if args.debug: norm = utils.grad_norm(model.parameters()) grad_norm += norm if global_steps % args.report_every == 0: print("> Gradient Norm: %1.4f" % (grad_norm / (num_steps + 1))) optimizer.step() report_stats.update(stat) train_stats.update(stat) report_stats = report_func(epoch, num_steps, iter_per_epoch, time_s, report_stats, args.report_every) if (global_steps + 1) % args.eval_steps == 0: dev_iter = data.iterator.pool( dev_data, args.wbatchsize, key=lambda x: data.utils.interleave_keys( len(x[0]), len(x[1])), batch_size_fn=batch_size_func, random_shuffler=data.iterator.RandomShuffler()) for dev_batch in dev_iter: model.eval() in_arrays = utils.seq2seq_pad_concat_convert(dev_batch, -1) loss_test, stat = model(*in_arrays) valid_stats.update(stat) print('Train perplexity: %g' % train_stats.ppl()) print('Train accuracy: %g' % train_stats.accuracy()) print('Validation perplexity: %g' % valid_stats.ppl()) print('Validation accuracy: %g' % valid_stats.accuracy()) bleu_score, _ = CalculateBleu(model, dev_data, 'Dev Bleu', batch=args.batchsize // 4, beam_size=args.beam_size, alpha=args.alpha, max_sent=args.max_sent_eval)() if args.metric == "bleu": score = bleu_score elif args.metric == "accuracy": score = valid_stats.accuracy() is_best = score > best_score best_score = max(score, best_score) save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_score': best_score, 'optimizer': optimizer.state_dict(), 'opts': args, }, is_best, args.model_file, args.best_model_file) # BLEU score on Dev and Test Data checkpoint = torch.load(args.best_model_file) print("=> loaded checkpoint '{}' (epoch {}, best score {})".format( args.best_model_file, checkpoint['epoch'], checkpoint['best_score'])) model.load_state_dict(checkpoint['state_dict']) print('Dev Set BLEU Score') _, dev_hyp = CalculateBleu(model, dev_data, 'Dev Bleu', batch=args.batchsize // 4, beam_size=args.beam_size, alpha=args.alpha)() save_output(dev_hyp, id2w, args.dev_hyp) print('Test Set BLEU Score') _, test_hyp = CalculateBleu(model, test_data, 'Test Bleu', batch=args.batchsize // 4, beam_size=args.beam_size, alpha=args.alpha)() save_output(test_hyp, id2w, args.test_hyp)
def main(): best_score = 0 args = get_train_args() logger = get_logger(args.log_path) logger.info(json.dumps(args.__dict__, indent=4)) # Set seed value torch.manual_seed(args.seed) random.seed(args.seed) if args.gpu: torch.cuda.manual_seed_all(args.seed) # Reading the int indexed text dataset train_data = np.load(os.path.join(args.input, args.data + ".train.npy"), allow_pickle=True) train_data = train_data.tolist() dev_data = np.load(os.path.join(args.input, args.data + ".valid.npy"), allow_pickle=True) dev_data = dev_data.tolist() test_data = np.load(os.path.join(args.input, args.data + ".test.npy"), allow_pickle=True) test_data = test_data.tolist() # Reading the vocab file with open(os.path.join(args.input, args.data + '.vocab.pickle'), 'rb') as f: id2w = pickle.load(f) args.id2w = id2w args.n_vocab = len(id2w) # Define Model model = eval(args.model)(args) model.apply(init_weights) tally_parameters(model) if args.gpu >= 0: model.cuda(args.gpu) logger.info(model) if args.optimizer == 'Noam': optimizer = NoamAdamTrainer(model, args) elif args.optimizer == 'Adam': params = filter(lambda p: p.requires_grad, model.parameters()) optimizer = torch.optim.Adam(params, lr=args.learning_rate, betas=(args.optimizer_adam_beta1, args.optimizer_adam_beta2), eps=args.optimizer_adam_epsilon) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.7, patience=7, verbose=True) elif args.optimizer == 'Yogi': params = filter(lambda p: p.requires_grad, model.parameters()) optimizer = Yogi(params, lr=args.learning_rate, betas=(args.optimizer_adam_beta1, args.optimizer_adam_beta2), eps=args.optimizer_adam_epsilon) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.7, patience=7, verbose=True) if args.fp16: model = FP16_Module(model) optimizer = FP16_Optimizer(optimizer, static_loss_scale=args.static_loss_scale, dynamic_loss_scale=args.dynamic_loss_scale, dynamic_loss_args={'init_scale': 2**16}, verbose=False) ema = ExponentialMovingAverage(decay=args.ema_decay) ema.register(model.state_dict()) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.model_file): logger.info("=> loading checkpoint '{}'".format(args.model_file)) checkpoint = torch.load(args.model_file) args.start_epoch = checkpoint['epoch'] best_score = checkpoint['best_score'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) logger.info("=> loaded checkpoint '{}' (epoch {})".format( args.model_file, checkpoint['epoch'])) else: logger.info("=> no checkpoint found at '{}'".format( args.model_file)) src_data, trg_data = list(zip(*train_data)) total_src_words = len(list(itertools.chain.from_iterable(src_data))) total_trg_words = len(list(itertools.chain.from_iterable(trg_data))) iter_per_epoch = (total_src_words + total_trg_words) // (2 * args.wbatchsize) logger.info('Approximate number of iter/epoch = {}'.format(iter_per_epoch)) time_s = time() global_steps = 0 num_grad_steps = 0 if args.grad_norm_for_yogi and args.optimizer == 'Yogi': args.start_epoch = -1 l2_norm = 0.0 parameters = list( filter(lambda p: p.requires_grad is True, model.parameters())) n_params = sum([p.nelement() for p in parameters]) for epoch in range(args.start_epoch, args.epoch): random.shuffle(train_data) train_iter = data.iterator.pool( train_data, args.wbatchsize, key=lambda x: (len(x[0]), len(x[1])), batch_size_fn=batch_size_fn, random_shuffler=data.iterator.RandomShuffler()) report_stats = utils.Statistics() train_stats = utils.Statistics() if args.debug: grad_norm = 0. for num_steps, train_batch in enumerate(train_iter): global_steps += 1 model.train() if args.grad_accumulator_count == 1: optimizer.zero_grad() elif num_grad_steps % args.grad_accumulator_count == 0: optimizer.zero_grad() src_iter = list(zip(*train_batch))[0] src_words = len(list(itertools.chain.from_iterable(src_iter))) report_stats.n_src_words += src_words train_stats.n_src_words += src_words in_arrays = utils.seq2seq_pad_concat_convert(train_batch, -1) if len(args.multi_gpu) > 1: loss_tuple, stat_tuple = zip( *dp(model, in_arrays, device_ids=args.multi_gpu)) n_total = sum([obj.n_words.item() for obj in stat_tuple]) n_correct = sum([obj.n_correct.item() for obj in stat_tuple]) loss = 0 for l_, s_ in zip(loss_tuple, stat_tuple): loss += l_ * s_.n_words.item() loss /= n_total stat = utils.Statistics(loss=loss.data.cpu() * n_total, n_correct=n_correct, n_words=n_total) else: loss, stat = model(*in_arrays) if args.fp16: optimizer.backward(loss) else: loss.backward() if epoch == -1 and args.grad_norm_for_yogi and args.optimizer == 'Yogi': l2_norm += (utils.grad_norm(model.parameters())**2) / n_params continue num_grad_steps += 1 if args.debug: norm = utils.grad_norm(model.parameters()) grad_norm += norm if global_steps % args.report_every == 0: logger.info("> Gradient Norm: %1.4f" % (grad_norm / (num_steps + 1))) if args.grad_accumulator_count == 1: optimizer.step() ema.apply(model.state_dict(keep_vars=True)) elif num_grad_steps % args.grad_accumulator_count == 0: optimizer.step() ema.apply(model.state_dict(keep_vars=True)) num_grad_steps = 0 report_stats.update(stat) train_stats.update(stat) report_stats = report_func(epoch, num_steps, iter_per_epoch, time_s, report_stats, args.report_every) valid_stats = utils.Statistics() if global_steps % args.eval_steps == 0: with torch.no_grad(): dev_iter = data.iterator.pool( dev_data, args.wbatchsize, key=lambda x: (len(x[0]), len(x[1])), batch_size_fn=batch_size_fn, random_shuffler=data.iterator.RandomShuffler()) for dev_batch in dev_iter: model.eval() in_arrays = utils.seq2seq_pad_concat_convert( dev_batch, -1) if len(args.multi_gpu) > 1: _, stat_tuple = zip(*dp( model, in_arrays, device_ids=args.multi_gpu)) n_total = sum( [obj.n_words.item() for obj in stat_tuple]) n_correct = sum( [obj.n_correct.item() for obj in stat_tuple]) dev_loss = sum([obj.loss for obj in stat_tuple]) stat = utils.Statistics(loss=dev_loss, n_correct=n_correct, n_words=n_total) else: _, stat = model(*in_arrays) valid_stats.update(stat) logger.info('Train perplexity: %g' % train_stats.ppl()) logger.info('Train accuracy: %g' % train_stats.accuracy()) logger.info('Validation perplexity: %g' % valid_stats.ppl()) logger.info('Validation accuracy: %g' % valid_stats.accuracy()) if args.metric == "accuracy": score = valid_stats.accuracy() elif args.metric == "bleu": score, _ = CalculateBleu( model, dev_data, 'Dev Bleu', batch=args.batchsize // 4, beam_size=args.beam_size, alpha=args.alpha, max_sent=args.max_sent_eval)(logger) # Threshold Global Steps to save the model if not (global_steps % 2000): print('saving') is_best = score > best_score best_score = max(score, best_score) save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'state_dict_ema': ema.shadow_variable_dict, 'best_score': best_score, 'optimizer': optimizer.state_dict(), 'opts': args, }, is_best, args.model_file, args.best_model_file) if args.optimizer == 'Adam' or args.optimizer == 'Yogi': scheduler.step(score) if epoch == -1 and args.grad_norm_for_yogi and args.optimizer == 'Yogi': optimizer.v_init = l2_norm / (num_steps + 1) logger.info("Initializing Yogi Optimizer (v_init = {})".format( optimizer.v_init)) # BLEU score on Dev and Test Data checkpoint = torch.load(args.best_model_file) logger.info("=> loaded checkpoint '{}' (epoch {}, best score {})".format( args.best_model_file, checkpoint['epoch'], checkpoint['best_score'])) model.load_state_dict(checkpoint['state_dict']) logger.info('Dev Set BLEU Score') _, dev_hyp = CalculateBleu(model, dev_data, 'Dev Bleu', batch=args.batchsize // 4, beam_size=args.beam_size, alpha=args.alpha, max_decode_len=args.max_decode_len)(logger) save_output(dev_hyp, id2w, args.dev_hyp) logger.info('Test Set BLEU Score') _, test_hyp = CalculateBleu(model, test_data, 'Test Bleu', batch=args.batchsize // 4, beam_size=args.beam_size, alpha=args.alpha, max_decode_len=args.max_decode_len)(logger) save_output(test_hyp, id2w, args.test_hyp) # Loading EMA state dict model.load_state_dict(checkpoint['state_dict_ema']) logger.info('Dev Set BLEU Score') _, dev_hyp = CalculateBleu(model, dev_data, 'Dev Bleu', batch=args.batchsize // 4, beam_size=args.beam_size, alpha=args.alpha, max_decode_len=args.max_decode_len)(logger) save_output(dev_hyp, id2w, args.dev_hyp + '.ema') logger.info('Test Set BLEU Score') _, test_hyp = CalculateBleu(model, test_data, 'Test Bleu', batch=args.batchsize // 4, beam_size=args.beam_size, alpha=args.alpha, max_decode_len=args.max_decode_len)(logger) save_output(test_hyp, id2w, args.test_hyp + '.ema')
def _run_epoch(self, train_data, dev_data, unlabel_data, addn_data, addn_data_unlab, addn_dev, ek, ek_t, ek_u, graph_embs, graph_embs_t, graph_embs_u): addn_dev.cuda() ek_t.cuda() graph_embs_t.cuda() report_stats = utils.Statistics() cm = ConfusionMatrix(self.classes) _, seq_data = list(zip(*train_data)) total_seq_words = len(list(itertools.chain.from_iterable(seq_data))) iter_per_epoch = (1.5 * total_seq_words) // self.config.wbatchsize self.encoder.train() self.clf.train() train_iter = self._create_iter(train_data, self.config.wbatchsize) unlabel_iter = self._create_iter(unlabel_data, self.config.wbatchsize_unlabel) sofar = 0 sofar_1 = 0 for batch_index, train_batch_raw in enumerate(train_iter): seq_iter = list(zip(*train_batch_raw))[1] seq_words = len(list(itertools.chain.from_iterable(seq_iter))) report_stats.n_words += seq_words self.global_steps += 1 # self.enc_clf_opt.zero_grad() if self.config.add_noise: train_batch_raw = add_noise(train_batch_raw, self.config.noise_dropout, self.config.random_permutation) train_batch = batch_utils.seq_pad_concat(train_batch_raw, -1) train_embedded = self.embedder(train_batch) memory_bank_train, enc_final_train = self.encoder( train_embedded, train_batch) if self.config.lambda_vat > 0 or self.config.lambda_ae > 0 or self.config.lambda_entropy: try: unlabel_batch_raw = next(unlabel_iter) except StopIteration: unlabel_iter = self._create_iter( unlabel_data, self.config.wbatchsize_unlabel) unlabel_batch_raw = next(unlabel_iter) if self.config.add_noise: unlabel_batch_raw = add_noise( unlabel_batch_raw, self.config.noise_dropout, self.config.random_permutation) unlabel_batch = batch_utils.seq_pad_concat( unlabel_batch_raw, -1) unlabel_embedded = self.embedder(unlabel_batch) memory_bank_unlabel, enc_final_unlabel = self.encoder( unlabel_embedded, unlabel_batch) addn_batch_unlab = retAddnBatch(addn_data_unlab, memory_bank_unlabel.shape[0], sofar_1).cuda() ek_batch_unlab = retAddnBatch(ek_u, memory_bank_unlabel.shape[0], sofar_1).cuda() graph_embs_unlab = retAddnBatch(graph_embs_u, memory_bank_unlabel.shape[0], sofar_1).cuda() sofar_1 += addn_batch_unlab.shape[0] if sofar_1 >= ek_u.shape[0]: sofar_1 = 0 addn_batch = retAddnBatch(addn_data, memory_bank_train.shape[0], sofar).cuda() ek_batch = retAddnBatch(ek, memory_bank_train.shape[0], sofar).cuda() graph_embs_batch = retAddnBatch(graph_embs, memory_bank_train.shape[0], sofar).cuda() sofar += addn_batch.shape[0] if sofar >= ek.shape[0]: sofar = 0 pred = self.clf(memory_bank_train, addn_batch, ek_batch, enc_final_train, graph_embs_batch) accuracy = self.get_accuracy(cm, pred.data, train_batch.labels.data) lclf = self.clf_loss(pred, train_batch.labels) lat = Variable( torch.FloatTensor([-1.]).type(batch_utils.FLOAT_TYPE)) lvat = Variable( torch.FloatTensor([-1.]).type(batch_utils.FLOAT_TYPE)) if self.config.lambda_at > 0: lat = at_loss( self.embedder, self.encoder, self.clf, train_batch, addn_batch, ek_batch, graph_embs_batch, perturb_norm_length=self.config.perturb_norm_length) if self.config.lambda_vat > 0: lvat_train = vat_loss( self.embedder, self.encoder, self.clf, train_batch, addn_batch, ek_batch, graph_embs_batch, p_logit=pred, perturb_norm_length=self.config.perturb_norm_length) if self.config.inc_unlabeled_loss: if memory_bank_unlabel.shape[0] != ek_batch_unlab.shape[0]: print( f'Skipping; Unequal Shapes: {memory_bank_unlabel.shape} and {ek_batch_unlab.shape}' ) continue else: lvat_unlabel = vat_loss( self.embedder, self.encoder, self.clf, unlabel_batch, addn_batch_unlab, ek_batch_unlab, graph_embs_unlab, p_logit=self.clf(memory_bank_unlabel, addn_batch_unlab, ek_batch_unlab, enc_final_unlabel, graph_embs_unlab), perturb_norm_length=self.config.perturb_norm_length ) if self.config.unlabeled_loss_type == "AvgTrainUnlabel": lvat = 0.5 * (lvat_train + lvat_unlabel) elif self.config.unlabeled_loss_type == "Unlabel": lvat = lvat_unlabel else: lvat = lvat_train lentropy = Variable( torch.FloatTensor([-1.]).type(batch_utils.FLOAT_TYPE)) if self.config.lambda_entropy > 0: lentropy_train = entropy_loss(pred) if self.config.inc_unlabeled_loss: lentropy_unlabel = entropy_loss( self.clf(memory_bank_unlabel, addn_batch_unlab, ek_batch_unlab, enc_final_unlabel, graph_embs_unlab)) if self.config.unlabeled_loss_type == "AvgTrainUnlabel": lentropy = 0.5 * (lentropy_train + lentropy_unlabel) elif self.config.unlabeled_loss_type == "Unlabel": lentropy = lentropy_unlabel else: lentropy = lentropy_train lae = Variable( torch.FloatTensor([-1.]).type(batch_utils.FLOAT_TYPE)) if self.config.lambda_ae > 0: lae = self.ae(memory_bank_unlabel, enc_final_unlabel, unlabel_batch.sent_len, unlabel_batch_raw) ltotal = (self.config.lambda_clf * lclf) + \ (self.config.lambda_ae * lae) + \ (self.config.lambda_at * lat) + \ (self.config.lambda_vat * lvat) + \ (self.config.lambda_entropy * lentropy) report_stats.clf_loss += lclf.data.cpu().numpy() report_stats.at_loss += lat.data.cpu().numpy() report_stats.vat_loss += lvat.data.cpu().numpy() report_stats.ae_loss += lae.data.cpu().numpy() report_stats.entropy_loss += lentropy.data.cpu().numpy() report_stats.n_sent += len(pred) report_stats.n_correct += accuracy self.enc_clf_opt.zero_grad() ltotal.backward() params_list = self._get_trainabe_modules() # Excluding embedder form norm constraint when AT or VAT if not self.config.normalize_embedding: params_list += list(self.embedder.parameters()) norm = torch.nn.utils.clip_grad_norm(params_list, self.config.max_norm) report_stats.grad_norm += norm self.enc_clf_opt.step() if self.config.scheduler == "ExponentialLR": self.scheduler.step() self.ema_embedder.apply(self.embedder.named_parameters()) self.ema_encoder.apply(self.encoder.named_parameters()) self.ema_clf.apply(self.clf.named_parameters()) report_func(self.epoch, batch_index, iter_per_epoch, self.time_s, report_stats, self.config.report_every, self.logger) if self.global_steps % self.config.eval_steps == 0: cm_, accuracy, prc_dev = self._run_evaluate( dev_data, addn_dev, ek_t, graph_embs_t) self.logger.info( "- dev accuracy {} | best dev accuracy {} ".format( accuracy, self.best_accuracy)) self.writer.add_scalar("Dev_Accuracy", accuracy, self.global_steps) pred_, lab_ = zip(*prc_dev) pred_ = torch.cat(pred_) lab_ = torch.cat(lab_) self.writer.add_pr_curve("Dev PR-Curve", lab_, pred_, self.global_steps) pprint.pprint(cm_) pprint.pprint(cm_.get_all_metrics()) if accuracy > self.best_accuracy: self.logger.info("- new best score!") self.best_accuracy = accuracy self._save_model() if self.config.scheduler == "ReduceLROnPlateau": self.scheduler.step(accuracy) self.encoder.train() # self.embedder.train() self.clf.train() if self.config.weight_decay > 0: print(">> Square Norm: %1.4f " % self._get_l2_norm_loss()) cm, train_accuracy, _ = self._run_evaluate(train_data, addn_data, ek, graph_embs) self.logger.info("- Train accuracy {}".format(train_accuracy)) pprint.pprint(cm.get_all_metrics()) cm, dev_accuracy, _ = self._run_evaluate(dev_data, addn_dev, ek_t, graph_embs_t) self.logger.info("- Dev accuracy {} | best dev accuracy {}".format( dev_accuracy, self.best_accuracy)) pprint.pprint(cm.get_all_metrics()) self.writer.add_scalars("Overall_Accuracy", { "Train_Accuracy": train_accuracy, "Dev_Accuracy": dev_accuracy }, self.global_steps) return dev_accuracy