예제 #1
0
 def output_and_loss(self, h_block, t_block):
     batch, length, units = h_block.shape
     # shape : (batch * sequence_length, num_classes)
     logits_flat = seq_func(self.affine, h_block, reconstruct_shape=False)
     # shape : (batch * sequence_length, num_classes)
     log_probs_flat = F.log_softmax(logits_flat, dim=-1)
     rebatch, _ = logits_flat.shape
     concat_t_block = t_block.view(rebatch)
     weights = (concat_t_block >= 1).type(h_block.type())
     n_correct, n_total = utils.accuracy(logits_flat.data,
                                         concat_t_block.data,
                                         ignore_index=0)
     if self.confidence < 1:
         tdata = concat_t_block.data
         mask = torch.nonzero(tdata.eq(self.padding_idx)).squeeze()
         tmp_ = self.one_hot.repeat(concat_t_block.size(0), 1)
         tmp_.scatter_(1, tdata.unsqueeze(1), self.confidence)
         if mask.dim() > 0 and mask.numel() > 0:
             tmp_.index_fill_(0, mask, 0)
         concat_t_block = Variable(tmp_, requires_grad=False)
     loss = self.criterion(log_probs_flat, concat_t_block)
     loss = loss.sum() / (weights.sum() + 1e-8)
     stats = utils.Statistics(loss=loss.data.cpu() * n_total,
                              n_correct=n_correct,
                              n_words=n_total)
     return loss, stats
def main():
    stats = utils.Statistics()

    pipes = []
    procs = []

    # make process group id match process id so all children
    # will share the same group id (for easier termination)
    os.setpgrp()

    with stats.time('setup'):
        args = parse_args()
        setup_execution(args, stats, os.getpid())
        if args.same_seeds or args.comms_disable:
            assert args.parallel is not None, "some flags you have specified have to be tested in the parallel mode."

        if args.parallel:
            for i, mode in enumerate(args.parallel.split(',')):
                newargs = copy.copy(args)
                if mode == 'MUS':
                    newargs.bias = 'MUSes'
                elif mode == 'MCS':
                    newargs.bias = 'MCSes'
                elif mode == 'MCSonly':
                    newargs.mcs_only = True
                else:
                    assert False, "Invalid parallel mode: %s" % mode

                pipe, child_pipe = multiprocessing.Pipe()
                pipes.append(pipe)

                if args.same_seeds:
                    if args.all_randomized:
                        seed = 1
                    else:
                        seed = None
                else:
                    # TODO: Handle randomization with non-homogeneous thread modes
                    if not args.all_randomized and i == 0:
                        seed = None
                    else:
                        seed = i + 1

                proc = multiprocessing.Process(target=run_enumerator,
                                               args=(stats, newargs, seed,
                                                     child_pipe))
                procs.append(proc)

    # useful for timing just the parsing / setup
    if args.limit == 0:
        sys.stderr.write("Result limit reached.\n")
        sys.exit(0)

    if args.parallel:
        for proc in procs:
            proc.start()
        run_master(stats, args, pipes)

    else:
        run_enumerator(stats, args, seed=args.rnd_init)
예제 #3
0
    def output_and_loss(self, h_block, t_block):
        batch, units, length = h_block.shape
        # shape : (batch * sequence_length, num_classes)
        logits_flat = seq_func(self.affine,
                               h_block,
                               reconstruct_shape=False)
        rebatch, _ = logits_flat.shape
        concat_t_block = t_block.view(rebatch)
        weights = (concat_t_block >= 1).float()
        n_correct, n_total = utils.accuracy(logits_flat,
                                            concat_t_block,
                                            ignore_index=0)

        # shape : (batch * sequence_length, num_classes)
        log_probs_flat = F.log_softmax(logits_flat,
                                       dim=-1)
        # shape : (batch * max_len, 1)
        targets_flat = t_block.view(-1, 1).long()

        if self.label_smoothing is not None and self.label_smoothing > 0.0:
            num_classes = logits_flat.size(-1)
            smoothing_value = self.label_smoothing / (num_classes - 1)
            # Fill all the correct indices with 1 - smoothing value.
            one_hot_targets = input_like(log_probs_flat,
                                         smoothing_value)
            smoothed_targets = one_hot_targets.scatter_(-1,
                                                        targets_flat,
                                                        1.0 - self.label_smoothing)
            negative_log_likelihood_flat = - log_probs_flat * smoothed_targets
            negative_log_likelihood_flat = negative_log_likelihood_flat.sum(-1,
                                                                            keepdim=True)
        else:
            # Contribution to the negative log likelihood only comes from the exact indices
            # of the targets, as the target distributions are one-hot. Here we use torch.gather
            # to extract the indices of the num_classes dimension which contribute to the loss.
            # shape : (batch * sequence_length, 1)
            negative_log_likelihood_flat = - torch.gather(log_probs_flat,
                                                          dim=1,
                                                          index=targets_flat)

        # shape : (batch, sequence_length)
        negative_log_likelihood = negative_log_likelihood_flat.view(rebatch)
        negative_log_likelihood = negative_log_likelihood * weights
        # shape : (batch_size,)
        loss = negative_log_likelihood.sum() / (weights.sum() + 1e-13)
        stats = utils.Statistics(loss=utils.to_cpu(loss) * n_total,
                                 n_correct=utils.to_cpu(n_correct),
                                 n_words=n_total)
        return loss, stats
예제 #4
0
    def forward(self, *args):
        # Identify the row indexes corresponding to lang1 and lang2
        lang1_input = index_select_train(self.lang1, args)
        if lang1_input is not None:
            loss1, stats1 = self.model1(*lang1_input)
        else:
            loss1 = 0.
            stats1 = utils.Statistics()

        lang2_input = index_select_train(self.lang2, args)
        if lang2_input is not None:
            loss2, stats2 = self.model2(*lang2_input)
        else:
            loss2 = 0.
            stats2 = utils.Statistics()

        n_total = stats1.n_words + stats2.n_words
        n_correct = stats1.n_correct + stats2.n_correct

        loss = ((loss1 * stats1.n_words) + (loss2 * stats2.n_words)) / n_total
        stats = utils.Statistics(loss=loss.data.cpu() * n_total,
                                 n_correct=n_correct,
                                 n_words=n_total)
        return loss, stats
예제 #5
0
def report_func(epoch, batch, num_batches, start_time, report_stats,
                report_every):
    """
    This is the user-defined batch-level training progress
    report function.
    Args:
        epoch(int): current epoch count.
        batch(int): current batch count.
        num_batches(int): total number of batches.
        start_time(float): last report time.
        lr(float): current learning rate.
        report_stats(Statistics): old Statistics instance.
    Returns:
        report_stats(Statistics): updated Statistics instance.
    """
    if batch % report_every == -1 % report_every:
        report_stats.output(epoch, batch + 1, num_batches, start_time)
        report_stats = utils.Statistics()
    return report_stats
예제 #6
0
def main():
    stats = utils.Statistics()

    with stats.time('setup'):
        args = parse_args()
        setup_execution(args, stats)
        csolver, msolver = setup_solvers(args)
        config = setup_config(args)
        mp = MarcoPolo(csolver, msolver, stats, config)

    # useful for timing just the parsing / setup
    if args.limit == 0:
        sys.stderr.write("Result limit reached.\n")
        sys.exit(0)

    # enumerate results in a separate thread so signal handling works while in C code
    # ref: https://thisismiller.github.io/blog/CPython-Signal-Handling/
    def enumerate():
        remaining = args.limit

        for result in mp.enumerate():
            output = result[0]
            if args.alltimes:
                output = "%s %0.3f" % (output, stats.current_time())
            if args.verbose:
                output = "%s %s" % (output, " ".join(
                    [str(x + 1) for x in result[1]]))

            print(output)

            if remaining:
                remaining -= 1
                if remaining == 0:
                    sys.stderr.write("Result limit reached.\n")
                    sys.exit(0)

    enumthread = threading.Thread(target=enumerate)
    enumthread.daemon = True  # so thread is killed when main thread exits (e.g. in signal handler)
    enumthread.start()
    enumthread.join(float(
        "inf"))  # timeout required for signal handler to work; set to infinity
예제 #7
0
    def __init__(self, validation_config):
        self._nondet_var_map = None
        self.machine_model = validation_config.machine_model
        self.config = validation_config
        self.witness_creator = wit_gen.WitnessCreator()
        self.harness_creator = harness_gen.HarnessCreator()

        self.naive_verification = validation_config.naive_verification

        # If a void appears in a line, there must be something between
        # the void and the __VERIFIER_error() symbol - otherwise
        # it is a function definition/declaration.
        self.error_method_pattern = re.compile(
            '((?!void).)*(void.*\S.*)?__VERIFIER_error\(\) *;.*')

        self.statistics = utils.Statistics('Test Validator ' + self.get_name())
        self.timer_validation = utils.Stopwatch()
        self.statistics.add_value('Time for validation', self.timer_validation)
        self.timer_witness_validation = utils.Stopwatch()
        self.statistics.add_value('Time for witness validation',
                                  self.timer_witness_validation)
        self.counter_size_witnesses = utils.Counter()
        self.statistics.add_value('Total size of witnesses',
                                  self.counter_size_witnesses)
        self.timer_execution_validation = utils.Stopwatch()
        self.statistics.add_value('Time for execution validation',
                                  self.timer_execution_validation)
        self.counter_size_harnesses = utils.Counter()
        self.statistics.add_value('Total size of harnesses',
                                  self.counter_size_harnesses)

        self.timer_vector_gen = utils.Stopwatch()
        self.statistics.add_value("Time for test vector generation",
                                  self.timer_vector_gen)
        self.counter_handled_test_cases = utils.Counter()
        self.statistics.add_value('Number of looked-at test cases',
                                  self.counter_handled_test_cases)

        self.final_test_vector_size = utils.Constant()
        self.statistics.add_value("Size of successful test vector",
                                  self.final_test_vector_size)
예제 #8
0
    def __init__(self, timelimit, machine_model, log_verbose):
        self.machine_model = machine_model
        self.timelimit = int(timelimit) if timelimit else 0
        self.log_verbose = log_verbose
        self.statistics = utils.Statistics("Input Generator " +
                                           self.get_name())

        self.timer_file_access = utils.Stopwatch()
        self.timer_prepare = utils.Stopwatch()
        self.timer_input_gen = utils.Stopwatch()
        self.timer_generator = utils.Stopwatch()

        self.number_generated_tests = utils.Constant()

        self.statistics.add_value('Time for full input generation',
                                  self.timer_input_gen)
        self.statistics.add_value('Time for test case generator',
                                  self.timer_generator)
        self.statistics.add_value('Time for controlled file accesses',
                                  self.timer_file_access)
        self.statistics.add_value('Time for file preparation',
                                  self.timer_prepare)
        self.statistics.add_value('Number of generated test cases',
                                  self.number_generated_tests)
예제 #9
0
        criterion = TASummDecLoss(model.generator, 0, model.decoder.vocab_size)

    if args.model_type == 'abs':
        dec_params = [
            p for n, p in model.decoder.named_parameters()
            if not n.startswith('encoder')
        ]
        optimizer = AdamW([{
            'params': model.encoder.parameters(),
            'lr': args.lr_enc
        }, {
            'params': dec_params,
            'lr': args.lr_dec
        }],
                          lr=1e-3)
    else:
        optimizer = AdamW(model.parameters(), lr=args.lr_enc)
    scheduler = ReduceLROnPlateau(optimizer, patience=2, factor=0.9)

    training_stats = utils.Statistics()

    # Train --------------------------------------------------------------------
    logger.info(f'Start training {args.model_type} model ')
    for epoch in range(1, args.epochs + 1):
        training_stats.epoch = epoch
        if args.model_type in ['rel', 'ext']:
            train_encoder(model, criterion, optimizer, scheduler,
                          training_stats)
        elif args.model_type == 'abs':
            train_abs(model, criterion, optimizer, scheduler, training_stats)
예제 #10
0
def main():
    best_score = 0
    args = get_train_args()
    print(json.dumps(args.__dict__, indent=4))

    # Reading the int indexed text dataset
    train_data = np.load(os.path.join(args.input, args.data + ".train.npy"))
    train_data = train_data.tolist()
    dev_data = np.load(os.path.join(args.input, args.data + ".valid.npy"))
    dev_data = dev_data.tolist()
    test_data = np.load(os.path.join(args.input, args.data + ".test.npy"))
    test_data = test_data.tolist()

    # Reading the vocab file
    with open(os.path.join(args.input, args.data + '.vocab.pickle'),
              'rb') as f:
        id2w = pickle.load(f)

    args.id2w = id2w
    args.n_vocab = len(id2w)

    # Define Model
    model = net.Transformer(args)

    tally_parameters(model)
    if args.gpu >= 0:
        model.cuda(args.gpu)
    print(model)

    optimizer = optim.TransformerAdamTrainer(model, args)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.model_file):
            print("=> loading checkpoint '{}'".format(args.model_file))
            checkpoint = torch.load(args.model_file)
            args.start_epoch = checkpoint['epoch']
            best_score = checkpoint['best_score']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.model_file, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.model_file))

    src_data, trg_data = list(zip(*train_data))
    total_src_words = len(list(itertools.chain.from_iterable(src_data)))
    total_trg_words = len(list(itertools.chain.from_iterable(trg_data)))
    iter_per_epoch = (total_src_words + total_trg_words) // args.wbatchsize
    print('Approximate number of iter/epoch =', iter_per_epoch)
    time_s = time()

    global_steps = 0
    for epoch in range(args.start_epoch, args.epoch):
        random.shuffle(train_data)
        train_iter = data.iterator.pool(
            train_data,
            args.wbatchsize,
            key=lambda x: data.utils.interleave_keys(len(x[0]), len(x[1])),
            batch_size_fn=batch_size_func,
            random_shuffler=data.iterator.RandomShuffler())
        report_stats = utils.Statistics()
        train_stats = utils.Statistics()
        valid_stats = utils.Statistics()

        if args.debug:
            grad_norm = 0.
        for num_steps, train_batch in enumerate(train_iter):
            global_steps += 1
            model.train()
            optimizer.zero_grad()
            src_iter = list(zip(*train_batch))[0]
            src_words = len(list(itertools.chain.from_iterable(src_iter)))
            report_stats.n_src_words += src_words
            train_stats.n_src_words += src_words
            in_arrays = utils.seq2seq_pad_concat_convert(train_batch, -1)
            loss, stat = model(*in_arrays)
            loss.backward()
            if args.debug:
                norm = utils.grad_norm(model.parameters())
                grad_norm += norm
                if global_steps % args.report_every == 0:
                    print("> Gradient Norm: %1.4f" % (grad_norm /
                                                      (num_steps + 1)))
            optimizer.step()

            report_stats.update(stat)
            train_stats.update(stat)
            report_stats = report_func(epoch, num_steps, iter_per_epoch,
                                       time_s, report_stats, args.report_every)

            if (global_steps + 1) % args.eval_steps == 0:
                dev_iter = data.iterator.pool(
                    dev_data,
                    args.wbatchsize,
                    key=lambda x: data.utils.interleave_keys(
                        len(x[0]), len(x[1])),
                    batch_size_fn=batch_size_func,
                    random_shuffler=data.iterator.RandomShuffler())

                for dev_batch in dev_iter:
                    model.eval()
                    in_arrays = utils.seq2seq_pad_concat_convert(dev_batch, -1)
                    loss_test, stat = model(*in_arrays)
                    valid_stats.update(stat)

                print('Train perplexity: %g' % train_stats.ppl())
                print('Train accuracy: %g' % train_stats.accuracy())

                print('Validation perplexity: %g' % valid_stats.ppl())
                print('Validation accuracy: %g' % valid_stats.accuracy())

                bleu_score, _ = CalculateBleu(model,
                                              dev_data,
                                              'Dev Bleu',
                                              batch=args.batchsize // 4,
                                              beam_size=args.beam_size,
                                              alpha=args.alpha,
                                              max_sent=args.max_sent_eval)()
                if args.metric == "bleu":
                    score = bleu_score
                elif args.metric == "accuracy":
                    score = valid_stats.accuracy()

                is_best = score > best_score
                best_score = max(score, best_score)
                save_checkpoint(
                    {
                        'epoch': epoch + 1,
                        'state_dict': model.state_dict(),
                        'best_score': best_score,
                        'optimizer': optimizer.state_dict(),
                        'opts': args,
                    }, is_best, args.model_file, args.best_model_file)

    # BLEU score on Dev and Test Data
    checkpoint = torch.load(args.best_model_file)
    print("=> loaded checkpoint '{}' (epoch {}, best score {})".format(
        args.best_model_file, checkpoint['epoch'], checkpoint['best_score']))
    model.load_state_dict(checkpoint['state_dict'])

    print('Dev Set BLEU Score')
    _, dev_hyp = CalculateBleu(model,
                               dev_data,
                               'Dev Bleu',
                               batch=args.batchsize // 4,
                               beam_size=args.beam_size,
                               alpha=args.alpha)()
    save_output(dev_hyp, id2w, args.dev_hyp)

    print('Test Set BLEU Score')
    _, test_hyp = CalculateBleu(model,
                                test_data,
                                'Test Bleu',
                                batch=args.batchsize // 4,
                                beam_size=args.beam_size,
                                alpha=args.alpha)()
    save_output(test_hyp, id2w, args.test_hyp)
예제 #11
0
def main():
    best_score = 0
    args = get_train_args()
    logger = get_logger(args.log_path)
    logger.info(json.dumps(args.__dict__, indent=4))

    # Set seed value
    torch.manual_seed(args.seed)
    random.seed(args.seed)
    if args.gpu:
        torch.cuda.manual_seed_all(args.seed)

    # Reading the int indexed text dataset
    train_data = np.load(os.path.join(args.input, args.data + ".train.npy"),
                         allow_pickle=True)
    train_data = train_data.tolist()
    dev_data = np.load(os.path.join(args.input, args.data + ".valid.npy"),
                       allow_pickle=True)
    dev_data = dev_data.tolist()
    test_data = np.load(os.path.join(args.input, args.data + ".test.npy"),
                        allow_pickle=True)
    test_data = test_data.tolist()

    # Reading the vocab file
    with open(os.path.join(args.input, args.data + '.vocab.pickle'),
              'rb') as f:
        id2w = pickle.load(f)

    args.id2w = id2w
    args.n_vocab = len(id2w)

    # Define Model
    model = eval(args.model)(args)
    model.apply(init_weights)

    tally_parameters(model)
    if args.gpu >= 0:
        model.cuda(args.gpu)
    logger.info(model)

    if args.optimizer == 'Noam':
        optimizer = NoamAdamTrainer(model, args)
    elif args.optimizer == 'Adam':
        params = filter(lambda p: p.requires_grad, model.parameters())
        optimizer = torch.optim.Adam(params,
                                     lr=args.learning_rate,
                                     betas=(args.optimizer_adam_beta1,
                                            args.optimizer_adam_beta2),
                                     eps=args.optimizer_adam_epsilon)
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                               mode='max',
                                                               factor=0.7,
                                                               patience=7,
                                                               verbose=True)
    elif args.optimizer == 'Yogi':
        params = filter(lambda p: p.requires_grad, model.parameters())
        optimizer = Yogi(params,
                         lr=args.learning_rate,
                         betas=(args.optimizer_adam_beta1,
                                args.optimizer_adam_beta2),
                         eps=args.optimizer_adam_epsilon)
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                               mode='max',
                                                               factor=0.7,
                                                               patience=7,
                                                               verbose=True)

    if args.fp16:
        model = FP16_Module(model)
        optimizer = FP16_Optimizer(optimizer,
                                   static_loss_scale=args.static_loss_scale,
                                   dynamic_loss_scale=args.dynamic_loss_scale,
                                   dynamic_loss_args={'init_scale': 2**16},
                                   verbose=False)

    ema = ExponentialMovingAverage(decay=args.ema_decay)
    ema.register(model.state_dict())

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.model_file):
            logger.info("=> loading checkpoint '{}'".format(args.model_file))
            checkpoint = torch.load(args.model_file)
            args.start_epoch = checkpoint['epoch']
            best_score = checkpoint['best_score']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            logger.info("=> loaded checkpoint '{}' (epoch {})".format(
                args.model_file, checkpoint['epoch']))
        else:
            logger.info("=> no checkpoint found at '{}'".format(
                args.model_file))

    src_data, trg_data = list(zip(*train_data))
    total_src_words = len(list(itertools.chain.from_iterable(src_data)))
    total_trg_words = len(list(itertools.chain.from_iterable(trg_data)))
    iter_per_epoch = (total_src_words + total_trg_words) // (2 *
                                                             args.wbatchsize)
    logger.info('Approximate number of iter/epoch = {}'.format(iter_per_epoch))
    time_s = time()

    global_steps = 0
    num_grad_steps = 0
    if args.grad_norm_for_yogi and args.optimizer == 'Yogi':
        args.start_epoch = -1
        l2_norm = 0.0
        parameters = list(
            filter(lambda p: p.requires_grad is True, model.parameters()))
        n_params = sum([p.nelement() for p in parameters])

    for epoch in range(args.start_epoch, args.epoch):
        random.shuffle(train_data)
        train_iter = data.iterator.pool(
            train_data,
            args.wbatchsize,
            key=lambda x: (len(x[0]), len(x[1])),
            batch_size_fn=batch_size_fn,
            random_shuffler=data.iterator.RandomShuffler())
        report_stats = utils.Statistics()
        train_stats = utils.Statistics()
        if args.debug:
            grad_norm = 0.
        for num_steps, train_batch in enumerate(train_iter):
            global_steps += 1
            model.train()
            if args.grad_accumulator_count == 1:
                optimizer.zero_grad()
            elif num_grad_steps % args.grad_accumulator_count == 0:
                optimizer.zero_grad()
            src_iter = list(zip(*train_batch))[0]
            src_words = len(list(itertools.chain.from_iterable(src_iter)))
            report_stats.n_src_words += src_words
            train_stats.n_src_words += src_words
            in_arrays = utils.seq2seq_pad_concat_convert(train_batch, -1)
            if len(args.multi_gpu) > 1:
                loss_tuple, stat_tuple = zip(
                    *dp(model, in_arrays, device_ids=args.multi_gpu))
                n_total = sum([obj.n_words.item() for obj in stat_tuple])
                n_correct = sum([obj.n_correct.item() for obj in stat_tuple])
                loss = 0
                for l_, s_ in zip(loss_tuple, stat_tuple):
                    loss += l_ * s_.n_words.item()
                loss /= n_total
                stat = utils.Statistics(loss=loss.data.cpu() * n_total,
                                        n_correct=n_correct,
                                        n_words=n_total)
            else:
                loss, stat = model(*in_arrays)

            if args.fp16:
                optimizer.backward(loss)
            else:
                loss.backward()
            if epoch == -1 and args.grad_norm_for_yogi and args.optimizer == 'Yogi':
                l2_norm += (utils.grad_norm(model.parameters())**2) / n_params
                continue
            num_grad_steps += 1
            if args.debug:
                norm = utils.grad_norm(model.parameters())
                grad_norm += norm
                if global_steps % args.report_every == 0:
                    logger.info("> Gradient Norm: %1.4f" % (grad_norm /
                                                            (num_steps + 1)))
            if args.grad_accumulator_count == 1:
                optimizer.step()
                ema.apply(model.state_dict(keep_vars=True))
            elif num_grad_steps % args.grad_accumulator_count == 0:
                optimizer.step()
                ema.apply(model.state_dict(keep_vars=True))
                num_grad_steps = 0
            report_stats.update(stat)
            train_stats.update(stat)
            report_stats = report_func(epoch, num_steps, iter_per_epoch,
                                       time_s, report_stats, args.report_every)

            valid_stats = utils.Statistics()
            if global_steps % args.eval_steps == 0:
                with torch.no_grad():
                    dev_iter = data.iterator.pool(
                        dev_data,
                        args.wbatchsize,
                        key=lambda x: (len(x[0]), len(x[1])),
                        batch_size_fn=batch_size_fn,
                        random_shuffler=data.iterator.RandomShuffler())

                    for dev_batch in dev_iter:
                        model.eval()
                        in_arrays = utils.seq2seq_pad_concat_convert(
                            dev_batch, -1)
                        if len(args.multi_gpu) > 1:
                            _, stat_tuple = zip(*dp(
                                model, in_arrays, device_ids=args.multi_gpu))
                            n_total = sum(
                                [obj.n_words.item() for obj in stat_tuple])
                            n_correct = sum(
                                [obj.n_correct.item() for obj in stat_tuple])
                            dev_loss = sum([obj.loss for obj in stat_tuple])
                            stat = utils.Statistics(loss=dev_loss,
                                                    n_correct=n_correct,
                                                    n_words=n_total)
                        else:
                            _, stat = model(*in_arrays)
                        valid_stats.update(stat)

                    logger.info('Train perplexity: %g' % train_stats.ppl())
                    logger.info('Train accuracy: %g' % train_stats.accuracy())

                    logger.info('Validation perplexity: %g' %
                                valid_stats.ppl())
                    logger.info('Validation accuracy: %g' %
                                valid_stats.accuracy())

                    if args.metric == "accuracy":
                        score = valid_stats.accuracy()
                    elif args.metric == "bleu":
                        score, _ = CalculateBleu(
                            model,
                            dev_data,
                            'Dev Bleu',
                            batch=args.batchsize // 4,
                            beam_size=args.beam_size,
                            alpha=args.alpha,
                            max_sent=args.max_sent_eval)(logger)

                    # Threshold Global Steps to save the model
                    if not (global_steps % 2000):
                        print('saving')
                        is_best = score > best_score
                        best_score = max(score, best_score)
                        save_checkpoint(
                            {
                                'epoch': epoch + 1,
                                'state_dict': model.state_dict(),
                                'state_dict_ema': ema.shadow_variable_dict,
                                'best_score': best_score,
                                'optimizer': optimizer.state_dict(),
                                'opts': args,
                            }, is_best, args.model_file, args.best_model_file)

                    if args.optimizer == 'Adam' or args.optimizer == 'Yogi':
                        scheduler.step(score)

        if epoch == -1 and args.grad_norm_for_yogi and args.optimizer == 'Yogi':
            optimizer.v_init = l2_norm / (num_steps + 1)
            logger.info("Initializing Yogi Optimizer (v_init = {})".format(
                optimizer.v_init))

    # BLEU score on Dev and Test Data
    checkpoint = torch.load(args.best_model_file)
    logger.info("=> loaded checkpoint '{}' (epoch {}, best score {})".format(
        args.best_model_file, checkpoint['epoch'], checkpoint['best_score']))
    model.load_state_dict(checkpoint['state_dict'])

    logger.info('Dev Set BLEU Score')
    _, dev_hyp = CalculateBleu(model,
                               dev_data,
                               'Dev Bleu',
                               batch=args.batchsize // 4,
                               beam_size=args.beam_size,
                               alpha=args.alpha,
                               max_decode_len=args.max_decode_len)(logger)
    save_output(dev_hyp, id2w, args.dev_hyp)

    logger.info('Test Set BLEU Score')
    _, test_hyp = CalculateBleu(model,
                                test_data,
                                'Test Bleu',
                                batch=args.batchsize // 4,
                                beam_size=args.beam_size,
                                alpha=args.alpha,
                                max_decode_len=args.max_decode_len)(logger)
    save_output(test_hyp, id2w, args.test_hyp)

    # Loading EMA state dict
    model.load_state_dict(checkpoint['state_dict_ema'])
    logger.info('Dev Set BLEU Score')
    _, dev_hyp = CalculateBleu(model,
                               dev_data,
                               'Dev Bleu',
                               batch=args.batchsize // 4,
                               beam_size=args.beam_size,
                               alpha=args.alpha,
                               max_decode_len=args.max_decode_len)(logger)
    save_output(dev_hyp, id2w, args.dev_hyp + '.ema')

    logger.info('Test Set BLEU Score')
    _, test_hyp = CalculateBleu(model,
                                test_data,
                                'Test Bleu',
                                batch=args.batchsize // 4,
                                beam_size=args.beam_size,
                                alpha=args.alpha,
                                max_decode_len=args.max_decode_len)(logger)
    save_output(test_hyp, id2w, args.test_hyp + '.ema')
예제 #12
0
    def _run_epoch(self, train_data, dev_data, unlabel_data, addn_data,
                   addn_data_unlab, addn_dev, ek, ek_t, ek_u, graph_embs,
                   graph_embs_t, graph_embs_u):
        addn_dev.cuda()
        ek_t.cuda()
        graph_embs_t.cuda()
        report_stats = utils.Statistics()
        cm = ConfusionMatrix(self.classes)
        _, seq_data = list(zip(*train_data))
        total_seq_words = len(list(itertools.chain.from_iterable(seq_data)))
        iter_per_epoch = (1.5 * total_seq_words) // self.config.wbatchsize

        self.encoder.train()
        self.clf.train()

        train_iter = self._create_iter(train_data, self.config.wbatchsize)

        unlabel_iter = self._create_iter(unlabel_data,
                                         self.config.wbatchsize_unlabel)

        sofar = 0
        sofar_1 = 0
        for batch_index, train_batch_raw in enumerate(train_iter):
            seq_iter = list(zip(*train_batch_raw))[1]
            seq_words = len(list(itertools.chain.from_iterable(seq_iter)))
            report_stats.n_words += seq_words
            self.global_steps += 1

            # self.enc_clf_opt.zero_grad()
            if self.config.add_noise:
                train_batch_raw = add_noise(train_batch_raw,
                                            self.config.noise_dropout,
                                            self.config.random_permutation)
            train_batch = batch_utils.seq_pad_concat(train_batch_raw, -1)

            train_embedded = self.embedder(train_batch)

            memory_bank_train, enc_final_train = self.encoder(
                train_embedded, train_batch)

            if self.config.lambda_vat > 0 or self.config.lambda_ae > 0 or self.config.lambda_entropy:
                try:
                    unlabel_batch_raw = next(unlabel_iter)
                except StopIteration:
                    unlabel_iter = self._create_iter(
                        unlabel_data, self.config.wbatchsize_unlabel)
                    unlabel_batch_raw = next(unlabel_iter)

                if self.config.add_noise:
                    unlabel_batch_raw = add_noise(
                        unlabel_batch_raw, self.config.noise_dropout,
                        self.config.random_permutation)
                unlabel_batch = batch_utils.seq_pad_concat(
                    unlabel_batch_raw, -1)
                unlabel_embedded = self.embedder(unlabel_batch)
                memory_bank_unlabel, enc_final_unlabel = self.encoder(
                    unlabel_embedded, unlabel_batch)
                addn_batch_unlab = retAddnBatch(addn_data_unlab,
                                                memory_bank_unlabel.shape[0],
                                                sofar_1).cuda()
                ek_batch_unlab = retAddnBatch(ek_u,
                                              memory_bank_unlabel.shape[0],
                                              sofar_1).cuda()
                graph_embs_unlab = retAddnBatch(graph_embs_u,
                                                memory_bank_unlabel.shape[0],
                                                sofar_1).cuda()
                sofar_1 += addn_batch_unlab.shape[0]
                if sofar_1 >= ek_u.shape[0]:
                    sofar_1 = 0
            addn_batch = retAddnBatch(addn_data, memory_bank_train.shape[0],
                                      sofar).cuda()
            ek_batch = retAddnBatch(ek, memory_bank_train.shape[0],
                                    sofar).cuda()
            graph_embs_batch = retAddnBatch(graph_embs,
                                            memory_bank_train.shape[0],
                                            sofar).cuda()
            sofar += addn_batch.shape[0]
            if sofar >= ek.shape[0]:
                sofar = 0
            pred = self.clf(memory_bank_train, addn_batch, ek_batch,
                            enc_final_train, graph_embs_batch)
            accuracy = self.get_accuracy(cm, pred.data,
                                         train_batch.labels.data)
            lclf = self.clf_loss(pred, train_batch.labels)

            lat = Variable(
                torch.FloatTensor([-1.]).type(batch_utils.FLOAT_TYPE))
            lvat = Variable(
                torch.FloatTensor([-1.]).type(batch_utils.FLOAT_TYPE))
            if self.config.lambda_at > 0:
                lat = at_loss(
                    self.embedder,
                    self.encoder,
                    self.clf,
                    train_batch,
                    addn_batch,
                    ek_batch,
                    graph_embs_batch,
                    perturb_norm_length=self.config.perturb_norm_length)

            if self.config.lambda_vat > 0:
                lvat_train = vat_loss(
                    self.embedder,
                    self.encoder,
                    self.clf,
                    train_batch,
                    addn_batch,
                    ek_batch,
                    graph_embs_batch,
                    p_logit=pred,
                    perturb_norm_length=self.config.perturb_norm_length)
                if self.config.inc_unlabeled_loss:
                    if memory_bank_unlabel.shape[0] != ek_batch_unlab.shape[0]:
                        print(
                            f'Skipping; Unequal Shapes: {memory_bank_unlabel.shape} and {ek_batch_unlab.shape}'
                        )
                        continue
                    else:
                        lvat_unlabel = vat_loss(
                            self.embedder,
                            self.encoder,
                            self.clf,
                            unlabel_batch,
                            addn_batch_unlab,
                            ek_batch_unlab,
                            graph_embs_unlab,
                            p_logit=self.clf(memory_bank_unlabel,
                                             addn_batch_unlab, ek_batch_unlab,
                                             enc_final_unlabel,
                                             graph_embs_unlab),
                            perturb_norm_length=self.config.perturb_norm_length
                        )
                    if self.config.unlabeled_loss_type == "AvgTrainUnlabel":
                        lvat = 0.5 * (lvat_train + lvat_unlabel)
                    elif self.config.unlabeled_loss_type == "Unlabel":
                        lvat = lvat_unlabel
                else:
                    lvat = lvat_train

            lentropy = Variable(
                torch.FloatTensor([-1.]).type(batch_utils.FLOAT_TYPE))
            if self.config.lambda_entropy > 0:
                lentropy_train = entropy_loss(pred)
                if self.config.inc_unlabeled_loss:
                    lentropy_unlabel = entropy_loss(
                        self.clf(memory_bank_unlabel, addn_batch_unlab,
                                 ek_batch_unlab, enc_final_unlabel,
                                 graph_embs_unlab))
                    if self.config.unlabeled_loss_type == "AvgTrainUnlabel":
                        lentropy = 0.5 * (lentropy_train + lentropy_unlabel)
                    elif self.config.unlabeled_loss_type == "Unlabel":
                        lentropy = lentropy_unlabel
                else:
                    lentropy = lentropy_train

            lae = Variable(
                torch.FloatTensor([-1.]).type(batch_utils.FLOAT_TYPE))
            if self.config.lambda_ae > 0:
                lae = self.ae(memory_bank_unlabel, enc_final_unlabel,
                              unlabel_batch.sent_len, unlabel_batch_raw)

            ltotal = (self.config.lambda_clf * lclf) + \
                     (self.config.lambda_ae * lae) + \
                     (self.config.lambda_at * lat) + \
                     (self.config.lambda_vat * lvat) + \
                     (self.config.lambda_entropy * lentropy)

            report_stats.clf_loss += lclf.data.cpu().numpy()
            report_stats.at_loss += lat.data.cpu().numpy()
            report_stats.vat_loss += lvat.data.cpu().numpy()
            report_stats.ae_loss += lae.data.cpu().numpy()
            report_stats.entropy_loss += lentropy.data.cpu().numpy()
            report_stats.n_sent += len(pred)
            report_stats.n_correct += accuracy
            self.enc_clf_opt.zero_grad()
            ltotal.backward()

            params_list = self._get_trainabe_modules()
            # Excluding embedder form norm constraint when AT or VAT
            if not self.config.normalize_embedding:
                params_list += list(self.embedder.parameters())

            norm = torch.nn.utils.clip_grad_norm(params_list,
                                                 self.config.max_norm)
            report_stats.grad_norm += norm
            self.enc_clf_opt.step()
            if self.config.scheduler == "ExponentialLR":
                self.scheduler.step()
            self.ema_embedder.apply(self.embedder.named_parameters())
            self.ema_encoder.apply(self.encoder.named_parameters())
            self.ema_clf.apply(self.clf.named_parameters())

            report_func(self.epoch, batch_index, iter_per_epoch, self.time_s,
                        report_stats, self.config.report_every, self.logger)

            if self.global_steps % self.config.eval_steps == 0:
                cm_, accuracy, prc_dev = self._run_evaluate(
                    dev_data, addn_dev, ek_t, graph_embs_t)
                self.logger.info(
                    "- dev accuracy {} | best dev accuracy {} ".format(
                        accuracy, self.best_accuracy))
                self.writer.add_scalar("Dev_Accuracy", accuracy,
                                       self.global_steps)
                pred_, lab_ = zip(*prc_dev)
                pred_ = torch.cat(pred_)
                lab_ = torch.cat(lab_)
                self.writer.add_pr_curve("Dev PR-Curve", lab_, pred_,
                                         self.global_steps)
                pprint.pprint(cm_)
                pprint.pprint(cm_.get_all_metrics())
                if accuracy > self.best_accuracy:
                    self.logger.info("- new best score!")
                    self.best_accuracy = accuracy
                    self._save_model()
                if self.config.scheduler == "ReduceLROnPlateau":
                    self.scheduler.step(accuracy)
                self.encoder.train()
                #                 self.embedder.train()
                self.clf.train()

                if self.config.weight_decay > 0:
                    print(">> Square Norm: %1.4f " % self._get_l2_norm_loss())

        cm, train_accuracy, _ = self._run_evaluate(train_data, addn_data, ek,
                                                   graph_embs)
        self.logger.info("- Train accuracy  {}".format(train_accuracy))
        pprint.pprint(cm.get_all_metrics())

        cm, dev_accuracy, _ = self._run_evaluate(dev_data, addn_dev, ek_t,
                                                 graph_embs_t)
        self.logger.info("- Dev accuracy  {} | best dev accuracy {}".format(
            dev_accuracy, self.best_accuracy))
        pprint.pprint(cm.get_all_metrics())
        self.writer.add_scalars("Overall_Accuracy", {
            "Train_Accuracy": train_accuracy,
            "Dev_Accuracy": dev_accuracy
        }, self.global_steps)
        return dev_accuracy