示例#1
0
    def validate(self, valid_dataset, device, epoch=0):
        """ Validate model.
            valid_iter: validate data iterator
        Returns:
            :obj:`nmt.Statistics`: validation loss statistics
        """
        # Set model in validating mode.
        self.model.eval()
        stats = Statistics()

        with torch.no_grad():
            mini_batches = get_minibatches_WDP(valid_dataset,
                                               self.args.batch_size,
                                               self.args.max_seq_length)
            logger.info('Number of minibatches: %s' %
                        (len(valid_dataset) // self.args.batch_size))
            for step, batch in enumerate(mini_batches):
                x, labels = batch
                x = torch.cuda.Tensor(x)
                labels = torch.cuda.Tensor(labels)
                logits = self.model(x)  # , mask

                loss = self.loss(logits, labels)
                # loss = (loss * mask.float()).sum()
                batch_stats = Statistics(float(loss.cpu().item()), len(labels))
                stats.update(batch_stats)
            self._report_step(0, epoch, valid_stats=stats)
            return stats
示例#2
0
def multi_main(args):
    """ Spawns 1 process per GPU """
    init_logger()

    nb_gpu = args.world_size
    mp = torch.multiprocessing.get_context('spawn')

    # Create a thread to listen for errors in the child processes.
    error_queue = mp.SimpleQueue()
    error_handler = ErrorHandler(error_queue)

    # Train with multiprocessing.
    procs = []
    for i in range(nb_gpu):
        device_id = i
        procs.append(
            mp.Process(target=run,
                       args=(
                           args,
                           device_id,
                           error_queue,
                       ),
                       daemon=True))
        procs[i].start()
        logger.info(" Starting process pid: %d  " % procs[i].pid)
        error_handler.add_child(procs[i].pid)
    for p in procs:
        p.join()
示例#3
0
def validate(args, device_id, pt, epoch):
    device = "cpu" if args.visible_gpus == '-1' else "cuda"
    if (pt != ''):
        test_from = pt
    else:
        test_from = args.test_from
    logger.info('Loading checkpoint from %s' % test_from)
    checkpoint = torch.load(test_from,
                            map_location=lambda storage, loc: storage)
    opt = vars(checkpoint['opt'])
    for k in opt.keys():
        if (k in model_flags):
            setattr(args, k, opt[k])
    print(args)

    config = BertConfig.from_json_file(args.bert_config_name)
    model = Summarizer(args,
                       device,
                       load_pretrained_bert=False,
                       bert_config=config)
    model.load_cp(checkpoint)
    model.eval()
    valid_dataset = torch.load(args.bert_data_path + 'valid.data')

    trainer = build_trainer(args, device_id, model, None)
    stats = trainer.validate(valid_dataset, epoch)
    return stats.xent()
示例#4
0
    def validate(self, valid_dataset, device, epoch=0):
        """ Validate model.
            valid_iter: validate data iterator
        Returns:
            :obj:`nmt.Statistics`: validation loss statistics
        """
        # Set model in validating mode.
        self.model.eval()
        stats = Statistics()

        with torch.no_grad():
            mini_batches = get_minibatches(valid_dataset, self.args.batch_size,
                                           self.args.max_seq_length)
            logger.info('Number of minibatches: %s' %
                        (len(valid_dataset) // self.args.batch_size))
            for step, batch in enumerate(mini_batches):
                src, labels, segs, clss = batch[0], batch[1], batch[2], batch[
                    3]
                if torch.cuda.is_available():
                    src = torch.cuda.LongTensor(src).to(
                        device)  # .reshape(-1, self.args.max_seq_length)
                    labels = torch.cuda.LongTensor(labels).to(
                        device)  # .reshape(1, -1)
                    segs = torch.cuda.LongTensor(segs).to(
                        device)  # .reshape(1, -1)

                    clss = [(cls + [-1] * (max([len(i)
                                                for i in clss]) - len(cls)))
                            for cls in clss]
                    clss = torch.cuda.LongTensor(clss).to(device)
                    mask = torch.cuda.ByteTensor((1 - (src == 0))).to(device)
                    mask_cls = torch.cuda.ByteTensor((1 - (clss == -1)))
                else:
                    src = torch.LongTensor(src).to(
                        device)  # .reshape(-1, self.args.max_seq_length)
                    labels = torch.LongTensor(labels).to(
                        device)  # .reshape(1, -1)
                    segs = torch.LongTensor(segs).to(device)  # .reshape(1, -1)

                    clss = [(cls + [-1] * (max([len(i)
                                                for i in clss]) - len(cls)))
                            for cls in clss]
                    clss = torch.LongTensor(clss).to(device)
                    mask = torch.ByteTensor((1 - (src == 0))).to(device)
                    mask_cls = torch.ByteTensor((
                        1 -
                        (clss == -1)))  # torch.ByteTensor(mask_cls).to(device)

                logits = self.model(src, segs, clss, mask, mask_cls)  # , mask

                loss = self.loss(logits, labels)
                # loss = (loss * mask.float()).sum()
                batch_stats = Statistics(float(loss.cpu().item()), len(labels))
                stats.update(batch_stats)
            self._report_step(0, epoch, valid_stats=stats)
            return stats
示例#5
0
    def test(self, model, test_dataset, device):
        """ Validate model.
            valid_iter: validate data iterator
        Returns:
            :obj:`nmt.Statistics`: validation loss statistics
        """
        model.eval()
        mini_batches = get_minibatches_WDP(test_dataset, self.args.batch_size,
                                           self.args.max_seq_length)
        logger.info('Number of minibatches: %s' %
                    (len(test_dataset) // self.args.batch_size))
        with torch.no_grad():
            n_correct = 0.
            n_total = 0.
            target_all = None
            output_all = None
            full_pred = []
            full_label_ids = []
            for step, batch in enumerate(mini_batches):
                x, labels = batch
                if torch.cuda.is_available():
                    x = torch.cuda.Tensor(x).to(device)
                    labels = torch.cuda.Tensor(labels).to(device)
                else:
                    x = torch.Tensor(x).to(device)
                    labels = torch.Tensor(labels).to(device)

                logits = self.model(x)  # , mask
                # loss = self.loss(logits, labels)
                n_correct += (torch.argmax(logits, -1) == labels).sum().item()
                n_total += len(logits)
                full_pred.extend(torch.argmax(logits, -1).tolist())
                full_label_ids.extend(labels.tolist())

                if target_all is None:
                    target_all = labels
                    output_all = logits
                else:
                    target_all = torch.cat((target_all, labels), dim=0)
                    output_all = torch.cat((output_all, logits), dim=0)

            acc = n_correct / n_total
            pred_res = metrics.classification_report(
                target_all.cpu(),
                torch.argmax(output_all, -1).cpu(),
                target_names=['NEG', 'NEU', 'POS'])
            logger.info(
                'Prediction results for test dataset: \n{}'.format(pred_res))

            # self._report_step(0, step, valid_stats=stats)
        return acc
示例#6
0
def wait_and_validate(args, device_id):
    timestep = 0
    if (args.test_all):
        cp_files = sorted(
            glob.glob(os.path.join(args.model_path, 'model_step_*.pt')))
        cp_files.sort(key=os.path.getmtime)
        xent_lst = []
        for i, cp in enumerate(cp_files):
            step = int(cp.split('.')[-2].split('_')[-1])
            xent = validate(args, device_id, cp, step)
            xent_lst.append((xent, cp))
            max_step = xent_lst.index(min(xent_lst))
            if (i - max_step > 10):
                break
        xent_lst = sorted(xent_lst, key=lambda x: x[0])[:3]
        logger.info('PPL %s' % str(xent_lst))
        for xent, cp in xent_lst:
            step = int(cp.split('.')[-2].split('_')[-1])
            test(args, device_id, cp, step)
    else:
        while (True):
            cp_files = sorted(
                glob.glob(os.path.join(args.model_path, 'model_step_*.pt')))
            cp_files.sort(key=os.path.getmtime)
            if cp_files:
                cp = cp_files[-1]
                time_of_cp = os.path.getmtime(cp)
                if (not os.path.getsize(cp) > 0):
                    time.sleep(60)
                    continue
                if (time_of_cp > timestep):
                    timestep = time_of_cp
                    step = int(cp.split('.')[-2].split('_')[-1])
                    validate(args, device_id, cp, step)
                    test(args, device_id, cp, step)

            cp_files = sorted(
                glob.glob(os.path.join(args.model_path, 'model_step_*.pt')))
            cp_files.sort(key=os.path.getmtime)
            if (cp_files):
                cp = cp_files[-1]
                time_of_cp = os.path.getmtime(cp)
                if (time_of_cp > timestep):
                    continue
            else:
                time.sleep(300)
示例#7
0
 def _save(self, model_name, epoch, acc):
     real_model = self.model
     model_state_dict = real_model.state_dict()
     # generator_state_dict = real_generator.state_dict()
     checkpoint = {
         'model': model_state_dict,
         # 'generator': generator_state_dict,
         'opt': self.args,
         'optim': self.optim,
     }
     checkpoint_path = os.path.join(
         self.args.model_path,
         'model_{}_epoch_{}_acc_{:.4f}.pt'.format(model_name, epoch, acc))
     logger.info("Saving checkpoint %s" % checkpoint_path)
     # checkpoint_path = '%s_step_%d.pt' % (FLAGS.model_path, step)
     if not os.path.exists(checkpoint_path):
         torch.save(checkpoint, checkpoint_path)
         return checkpoint, checkpoint_path
示例#8
0
def build_trainer(args, device_id, model, optim):
    """
    Simplify `Trainer` creation based on user `opt`s*
    Args:
        opt (:obj:`Namespace`): user options (usually from argument parsing)
        model (:obj:`onmt.models.NMTModel`): the model to train
        fields (dict): dict of fields
        optim (:obj:`onmt.utils.Optimizer`): optimizer used during training
        data_type (str): string describing the type of data
            e.g. "text", "img", "audio"
        model_saver(:obj:`onmt.models.ModelSaverBase`): the utility object
            used to save the model
    """
    # device = "cpu" if args.visible_gpus == '-1' else "cuda"

    grad_accum_count = args.accum_count
    n_gpu = args.world_size

    # if device_id >= 0:  # != 'cpu':  # >= 0:
    # 	gpu_rank = int(args.gpu_ranks)
    # else:
    gpu_rank = 0
    n_gpu = 0

    print('gpu_rank %d' % gpu_rank)

    tensorboard_log_dir = args.model_path

    writer = SummaryWriter(tensorboard_log_dir, comment="Unmt")

    report_manager = ReportMgr(args.report_every,
                               start_time=-1,
                               tensorboard_writer=writer)

    trainer = Trainer(args, model, optim, grad_accum_count, n_gpu, gpu_rank,
                      report_manager)

    # print(tr)
    if (model):
        n_params = _tally_parameters(model)
        logger.info('* number of parameters: %d' % n_params)

    return trainer
示例#9
0
    def output(self, step, num_steps, learning_rate, start):
        """Write out statistics to stdout.

        Args:
           step (int): current step
           n_batch (int): total batches
           start (int): start time of step.
        """
        t = self.elapsed_time()
        step_fmt = "%2d" % step
        if num_steps > 0:
            step_fmt = "%s/%5d" % (step_fmt, num_steps)
        logger.info(
            ("Step %s; xent: %4.2f; " +
             "lr: %7.7f; %3.0f docs/s; %6.0f sec")
            % (step_fmt,
               self.xent(),
               learning_rate,
               self.n_docs / (t + 1e-5),
               time.time() - start))
        sys.stdout.flush()
示例#10
0
def test(args, device_id, pt):
    device = "cpu" if args.visible_gpus == '-1' else "cuda"
    if pt != '':
        test_from = pt
    else:
        test_from = args.best_model
    logger.info('Loading checkpoint from %s' % test_from)
    checkpoint = torch.load(test_from,
                            map_location=lambda storage, loc: storage)
    opt = vars(checkpoint['opt'])
    for k in opt.keys():
        if k in model_flags:
            setattr(args, k, opt[k])
    # print(args)

    config = BertConfig.from_json_file(args.bert_config_path)
    model = Summarizer(args,
                       device,
                       load_pretrained_bert=False,
                       bert_config=config)
    model.load_cp(checkpoint)
    model.eval()

    logger.info("Test dataset......")
    test_dataset = torch.load(args.bert_data_path + 'test.data')
    trainer = build_trainer(args, device_id, model, None)
    trainer.test(model, test_dataset, device)

    logger.info("Valid dataset......")
    test_dataset = torch.load(args.bert_data_path + 'valid.data')
    trainer = build_trainer(args, device_id, model, None)
    trainer.test(model, test_dataset, device)
示例#11
0
        def orig():
            # Set model in validating mode.
            def _get_ngrams(n, text):
                ngram_set = set()
                text_length = len(text)
                max_index_ngram_start = text_length - n
                for i in range(max_index_ngram_start + 1):
                    ngram_set.add(tuple(text[i:i + n]))
                return ngram_set

            def _block_tri(c, p):
                tri_c = _get_ngrams(3, c.split())
                for s in p:
                    tri_s = _get_ngrams(3, s.split())
                    if len(tri_c.intersection(tri_s)) > 0:
                        return True
                return False

            if not cal_lead and not cal_oracle:
                model.eval()
            stats = Statistics()

            can_path = '%s_step%d.candidate' % (self.args.result_path, step)
            gold_path = '%s_step%d.gold' % (self.args.result_path, step)
            with open(can_path, 'w') as save_pred:
                with open(gold_path, 'w') as save_gold:
                    with torch.no_grad():
                        target_all = []
                        output_all = []
                        # n_correct, n_total = 0., 0.
                        mini_batches = get_minibatches(
                            test_dataset, self.args.batch_size,
                            self.args.max_seq_length)
                        for i, batch in enumerate(mini_batches):
                            src = batch.src
                            labels = batch.labels
                            segs = batch.segs
                            clss = batch.clss
                            mask = batch.mask
                            mask_cls = batch.mask_cls

                            gold = []
                            pred = []

                            if (cal_lead):
                                selected_ids = [
                                    list(range(batch.clss.size(1)))
                                ] * batch.batch_size
                            elif (cal_oracle):
                                selected_ids = [[
                                    j for j in range(batch.clss.size(1))
                                    if labels[i][j] == 1
                                ] for i in range(batch.batch_size)]
                            else:
                                logits = model(src, segs, clss, mask, mask_cls)

                                loss = self.loss(
                                    logits, labels
                                )  # loss = self.loss(sent_scores, labels.float())
                                # loss = (loss * mask.float()).sum()
                                # n_correct += (torch.argmax(logits, -1) == labels).sum().item()
                                # n_total += len(logits)
                                if target_all is None:
                                    target_all = labels
                                    output_all = logits
                                else:
                                    target_all = torch.cat(
                                        (target_all, labels), dim=0)
                                    output_all = torch.cat(
                                        (output_all, logits), dim=0)

                                batch_stats = Statistics(
                                    float(loss.cpu().item()), len(labels))
                                stats.update(batch_stats)

                                sent_scores = sent_scores + mask.float()
                                sent_scores = sent_scores.cpu().data.numpy()
                                selected_ids = np.argsort(-sent_scores, 1)
                            # selected_ids = np.sort(selected_ids,1)
                            for i, idx in enumerate(selected_ids):
                                _pred = []
                                if len(batch.src_str[i]) == 0:
                                    continue
                                for j in selected_ids[i][:len(batch.src_str[i]
                                                              )]:
                                    if j >= len(batch.src_str[i]):
                                        continue
                                    candidate = batch.src_str[i][j].strip()
                                    if self.args.block_trigram:
                                        if not _block_tri(candidate, _pred):
                                            _pred.append(candidate)
                                    else:
                                        _pred.append(candidate)

                                    if (not cal_oracle) and (
                                            not self.args.recall_eval
                                    ) and len(_pred) == 3:
                                        break

                                _pred = '<q>'.join(_pred)
                                if self.args.recall_eval:
                                    _pred = ' '.join(
                                        _pred.split()
                                        [:len(batch.tgt_str[i].split())])

                                pred.append(_pred)
                                gold.append(batch.tgt_str[i])

                            for i in range(len(gold)):
                                save_gold.write(gold[i].strip() + '\n')
                            for i in range(len(pred)):
                                save_pred.write(pred[i].strip() + '\n')
                    pred_res = metrics.classification_report(
                        target_all.cpu(),
                        torch.argmax(output_all, -1).cpu(),
                        target_names=['NEG', 'NEU', 'POS'])
                    logger.info(
                        'Prediction results for test dataset: \n{}'.format(
                            pred_res))
            if step != -1 and self.args.report_rouge:
                rouges = test_rouge(self.args.temp_dir, can_path, gold_path)
                logger.info('Rouges at step %d \n%s' %
                            (step, rouge_results_to_str(rouges)))
            self._report_step(0, step, valid_stats=stats)

            return stats
示例#12
0
    def test(self,
             model,
             test_dataloader,
             device,
             cal_lead=False,
             cal_oracle=False):
        """ Validate model.
            valid_iter: validate data iterator
        Returns:
            :obj:`nmt.Statistics`: validation loss statistics
        """
        model.eval()
        stats = Statistics()
        batch_num = len(test_dataloader)
        # logger.info('Number of minibatches: %s' % batch_num)
        mini_batches = get_minibatches(test_dataset,
                                       self.args.batch_size,
                                       self.args.max_seq_length,
                                       shuffle=False)
        logger.info('Number of minibatches: %s' % len(test_dataloader))
        with torch.no_grad():
            n_correct = 0.
            n_total = 0.
            target_all = None
            output_all = None
            full_pred = []
            full_label_ids = []
            for step, batch in enumerate(mini_batches):
                src, labels, segs, clss = batch[0], batch[1], batch[2], batch[
                    3]
                if torch.cuda.is_available():
                    src = torch.cuda.LongTensor(src).to(
                        device)  # .reshape(-1, self.args.max_seq_length)
                    labels = torch.cuda.LongTensor(labels).to(
                        device)  # .reshape(1, -1)
                    segs = torch.cuda.LongTensor(segs).to(
                        device)  # .reshape(1, -1)

                    clss = [(cls + [-1] * (max([len(i)
                                                for i in clss]) - len(cls)))
                            for cls in clss]
                    clss = torch.cuda.LongTensor(clss).to(device)
                    mask = torch.cuda.ByteTensor((1 - (src == 0))).to(device)
                    mask_cls = torch.cuda.ByteTensor((1 - (clss == -1)))
                else:
                    src = torch.LongTensor(src).to(
                        device)  # .reshape(-1, self.args.max_seq_length)
                    labels = torch.LongTensor(labels).to(
                        device)  # .reshape(1, -1)
                    segs = torch.LongTensor(segs).to(device)  # .reshape(1, -1)

                    clss = [(cls + [-1] * (max([len(i)
                                                for i in clss]) - len(cls)))
                            for cls in clss]
                    clss = torch.LongTensor(clss).to(device)
                    mask = torch.ByteTensor((1 - (src == 0))).to(device)
                    mask_cls = torch.ByteTensor((
                        1 -
                        (clss == -1)))  # torch.ByteTensor(mask_cls).to(device)

                logits = self.model(src, segs, clss, mask, mask_cls)  # , mask
                # loss = self.loss(logits, labels)
                n_correct += (torch.argmax(logits, -1) == labels).sum().item()
                n_total += len(logits)
                full_pred.extend(torch.argmax(logits, -1).tolist())
                full_label_ids.extend(labels.tolist())

                if target_all is None:
                    target_all = labels
                    output_all = logits
                else:
                    target_all = torch.cat((target_all, labels), dim=0)
                    output_all = torch.cat((output_all, logits), dim=0)

                # batch_stats = Statistics(float(loss.cpu().item()), len(labels))
                # stats.update(batch_stats)

                # sent_scores = sent_scores + mask.float()
                # sent_scores = sent_scores.cpu().data.numpy()
                # selected_ids = np.argsort(-sent_scores, 1)
            acc = n_correct / n_total
            pred_res = metrics.classification_report(
                target_all.cpu(),
                torch.argmax(output_all, -1).cpu(),
                target_names=['NEG', 'NEU', 'POS'])
            logger.info('Prediction results: \n{}'.format(pred_res))

            predict_vote(full_pred, full_label_ids, test_dataloader)
            # self._report_step(0, step, valid_stats=stats)
        return acc

        def orig():
            # Set model in validating mode.
            def _get_ngrams(n, text):
                ngram_set = set()
                text_length = len(text)
                max_index_ngram_start = text_length - n
                for i in range(max_index_ngram_start + 1):
                    ngram_set.add(tuple(text[i:i + n]))
                return ngram_set

            def _block_tri(c, p):
                tri_c = _get_ngrams(3, c.split())
                for s in p:
                    tri_s = _get_ngrams(3, s.split())
                    if len(tri_c.intersection(tri_s)) > 0:
                        return True
                return False

            if not cal_lead and not cal_oracle:
                model.eval()
            stats = Statistics()

            can_path = '%s_step%d.candidate' % (self.args.result_path, step)
            gold_path = '%s_step%d.gold' % (self.args.result_path, step)
            with open(can_path, 'w') as save_pred:
                with open(gold_path, 'w') as save_gold:
                    with torch.no_grad():
                        target_all = []
                        output_all = []
                        # n_correct, n_total = 0., 0.
                        mini_batches = get_minibatches(
                            test_dataset, self.args.batch_size,
                            self.args.max_seq_length)
                        for i, batch in enumerate(mini_batches):
                            src = batch.src
                            labels = batch.labels
                            segs = batch.segs
                            clss = batch.clss
                            mask = batch.mask
                            mask_cls = batch.mask_cls

                            gold = []
                            pred = []

                            if (cal_lead):
                                selected_ids = [
                                    list(range(batch.clss.size(1)))
                                ] * batch.batch_size
                            elif (cal_oracle):
                                selected_ids = [[
                                    j for j in range(batch.clss.size(1))
                                    if labels[i][j] == 1
                                ] for i in range(batch.batch_size)]
                            else:
                                logits = model(src, segs, clss, mask, mask_cls)

                                loss = self.loss(
                                    logits, labels
                                )  # loss = self.loss(sent_scores, labels.float())
                                # loss = (loss * mask.float()).sum()
                                # n_correct += (torch.argmax(logits, -1) == labels).sum().item()
                                # n_total += len(logits)
                                if target_all is None:
                                    target_all = labels
                                    output_all = logits
                                else:
                                    target_all = torch.cat(
                                        (target_all, labels), dim=0)
                                    output_all = torch.cat(
                                        (output_all, logits), dim=0)

                                batch_stats = Statistics(
                                    float(loss.cpu().item()), len(labels))
                                stats.update(batch_stats)

                                sent_scores = sent_scores + mask.float()
                                sent_scores = sent_scores.cpu().data.numpy()
                                selected_ids = np.argsort(-sent_scores, 1)
                            # selected_ids = np.sort(selected_ids,1)
                            for i, idx in enumerate(selected_ids):
                                _pred = []
                                if len(batch.src_str[i]) == 0:
                                    continue
                                for j in selected_ids[i][:len(batch.src_str[i]
                                                              )]:
                                    if j >= len(batch.src_str[i]):
                                        continue
                                    candidate = batch.src_str[i][j].strip()
                                    if self.args.block_trigram:
                                        if not _block_tri(candidate, _pred):
                                            _pred.append(candidate)
                                    else:
                                        _pred.append(candidate)

                                    if (not cal_oracle) and (
                                            not self.args.recall_eval
                                    ) and len(_pred) == 3:
                                        break

                                _pred = '<q>'.join(_pred)
                                if self.args.recall_eval:
                                    _pred = ' '.join(
                                        _pred.split()
                                        [:len(batch.tgt_str[i].split())])

                                pred.append(_pred)
                                gold.append(batch.tgt_str[i])

                            for i in range(len(gold)):
                                save_gold.write(gold[i].strip() + '\n')
                            for i in range(len(pred)):
                                save_pred.write(pred[i].strip() + '\n')
                    pred_res = metrics.classification_report(
                        target_all.cpu(),
                        torch.argmax(output_all, -1).cpu(),
                        target_names=['NEG', 'NEU', 'POS'])
                    logger.info(
                        'Prediction results for test dataset: \n{}'.format(
                            pred_res))
            if step != -1 and self.args.report_rouge:
                rouges = test_rouge(self.args.temp_dir, can_path, gold_path)
                logger.info('Rouges at step %d \n%s' %
                            (step, rouge_results_to_str(rouges)))
            self._report_step(0, step, valid_stats=stats)

            return stats
示例#13
0
 def log(self, *args, **kwargs):
     logger.info(*args, **kwargs)
示例#14
0
def train(args, device_id):
    init_logger(args.log_file)

    device = "cpu" if args.visible_gpus == '-1' else "cuda"
    logger.info('Device ID %d' % device_id)
    logger.info('Device %s' % device)
    torch.manual_seed(args.seed)
    random.seed(args.seed)
    torch.backends.cudnn.deterministic = True

    if device_id >= 0:
        # torch.cuda.set_device(device_id)
        torch.cuda.manual_seed(args.seed)

    torch.manual_seed(args.seed)
    random.seed(args.seed)
    torch.backends.cudnn.deterministic = True

    # def train_iter_fct():
    # 	return data_loader.Dataloader(args, load_dataset(args, 'train', shuffle=True), args.batch_size, device,
    # 								  shuffle=True, is_test=False)

    train_dataset = torch.load(args.bert_data_path + 'train.data')
    if args.do_use_second_dataset:
        train_dataset += torch.load(args.second_dataset_path + 'train.data')
    logger.info('Loading training dataset from %s, number of examples: %d' %
                (args.bert_data_path, len(train_dataset)))

    if args.do_WDP:
        if os.path.exists(args.bert_data_path + 'train_512dim.data.npy'
                          ) and os.path.exists('train_512dim.labels.npy'):
            all_document = np.load(args.bert_data_path + 'train_512dim.data')
            all_labels = np.load(args.bert_data_path + 'train_512dim.labels')
        else:
            DimReducer = reduceDim.DimReducer(args, device)
            all_document = None
            all_labels = None
            for document in tqdm(train_dataset,
                                 desc="Loading dataset",
                                 unit="lines"):
                if all_document is None:
                    all_document = DimReducer(
                        document['src'])  #.reshape(args.max_seq_length, -1)
                    all_labels = np.array([document['labels']])
                else:
                    all_document = np.append(all_document,
                                             DimReducer(document['src']),
                                             axis=0)
                    all_labels = np.append(all_labels, document['labels'])
                # all_document.append(DimReducer(document['src']).reshape(args.max_seq_length, -1))
            assert all_labels.shape[0] == all_document.shape[0]
            np.save(args.bert_data_path + 'train_512dim.data', all_document)
            np.save(args.bert_data_path + 'train_512dim.labels', all_labels)

        test_dataset = torch.load(args.bert_data_path + 'valid.data')
        logger.info('Loading valid dataset from %s, number of examples: %d' %
                    (args.bert_data_path, len(test_dataset)))
        if os.path.exists(args.bert_data_path + 'valid_512dim.data.npy'
                          ) and os.path.exists('valid_512dim.labels.npy'):
            test_document = np.load(args.bert_data_path + 'valid_512dim.data')
            test_labels = np.load(args.bert_data_path + 'valid_512dim.labels')
        else:
            DimReducer = reduceDim.DimReducer(args, device)
            test_document = None
            test_labels = None
            for document in tqdm(test_dataset,
                                 desc="Loading dataset",
                                 unit="lines"):
                if test_document is None:
                    test_document = DimReducer(
                        document['src'])  #.reshape(args.max_seq_length, -1)
                    test_labels = np.array([document['labels']])
                else:
                    test_document = np.append(test_document,
                                              DimReducer(document['src']),
                                              axis=0)
                    test_labels = np.append(test_labels, document['labels'])
                # test_document.append(DimReducer(document['src']).reshape(args.max_seq_length, -1))
            assert test_labels.shape[0] == test_document.shape[0]
            np.save(args.bert_data_path + 'valid_512dim.data', test_document)
            np.save(args.bert_data_path + 'valid_512dim.labels', test_labels)

        model = reduceDim.Decoder(args, device, DimReducer.hidden_size,
                                  DimReducer.bert_vocab_size)

        _params = filter(lambda p: p.requires_grad, model.parameters())
        optim = optimization.BertAdam(_params,
                                      lr=args.lr,
                                      weight_decay=args.l2reg)

        logger.info(model)
        trainer = trainerWDP.build_trainer(args, device_id, model, optim)
        trainer.train([all_document, all_labels], device,
                      [test_document, test_labels])

        if args.do_test:
            model = trainer.model
            model.eval()
            test_dataset = torch.load(args.bert_data_path + 'valid.data')
            logger.info(
                'Loading valid dataset from %s, number of examples: %d' %
                (args.bert_data_path, len(test_dataset)))
            valid_document = []
            valid_labels = []
            for document in train_dataset:
                valid_document.append(DimReducer(document['src']))
                valid_labels.append(document['labels'])
            trainer.test(model, [valid_document, valid_labels], device)
    else:
        # train_dataloader = DataLoader(dataset=train_dataset, batch_size=args.batch_size, shuffle=True)
        model = Summarizer(args, device, load_pretrained_bert=True)
        # if args.train_from != '':
        # 	logger.info('Loading checkpoint from %s' % args.train_from)
        # 	checkpoint = torch.load(args.train_from,
        # 							map_location=lambda storage, loc: storage)
        # 	opt = vars(checkpoint['opt'])
        # 	for k in opt.keys():
        # 		if (k in model_flags):
        # 			setattr(args, k, opt[k])
        # 	model.load_cp(checkpoint)
        # 	optim = model_builder.build_optim(args, model, checkpoint)
        # else:
        # 	optim = model_builder.build_optim(args, model, None)
        _params = filter(lambda p: p.requires_grad, model.parameters())
        optim = optimization.BertAdam(_params,
                                      lr=args.lr,
                                      weight_decay=args.l2reg)

        logger.info(model)
        trainer = build_trainer(args, device_id, model, optim)
        trainer.train(train_dataset, device)

        if args.do_test:
            model = trainer.model
            model.eval()
            test_dataset = torch.load(args.bert_data_path + 'valid.data')
            logger.info(
                'Loading valid dataset from %s, number of examples: %d' %
                (args.bert_data_path, len(test_dataset)))
            trainer = build_trainer(args, device_id, model, None)
            trainer.test(model, test_dataset, device)
示例#15
0
    def train(self, train_dataset,
              device):  # , valid_iter_fct=None, valid_steps=-1)
        """
        The main training loops.
        by iterating over training data (i.e. `train_iter_fct`)
        and running validation (i.e. iterating over `valid_iter_fct`
        Args:
            train_iter_fct(function): a function that returns the train
                iterator. e.g. something like
                train_iter_fct = lambda: generator(*args, **kwargs)
            valid_iter_fct(function): same as train_iter_fct, for valid data
            train_steps(int):
            valid_steps(int):
            save_checkpoint_steps(int):
        Return:
            None
        """
        # step =  self.optim._step + 1
        # step = self.optim._step + 1
        # epoch = 0
        true_batchs = []
        accum = 0
        normalization = 0
        # train_iter = train_iter_fct()

        total_stats = Statistics()
        report_stats = Statistics()
        self._start_report_manager(start_time=total_stats.start_time)
        if self.args.do_eval:
            test_dataset = torch.load(self.args.bert_data_path + 'test.data')
            logger.info(
                'Loading test dataset from %s, number of examples: %d' %
                (self.args.bert_data_path, len(test_dataset)))
            test_dataloader = DataLoader(dataset=test_dataset,
                                         batch_size=self.args.batch_size,
                                         shuffle=False)
            if self.args.do_use_second_dataset:
                test_dataset2 = torch.load(self.args.second_dataset_path +
                                           'test.data')
                test_dataloader2 = DataLoader(dataset=test_dataset2,
                                              batch_size=self.args.batch_size,
                                              shuffle=False)
        for epoch in range(self.args.train_epochs):
            n_correct, n_total = 0., 0.
            reduce_counter = 0
            loss_total = 0

            logger.info('Getting minibatches')
            mini_batches = get_minibatches(train_dataset, self.args.batch_size,
                                           self.args.max_seq_length)
            logger.info('Number of minibatches: %s' %
                        (len(train_dataset) // self.args.batch_size))
            logger.info('Start training...')
            for step, batch in enumerate(mini_batches):
                # if self.n_gpu == 0 or (step % self.n_gpu == self.gpu_rank):
                self.optim.zero_grad()
                # true_batchs.append(batch)
                # normalization += batch.batch_size
                # accum += 1
                # if accum == self.grad_accum_count:
                # 	reduce_counter += 1
                # 	if self.n_gpu > 1:
                # 		normalization = sum(distributed.all_gather_list(normalization))
                src, labels, segs, clss = batch[0], batch[1], batch[2], batch[
                    3]
                if torch.cuda.is_available():
                    src = torch.cuda.LongTensor(src).to(
                        device)  # .reshape(-1, self.args.max_seq_length)
                    labels = torch.cuda.LongTensor(labels).to(
                        device)  # .reshape(1, -1)
                    segs = torch.cuda.LongTensor(segs).to(
                        device)  # .reshape(1, -1)

                    clss = [(cls + [-1] * (max([len(i)
                                                for i in clss]) - len(cls)))
                            for cls in clss]
                    clss = torch.cuda.LongTensor(clss).to(device)
                    mask = torch.cuda.ByteTensor((1 - (src == 0))).to(device)
                    mask_cls = torch.cuda.ByteTensor((1 - (clss == -1)))
                else:
                    src = torch.LongTensor(src).to(
                        device)  # .reshape(-1, self.args.max_seq_length)
                    labels = torch.LongTensor(labels).to(
                        device)  # .reshape(1, -1)
                    segs = torch.LongTensor(segs).to(device)  # .reshape(1, -1)

                    clss = [(cls + [-1] * (max([len(i)
                                                for i in clss]) - len(cls)))
                            for cls in clss]
                    clss = torch.LongTensor(clss).to(device)
                    mask = torch.ByteTensor((1 - (src == 0))).to(device)
                    mask_cls = torch.ByteTensor((
                        1 -
                        (clss == -1)))  # torch.ByteTensor(mask_cls).to(device)
                '''src, labels, segs, clss = batch['src'], batch['labels'], batch['segs'], batch['clss']
				if torch.cuda.is_available():
					src = torch.cuda.LongTensor([t for t in src]).to(device)  # .reshape(-1, self.args.max_seq_length)
					labels = torch.cuda.LongTensor(labels).to(device)  # .reshape(1, -1)
					segs = torch.cuda.LongTensor(segs).to(device)  # .reshape(1, -1)

					clss = [(cls + [-1] * (max([len(i) for i in clss]) - len(cls))) for cls in clss]
					clss = torch.cuda.LongTensor(clss).to(device)
					mask = torch.cuda.ByteTensor((1 - (src == 0))).to(device)
					mask_cls = torch.cuda.ByteTensor((1 - (clss == -1)))
				else:
					src = torch.LongTensor(src).to(device)  		# .reshape(-1, self.args.max_seq_length)
					labels = torch.LongTensor(labels).to(device)  	# .reshape(1, -1)
					segs = torch.LongTensor(segs).to(device)		# .reshape(1, -1)

					clss = [(cls + [-1] * (max([len(i) for i in clss]) - len(cls))) for cls in clss]
					clss = torch.LongTensor(clss).to(device)
					mask = torch.ByteTensor((1 - (src == 0))).to(device)
					mask_cls = torch.ByteTensor((1 - (clss == -1)))  # torch.ByteTensor(mask_cls).to(device)'''

                # src = batch.src
                # labels = batch.labels
                # segs = batch.segs
                # clss = batch.clss
                # mask = batch.mask
                # mask_cls = batch.mask_cls

                logits = self.model(src, segs, clss, mask, mask_cls)  # , mask

                loss = self.loss(logits, labels)
                n_correct += (torch.argmax(logits, -1) == labels).sum().item()
                n_total += len(logits)
                loss_total += loss.item() * len(logits)
                # loss = (loss * mask.float()).sum()
                # (loss / loss.numel()).backward()
                loss.backward()
                # loss.div(float(normalization)).backward()
                # 4. Update the parameters and statistics.
                # if self.grad_accum_count == 1:
                # Multi GPU gradient gather
                if self.n_gpu > 1:
                    grads = [
                        p.grad.data for p in self.model.parameters()
                        if p.requires_grad and p.grad is not None
                    ]
                    distributed.all_reduce_and_rescale_tensors(grads, float(1))
                self.optim.step()

                batch_stats = Statistics(float(loss.cpu().item()),
                                         normalization)
                total_stats.update(batch_stats)
                report_stats.update(batch_stats)

                logger.info('step-{}, loss:{:.4f}, acc:{:.4f}'.format(
                    step, loss_total / n_total, n_correct / n_total))
                if step % self.check_steps == 0 or step == batch_num:
                    valid_acc_2 = 0
                    valid_acc = self.test(self.model, test_dataloader, device)
                    if self.args.do_use_second_dataset:
                        valid_acc_2 = self.test(self.model, test_dataloader2,
                                                device)
                    if valid_acc > self.best_acc or valid_acc_2 > self.best_acc:
                        self.best_acc = valid_acc
                        self._save(
                            str(self.args.model_name) + str(self.args.lr) +
                            'valid', epoch, self.best_acc)
                # 	self._save(epoch, step)
                # report_stats = self._maybe_report_training(step, epoch, self.optim.learning_rate, report_stats)

                # in case of multi step gradient accumulation,
                # update only after accum batches
            # valid_acc = self.test(self.model, test_dataset, device)
            # if valid_acc > self.best_acc:
            # 	self.best_acc = valid_acc
            # self._save(str(self.args.model_name)+str(self.args.lr), epoch, valid_acc)
            if self.grad_accum_count > 1:
                if self.n_gpu > 1:
                    grads = [
                        p.grad.data for p in self.model.parameters()
                        if p.requires_grad and p.grad is not None
                    ]
                    distributed.all_reduce_and_rescale_tensors(grads, float(1))
                self.optim.step()

            # return n_correct, n_total, loss_total

            if self.args.do_eval:
                # model = trainer.model
                # self.model.eval()
                # trainer = build_trainer(args, device_id, model, None)
                try:
                    self.test(self.model, test_dataset, device)
                except Exception as e:
                    logger.error(e)
示例#16
0
    def train(self, train_dataset, device,
              test_dataset):  # , valid_iter_fct=None, valid_steps=-1)

        normalization = 0
        total_stats = Statistics()
        report_stats = Statistics()
        self._start_report_manager(start_time=total_stats.start_time)

        for epoch in range(self.args.train_epochs):
            n_correct, n_total = 0., 0.
            reduce_counter = 0
            loss_total = 0

            logger.info('Getting minibatches')
            mini_batches = get_minibatches_WDP(train_dataset,
                                               self.args.batch_size)
            logger.info('Number of minibatches: %s' %
                        (len(train_dataset[0]) // self.args.batch_size))
            logger.info('Start training...')
            for step, batch in enumerate(mini_batches):
                # if self.n_gpu == 0 or (step % self.n_gpu == self.gpu_rank):
                x, labels = batch
                if torch.cuda.is_available():
                    x = torch.cuda.Tensor(x).to(device)
                    labels = torch.cuda.Tensor(labels).to(device)
                else:
                    x = torch.Tensor(x).to(device)
                    labels = torch.Tensor(labels).to(device)
                self.optim.zero_grad()

                logits = self.model(x)  # , mask

                loss = self.loss(logits, labels)
                n_correct += (torch.argmax(logits, -1) == labels).sum().item()
                n_total += len(logits)
                loss_total += loss.item() * len(logits)

                loss.backward()
                # loss.div(float(normalization)).backward()
                # 4. Update the parameters and statistics.
                # if self.grad_accum_count == 1:
                # Multi GPU gradient gather
                if self.n_gpu > 1:
                    grads = [
                        p.grad.data for p in self.model.parameters()
                        if p.requires_grad and p.grad is not None
                    ]
                    distributed.all_reduce_and_rescale_tensors(grads, float(1))
                self.optim.step()

                batch_stats = Statistics(float(loss.cpu().item()),
                                         normalization)
                total_stats.update(batch_stats)
                report_stats.update(batch_stats)

                logger.info('step-{}, loss:{:.4f}, acc:{:.4f}'.format(
                    step, loss_total / n_total, n_correct / n_total))
                if step != 0 and step % self.check_steps == 0:
                    valid_acc = self.test(self.model, test_dataset, device)
                    if valid_acc > self.best_acc:
                        self.best_acc = valid_acc
                        self._save(
                            str(self.args.model_name) + str(self.args.lr) +
                            'valid', epoch, self.best_acc)
                # self._save(epoch, step)
                # report_stats = self._maybe_report_training(step, epoch, self.optim.learning_rate, report_stats)

                # in case of multi step gradient accumulation,
                # update only after accum batches
            valid_acc = self.test(self.model, test_dataset, device)
            # if valid_acc > self.best_acc:
            # 	self.best_acc = valid_acc
            self._save(
                str(self.args.model_name) + str(self.args.lr), epoch,
                valid_acc)
            if self.grad_accum_count > 1:
                if self.n_gpu > 1:
                    grads = [
                        p.grad.data for p in self.model.parameters()
                        if p.requires_grad and p.grad is not None
                    ]
                    distributed.all_reduce_and_rescale_tensors(grads, float(1))
                self.optim.step()
示例#17
0
def predict_vote(pred_labels, label_ids, test_dataloader):

    act_pred_label = {
    }  # id:{'entity': '', 'emotion': 0/1/-1, 'predictions': []}
    prev_entity = ""
    # "pred: %s, act: %s" % (pred_labels[i] - 1, label_ids[i] - 1)

    doc_id = 0
    for i, test_dataset in enumerate(test_dataloader):
        # pred_id = int(i / 3)
        doc = test_dataset['src_txt']
        entity = doc[0]
        polarity = test_dataset['labels']
        assert int(polarity) == (label_ids[i])
        # print(polarity_str, label_ids[pred_id] - 1)
        if prev_entity == "" or entity != prev_entity:
            doc_id += 1
            prev_entity = entity
            act_pred_label[doc_id] = {}
            act_pred_label[doc_id]['entity'] = entity
            act_pred_label[doc_id]['emotion'] = int(polarity)  # actual label;
            act_pred_label[doc_id]['predictions'] = [int(pred_labels[i])
                                                     ]  # predict label
        else:
            # if entity == prev_entity:
            act_pred_label[doc_id]['predictions'].append(int(pred_labels[i]))

    # print(act_pred_label)
    acc = 0.
    total = len(act_pred_label)
    act_labels_all = []
    pred_labels_all = []
    for idx in act_pred_label.keys():
        each_pred = act_pred_label[idx]
        predic_labels = each_pred['predictions']
        act_label = each_pred['emotion']
        num = []
        num.append(predic_labels.count(0))
        num.append(predic_labels.count(1))
        num.append(predic_labels.count(2))
        # 0:0, 1:1, 2:2
        if num[0] == num[1] and num[0] != 0 and num[0] > num[2]:
            pred = 0
        elif num[1] == num[2] and num[1] != 0 and num[1] > num[0]:
            pred = 2
        elif num[0] == num[2] and num[0] != 0 and num[0] > num[1]:
            pred = 1
        else:
            pred = num.index(max(num))
        # if num[0]== num[1] or num[1] == num[2] or num[0] == num[2]:
        # 	print(pred, num)
        if pred == act_label:
            acc += 1
        act_labels_all.append(act_label)
        pred_labels_all.append(pred)
    # print(acc / total)
    # vote_f1 = metrics.f1_score(act_labels_all, pred_labels_all, labels=[0, 1, 2], average=None)
    # vote_recall = metrics.recall_score(act_labels_all, pred_labels_all, labels=[0, 1, 2], average=None)
    # logger.info('>> vote_acc: {:.4f}, vote_recall:{} vote_f1: {}'.format(acc / total, vote_recall, vote_f1))
    pred_result = metrics.classification_report(
        act_labels_all, pred_labels_all, target_names=['NEG', 'NEU', "POS"])
    logger.info('>> vote_acc: {:.4f}'.format(acc / total))
    logger.info('>> Prediction voted results: \n {}'.format(pred_result))