Пример #1
0
def evaluate(args, model, tokenizer, prefix=''):
    eval_output_dir = args.output_dir

    results = {}
    eval_dataset = load_and_cache_examples(args, tokenizer, set_type='dev')

    if not os.path.exists(eval_output_dir):
        os.makedirs(eval_output_dir)

    args.eval_batch_size = args.per_gpu_eval_batch_size * max(1, args.n_gpu)
    eval_sampler = SequentialSampler(eval_dataset)
    eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler, batch_size=args.eval_batch_size)

    # Eval!
    logger.info('***** Running evaluation {} *****'.format(prefix))
    logger.info('   Num examples = %d', len(eval_dataset))
    logger.info('   Batch size = %d', args.eval_batch_size)
    eval_loss = 0.0
    nb_eval_steps = 0
    preds = None
    out_label_ids = None
    for batch in eval_dataloader:
        model.eval()
        batch = tuple(t.to(args.device) for t in batch)

        with torch.no_grad():
            inputs = {'input_ids':              batch[0],
                      'attention_mask':         batch[1],
                      'token_type_ids':         batch[2],
                      'labels':                 batch[3],
                      'ct_clf_input_ids':       batch[4],
                      'ct_clf_attention_mask':  batch[5],
                      'ct_clf_token_type_ids':  batch[6],
                      'categories':             batch[7],
                      'hand_features':          batch[8]}
            outputs = model(**inputs)
            tmp_eval_loss, logits = outputs[0][:2]
            eval_loss += tmp_eval_loss.mean().item()
        nb_eval_steps += 1
        if preds is None:
            preds = logits.detach().cpu().numpy()
            out_label_ids = inputs['labels'].detach().cpu().numpy()
        else:
            preds = np.append(preds, logits.detach().cpu().numpy(), axis=0)
            out_label_ids = np.append(out_label_ids, inputs['labels'].detach().cpu().numpy(), axis=0)

    eval_loss = eval_loss / nb_eval_steps
    preds = np.argmax(preds, axis=1)
    result = compute_metrics(preds, out_label_ids)
    results.update(result)

    output_eval_file = os.path.join(eval_output_dir, 'eval_results.txt')
    with open(output_eval_file, 'a') as writer:
        for key in sorted(result.keys()):
            logger.info('   %s = %s', key, str(result[key]))
            writer.write('%s = %s\n' % (key, str(result[key])))
        writer.write('='*20 + '\n')

    return results
Пример #2
0
def main():
    weather = Weather()
    df = process_dataframe(get_dataframe(FIREBASE_URL))

    print('Last five rows:\n', df[-5:][['url', 'time']], '\n')
    save_to_csv(df)
    text = 'Hi! Here is how you have been doing so far.'
    sound(text)

    results = compute_metrics(df)
    #print(results)
    time_work = str(10.03)
    time_media = str(0.69)
    time_other = str(1.78)
    top_hosts = ['localhost', 'google', 'firebase console']
    text = 'You spend ' + time_work + 'hours on work tab, ' + time_media + ' hours on media and ' + time_other + 'hours on other.' \
          + 'Your top three tabs are: ' + top_hosts[0] + ', ' + top_hosts[1] + ',' + top_hosts[2]
    sound(text)
def evaluate(args, model, tokenizer, prefix="", is_eval=True):
    # Loop to handle MNLI double evaluation (matched, mis-matched)
    eval_task_names = ("mnli", "mnli-mm") if args.task_name == "mnli" else (
        args.task_name, )
    eval_outputs_dirs = (args.output_dir, args.output_dir +
                         '-MM') if args.task_name == "mnli" else (
                             args.output_dir, )

    results = {}
    for eval_task, eval_output_dir in zip(eval_task_names, eval_outputs_dirs):
        eval_dataset, eval_examples = load_and_cache_examples(
            args, eval_task, tokenizer, evaluate=is_eval, test=(not is_eval))

        if not os.path.exists(eval_output_dir) and args.local_rank in [-1, 0]:
            os.makedirs(eval_output_dir)

        args.eval_batch_size = args.per_gpu_eval_batch_size * max(
            1, args.n_gpu)
        # Note that DistributedSampler samples randomly
        eval_sampler = SequentialSampler(
            eval_dataset) if args.local_rank == -1 else DistributedSampler(
                eval_dataset)
        eval_dataloader = DataLoader(eval_dataset,
                                     sampler=eval_sampler,
                                     batch_size=args.eval_batch_size)

        # Eval!
        logger.info("***** Running evaluation {} *****".format(prefix))
        logger.info("  Num examples = %d", len(eval_dataset))
        logger.info("  Batch size = %d", args.eval_batch_size)
        eval_loss = 0.0
        nb_eval_steps = 0
        preds = None
        out_label_ids = None
        for batch in tqdm(eval_dataloader, desc="Evaluating"):
            model.eval()
            batch = tuple(t.to(args.device) for t in batch)

            with torch.no_grad():
                inputs = {
                    'input_ids':
                    batch[0],
                    'attention_mask':
                    batch[1],
                    'token_type_ids':
                    batch[2] if args.model_type in ['bert', 'xlnet'] else
                    None,  # XLM don't use segment_ids
                    'labels':
                    batch[3]
                }
                outputs = model(**inputs)
                tmp_eval_loss, logits = outputs[:2]

                eval_loss += tmp_eval_loss.mean().item()
            nb_eval_steps += 1
            if preds is None:
                preds = logits.detach().cpu().numpy()
                out_label_ids = inputs['labels'].detach().cpu().numpy()
            else:
                preds = np.append(preds, logits.detach().cpu().numpy(), axis=0)
                out_label_ids = np.append(
                    out_label_ids,
                    inputs['labels'].detach().cpu().numpy(),
                    axis=0)

        eval_loss = eval_loss / nb_eval_steps
        if args.output_mode == "classification":
            preds = np.argmax(preds, axis=1)
        elif args.output_mode == "regression":
            preds = np.squeeze(preds)
        result = compute_metrics(eval_task, preds, out_label_ids)
        results.update(result)

        output_eval_file = os.path.join(eval_output_dir, "eval_results.txt")
        with open(output_eval_file, "w") as writer:
            logger.info("***** Eval results {} *****".format(prefix))
            for key in sorted(result.keys()):
                logger.info("  %s = %s", key, str(result[key]))
                writer.write("%s = %s\n" % (key, str(result[key])))
        lines = []
        mapping = ['non-propaganda', 'propaganda']
        if args.output_mode == "multi-label-classification":
            preds = preds[:, 0]
        for example, pred in zip(eval_examples, preds):
            line = example.guid.split('-')[1].split('_')
            if example.text_a == '[EMPTY]':
                line.append('non-propaganda')
            else:
                line.append(mapping[int(pred)])
            lines.append(line)
        import time
        end_date = time.strftime("%Y-%m-%d(%H:%M:%S)", time.localtime())
        output_pred_file = os.path.join(eval_output_dir,
                                        "preds_%s.txt" % end_date)
        with open(output_pred_file, "w") as writer:
            for line in lines:
                writer.write('\t'.join(line) + '\n')
    return results
Пример #4
0
def prediction(args, model, tokenizer, prefix=""):
    pred_task_names = (args.task_name,)
    pred_outputs_dirs = (args.output_dir,)

    result = {}
    for p_task, p_output_dir in zip(pred_task_names, pred_outputs_dirs):
        pred_dataset = load_and_cache_examples(
            args, p_task, tokenizer, predict=True)

        if not os.path.exists(p_output_dir):
            os.makedirs(p_output_dir)

        args.pred_batch_size = args.per_gpu_eval_batch_size
        pred_sampler = SequentialSampler(pred_dataset)
        pred_dataloader = DataLoader(
            pred_dataset, sampler=pred_sampler, batch_size=args.pred_batch_size)

        # Prediction
        logger.info("******* Running prediction ****")
        logger.info(" Num examples = %d", len(pred_dataset))
        logger.info(" Batch size = %d", args.pred_batch_size)
        nb_eval_steps = 0
        preds = None
        for batch in tqdm(pred_dataloader, desc="Predicting"):
            model.eval()
            batch = tuple(t.to(args.device) for t in batch)

            with torch.no_grad():
                inputs = {
                    "input_ids": batch[0], "attention_mask": batch[1], "labels": batch[3]}
                if args.model_type != "distilbert":
                    inputs["token_type_ids"] = (
                        batch[2] if args.model_type in [
                            "bert", "xlnet", "albert"] else None
                    )  # XLM, DistilBERT, RoBERTa, and XLM-RoBERTa don't use segment_ids
                outputs = model(**inputs)
                _, logits = outputs[:2]

            nb_eval_steps += 1
            if preds is None:
                preds = logits.detach().cpu().numpy()
                out_label_ids = inputs["labels"].detach().cpu().numpy()
            else:
                preds = np.append(preds, logits.detach().cpu().numpy(), axis=0)
                out_label_ids = np.append(
                    out_label_ids, inputs["labels"].detach().cpu().numpy(), axis=0)

        if args.output_mode == "regression":
            preds = np.squeeze(preds)

        result = compute_metrics(p_task, preds, out_label_ids)

        # make the prefix dir
        Path(os.path.join(p_output_dir, prefix)).mkdir(exist_ok=True)
        output_prediction = os.path.join(
            p_output_dir, prefix, "prediction.txt")
        with open(output_prediction, "w") as w:
            for l in preds:
                w.write(str(l) + "\n")

        output_eval_file = os.path.join(
            p_output_dir, prefix, "pred_results.txt")

        with open(output_eval_file, "w") as writer:
            logger.info("***** Eval results {} *****".format(prefix))
            writer.write("hyperparameter: batch %s, epoch %s\n" %
                         (args.per_gpu_train_batch_size, args.num_train_epochs))
            for key in sorted(result.keys()):
                logger.info("  %s = %s", key, str(result[key]))
                writer.write("%s = %s\n" % (key, str(result[key])))
    return result
Пример #5
0
def evaluate(args, model, tokenizer, prefix=""):
    # Loop to handle MNLI double evaluation (matched, mis-matched)
    eval_task_names = (args.task_name,)
    eval_outputs_dirs = (args.output_dir,)

    results = {}
    for eval_task, eval_output_dir in zip(eval_task_names, eval_outputs_dirs):
        eval_dataset = load_and_cache_examples(
            args, eval_task, tokenizer, evaluate=True)

        if not os.path.exists(eval_output_dir) and args.local_rank in [-1, 0]:
            os.makedirs(eval_output_dir)

        args.eval_batch_size = args.per_gpu_eval_batch_size * \
            max(1, args.n_gpu)
        # Note that DistributedSampler samples randomly
        eval_sampler = SequentialSampler(eval_dataset)
        eval_dataloader = DataLoader(
            eval_dataset, sampler=eval_sampler, batch_size=args.eval_batch_size)

        # Eval!
        logger.info("***** Running evaluation {} *****".format(prefix))
        logger.info("  Num examples = %d", len(eval_dataset))
        logger.info("  Batch size = %d", args.eval_batch_size)
        eval_loss = 0.0
        nb_eval_steps = 0
        preds = None
        out_label_ids = None
        for batch in tqdm(eval_dataloader, desc="Evaluating"):
            model.eval()
            batch = tuple(t.to(args.device) for t in batch)

            with torch.no_grad():
                inputs = {
                    "input_ids": batch[0], "attention_mask": batch[1], "labels": batch[3]}
                if args.model_type != "distilbert":
                    inputs["token_type_ids"] = (
                        batch[2] if args.model_type in [
                            "bert", "xlnet", "albert"] else None
                    )  # XLM, DistilBERT, RoBERTa, and XLM-RoBERTa don't use segment_ids
                outputs = model(**inputs)
                tmp_eval_loss, logits = outputs[:2]

                eval_loss += tmp_eval_loss.mean().item()
            nb_eval_steps += 1
            if preds is None:
                preds = logits.detach().cpu().numpy()
                out_label_ids = inputs["labels"].detach().cpu().numpy()
            else:
                preds = np.append(preds, logits.detach().cpu().numpy(), axis=0)
                out_label_ids = np.append(
                    out_label_ids, inputs["labels"].detach().cpu().numpy(), axis=0)

        eval_loss = eval_loss / nb_eval_steps
        if args.output_mode == "classification":
            preds = np.argmax(preds, axis=1)
        elif args.output_mode == "regression":
            preds = np.squeeze(preds)
        result = compute_metrics(eval_task, preds, out_label_ids)
        results.update(result)
        # make the prefix dir
        Path(os.path.join(eval_output_dir, prefix)).mkdir(exist_ok=True)
        output_eval_file = os.path.join(
            eval_output_dir, prefix, "eval_results.txt")

        with open(output_eval_file, "w") as writer:
            logger.info("***** Eval results {} *****".format(prefix))
            writer.write("hyperparameter: batch %s, epoch %s\n" %
                         (args.per_gpu_train_batch_size, args.num_train_epochs))
            for key in sorted(result.keys()):
                logger.info("  %s = %s", key, str(result[key]))
                writer.write("%s = %s\n" % (key, str(result[key])))
            logger.info(" %s = %s", "eval_loss", str(eval_loss))
            writer.write("%s = %s\n" % ("eval_loss", str(eval_loss)))

    return results