Exemplo n.º 1
0
    def post_processing_function(examples, features, predictions, stage="eval"):
        # Post-processing: we match the start logits and end logits to answers in the original context.
        predictions, scores_diff_json = postprocess_qa_predictions_with_beam_search(
            examples=examples,
            features=features,
            predictions=predictions,
            version_2_with_negative=data_args.version_2_with_negative,
            n_best_size=data_args.n_best_size,
            max_answer_length=data_args.max_answer_length,
            start_n_top=model.config.start_n_top,
            end_n_top=model.config.end_n_top,
            output_dir=training_args.output_dir,
            is_world_process_zero=trainer.is_world_process_zero(),
            prefix=stage,
        )
        # Format the result to the format the metric expects.
        if data_args.version_2_with_negative:
            formatted_predictions = [
                {"id": k, "prediction_text": v, "no_answer_probability": scores_diff_json[k]}
                for k, v in predictions.items()
            ]
        else:
            formatted_predictions = [{"id": k, "prediction_text": v} for k, v in predictions.items()]

        references = [{"id": ex["id"], "answers": ex[answer_column_name]} for ex in examples]
        return EvalPrediction(predictions=formatted_predictions, label_ids=references)
Exemplo n.º 2
0
    def post_processing_function(examples,
                                 features,
                                 predictions,
                                 stage="eval"):
        # Post-processing: we match the start logits and end logits to
        # answers in the original context.

        if data_args.beam_search:
            predictions, scores_diff_json = \
                postprocess_qa_predictions_with_beam_search(
                    examples=examples,
                    features=features,
                    predictions=predictions,
                    version_2_with_negative=data_args.version_2_with_negative,
                    n_best_size=data_args.n_best_size,
                    max_answer_length=data_args.max_answer_length,
                    start_n_top=model.config.start_n_top,
                    end_n_top=model.config.end_n_top,
                    output_dir=training_args.output_dir,
                    # log_level=log_level,
                    prefix=stage,
                )

        else:
            predictions = postprocess_qa_predictions(
                examples=examples,
                features=features,
                predictions=predictions,
                version_2_with_negative=data_args.version_2_with_negative,
                n_best_size=data_args.n_best_size,
                max_answer_length=data_args.max_answer_length,
                output_dir=training_args.output_dir,
                prefix=stage,
            )

        if data_args.version_2_with_negative:
            if data_args.beam_search:
                formatted_predictions = [
                    {
                        "id": k,
                        "prediction_text": v,
                        "no_answer_probability": scores_diff_json[k]
                    }  # noqa E501
                    for k, v in predictions.items()
                ]
            else:
                formatted_predictions = [
                    {
                        "id": k,
                        "prediction_text": v,
                        "no_answer_probability": 0.0
                    } for k, v in predictions.items()  # noqa E501
                ]
        else:
            formatted_predictions = [{
                "id": k,
                "prediction_text": v
            } for k, v in predictions.items()]  # noqa E501

        references = [{
            "id": ex["id"],
            "answers": ex[answer_column_name]
        } for ex in examples]  # noqa E501
        return EvalPrediction(predictions=formatted_predictions,
                              label_ids=references)