Python Metrics примеры использования

Язык программирования: Python

Пространство имен/Пакет: qanom.evaluation.metrics

Класс/Тип: Metrics

Примеров на hotexamples.com: 6

Python Metrics - 6 примеров найдено. Это лучшие примеры Python кода для qanom.evaluation.metrics.Metrics, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Metrics(3)

empty(3)

Основные методы

Metrics (3)

empty (3)

Пример #1

Показать файл

Файл: evaluate_inter_annotator.py Проект: kleinay/QANom

def evaluate_inter_generator_agreement(annot_df: pd.DataFrame,
                                       verbose: bool = False) -> float:
    cols = ['qasrl_id', get_predicate_idx_label(annot_df)]
    n_gen = annot_df.groupby(cols).worker_id.transform(pd.Series.nunique)
    workers = annot_df.worker_id.unique().tolist()
    n_workers = len(workers)
    annot_df = annot_df.copy()
    n_predicates = annot_df[cols].drop_duplicates().shape[0]
    if verbose:
        print("n_workers: ", n_workers)
        print("n_predicates: ", n_predicates)
        print(f"metric\tworker_1\tworker_2\tprec\trecall\tf1")

    total_arg_metric = Metrics.empty()
    total_larg_metric = Metrics.empty()
    total_role_metric = Metrics.empty()
    total_nomIdent_metric: BinaryClassificationMetrics = BinaryClassificationMetrics.empty(
    )
    for w1, w2 in combinations(workers, r=2):
        w1_df = annot_df[annot_df.worker_id == w1].copy()
        w2_df = annot_df[annot_df.worker_id == w2].copy()
        # compute agreement measures
        arg_metrics, labeled_arg_metrics, role_metrics, nom_ident_metrics, _ = \
            eval_datasets(w1_df, w2_df)
        if verbose:
            print(f"\nComparing  {w1}   to   {w2}:   [p,r,f1]")
            merged_df = pd.merge(w1_df, w2_df, on='key')
            print(
                f"Number of shared predicates: {get_n_predicates(merged_df)}")
            print(f"ARG:\t{arg_metrics}")
            print(f"Labeled ARG:\t{labeled_arg_metrics}")
            print(f"ROLE:\t{role_metrics}")
            print(
                f"NOM_IDENT:\t{w1}\t{w2}\t{nom_ident_metrics.prec():.3f}\t{nom_ident_metrics.recall():.3f}\t{nom_ident_metrics.f1():.3f}"
            )
            print(
                f"NOM_IDENT accuracy: {nom_ident_metrics.accuracy():.3f}, {int(nom_ident_metrics.errors())} mismathces out of {nom_ident_metrics.instances()} predicates."
            )
        total_arg_metric += arg_metrics
        total_larg_metric += labeled_arg_metrics
        total_role_metric += role_metrics
        total_nomIdent_metric += nom_ident_metrics

    print(f"\nOverall pairwise agreement:")
    print(f"arg-f1 \t {total_arg_metric.f1():.4f}")
    print(f"labeled-arg-f1 \t {total_larg_metric.f1():.4f}")
    print(f"role-f1 \t {total_role_metric.f1():.4f}")
    print(
        f"is-verbal-accuracy \t {total_nomIdent_metric.accuracy():.4f}    for {total_nomIdent_metric.instances()} pairwise comparisons."
    )
    return total_arg_metric.f1()

Пример #2

Показать файл

def eval_labeled_arguments(grt_roles: List[Role], sys_roles: List[Role],
                           sys_to_grt: Dict[Argument, Argument]) -> Metrics:
    """ LA metric - Labeled Argument match - spans overlap and questions are equivalent. """
    tp_arg_count = count_labeled_arg_matches(grt_roles, sys_roles, sys_to_grt)
    fp_arg_count = count_arguments(sys_roles) - tp_arg_count
    fn_arg_count = count_arguments(grt_roles) - tp_arg_count
    return Metrics(tp_arg_count, fp_arg_count, fn_arg_count)

Пример #3

Показать файл

def eval_datasets(sys_df, grt_df, sent_map= None) \
        -> Tuple[Metrics, Metrics, Metrics, BinaryClassificationMetrics, pd.DataFrame]:
    if not sent_map:
        annot_df = pd.concat([
            sys_df[['qasrl_id', 'sentence']], grt_df[['qasrl_id', 'sentence']]
        ])
        sent_map = get_sent_map(annot_df)
    arg_metrics = Metrics.empty()
    labeled_arg_metrics = Metrics.empty()
    role_metrics = Metrics.empty()
    is_nom_counts = BinaryClassificationMetrics.empty()
    all_matchings = []
    for key, sys_response, grt_response in tqdm(yield_paired_predicates(
            sys_df, grt_df),
                                                leave=False):
        qasrl_id, target_idx = key
        tokens = sent_map[qasrl_id]
        local_arg_metric, local_labeled_arg_metric, local_role_metric, local_is_nom_metric, sys_to_grt = \
            evaluate_response(sys_response, grt_response)
        arg_metrics += local_arg_metric
        labeled_arg_metrics += local_labeled_arg_metric
        role_metrics += local_role_metric
        is_nom_counts += local_is_nom_metric
        all_args = build_all_qa_pairs(sys_response.roles, grt_response.roles,
                                      sys_to_grt)
        all_args['qasrl_id'] = qasrl_id
        all_args['target_idx'] = target_idx
        all_args['grt_arg_text'] = all_args.grt_arg.apply(fill_answer,
                                                          tokens=tokens)
        all_args['sys_arg_text'] = all_args.sys_arg.apply(fill_answer,
                                                          tokens=tokens)
        all_matchings.append(all_args)

    # when all_matching is empty, return empty DataFrame
    if not all_matchings:
        all_matchings = pd.DataFrame()
    else:
        all_matchings = pd.concat(all_matchings)
        all_matchings = all_matchings[[
            'grt_arg_text', 'sys_arg_text', 'grt_role', 'sys_role', 'grt_arg',
            'sys_arg', 'qasrl_id', 'target_idx'
        ]]

    return arg_metrics, labeled_arg_metrics, role_metrics, is_nom_counts, all_matchings

Пример #4

Показать файл

def evaluate_response(sys_response: Response,
                      grt_response: Response):
    sys_roles: List[Role] = sys_response.roles
    grt_roles: List[Role] = grt_response.roles
    sys_to_grt = find_matches(sys_response.all_args(), grt_response.all_args())

    is_nom_metrics = BinaryClassificationMetrics.simple_boolean_decision(sys_response.is_verbal, grt_response.is_verbal)

    # todo decide on evaluation of roles where is_verbal mismatch - should the roles be included in the role_count metric?
    # Currently excluding these mismatches from the arg & roles metrics
    if is_nom_metrics.errors() == 0:
        arg_metrics = eval_arguments(grt_roles, sys_roles, sys_to_grt)
        labeled_arg_metrics = eval_labeled_arguments(grt_roles, sys_roles, sys_to_grt)
        role_metrics = eval_roles(grt_roles, sys_roles, sys_to_grt)
    else:
        arg_metrics = Metrics.empty()
        labeled_arg_metrics = Metrics.empty()
        role_metrics = Metrics.empty()

    return arg_metrics, labeled_arg_metrics, role_metrics, is_nom_metrics, sys_to_grt

Пример #5

Показать файл

def eval_roles(grt_roles: List[Role], sys_roles: List[Role],
               sys_to_grt: Dict[Argument, Argument]) -> Metrics:
    alignemnt = align_by_argument(grt_roles, sys_roles, sys_to_grt)
    tp, fp, fn = 0, 0, 0
    for grt_role in grt_roles:
        if alignemnt.has_single_alignment(grt_role, is_grt=True):
            tp += 1
        else:
            fn += 1
    for sys_role in sys_roles:
        if not alignemnt.has_single_alignment(sys_role, is_grt=False):
            fp += 1
    return Metrics(tp, fp, fn)

Пример #6

Показать файл

def eval_arguments(grt_roles: List[Role], sys_roles: List[Role], sys_to_grt: Dict[Argument, Argument]) -> Metrics:
    tp_arg_count = len(sys_to_grt)
    fp_arg_count = count_arguments(sys_roles) - tp_arg_count
    fn_arg_count = count_arguments(grt_roles) - tp_arg_count
    return Metrics(tp_arg_count, fp_arg_count, fn_arg_count)