Exemplo n.º 1
0
def yield_paired_predicates(sys_df: pd.DataFrame, grt_df: pd.DataFrame) -> Generator[Tuple[Tuple[str,int],Response,Response], None, None]:
    grt_predicate_ids = grt_df[['qasrl_id', 'target_idx']].drop_duplicates()
    sys_predicate_ids = sys_df[['qasrl_id', 'target_idx']].drop_duplicates()
    # Include only those predicates which are both in grt and in sys
    predicate_ids = pd.merge(grt_predicate_ids, sys_predicate_ids, how='inner')
    for idx, row in predicate_ids.iterrows():
        sys_qa_pairs = sys_df[filter_ids(sys_df, row)].copy()
        grt_qa_pairs = grt_df[filter_ids(grt_df, row)].copy()
        sys_response = decode_response(sys_qa_pairs)
        grt_response = decode_response(grt_qa_pairs)
        yield (row.qasrl_id, row.target_idx), sys_response, grt_response
Exemplo n.º 2
0
def auto_consolidate_gen_annot_df(df: pd.DataFrame) -> pd.DataFrame:
    """
    Return consolidated (non-redundant) QANom annotations.
    Algorithm is precision-oriented:
        take conjunction of is-verbal,
        filter only roles (questions) aligned by their answers,
        for answers: take larger set of answers;
            if even - take longer (and select the question corresponding to selected answers).
    :param df: generation annotation DataFrame containing multiple workers' annotation per predicate
    :return: annotation csv (encoded, not decoded) of the consolidated annotations
    """
    from annotations.decode_encode_answers import SPAN_SEPARATOR
    data_columns = [
        'qasrl_id', 'sentence', 'target_idx', 'key', 'verb', 'verb_form'
    ]
    to_be_conjoined_columns = ['worker_id', 'assign_id']
    """ Rest of columns are annotation-columns - they are to be decoded from consolidated Response.
        (Except from 'answer' which requires both answer_range from Response and sentence.) """
    pred_dfs: List[pd.DataFrame] = []
    for key, pred_df in df.groupby('key'):
        responses = {
            worker: decode_encode_answers.decode_response(worker_df)
            for worker, worker_df in pred_df.groupby('worker_id')
        }
        if len(responses) < 2:
            # only one generator for predicate
            print(
                f"Warning: predicate {key} has only one generator. Taking his response as final."
            )
            consolidated_response = list(responses.values())[0]
        else:
            # two generators = automatically consolidate responses
            consolidated_response = auto_consolidate_predicate(
                list(responses.values()))
        cons_pred_df = encode_response(consolidated_response,
                                       pred_df.sentence.to_list()[0])
        # add other columns to resulting df
        cons_pred_df = cons_pred_df.assign(
            **{col: pred_df[col].to_list()[0]
               for col in data_columns})
        cons_pred_df = cons_pred_df.assign(
            **{
                col: SPAN_SEPARATOR.join(set(pred_df[col]))
                for col in to_be_conjoined_columns
            })
        # re-order columns for convenience
        cons_pred_df = cons_pred_df[FINAL_COLUMNS]
        pred_dfs.append(cons_pred_df)
    out = pd.concat(pred_dfs)
    return out
Exemplo n.º 3
0
def iterate_qanom_responses(
        annot_df: pd.DataFrame
) -> Iterator[Tuple[str, str, int, str, Response]]:
    """
    An iterator over every QANomResponse in the annotation-DataFrame.
    :param annot_df: 
    :return: Iterator, every instance is (sent_id, sent, target_index, worker_id, Response)
    """
    pred_idx_label = get_predicate_idx_label(annot_df)
    for key, predicate_df in annot_df.groupby('key'):
        for worker_id, assignment_df in predicate_df.groupby('worker_id'):
            response = decode_response(assignment_df)
            row = assignment_df.iloc[0]
            sentence_id, sentence, target_index = row[[
                "qasrl_id", "sentence", pred_idx_label
            ]]
            yield sentence_id, sentence, target_index, worker_id, response