Пример #1
0
def f1_per_depth(dist_gold: List, dist_prediction: List, max_depth: int):
    """
    Find at which depth prediction mismatches happen (when the output forms a tree)

    Args:
        dist_gold (List): gold answer per essay
        dist_prediction (List): predicted answer per essay
        max_depth (int): max structure depth in the dataset

    Returns:
        tuple, i.e., (list, list, list)
    """
    gold_all_depth = []
    pred_all_depth = []

    for i in range(len(dist_gold)):
        rep_gold = TreeBuilder(dist_gold[i])
        rep_pred = TreeBuilder(dist_prediction[i])

        if rep_pred.is_tree():
            g_depths = rep_gold.node_depths()
            p_depths = rep_pred.node_depths()

            gold_all_depth.append(g_depths)
            pred_all_depth.append(p_depths)

    gold_all_depth_flat = flatten_list(gold_all_depth)
    pred_all_depth_flat = flatten_list(pred_all_depth)

    print("=== Depth prediction performance when output forms a tree ===")
    print(
        classification_report(y_true=gold_all_depth_flat,
                              y_pred=pred_all_depth_flat,
                              digits=3))
    report = classification_report(y_true=gold_all_depth_flat,
                                   y_pred=pred_all_depth_flat,
                                   output_dict=True)
    f1s = []
    for i in range(max_depth):
        try:
            f1s.append(report[str(i)]['f1-score'])
        except:
            f1s.append(0.0)

    return f1s