示例#1
0
def main():
    import os
    import cPickle as pkl
    import pandas as pd
    from util import json_load
    from max_cover import k_best_trees
    import argparse
    
    parser = argparse.ArgumentParser('Evaluate the events')
    parser.add_argument('-c', '--cand_trees_path', required=True, nargs='+')
    parser.add_argument('--interactions_path', required=True)
    parser.add_argument('--events_path', required=True)
    args = parser.parse_args()

    interactions = json_load(args.interactions_path)
    true_events = json_load(args.events_path)
    methods = [metrics.adjusted_rand_score,
               metrics.adjusted_mutual_info_score,
               metrics.homogeneity_score,
               metrics.completeness_score,
               metrics.v_measure_score]

    K = 10
    indexes = []
    scores = []
    for p in args.cand_trees_path:
        cand_trees = pkl.load(open(p))
        pred_trees = k_best_trees(cand_trees, K)

        indexes.append(os.path.basename(p))
        scores.append(evaluate_meta_tree_result(
            true_events,
            pred_trees,
            [i['message_id'] for i in interactions],
            methods
        ))
    df = pd.DataFrame(scores, index=indexes,
                      columns=[m.__name__ for m in methods] +
                      [m.__name__ + "(all)" for m in methods] +
                      ['precision', 'recall', 'f1'])
    df.to_csv('tmp/evaluation.csv')
def main():
    import os
    import cPickle as pkl
    import pandas as pd
    from util import json_load
    from max_cover import k_best_trees
    import argparse

    parser = argparse.ArgumentParser('Evaluate the events')
    parser.add_argument('-c', '--cand_trees_path', required=True, nargs='+')
    parser.add_argument('--interactions_path', required=True)
    parser.add_argument('--events_path', required=True)
    args = parser.parse_args()

    interactions = json_load(args.interactions_path)
    true_events = json_load(args.events_path)
    methods = [
        metrics.adjusted_rand_score, metrics.adjusted_mutual_info_score,
        metrics.homogeneity_score, metrics.completeness_score,
        metrics.v_measure_score
    ]

    K = 10
    indexes = []
    scores = []
    for p in args.cand_trees_path:
        cand_trees = pkl.load(open(p))
        pred_trees = k_best_trees(cand_trees, K)

        indexes.append(os.path.basename(p))
        scores.append(
            evaluate_meta_tree_result(true_events, pred_trees,
                                      [i['message_id'] for i in interactions],
                                      methods))
    df = pd.DataFrame(scores,
                      index=indexes,
                      columns=[m.__name__ for m in methods] +
                      [m.__name__ + "(all)"
                       for m in methods] + ['precision', 'recall', 'f1'])
    df.to_csv('tmp/evaluation.csv')
示例#3
0
def evaluate_general(
    result_paths,
    interactions_paths,
    events_paths,
    metrics,
    x_axis_name,
    x_axis_type,
    group_key,
    group_key_name_func,
    sort_keyfunc=None,
    xticks=[],
    K=10,
):
    """
    Return a 3D table
    group_key: the legend part
    metrics: the y axis
    x_axis_name, sort_keyfunc: the x axis
    """
    groups = group_paths(result_paths, group_key, sort_keyfunc)

    # inferring x labels
    if not xticks:
        xticks = set()
        for k, paths in groups:
            xticks |= set(get_values_by_key(groups[0][1], x_axis_name, x_axis_type))
        xticks = sorted(xticks)

    group_keys = [k for k, _ in groups]
    legend_names = [group_key_name_func(k) for k in group_keys]

    # get metric names
    example_true_events = pkl.load(open(events_paths[0]))
    example_all_entry_ids = get_interaction_ids(interactions_paths[0])
    metric_names = evaluate_meta_tree_result(
        example_true_events, k_best_trees(pkl.load(open(groups[0][1][0])), K), example_all_entry_ids, metrics
    ).keys()  # extra computing

    # enchance groups with other paths
    result_path2all_paths = {tpl[0]: tpl for tpl in zip(result_paths, interactions_paths, events_paths)}
    enhanced_groups = defaultdict(list)
    for k, result_paths in groups:
        i = 0
        for x in xticks:

            if i < len(result_paths) and x_axis_type(parse_result_path(result_paths[i])[x_axis_name]) == x:
                enhanced_groups[k].append(result_path2all_paths[result_paths[i]])
                i += 1
            else:
                # the result is absent
                enhanced_groups[k].append((None, None, None))

    data3d = []
    for method, _ in groups:
        data2d = []
        for result_path, interactions_path, events_path in enhanced_groups[method]:
            if result_path is None:
                data2d.append([np.nan for m in metric_names])
            else:
                data2d.append(
                    evaluate_meta_tree_result(
                        pkl.load(open(events_path)),
                        k_best_trees(pkl.load(open(result_path)), K),
                        get_interaction_ids(interactions_path),
                        metrics,
                    ).values()
                )
        data3d.append(data2d)
    print metric_names
    # method, x_axis, metric

    # some checking on size of results for different methods should be done
    # filling None if possible

    # 3d array: (method, U, metric)
    # data3d = np.array([
    #     [evaluate_meta_tree_result(
    #         pkl.load(open(events_path)),
    #         k_best_trees(pkl.load(open(result_path)), K),
    #         get_interaction_ids(interactions_path),
    #         metrics).values()
    #      for result_path, interactions_path, events_path in enhanced_groups[key]]
    #     for key, _ in groups])

    # change axis to to (metric, method, U)
    data3d = np.swapaxes(data3d, 0, 1)
    data3d = np.swapaxes(data3d, 0, 2)

    group_keys = [group_key_name_func(k) for k in group_keys]
    ret = {}
    for metric, matrix in itertools.izip(metric_names, data3d):
        ret[metric] = pd.DataFrame(matrix, columns=xticks, index=group_keys)

    return ret
def evaluate_general(result_paths,
                     interactions_paths,
                     events_paths,
                     metrics,
                     x_axis_name,
                     x_axis_type,
                     group_key,
                     group_key_name_func,
                     sort_keyfunc=None,
                     xticks=[],
                     K=10):
    """
    Return a 3D table
    group_key: the legend part
    metrics: the y axis
    x_axis_name, sort_keyfunc: the x axis
    """
    groups = group_paths(result_paths, group_key, sort_keyfunc)

    # inferring x labels
    if not xticks:
        xticks = set()
        for k, paths in groups:
            xticks |= set(
                get_values_by_key(groups[0][1], x_axis_name, x_axis_type))
        xticks = sorted(xticks)

    group_keys = [k for k, _ in groups]
    legend_names = [group_key_name_func(k) for k in group_keys]

    # get metric names
    example_true_events = pkl.load(open(events_paths[0]))
    example_all_entry_ids = get_interaction_ids(interactions_paths[0])
    metric_names = evaluate_meta_tree_result(
        example_true_events, k_best_trees(pkl.load(open(groups[0][1][0])), K),
        example_all_entry_ids, metrics).keys()  # extra computing

    # enchance groups with other paths
    result_path2all_paths = {
        tpl[0]: tpl
        for tpl in zip(result_paths, interactions_paths, events_paths)
    }
    enhanced_groups = defaultdict(list)
    for k, result_paths in groups:
        i = 0
        for x in xticks:

            if i < len(result_paths) and x_axis_type(
                    parse_result_path(result_paths[i])[x_axis_name]) == x:
                enhanced_groups[k].append(
                    result_path2all_paths[result_paths[i]])
                i += 1
            else:
                # the result is absent
                enhanced_groups[k].append((None, None, None))

    data3d = []
    for method, _ in groups:
        data2d = []
        for result_path, interactions_path, events_path in enhanced_groups[
                method]:
            if result_path is None:
                data2d.append([np.nan for m in metric_names])
            else:
                data2d.append(
                    evaluate_meta_tree_result(
                        pkl.load(open(events_path)),
                        k_best_trees(pkl.load(open(result_path)), K),
                        get_interaction_ids(interactions_path),
                        metrics).values())
        data3d.append(data2d)
    print metric_names
    # method, x_axis, metric

    # some checking on size of results for different methods should be done
    # filling None if possible

    # 3d array: (method, U, metric)
    # data3d = np.array([
    #     [evaluate_meta_tree_result(
    #         pkl.load(open(events_path)),
    #         k_best_trees(pkl.load(open(result_path)), K),
    #         get_interaction_ids(interactions_path),
    #         metrics).values()
    #      for result_path, interactions_path, events_path in enhanced_groups[key]]
    #     for key, _ in groups])

    # change axis to to (metric, method, U)
    data3d = np.swapaxes(data3d, 0, 1)
    data3d = np.swapaxes(data3d, 0, 2)

    group_keys = [group_key_name_func(k) for k in group_keys]
    ret = {}
    for metric, matrix in itertools.izip(metric_names, data3d):
        ret[metric] = pd.DataFrame(matrix, columns=xticks, index=group_keys)

    return ret