def main(): import os import cPickle as pkl import pandas as pd from util import json_load from max_cover import k_best_trees import argparse parser = argparse.ArgumentParser('Evaluate the events') parser.add_argument('-c', '--cand_trees_path', required=True, nargs='+') parser.add_argument('--interactions_path', required=True) parser.add_argument('--events_path', required=True) args = parser.parse_args() interactions = json_load(args.interactions_path) true_events = json_load(args.events_path) methods = [metrics.adjusted_rand_score, metrics.adjusted_mutual_info_score, metrics.homogeneity_score, metrics.completeness_score, metrics.v_measure_score] K = 10 indexes = [] scores = [] for p in args.cand_trees_path: cand_trees = pkl.load(open(p)) pred_trees = k_best_trees(cand_trees, K) indexes.append(os.path.basename(p)) scores.append(evaluate_meta_tree_result( true_events, pred_trees, [i['message_id'] for i in interactions], methods )) df = pd.DataFrame(scores, index=indexes, columns=[m.__name__ for m in methods] + [m.__name__ + "(all)" for m in methods] + ['precision', 'recall', 'f1']) df.to_csv('tmp/evaluation.csv')
def main(): import os import cPickle as pkl import pandas as pd from util import json_load from max_cover import k_best_trees import argparse parser = argparse.ArgumentParser('Evaluate the events') parser.add_argument('-c', '--cand_trees_path', required=True, nargs='+') parser.add_argument('--interactions_path', required=True) parser.add_argument('--events_path', required=True) args = parser.parse_args() interactions = json_load(args.interactions_path) true_events = json_load(args.events_path) methods = [ metrics.adjusted_rand_score, metrics.adjusted_mutual_info_score, metrics.homogeneity_score, metrics.completeness_score, metrics.v_measure_score ] K = 10 indexes = [] scores = [] for p in args.cand_trees_path: cand_trees = pkl.load(open(p)) pred_trees = k_best_trees(cand_trees, K) indexes.append(os.path.basename(p)) scores.append( evaluate_meta_tree_result(true_events, pred_trees, [i['message_id'] for i in interactions], methods)) df = pd.DataFrame(scores, index=indexes, columns=[m.__name__ for m in methods] + [m.__name__ + "(all)" for m in methods] + ['precision', 'recall', 'f1']) df.to_csv('tmp/evaluation.csv')
def evaluate_general( result_paths, interactions_paths, events_paths, metrics, x_axis_name, x_axis_type, group_key, group_key_name_func, sort_keyfunc=None, xticks=[], K=10, ): """ Return a 3D table group_key: the legend part metrics: the y axis x_axis_name, sort_keyfunc: the x axis """ groups = group_paths(result_paths, group_key, sort_keyfunc) # inferring x labels if not xticks: xticks = set() for k, paths in groups: xticks |= set(get_values_by_key(groups[0][1], x_axis_name, x_axis_type)) xticks = sorted(xticks) group_keys = [k for k, _ in groups] legend_names = [group_key_name_func(k) for k in group_keys] # get metric names example_true_events = pkl.load(open(events_paths[0])) example_all_entry_ids = get_interaction_ids(interactions_paths[0]) metric_names = evaluate_meta_tree_result( example_true_events, k_best_trees(pkl.load(open(groups[0][1][0])), K), example_all_entry_ids, metrics ).keys() # extra computing # enchance groups with other paths result_path2all_paths = {tpl[0]: tpl for tpl in zip(result_paths, interactions_paths, events_paths)} enhanced_groups = defaultdict(list) for k, result_paths in groups: i = 0 for x in xticks: if i < len(result_paths) and x_axis_type(parse_result_path(result_paths[i])[x_axis_name]) == x: enhanced_groups[k].append(result_path2all_paths[result_paths[i]]) i += 1 else: # the result is absent enhanced_groups[k].append((None, None, None)) data3d = [] for method, _ in groups: data2d = [] for result_path, interactions_path, events_path in enhanced_groups[method]: if result_path is None: data2d.append([np.nan for m in metric_names]) else: data2d.append( evaluate_meta_tree_result( pkl.load(open(events_path)), k_best_trees(pkl.load(open(result_path)), K), get_interaction_ids(interactions_path), metrics, ).values() ) data3d.append(data2d) print metric_names # method, x_axis, metric # some checking on size of results for different methods should be done # filling None if possible # 3d array: (method, U, metric) # data3d = np.array([ # [evaluate_meta_tree_result( # pkl.load(open(events_path)), # k_best_trees(pkl.load(open(result_path)), K), # get_interaction_ids(interactions_path), # metrics).values() # for result_path, interactions_path, events_path in enhanced_groups[key]] # for key, _ in groups]) # change axis to to (metric, method, U) data3d = np.swapaxes(data3d, 0, 1) data3d = np.swapaxes(data3d, 0, 2) group_keys = [group_key_name_func(k) for k in group_keys] ret = {} for metric, matrix in itertools.izip(metric_names, data3d): ret[metric] = pd.DataFrame(matrix, columns=xticks, index=group_keys) return ret
def evaluate_general(result_paths, interactions_paths, events_paths, metrics, x_axis_name, x_axis_type, group_key, group_key_name_func, sort_keyfunc=None, xticks=[], K=10): """ Return a 3D table group_key: the legend part metrics: the y axis x_axis_name, sort_keyfunc: the x axis """ groups = group_paths(result_paths, group_key, sort_keyfunc) # inferring x labels if not xticks: xticks = set() for k, paths in groups: xticks |= set( get_values_by_key(groups[0][1], x_axis_name, x_axis_type)) xticks = sorted(xticks) group_keys = [k for k, _ in groups] legend_names = [group_key_name_func(k) for k in group_keys] # get metric names example_true_events = pkl.load(open(events_paths[0])) example_all_entry_ids = get_interaction_ids(interactions_paths[0]) metric_names = evaluate_meta_tree_result( example_true_events, k_best_trees(pkl.load(open(groups[0][1][0])), K), example_all_entry_ids, metrics).keys() # extra computing # enchance groups with other paths result_path2all_paths = { tpl[0]: tpl for tpl in zip(result_paths, interactions_paths, events_paths) } enhanced_groups = defaultdict(list) for k, result_paths in groups: i = 0 for x in xticks: if i < len(result_paths) and x_axis_type( parse_result_path(result_paths[i])[x_axis_name]) == x: enhanced_groups[k].append( result_path2all_paths[result_paths[i]]) i += 1 else: # the result is absent enhanced_groups[k].append((None, None, None)) data3d = [] for method, _ in groups: data2d = [] for result_path, interactions_path, events_path in enhanced_groups[ method]: if result_path is None: data2d.append([np.nan for m in metric_names]) else: data2d.append( evaluate_meta_tree_result( pkl.load(open(events_path)), k_best_trees(pkl.load(open(result_path)), K), get_interaction_ids(interactions_path), metrics).values()) data3d.append(data2d) print metric_names # method, x_axis, metric # some checking on size of results for different methods should be done # filling None if possible # 3d array: (method, U, metric) # data3d = np.array([ # [evaluate_meta_tree_result( # pkl.load(open(events_path)), # k_best_trees(pkl.load(open(result_path)), K), # get_interaction_ids(interactions_path), # metrics).values() # for result_path, interactions_path, events_path in enhanced_groups[key]] # for key, _ in groups]) # change axis to to (metric, method, U) data3d = np.swapaxes(data3d, 0, 1) data3d = np.swapaxes(data3d, 0, 2) group_keys = [group_key_name_func(k) for k in group_keys] ret = {} for metric, matrix in itertools.izip(metric_names, data3d): ret[metric] = pd.DataFrame(matrix, columns=xticks, index=group_keys) return ret