def roots(acc_trees, true_trees, k): roots = set() for t in acc_trees: rs = get_roots(t) if rs: roots.add(rs[0]) true_roots = set([get_roots(t)[0] for t in true_trees]) return len(roots & true_roots) / float(len(true_roots))
def get_gen_cand_tree_params(e): U = np.sum(e[s][t]['c'] for s, t in e.edges_iter()) roots = get_roots(e) timestamps = [e.node[n]['timestamp'] for n in e.nodes_iter()] preprune_secs = np.max(timestamps) - np.min(timestamps) return {'U': U, 'roots': roots, 'preprune_secs': math.ceil(preprune_secs)}
def get_gen_cand_tree_params(e): U = np.sum(e[s][t]['c'] for s, t in e.edges_iter()) roots = get_roots(e) timestamps = [e.node[n]['timestamp'] for n in e.nodes_iter()] preprune_secs = np.max(timestamps) - np.min(timestamps) return { 'U': U, 'roots': roots, 'preprune_secs': math.ceil(preprune_secs) }
def to_bracket_notation(tree): def aux(node): nbrs = sorted(tree.neighbors(node)) if len(nbrs) == 0: return '{%s}' % node else: return '{%s%s}' % (node, ''.join([aux(n) for n in nbrs])) if tree.number_of_nodes() == 0: return '{}' else: assert nx.is_arborescence(tree), tree.nodes() return aux(get_roots(tree)[0])
def to_bracket_notation(tree): def aux(node): nbrs = sorted(tree.neighbors(node)) if len(nbrs) == 0: return '{%s}' % node else: return '{%s%s}' % ( node, ''.join([aux(n) for n in nbrs]) ) if tree.number_of_nodes() == 0: return '{}' else: assert nx.is_arborescence(tree), tree.nodes() return aux(get_roots(tree)[0])
def run(candidate_tree_path, k, id2people, id2interaction, dirname=None, to_original_graph=False): if dirname and not os.path.exists(dirname): os.makedirs(dirname) output_path = get_output_path(candidate_tree_path, dirname) events = detect_events_given_path(candidate_tree_path, k) # add people and content for e in events: root = get_roots(e)[0] for n in e.nodes_iter(): e.node[n]['sender'] = id2people[e.node[n]['sender_id']] e.node[n]['recipients'] = [id2people[id_] for id_ in e.node[n]['recipient_ids']] # print(id2interaction[n]) e.node[n]['subject'] = id2interaction[n]['subject'] e.node[n]['body'] = id2interaction[n]['body'] for f in ('retweet_count', 'favorite_count'): e.node[n][f] = id2interaction[n].get(f) e.node[n]['body'] = id2interaction[n]['body'] e.node[n]['root'] = (n == root) e.node[n]['datetime'] = str(e.node[n]['datetime']) # # some simple clustering # assignment = greedy_clustering_on_graph(e) # for n in e.nodes_iter(): # e.node[n]['cluster_label'] = assignment[n] if to_original_graph: events = map(convert_to_original_graph, events) # import pdb; pdb.set_trace() d3_events = [to_d3_graph(e) for e in events] json_dump(d3_events, output_path)
def main(): import sys from pprint import pprint pkl_path = sys.argv[1] candidate_events = pkl.load(open(pkl_path)) g = detect_events(candidate_events, 5)[0] mid2interaction = load_id2obj_dict('data/enron.json', 'message_id') root = get_roots(g)[0] pprint('children documents count: {}'.format( count_message_ids(children_documents(g, root, mid2interaction)))) pprint('all documents count: {}'.format( count_message_ids(all_documents(g, mid2interaction)))) lpd = longest_path_documents(g, root, mid2interaction) pprint('longest path documents count: {}'.format(count_message_ids(lpd))) pprint('longest path documents\' subject: {}'.format( [d['subject'] for d in lpd]))
def test_gen_event_with_known_tree_structure(): event_size = 100 participants_n = 10 event = gen_event_with_known_tree_structure( event_size=event_size, participants=range(participants_n), start_time=10, end_time=110, event_topic_param=random_topic(10, topic_noise=0.0001)[0], topic_noise=1, alpha=1.0, tau=0.8, forward_proba=0.3, reply_proba=0.5, create_new_proba=0.2 ) for n in event.nodes_iter(): sid, rid = event.node[n]['sender_id'], event.node[n]['recipient_ids'][0] assert_true(sid != rid) for s, t in event.edges_iter(): sid1, rid1 = event.node[s]['sender_id'], event.node[s]['recipient_ids'][0] sid2, rid2 = event.node[t]['sender_id'], event.node[t]['recipient_ids'][0] c_type = event[s][t]['c_type'] if c_type == 'r': assert_equal(sid1, rid2) assert_equal(sid2, rid1) elif c_type == 'f': assert_equal(rid1, sid2) assert_true(rid2 != sid1) else: assert_equal(sid1, sid2) interactions = [event.node[n] for n in event.nodes_iter()] g = IU.get_meta_graph( interactions, decompose_interactions=False, remove_singleton=True, given_topics=True, convert_time=False ) assert_equal(1, len(get_roots(g))) assert_equal(event_size, len(interactions)) assert_true(nx.is_arborescence(event))
def draw_pred_tree_against_true_tree(pred_tree, true_tree, meta_graph, draw_which='together', output_path_suffix=''): """ Draw predicted event against the true event while using the meta graph as the background doesn't draw the entire meta_graph, just nx.compose(pred_tree, true_tree) """ # some checking for n in true_tree.nodes_iter(): assert meta_graph.has_node(n), n for s, t in true_tree.edges_iter(): assert meta_graph.has_edge(s, t), (s, t, (meta_graph.node[s]['sender_id'], meta_graph.node[s]['recipient_ids']), (meta_graph.node[t]['sender_id'], meta_graph.node[t]['recipient_ids']), meta_graph.node[s]['timestamp'], meta_graph.node[t]['timestamp'], meta_graph.node[t]['timestamp'] - meta_graph.node[s]['timestamp']) for n in pred_tree.nodes_iter(): assert meta_graph.has_node(n), n for s, t in pred_tree.edges_iter(): assert meta_graph.has_edge(s, t), (s, t) node_color_types = {'tp': 'green', 'fn': 'blue', 'fp': 'red', 'tn': 'gray'} edge_color_types = {'tp': 'green', 'fn': 'blue', 'fp': 'red', 'tn': 'gray'} def get_style_general(n, true_tree_bool_func, pred_tree_bool_func, style_map): if isinstance(n, list) or isinstance(n, tuple): true_has, pred_has = (true_tree_bool_func(*n), pred_tree_bool_func(*n)) else: true_has, pred_has = (true_tree_bool_func(n), pred_tree_bool_func(n)) if true_has and pred_has: return style_map['tp'] elif true_has and not pred_has: return style_map['fn'] elif not true_has and pred_has: return style_map['fp'] else: return style_map['tn'] root = get_roots(true_tree)[0] get_node_color = (lambda n: 'black' if n == root else get_style_general( n, true_tree.has_node, pred_tree.has_node, node_color_types) ) get_edge_color = lambda n: get_style_general(n, true_tree.has_edge, pred_tree.has_edge, edge_color_types) if draw_which == "together": g = nx.compose(true_tree, pred_tree) output_path = 'tmp/tree_inspection/true_event_vs_pred_event{}.png'.format(output_path_suffix) else: g = true_tree output_path = 'tmp/tree_inspection/true_event{}.png'.format(output_path_suffix) pos = nx.graphviz_layout(g, prog='dot') nx.draw(g, pos, node_color=map(get_node_color, g.nodes_iter()), edge_color=map(get_edge_color, g.edges_iter()), node_size=200, alpha=0.5, arrows=False ) if False: edge_label_func = lambda s, t: '{0:.2f}({1:.2f}, {2:.2f})'.format( meta_graph[s][t]['c'], meta_graph[s][t]['orig_c'], meta_graph[s][t]['recency'] ) else: edge_label_func = lambda s, t: '{0:.2f}'.format(meta_graph[s][t]['c']) if True: nx.draw_networkx_edge_labels( g, pos, edge_labels={(s, t): edge_label_func(s, t) for s, t in g.edges_iter()}, alpha=0.5 ) if True: nx.draw_networkx_labels( g, pos, edge_labels={i: str(i) for i in g.nodes()}, alpha=0.5 ) plt.savefig(output_path)
import matplotlib as mpl mpl.use('Agg') import matplotlib.pyplot as plt import cPickle as pkl import networkx as nx from dag_util import get_roots from budget_problem import binary_search_using_charikar g = pkl.load(open('test/data/tmp/result-quota--U=0.01--dijkstra=False--timespan=28days----consider_recency=False--distance_weights={"topics":1.0}--preprune_secs=28days----cand_tree_percent=0.1--root_sampling=random.pkl.dag'))[2] print('g.has_edge(54619, 54627)', g.has_edge(54619, 54627)) root = get_roots(g)[0] print('roots:', get_roots(g)) nodes_to_remove = [54637, 54657, 54677, 54669, 54643, 54640, 54631, 54627, 54673, 54670, 54647] # for n in g.nodes_iter(): # if g.in_degree(n) == 0 and n != root: # nodes_to_remove.append(n) print(nodes_to_remove) for n in nodes_to_remove: g.remove_node(n)
import matplotlib as mpl mpl.use('Agg') import matplotlib.pyplot as plt import cPickle as pkl import networkx as nx from dag_util import get_roots from budget_problem import binary_search_using_charikar g = pkl.load( open( 'test/data/tmp/result-quota--U=0.01--dijkstra=False--timespan=28days----consider_recency=False--distance_weights={"topics":1.0}--preprune_secs=28days----cand_tree_percent=0.1--root_sampling=random.pkl.dag' ))[2] print('g.has_edge(54619, 54627)', g.has_edge(54619, 54627)) root = get_roots(g)[0] print('roots:', get_roots(g)) nodes_to_remove = [ 54637, 54657, 54677, 54669, 54643, 54640, 54631, 54627, 54673, 54670, 54647 ] # for n in g.nodes_iter(): # if g.in_degree(n) == 0 and n != root: # nodes_to_remove.append(n) print(nodes_to_remove) for n in nodes_to_remove: g.remove_node(n)
def draw_pred_tree_against_true_tree(pred_tree, true_tree, meta_graph, draw_which='together', output_path_suffix=''): """ Draw predicted event against the true event while using the meta graph as the background doesn't draw the entire meta_graph, just nx.compose(pred_tree, true_tree) """ # some checking for n in true_tree.nodes_iter(): assert meta_graph.has_node(n), n for s, t in true_tree.edges_iter(): assert meta_graph.has_edge(s, t), (s, t, (meta_graph.node[s]['sender_id'], meta_graph.node[s]['recipient_ids']), (meta_graph.node[t]['sender_id'], meta_graph.node[t]['recipient_ids']), meta_graph.node[s]['timestamp'], meta_graph.node[t]['timestamp'], meta_graph.node[t]['timestamp'] - meta_graph.node[s]['timestamp']) for n in pred_tree.nodes_iter(): assert meta_graph.has_node(n), n for s, t in pred_tree.edges_iter(): assert meta_graph.has_edge(s, t), (s, t) node_color_types = {'tp': 'green', 'fn': 'blue', 'fp': 'red', 'tn': 'gray'} edge_color_types = {'tp': 'green', 'fn': 'blue', 'fp': 'red', 'tn': 'gray'} def get_style_general(n, true_tree_bool_func, pred_tree_bool_func, style_map): if isinstance(n, list) or isinstance(n, tuple): true_has, pred_has = (true_tree_bool_func(*n), pred_tree_bool_func(*n)) else: true_has, pred_has = (true_tree_bool_func(n), pred_tree_bool_func(n)) if true_has and pred_has: return style_map['tp'] elif true_has and not pred_has: return style_map['fn'] elif not true_has and pred_has: return style_map['fp'] else: return style_map['tn'] root = get_roots(true_tree)[0] get_node_color = (lambda n: 'black' if n == root else get_style_general( n, true_tree.has_node, pred_tree.has_node, node_color_types)) get_edge_color = lambda n: get_style_general( n, true_tree.has_edge, pred_tree.has_edge, edge_color_types) if draw_which == "together": g = nx.compose(true_tree, pred_tree) output_path = 'tmp/tree_inspection/true_event_vs_pred_event{}.png'.format( output_path_suffix) else: g = true_tree output_path = 'tmp/tree_inspection/true_event{}.png'.format( output_path_suffix) pos = nx.graphviz_layout(g, prog='dot') nx.draw(g, pos, node_color=map(get_node_color, g.nodes_iter()), edge_color=map(get_edge_color, g.edges_iter()), node_size=200, alpha=0.5, arrows=False) if False: edge_label_func = lambda s, t: '{0:.2f}({1:.2f}, {2:.2f})'.format( meta_graph[s][t]['c'], meta_graph[s][t]['orig_c'], meta_graph[s][t] ['recency']) else: edge_label_func = lambda s, t: '{0:.2f}'.format(meta_graph[s][t]['c']) if True: nx.draw_networkx_edge_labels(g, pos, edge_labels={(s, t): edge_label_func(s, t) for s, t in g.edges_iter()}, alpha=0.5) if True: nx.draw_networkx_labels(g, pos, edge_labels={i: str(i) for i in g.nodes()}, alpha=0.5) plt.savefig(output_path)