Пример #1
0
def roots(acc_trees, true_trees, k):
    roots = set()
    for t in acc_trees:
        rs = get_roots(t)
        if rs:
            roots.add(rs[0])
    
    true_roots = set([get_roots(t)[0] for t in true_trees])
    return len(roots & true_roots) / float(len(true_roots))
Пример #2
0
def get_gen_cand_tree_params(e):
    U = np.sum(e[s][t]['c'] for s, t in e.edges_iter())

    roots = get_roots(e)
    timestamps = [e.node[n]['timestamp'] for n in e.nodes_iter()]
    preprune_secs = np.max(timestamps) - np.min(timestamps)
    return {'U': U, 'roots': roots, 'preprune_secs': math.ceil(preprune_secs)}
Пример #3
0
def get_gen_cand_tree_params(e):
    U = np.sum(e[s][t]['c'] for s, t in e.edges_iter())

    roots = get_roots(e)
    timestamps = [e.node[n]['timestamp'] for n in e.nodes_iter()]
    preprune_secs = np.max(timestamps) - np.min(timestamps)
    return {
        'U': U,
        'roots': roots,
        'preprune_secs': math.ceil(preprune_secs)
    }
def to_bracket_notation(tree):
    def aux(node):
        nbrs = sorted(tree.neighbors(node))
        if len(nbrs) == 0:
            return '{%s}' % node
        else:
            return '{%s%s}' % (node, ''.join([aux(n) for n in nbrs]))

    if tree.number_of_nodes() == 0:
        return '{}'
    else:
        assert nx.is_arborescence(tree), tree.nodes()
        return aux(get_roots(tree)[0])
Пример #5
0
def to_bracket_notation(tree):
    def aux(node):
        nbrs = sorted(tree.neighbors(node))
        if len(nbrs) == 0:
            return '{%s}' % node
        else:
            return '{%s%s}' % (
                node,
                ''.join([aux(n) for n in nbrs])
            )
    if tree.number_of_nodes() == 0:
        return '{}'
    else:
        assert nx.is_arborescence(tree), tree.nodes()
        return aux(get_roots(tree)[0])
def run(candidate_tree_path,
        k,
        id2people,
        id2interaction,
        dirname=None,
        to_original_graph=False):

    if dirname and not os.path.exists(dirname):
        os.makedirs(dirname)

    output_path = get_output_path(candidate_tree_path, dirname)

    events = detect_events_given_path(candidate_tree_path, k)
    
    # add people and content
    for e in events:
        root = get_roots(e)[0]
        for n in e.nodes_iter():
            e.node[n]['sender'] = id2people[e.node[n]['sender_id']]
            e.node[n]['recipients'] = [id2people[id_]
                                       for id_ in e.node[n]['recipient_ids']]
            # print(id2interaction[n])
            e.node[n]['subject'] = id2interaction[n]['subject']
            e.node[n]['body'] = id2interaction[n]['body']

            for f in ('retweet_count', 'favorite_count'):
                e.node[n][f] = id2interaction[n].get(f)
            
            e.node[n]['body'] = id2interaction[n]['body']
            e.node[n]['root'] = (n == root)
            e.node[n]['datetime'] = str(e.node[n]['datetime'])

        # # some simple clustering
        # assignment = greedy_clustering_on_graph(e)
        # for n in e.nodes_iter():
        #     e.node[n]['cluster_label'] = assignment[n]
            
    if to_original_graph:
        events = map(convert_to_original_graph,
                     events)
        # import pdb; pdb.set_trace()

    d3_events = [to_d3_graph(e)
                 for e in events]

    json_dump(d3_events, output_path)
def main():
    import sys
    from pprint import pprint
    pkl_path = sys.argv[1]

    candidate_events = pkl.load(open(pkl_path))
    g = detect_events(candidate_events, 5)[0]
    mid2interaction = load_id2obj_dict('data/enron.json', 'message_id')
    root = get_roots(g)[0]
    pprint('children documents count: {}'.format(
        count_message_ids(children_documents(g, root, mid2interaction))))
    pprint('all documents count: {}'.format(
        count_message_ids(all_documents(g, mid2interaction))))
    lpd = longest_path_documents(g, root, mid2interaction)
    pprint('longest path documents count: {}'.format(count_message_ids(lpd)))

    pprint('longest path documents\' subject: {}'.format(
        [d['subject'] for d in lpd]))
Пример #8
0
def test_gen_event_with_known_tree_structure():
    event_size = 100
    participants_n = 10
    event = gen_event_with_known_tree_structure(
        event_size=event_size,
        participants=range(participants_n),
        start_time=10, end_time=110,
        event_topic_param=random_topic(10, topic_noise=0.0001)[0],
        topic_noise=1,
        alpha=1.0, tau=0.8,
        forward_proba=0.3,
        reply_proba=0.5,
        create_new_proba=0.2
    )

    for n in event.nodes_iter():
        sid, rid = event.node[n]['sender_id'], event.node[n]['recipient_ids'][0]
        assert_true(sid != rid)

    for s, t in event.edges_iter():
        sid1, rid1 = event.node[s]['sender_id'], event.node[s]['recipient_ids'][0]
        sid2, rid2 = event.node[t]['sender_id'], event.node[t]['recipient_ids'][0]
        c_type = event[s][t]['c_type']
        if c_type == 'r':
            assert_equal(sid1, rid2)
            assert_equal(sid2, rid1)
        elif c_type == 'f':
            assert_equal(rid1, sid2)
            assert_true(rid2 != sid1)
        else:
            assert_equal(sid1, sid2)

    interactions = [event.node[n] for n in event.nodes_iter()]
    g = IU.get_meta_graph(
        interactions,
        decompose_interactions=False,
        remove_singleton=True,
        given_topics=True,
        convert_time=False
    )
    assert_equal(1, len(get_roots(g)))
    assert_equal(event_size, len(interactions))
    
    assert_true(nx.is_arborescence(event))
Пример #9
0
def draw_pred_tree_against_true_tree(pred_tree, true_tree, meta_graph,
                                     draw_which='together',
                                     output_path_suffix=''):
    """

    Draw predicted event against the true event
    while using the meta graph as the background

    doesn't draw the entire meta_graph, just nx.compose(pred_tree, true_tree)
    """
    # some checking
    for n in true_tree.nodes_iter():
        assert meta_graph.has_node(n), n
    for s, t in true_tree.edges_iter():
        assert meta_graph.has_edge(s, t), (s, t,
                                           (meta_graph.node[s]['sender_id'], meta_graph.node[s]['recipient_ids']),
                                           (meta_graph.node[t]['sender_id'], meta_graph.node[t]['recipient_ids']),
                                           meta_graph.node[s]['timestamp'],
                                           meta_graph.node[t]['timestamp'],
                                           meta_graph.node[t]['timestamp'] - meta_graph.node[s]['timestamp'])
    for n in pred_tree.nodes_iter():
        assert meta_graph.has_node(n), n
    for s, t in pred_tree.edges_iter():
        assert meta_graph.has_edge(s, t), (s, t)
    
    node_color_types = {'tp': 'green',
                        'fn': 'blue',
                        'fp': 'red',
                        'tn': 'gray'}
    edge_color_types = {'tp': 'green',
                        'fn': 'blue',
                        'fp': 'red',
                        'tn': 'gray'}

    def get_style_general(n, true_tree_bool_func, pred_tree_bool_func,
                          style_map):
        if isinstance(n, list) or isinstance(n, tuple):
            true_has, pred_has = (true_tree_bool_func(*n),
                                  pred_tree_bool_func(*n))
        else:
            true_has, pred_has = (true_tree_bool_func(n),
                                  pred_tree_bool_func(n))
        if true_has and pred_has:
            return style_map['tp']
        elif true_has and not pred_has:
            return style_map['fn']
        elif not true_has and pred_has:
            return style_map['fp']
        else:
            return style_map['tn']
        
    root = get_roots(true_tree)[0]
    get_node_color = (lambda n: 'black'
                      if n == root
                      else
                      get_style_general(
                          n,
                          true_tree.has_node,
                          pred_tree.has_node,
                          node_color_types)
    )
    get_edge_color = lambda n: get_style_general(n,
                                                 true_tree.has_edge,
                                                 pred_tree.has_edge,
                                                 edge_color_types)

    if draw_which == "together":
        g = nx.compose(true_tree, pred_tree)
        output_path = 'tmp/tree_inspection/true_event_vs_pred_event{}.png'.format(output_path_suffix)
    else:
        g = true_tree
        output_path = 'tmp/tree_inspection/true_event{}.png'.format(output_path_suffix)

    pos = nx.graphviz_layout(g, prog='dot')

    nx.draw(g, pos,
            node_color=map(get_node_color, g.nodes_iter()),
            edge_color=map(get_edge_color, g.edges_iter()),
            node_size=200,
            alpha=0.5,
            arrows=False
    )

    if False:
        edge_label_func = lambda s, t: '{0:.2f}({1:.2f}, {2:.2f})'.format(
            meta_graph[s][t]['c'],
            meta_graph[s][t]['orig_c'],
            meta_graph[s][t]['recency']
        )
    else:
        edge_label_func = lambda s, t: '{0:.2f}'.format(meta_graph[s][t]['c'])

    if True:
        nx.draw_networkx_edge_labels(
            g, pos,
            edge_labels={(s, t): edge_label_func(s, t)
                         for s, t in g.edges_iter()},
            alpha=0.5
        )

    if True:
        nx.draw_networkx_labels(
            g, pos,
            edge_labels={i: str(i) for i in g.nodes()},
            alpha=0.5
        )
        
    plt.savefig(output_path)
Пример #10
0
import matplotlib as mpl
mpl.use('Agg')

import matplotlib.pyplot as plt
import cPickle as pkl
import networkx as nx
from dag_util import get_roots
from budget_problem import binary_search_using_charikar

g = pkl.load(open('test/data/tmp/result-quota--U=0.01--dijkstra=False--timespan=28days----consider_recency=False--distance_weights={"topics":1.0}--preprune_secs=28days----cand_tree_percent=0.1--root_sampling=random.pkl.dag'))[2]
print('g.has_edge(54619, 54627)', g.has_edge(54619, 54627))

root = get_roots(g)[0]

print('roots:', get_roots(g))

nodes_to_remove = [54637, 54657, 54677, 54669, 54643,
                   54640, 54631, 54627, 54673, 54670,
                   54647]

# for n in g.nodes_iter():
#     if g.in_degree(n) == 0 and n != root:
#         nodes_to_remove.append(n)
        
print(nodes_to_remove)

for n in nodes_to_remove:
    g.remove_node(n)


Пример #11
0
import matplotlib as mpl
mpl.use('Agg')

import matplotlib.pyplot as plt
import cPickle as pkl
import networkx as nx
from dag_util import get_roots
from budget_problem import binary_search_using_charikar

g = pkl.load(
    open(
        'test/data/tmp/result-quota--U=0.01--dijkstra=False--timespan=28days----consider_recency=False--distance_weights={"topics":1.0}--preprune_secs=28days----cand_tree_percent=0.1--root_sampling=random.pkl.dag'
    ))[2]
print('g.has_edge(54619, 54627)', g.has_edge(54619, 54627))

root = get_roots(g)[0]

print('roots:', get_roots(g))

nodes_to_remove = [
    54637, 54657, 54677, 54669, 54643, 54640, 54631, 54627, 54673, 54670, 54647
]

# for n in g.nodes_iter():
#     if g.in_degree(n) == 0 and n != root:
#         nodes_to_remove.append(n)

print(nodes_to_remove)

for n in nodes_to_remove:
    g.remove_node(n)
Пример #12
0
def draw_pred_tree_against_true_tree(pred_tree,
                                     true_tree,
                                     meta_graph,
                                     draw_which='together',
                                     output_path_suffix=''):
    """

    Draw predicted event against the true event
    while using the meta graph as the background

    doesn't draw the entire meta_graph, just nx.compose(pred_tree, true_tree)
    """
    # some checking
    for n in true_tree.nodes_iter():
        assert meta_graph.has_node(n), n
    for s, t in true_tree.edges_iter():
        assert meta_graph.has_edge(s,
                                   t), (s, t,
                                        (meta_graph.node[s]['sender_id'],
                                         meta_graph.node[s]['recipient_ids']),
                                        (meta_graph.node[t]['sender_id'],
                                         meta_graph.node[t]['recipient_ids']),
                                        meta_graph.node[s]['timestamp'],
                                        meta_graph.node[t]['timestamp'],
                                        meta_graph.node[t]['timestamp'] -
                                        meta_graph.node[s]['timestamp'])
    for n in pred_tree.nodes_iter():
        assert meta_graph.has_node(n), n
    for s, t in pred_tree.edges_iter():
        assert meta_graph.has_edge(s, t), (s, t)

    node_color_types = {'tp': 'green', 'fn': 'blue', 'fp': 'red', 'tn': 'gray'}
    edge_color_types = {'tp': 'green', 'fn': 'blue', 'fp': 'red', 'tn': 'gray'}

    def get_style_general(n, true_tree_bool_func, pred_tree_bool_func,
                          style_map):
        if isinstance(n, list) or isinstance(n, tuple):
            true_has, pred_has = (true_tree_bool_func(*n),
                                  pred_tree_bool_func(*n))
        else:
            true_has, pred_has = (true_tree_bool_func(n),
                                  pred_tree_bool_func(n))
        if true_has and pred_has:
            return style_map['tp']
        elif true_has and not pred_has:
            return style_map['fn']
        elif not true_has and pred_has:
            return style_map['fp']
        else:
            return style_map['tn']

    root = get_roots(true_tree)[0]
    get_node_color = (lambda n: 'black' if n == root else get_style_general(
        n, true_tree.has_node, pred_tree.has_node, node_color_types))
    get_edge_color = lambda n: get_style_general(
        n, true_tree.has_edge, pred_tree.has_edge, edge_color_types)

    if draw_which == "together":
        g = nx.compose(true_tree, pred_tree)
        output_path = 'tmp/tree_inspection/true_event_vs_pred_event{}.png'.format(
            output_path_suffix)
    else:
        g = true_tree
        output_path = 'tmp/tree_inspection/true_event{}.png'.format(
            output_path_suffix)

    pos = nx.graphviz_layout(g, prog='dot')

    nx.draw(g,
            pos,
            node_color=map(get_node_color, g.nodes_iter()),
            edge_color=map(get_edge_color, g.edges_iter()),
            node_size=200,
            alpha=0.5,
            arrows=False)

    if False:
        edge_label_func = lambda s, t: '{0:.2f}({1:.2f}, {2:.2f})'.format(
            meta_graph[s][t]['c'], meta_graph[s][t]['orig_c'], meta_graph[s][t]
            ['recency'])
    else:
        edge_label_func = lambda s, t: '{0:.2f}'.format(meta_graph[s][t]['c'])

    if True:
        nx.draw_networkx_edge_labels(g,
                                     pos,
                                     edge_labels={(s, t):
                                                  edge_label_func(s, t)
                                                  for s, t in g.edges_iter()},
                                     alpha=0.5)

    if True:
        nx.draw_networkx_labels(g,
                                pos,
                                edge_labels={i: str(i)
                                             for i in g.nodes()},
                                alpha=0.5)

    plt.savefig(output_path)