def test_detect_events_given_path():
    result_path = os.path.join(CURDIR, 'test/data/cand_trees.pkl')
    cand_trees = pickle.load(open(result_path))
    expected_trees = detect_events(cand_trees, K=5)
    actual_trees = detect_events_given_path(result_path, K=5)
    for at, et in zip(actual_trees, expected_trees):
        assert_equal(sorted(et.edges()), sorted(at.edges()))
def test_detect_events():
    result_path = os.path.join(CURDIR, 'test/data/cand_trees.pkl')
    cand_trees = pickle.load(open(result_path))
    trees = detect_events(cand_trees, K=5)
    assert_equal(5, len(trees))
    for t1, t2 in zip(trees, trees[1:]):
        assert_true(len(t1.nodes()) >= len(t2.nodes()))
def main():
    import sys
    from pprint import pprint
    pkl_path = sys.argv[1]

    candidate_events = pkl.load(open(pkl_path))
    g = detect_events(candidate_events, 5)[0]
    mid2interaction = load_id2obj_dict('data/enron.json', 'message_id')
    root = get_roots(g)[0]
    pprint('children documents count: {}'.format(
        count_message_ids(children_documents(g, root, mid2interaction))))
    pprint('all documents count: {}'.format(
        count_message_ids(all_documents(g, mid2interaction))))
    lpd = longest_path_documents(g, root, mid2interaction)
    pprint('longest path documents count: {}'.format(count_message_ids(lpd)))

    pprint('longest path documents\' subject: {}'.format(
        [d['subject'] for d in lpd]))