def setUp(self):
        self.df1 = pd.io.json.read_json(
            make_path('test/data/repeated_messages_interactions.json'))

        self.df2 = pd.io.json.read_json(
            make_path(
                'test/data/repeated_messages_interactions_multiple_senders.json'
            ))

        self.df3 = pd.io.json.read_json(
            make_path('test/data/repeated_messages_twitter_example.json'))
    def setUp(self):
        self.df1 = pd.io.json.read_json(
            make_path('test/data/repeated_messages_interactions.json')
        )

        self.df2 = pd.io.json.read_json(
            make_path('test/data/repeated_messages_interactions_multiple_senders.json')
        )

        self.df3 = pd.io.json.read_json(
            make_path('test/data/repeated_messages_twitter_example.json')
        )
Exemplo n.º 3
0
    def test_bs_ensure_result_is_tree(self):
        params = pkl.load(
            open(make_path('test/data/quota_test_cases/params.pkl')))[0]

        root = params['roots'][0]
        preprune_secs = params['preprune_secs']
        mg = IU.get_topic_meta_graph_from_synthetic(
            make_path('test/data/quota_test_cases/interactions.json'),
            preprune_secs)
        dag = IU.get_rooted_subgraph_within_timespan(mg, root, preprune_secs)
        t = charikar_algo(dag, root, dag.nodes(), k=20, level=2)
        assert_true(nx.is_arborescence(t))
Exemplo n.º 4
0
    def test_bs_ensure_result_is_tree(self):
        params = pkl.load(
            open(make_path('test/data/quota_test_cases/params.pkl'))
        )[0]

        root = params['roots'][0]
        preprune_secs = params['preprune_secs']
        mg = IU.get_topic_meta_graph_from_synthetic(
            make_path('test/data/quota_test_cases/interactions.json'),
            preprune_secs            
        )
        dag = IU.get_rooted_subgraph_within_timespan(
            mg, root, preprune_secs
        )
        t = charikar_algo(dag, root, dag.nodes(),
                          k=20, level=2)
        assert_true(nx.is_arborescence(t))
Exemplo n.º 5
0
        nbrs = sorted(tree.neighbors(node))
        if len(nbrs) == 0:
            return '{%s}' % node
        else:
            return '{%s%s}' % (
                node,
                ''.join([aux(n) for n in nbrs])
            )
    if tree.number_of_nodes() == 0:
        return '{}'
    else:
        assert nx.is_arborescence(tree), tree.nodes()
        return aux(get_roots(tree)[0])
    

JAR_PATH = make_path('external/APTED-0.1.1.jar')


def salzburg_ted(tree1, tree2):
    """
    tree edit distance

    From [Source](tree-edit-distance.dbresearch.uni-salzburg.at/#download)
    """
    # print('##### 1 ######')
    # print(to_bracket_notation(tree1))
    # print('##### 2 ######')
    # print(to_bracket_notation(tree2))
    output = check_output('java -jar {} --trees {} {}'.format(
        JAR_PATH,
        to_bracket_notation(tree1),
def to_bracket_notation(tree):
    def aux(node):
        nbrs = sorted(tree.neighbors(node))
        if len(nbrs) == 0:
            return '{%s}' % node
        else:
            return '{%s%s}' % (node, ''.join([aux(n) for n in nbrs]))

    if tree.number_of_nodes() == 0:
        return '{}'
    else:
        assert nx.is_arborescence(tree), tree.nodes()
        return aux(get_roots(tree)[0])


JAR_PATH = make_path('external/APTED-0.1.1.jar')


def salzburg_ted(tree1, tree2):
    """
    tree edit distance

    From [Source](tree-edit-distance.dbresearch.uni-salzburg.at/#download)
    """
    # print('##### 1 ######')
    # print(to_bracket_notation(tree1))
    # print('##### 2 ######')
    # print(to_bracket_notation(tree2))
    output = check_output('java -jar {} --trees {} {}'.format(
        JAR_PATH, to_bracket_notation(tree1),
        to_bracket_notation(tree2)).split())
Exemplo n.º 7
0
def test_parse_file():
    path = make_path('test/data/allen.txt')
    letters = parse_file(path)

    assert_equal(4,
                 len(letters))

    l1 = letters[0]
    assert_equal(
        'WILLIAM_ALLEN',
        l1['sender_id']
    )
    assert_equal(
        ['RICHARD_HOPKINS'],
        l1['recipient_ids']
    )
    assert_equal(
        dt(1579, 4, 5),
        l1['datetime']
    )
    assert_true(
        l1['body'].startswith('Mr. Hopkins')
    )

    assert_true(
        l1['body'].endswith("Loven chez Madame d'Hungerford . \n")
    )

    l2 = letters[1]
    assert_equal(
        'WILLIAM_ALLEN',
        l2['sender_id']
    )
    assert_equal(
        ['OWEN_LEWIS'],
        l2['recipient_ids']
    )
    assert_equal(
        dt(1579, 5, 12),
        l2['datetime']
    )
    # print(l2['body'])
    assert_true(
        l2['body'].startswith('Most dearly beloved')
    )

    assert_true(
        l2['body'].endswith('Romae . \n')
    )

    black_list = ('{COM:DIAERESIS_ABOVE_THE_LETTER_e_IN_AUDOENO}',
                 'ALLEN,231.003.172',
                 '<paren>',
                 '</paren>',
                 'Mons=r=',
                 '$1579')
    for l in letters:
        for i in black_list:
            print i
            print l['body']
            assert_true(i not in l['body'])