def setUp(self): self.df1 = pd.io.json.read_json( make_path('test/data/repeated_messages_interactions.json')) self.df2 = pd.io.json.read_json( make_path( 'test/data/repeated_messages_interactions_multiple_senders.json' )) self.df3 = pd.io.json.read_json( make_path('test/data/repeated_messages_twitter_example.json'))
def setUp(self): self.df1 = pd.io.json.read_json( make_path('test/data/repeated_messages_interactions.json') ) self.df2 = pd.io.json.read_json( make_path('test/data/repeated_messages_interactions_multiple_senders.json') ) self.df3 = pd.io.json.read_json( make_path('test/data/repeated_messages_twitter_example.json') )
def test_bs_ensure_result_is_tree(self): params = pkl.load( open(make_path('test/data/quota_test_cases/params.pkl')))[0] root = params['roots'][0] preprune_secs = params['preprune_secs'] mg = IU.get_topic_meta_graph_from_synthetic( make_path('test/data/quota_test_cases/interactions.json'), preprune_secs) dag = IU.get_rooted_subgraph_within_timespan(mg, root, preprune_secs) t = charikar_algo(dag, root, dag.nodes(), k=20, level=2) assert_true(nx.is_arborescence(t))
def test_bs_ensure_result_is_tree(self): params = pkl.load( open(make_path('test/data/quota_test_cases/params.pkl')) )[0] root = params['roots'][0] preprune_secs = params['preprune_secs'] mg = IU.get_topic_meta_graph_from_synthetic( make_path('test/data/quota_test_cases/interactions.json'), preprune_secs ) dag = IU.get_rooted_subgraph_within_timespan( mg, root, preprune_secs ) t = charikar_algo(dag, root, dag.nodes(), k=20, level=2) assert_true(nx.is_arborescence(t))
nbrs = sorted(tree.neighbors(node)) if len(nbrs) == 0: return '{%s}' % node else: return '{%s%s}' % ( node, ''.join([aux(n) for n in nbrs]) ) if tree.number_of_nodes() == 0: return '{}' else: assert nx.is_arborescence(tree), tree.nodes() return aux(get_roots(tree)[0]) JAR_PATH = make_path('external/APTED-0.1.1.jar') def salzburg_ted(tree1, tree2): """ tree edit distance From [Source](tree-edit-distance.dbresearch.uni-salzburg.at/#download) """ # print('##### 1 ######') # print(to_bracket_notation(tree1)) # print('##### 2 ######') # print(to_bracket_notation(tree2)) output = check_output('java -jar {} --trees {} {}'.format( JAR_PATH, to_bracket_notation(tree1),
def to_bracket_notation(tree): def aux(node): nbrs = sorted(tree.neighbors(node)) if len(nbrs) == 0: return '{%s}' % node else: return '{%s%s}' % (node, ''.join([aux(n) for n in nbrs])) if tree.number_of_nodes() == 0: return '{}' else: assert nx.is_arborescence(tree), tree.nodes() return aux(get_roots(tree)[0]) JAR_PATH = make_path('external/APTED-0.1.1.jar') def salzburg_ted(tree1, tree2): """ tree edit distance From [Source](tree-edit-distance.dbresearch.uni-salzburg.at/#download) """ # print('##### 1 ######') # print(to_bracket_notation(tree1)) # print('##### 2 ######') # print(to_bracket_notation(tree2)) output = check_output('java -jar {} --trees {} {}'.format( JAR_PATH, to_bracket_notation(tree1), to_bracket_notation(tree2)).split())
def test_parse_file(): path = make_path('test/data/allen.txt') letters = parse_file(path) assert_equal(4, len(letters)) l1 = letters[0] assert_equal( 'WILLIAM_ALLEN', l1['sender_id'] ) assert_equal( ['RICHARD_HOPKINS'], l1['recipient_ids'] ) assert_equal( dt(1579, 4, 5), l1['datetime'] ) assert_true( l1['body'].startswith('Mr. Hopkins') ) assert_true( l1['body'].endswith("Loven chez Madame d'Hungerford . \n") ) l2 = letters[1] assert_equal( 'WILLIAM_ALLEN', l2['sender_id'] ) assert_equal( ['OWEN_LEWIS'], l2['recipient_ids'] ) assert_equal( dt(1579, 5, 12), l2['datetime'] ) # print(l2['body']) assert_true( l2['body'].startswith('Most dearly beloved') ) assert_true( l2['body'].endswith('Romae . \n') ) black_list = ('{COM:DIAERESIS_ABOVE_THE_LETTER_e_IN_AUDOENO}', 'ALLEN,231.003.172', '<paren>', '</paren>', 'Mons=r=', '$1579') for l in letters: for i in black_list: print i print l['body'] assert_true(i not in l['body'])