class TestNode(unittest.TestCase): def setUp(self): self.my_tree = Tree( 5, Tree(3, Tree(2), Tree(5)), Tree(7, Tree(1), Tree(0, Tree(2), Tree(8, None, Tree(5))))) self.my_median = 4.0 self.my_mean = 3.8 self.my_sum = 38 self.str_my_tree = "[5[3[2[][]][5[][]]][7[1[][]][0[2[][]][8[][5[][]]]]]]" def test_str_representation(self): self.assertEqual(self.my_tree.__str__(), self.str_my_tree) def test_positive_median_calculation(self): self.assertEqual(self.my_tree.median_value(), self.my_median) def test_positive_sum_calculation(self): self.assertEqual(self.my_tree.sum_subtree_values(), self.my_sum) def test_positive_mean_calculation(self): self.assertEqual(self.my_tree.mean_value(), self.my_mean)
def func(sen_size, time): d = json.load(open("d.text")) rev_d = json.load(open("rd.text")) kTOKENIZER = TreebankWordTokenizer() for line in fileinput.input(): tokens = kTOKENIZER.tokenize(line) n = len(tokens) #d //Dictionary which contains production rule A -> BC and A -> literal and their log probability #rev_d // This dictionary contains reverse production rules. BC -> A and literal -> A with probability dd = [[defaultdict(float) for _ in range(n)] for _ in range(n)] back = [[defaultdict(float) for _ in range(n)] for _ in range(n)] for index, token in enumerate(tokens): if token not in rev_d: tokens[index] = '<unk>' start_time = timeit.default_timer() for index, token in enumerate(tokens): j = index temp_dict = rev_d[token] for k, v in temp_dict.iteritems(): dd[j][j][k] = v back[j][j][k] = token for i in range(j - 1, -1, -1): for k in range(i, j): left_b = dd[i][k].keys() right_b = dd[k + 1][j].keys() # print left_b , i,j,k # print right_b, i, j, k for B in left_b: for C in right_b: r_side = B + ' ' + C if r_side in rev_d: for key, value in rev_d[r_side].iteritems(): if key not in dd[i][j]: dd[i][j][key] = value + dd[i][k][ B] + dd[k + 1][j][C] back[i][j][key] = str( k) + ' ' + B + ' ' + C elif dd[i][j][key] < (value + dd[i][k][B] + dd[k + 1][j][C]): dd[i][j][key] = value + dd[i][k][ B] + dd[k + 1][j][C] back[i][j][key] = str( k) + ' ' + B + ' ' + C # for i in range(n+1): # for j in range(n+1): elapsed = timeit.default_timer() - start_time time.append(elapsed * 1000000) sen_size.append(n) maxi = -100000000 top_word = '' for k, v in dd[0][n - 1].iteritems(): if v > maxi: top_word = k #print back[0][n-1] if len(top_word) > 0: rr = Node(top_word, []) printpath(back, top_word, 0, n - 1, rr) t = Tree(rr) #t.pre_terminal_rev() t.restore_unit() t.unbinarize() #t.rev_head_annotate() #out_file.write(t.__str__()+'\n') print t.__str__() else: #out_file.write('\n') print
def test_string_empty_tree(self): t2 = Tree(None) self.assertEqual(t2.__str__(), '|---Google_Drive\n')