def test_get_ordered_words(self): sentence = [(0, None, [(u'root', [5])]), (1, u'For', []), (2, u'ten', []), (3, u'points', [(u'case', [1]), (u'nummod', [2])]), (5, u'identify', [(u'advcl', [17]), (u'nmod', [3]), (u'dobj', [8])]), (6, u'this', []), (7, u'English', []), (8, u'king', [(u'det', [6]), (u'nmod', [10]), (u'amod', [7])]), (9, u'of', []), (10, u'Wessex', [(u'case', [9]), (u'appos', [15])]), (12, u'the', []), (13, u'only', []), (14, u'English', []), (15, u'king', [(u'advmod', [13]), (u'det', [12]), (u'amod', [14])]), (16, u'to', []), (17, u'earn', [(u'xcomp', [22]), (u'mark', [16])]), (18, u'the', []), (19, u'epithet', [(u'det', [18])]), (21, u'the', []), (22, u'Great', [(u'dep', [19]), (u'det', [21])])] vocabulary = { 'earn': 0, 'English': 1, 'epithet': 2, 'For': 3, 'Great': 4, 'identify': 5, 'king': 6, 'of': 7, 'only': 8, 'points': 9, 'ten': 10, 'the': 11, 'this': 12, 'to': 13, 'Wessex': 14 } dep_dict = { 'advcl': 0, 'advmod': 1, 'amod': 2, 'appos': 3, 'case': 4, 'case': 5, 'dep': 6, 'det': 7, 'dobj': 8, 'mark': 9, 'nmod': 10, 'nummod': 11, 'xcomp': 12 } answer = 'yo buddy!' tree = tree_from_stanford_parse_tuples(sentence, answer, vocabulary, dep_dict) result = ' '.join(tree.get_ordered_words(vocabulary)) expected = ('For ten points identify this English king of Wessex ' 'the only English king to earn the epithet the Great') self.assertEquals(result, expected)
def create_tree(sentences_path, sentence_ID_path, question_info_path, vocabulary_path, dependency_path, stanford_parsed_path, tree_list_path): """Opens up all the data and passes it to dependency_tree to create all trees """ tree_list = [] with open(sentences_path, 'rb') as f: sentences = cPickle.load(f) with open(sentence_ID_path, 'rb') as f: sentences_ID = cPickle.load(f) with open(question_info_path, 'rb') as f: question_info = cPickle.load(f) with open(vocabulary_path, 'rb') as f: vocabulary = cPickle.load(f) with open(dependency_path, 'rb') as f: dependency = cPickle.load(f) with open(stanford_parsed_path, 'rb') as f: stanford_parsed = cPickle.load(f) for k in range(len(sentences)): answer = question_info[sentences_ID[k]][2] tree = tree_from_stanford_parse_tuples(stanford_parsed[k], answer, vocabulary, dependency) tree_list.append(tree) with open(tree_list_path, 'wb') as f: cPickle.dump(tree_list, f)