def open_syntax_file(file, mentions): with open(os.path.join(parsed_path, file + '.head.rel.tokenized.raw.parse')) as raw_syntax_file: lines = raw_syntax_file.readlines() mention_counter = 0 distances = [] prev_mention = mentions[mention_counter][0] for line in lines: if prev_mention in line: while (mentions[mention_counter][0] == prev_mention) and mention_counter < len(mentions): if len(line.strip()) == 0: continue full_tree = ParentedTree.fromstring(line) subtrees = ParentedTree.subtrees(full_tree) arg1_subtrees = [] arg2_subtrees = [] found_m1 = False found_m2 = False for subtree in subtrees: for node in subtree.leaves(): if node == mentions[mention_counter][0]: arg1_subtrees.append(subtree) found_m1 = True elif node == mentions[mention_counter][1]: arg2_subtrees.append(subtree) found_m2 = True if found_m2 and found_m1: arg1_height, arg1_subtree = get_smallest_height(arg1_subtrees) arg2_height, arg2_subtree = get_smallest_height(arg2_subtrees) distances.append(get_tree_distance(arg1_subtree, arg2_subtree)) if mention_counter == len(mentions)-1: return distances mention_counter += 1 break mention_counter += 1 distances.append(-1) if mention_counter == len(mentions) -1: return distances prev_mention = mentions[mention_counter][0] while len(mentions) != len(distances): distances.append(-1) return distances
def get_terminals(ptree: ParentedTree) -> list: terms = ptree.subtrees(filter=lambda x: len(list(x.subtrees())) == 1) terms = list(terms) assert len(ptree.leaves()) == len(terms) # Pull out to unit test? return terms