def pennconverter_find_heads(tree, head_map=None): if head_map is None: head_map = {} tree = treebanks.remove_coindexation(tree, False) for subtree in tree.subtrees: pennconverter_find_heads(subtree, head_map) if log: print "Head for", tree.span, tree.label # A word is it's own head if tree.word is not None: head = (tree.span, tree.word, tree.label) add_head(head_map, tree, head) return head_map # First handle conjunctions coord = pennconverter_is_coord(tree) if coord: if not add_if_match(tree, {'CC', 'CONJP'}, head_map, True): if not add_if_match(tree, {',', ':'}, head_map, True): add_head(head_map, tree, get_head(head_map, tree.subtrees[-1])) return head_map # If the label for this node is not in the table we are either at the bottom, # at an NP, or have an error base_label = treebanks.split_label_type_and_function(tree.label)[0] if base_label not in pennconverter_mapping_table: if base_label in ['NP', 'NML']: collins_NP(tree, head_map) elif base_label in ['PP', 'WHPP']: pennconverter_PP(tree, head_map) else: add_head(head_map, tree, get_head(head_map, tree.subtrees[-1])) return head_map # Look through and take the first/last occurrence that matches info = pennconverter_mapping_table[base_label] for label in info[1]: for i in xrange(len(tree.subtrees)): if info[0] == 'right': i = len(tree.subtrees) - i - 1 subtree = tree.subtrees[i] if isinstance(label, str): if subtree.label == label: add_head(head_map, tree, get_head(head_map, subtree)) return head_map else: if re.match(label, subtree.label) is not None: add_head(head_map, tree, get_head(head_map, subtree)) return head_map # Final fallback if info[0] == 'left': add_head(head_map, tree, get_head(head_map, tree.subtrees[0])) else: add_head(head_map, tree, get_head(head_map, tree.subtrees[-1])) return head_map
def get_signature(head_map, tree): tree_repr = (tree.span, tree.label) if tree_repr in head_map: return tree_repr tree_repr = (tree.wordspan, treebanks.remove_coindexation(tree, False).label) if tree_repr in head_map: return tree_repr tree_repr = (tree.wordspan, treebanks.split_label_type_and_function(tree.label)[0]) if tree_repr in head_map: return tree_repr return None
def get_head(head_map, tree, amend_for_trace=False): if not amend_for_trace: tree_repr = (tree.span, tree.label) if tree_repr in head_map: return head_map[tree_repr] tree_repr = (tree.wordspan, treebanks.remove_coindexation(tree, False).label) if tree_repr in head_map: return head_map[tree_repr] tree_repr = (tree.wordspan, treebanks.split_label_type_and_function(tree.label)[0]) if tree_repr in head_map: return head_map[tree_repr] return None
def without_func(label): return treebanks.split_label_type_and_function(label)[0]