def appositive(pair,parsed_sentences): if pair.first.sentenceID == pair.second.sentenceID: index1 = eval(pair.first.offsets[0]) index2 = eval(pair.second.offsets[0]) #print index1,index2 #print pair.first.str,pair.second.str tree = parsed_sentences[eval(pair.first.sentenceID)] #print tree1,tree2 path1 = list(tree.leaf_treeposition(index1)) path2 = list(tree.leaf_treeposition(index2)) if index1 < index2: dominate_path = tree.treeposition_spanning_leaves(index1,index2+1) else: dominate_path = tree.treeposition_spanning_leaves(index2,index1+1) dept = len(dominate_path) subtree = copy.deepcopy(tree) #print index1,index2 #print dominate_path for i in list(dominate_path): temp = subtree[i] subtree = temp #print subtree if subtree.node == 'NP': if subtree[path1[dept]].node == 'NP' and subtree[path2[dept]].node == 'NP': if path1[dept] == (path2[dept] - 2) and subtree[path1[dept]+1].node == ',': return True if path1[dept] == (path2[dept] + 2) and subtree[path1[dept]-1].node == ',': return True return False
def tree_distance(pair,parsed_sentences): #within the same tree if pair.first.sentenceID == pair.second.sentenceID: index1 = eval(pair.first.offsets[0]) index2 = eval(pair.second.offsets[0]) #print index1,index2 #print pair.first.str,pair.second.str tree = parsed_sentences[eval(pair.first.sentenceID)] #print tree1,tree2 path1 = list(tree.leaf_treeposition(index1)) path2 = list(tree.leaf_treeposition(index2)) n = 0 for i,j in zip(path1,path2): if i == j: n+=1 else: break tree_distance = len(path1)+len(path2)-2*n return tree_distance else: return "unknown"
def padding_leaves(tree): leaves_location = [ tree.leaf_treeposition(i) for i in range(len(tree.leaves())) ] for i in range(len(leaves_location)): tree[leaves_location[i]] = "{0:03}".format(i) + "||||" + tree[ leaves_location[i]] for i in range(len(tree.leaves())): if len(tree[tree.leaf_treeposition(i)[:-1]]) > 1: tree[tree.leaf_treeposition(i)] = Tree( tree[tree.leaf_treeposition(i)[:-1]].label(), [tree.leaves()[i]])
def padding_leaves_wnum(leaves, tree): # leaves_location = [tree.leaf_treeposition(i) for i in range(len(tree.leaves()))] # for i in range(len(leaves_location)): # # tree[leaves_location[i]] = "{0:03}".format(i) + "||||" + tree[leaves_location[i]] # tree[leaves_location[i]] = f'{i}' # for i in range(len(tree.leaves())): # if len(tree[tree.leaf_treeposition(i)[:-1]]) > 1: # tree[tree.leaf_treeposition(i)] = Tree(tree[tree.leaf_treeposition(i)[:-1]].label(), [tree.leaves()[i]]) for i in range(len(leaves)): tree[tree.leaf_treeposition(i)] = f'{i}'
def depth(entity,parsed_sentences): index = eval(entity.offsets[0]) #print index1,index2 #print pair.first.str,pair.second.str tree = parsed_sentences[eval(entity.sentenceID)] #print tree1,tree2 path = list(tree.leaf_treeposition(index)) return len(path)
def get_PTP(pair,parsed_sentences): m1_index = pair.first.offsets[0] m2_index = pair.second.offsets[0] senID = pair.first.sentenceID tree = parsed_sentences[senID] if m2_index >= len(tree.leaves()): m2_index -=1 path1 = list(tree.leaf_treeposition(m1_index)) path2 = list(tree.leaf_treeposition(m2_index)) phrase_labels = [] n = 0 share_path = [] for i,j in zip(path1,path2): if i == j: n+=1 share_path.append(i) else: break sub_path1 = path1[n:] sub_path2 = path2[n:] def get_labels(stree,path): subtree = copy.deepcopy(stree) labels = [subtree.node] for i in path: if isinstance(subtree[i],nltk.tree.Tree): labels.append(subtree[i].node) temp = subtree[i] subtree = temp return tuple((subtree,labels)) subtree = get_labels(tree,share_path)[0] path1_labels = get_labels(subtree,sub_path1)[1] path2_labels = get_labels(subtree,sub_path2)[1] path1_labels.reverse() path2_labels.reverse() if path1_labels[-1] == path2_labels[0]: return list(set(path1_labels[:]+path2_labels[1:])) else: ValueError("Path cannot connect:%s,%s" % (path1_labels,path2_labels ))
def remap_chars(tree): for i in range(len(tree.leaves())): if tree.leaves()[i] in SPECIAL_CHAR_MBACK: tree[tree.leaf_treeposition(i)] = SPECIAL_CHAR_MBACK[tree.leaves() [i]]