示例#1
0
def check_puncs(forest, pos2):
	'''returns idx_mapping and the modified forest.
	CAUTION: can not modify the original forest.
	CAUTION: delete redundant layers'''

	newforest = forest.copy()
	assert len(newforest) == len(pos2), "different sentence lengths!\n%s\n%s" % (str(test_tree), str(gold_tree))

	idx_mapping = {}
	j = 0
	last_is_punc = True
	for i, a in enumerate(pos2):
		if not last_is_punc:
			j += 1
		idx_mapping [i] = j
		
		# delete the non-consistent tags of this word in the newforest
		for node in newforest.cells[(i, i+1)]:
			if node.is_terminal() or node.sp_terminal():
				node.deleted = is_punc(node.label) ^ is_punc(a)
				if node.deleted:
					print >> logs, node, "deleted"
					
		last_is_punc = is_punc(a)

	if not last_is_punc:
		j += 1
	idx_mapping[i+1] = j

	# TODO: CLEAN UP THIS PART!
	newforest.nodeorder = [node for node in newforest if not node.is_terminal() or not node.deleted]
	newforest.nodes = {}
	for node in newforest:
		newforest.nodes[node.iden] = node
	newforest.rehash()
	for node in newforest:
		if not node.is_terminal():
			mapped_span = node.mapped_span(idx_mapping)
			newedges = []
			for edge in node.edges:
				if edge.unary_cycle():
					print >> logs, edge, "deleted (cycle)"
				else:
					for sub in edge.subs:
						if sub.is_terminal() and sub.deleted:
							print >> logs, edge, "deleted (punc)"
							break
						if not node.is_root() and sub.label == node.label \
							   and sub.mapped_span(idx_mapping) == mapped_span:
						## make sure no induced unary cycle
							print >> logs, edge, "deleted (induced cycle)"
							break							
					else:						
						newedges.append(edge)
						
			node.edges = newedges

	return lambda x:idx_mapping[x], newforest
示例#2
0
    def prepare_stuff(self, label, wrd=None, sym=True):

        self._coordination = None  ## to be evaluated once called (same as C++'s const)
        self._str = None
        ## heads-info
        self.headinfo = {
            heads.SEM: heads.HeadInfo(),
            heads.SYN: heads.HeadInfo()
        }

        if wrd is not None:
            self.word = symbol(wrd) if sym else wrd

            self._terminal = True
            self._punctuation = is_punc(self.label)
            self._conjunction = is_conj(self.label)

            self.word_seq = [self.word]
            self.tag_seq = [label]

        else:
            self._terminal = False
            self._punctuation = False
            self._conjunction = False

            self.word_seq = []
            self.tag_seq = []
示例#3
0
    def prepare_stuff(self, label, wrd=None, sym=True):
        
        self._coordination = None ## to be evaluated once called (same as C++'s const)
        self._str = None
        ## heads-info
        self.headinfo = { heads.SEM: heads.HeadInfo(), heads.SYN: heads.HeadInfo()}

        if wrd is not None:
            self.word = symbol(wrd) if sym else wrd

            self._terminal = True
            self._punctuation = is_punc(self.label)
            self._conjunction = is_conj(self.label)

            self.word_seq = [self.word]
            self.tag_seq = [label]
            
        else:
            self._terminal = False
            self._punctuation = False
            self._conjunction = False            

            self.word_seq = []
            self.tag_seq = []            
示例#4
0
 def is_punctuation(self):
     return is_punc(self.label)
示例#5
0
	def is_punctuation(self):
		return is_punc(self.label)