Пример #1
0
    def __init__(self, iden, labelspan, size, fvector, sent):
        # NP [0-3]
        self.iden = iden

        label, span = labelspan.split()
        self.span = tuple(map(int, span[1:-1].split("-")))

        if label[-1] == "*":
            label = label[:-1]
            self._spurious = True
        else:
            self._spurious = False

        self.label = "TOP" if label == "S1" else label
        self.label = symbol(self.label)
        self.edges = []

        word = sent[self.span[0]] if (size == 0) else None
        self.prepare_stuff(label, word)

        self.fvector = fvector

        self._root = False

        self._bin_len = None
Пример #2
0
    def __init__(self, label, span, wrd=None, subs=None, is_root=False, sym=True):

        self.parentlabel = None ### TODO: FIX THIS!

        self.label = symbol(label) if sym else label ## in forest.assemble, don't symbol again
        self.span = span
        assert (wrd is None) ^ (subs is None), "bad tree"

        self.prepare_stuff(label, wrd, sym)

        if not self._terminal:
            self.subs = subs
            for sub in subs:
                self.word_seq += sub.word_seq
                self.tag_seq += sub.tag_seq
                
        self._root = is_root

        ## features
        self._bin_len = None


        # for heads feature
        self.allheads = {}
        self.ccheads = {}
        self.twolevels = {}

        # for headtree feature
        self.headspath = {}
Пример #3
0
	def __init__(self, iden, labelspan, size, fvector, sent):
		# NP [0-3]
		self.iden = iden
		
		label, span = labelspan.split()
		self.span = tuple(map(int, span[1:-1].split("-")))
		
		if label[-1] == "*":
			label = label[:-1]
			self._spurious = True
		else:
			self._spurious = False
			
		self.label = "TOP" if label == "S1" else label
		self.label = symbol(self.label)
		self.edges = []
		
		word = sent[self.span[0]] if (size == 0) else None
		self.prepare_stuff(label, word)

		self.fvector = fvector

		self._root = False

		self._bin_len = None
Пример #4
0
    def __init__(self,
                 label,
                 span,
                 wrd=None,
                 subs=None,
                 is_root=False,
                 sym=True):

        self.parentlabel = None  ### TODO: FIX THIS!

        self.label = symbol(
            label) if sym else label  ## in forest.assemble, don't symbol again
        self.span = span
        assert (wrd is None) ^ (subs is None), "bad tree"

        self.prepare_stuff(label, wrd, sym)

        if not self._terminal:
            self.subs = subs
            for sub in subs:
                self.word_seq += sub.word_seq
                self.tag_seq += sub.tag_seq

        self._root = is_root

        ## features
        self._bin_len = None

        # for heads feature
        self.allheads = {}
        self.ccheads = {}
        self.twolevels = {}

        # for headtree feature
        self.headspath = {}
Пример #5
0
    def prepare_stuff(self, label, wrd=None, sym=True):

        self._coordination = None  ## to be evaluated once called (same as C++'s const)
        self._str = None
        ## heads-info
        self.headinfo = {
            heads.SEM: heads.HeadInfo(),
            heads.SYN: heads.HeadInfo()
        }

        if wrd is not None:
            self.word = symbol(wrd) if sym else wrd

            self._terminal = True
            self._punctuation = is_punc(self.label)
            self._conjunction = is_conj(self.label)

            self.word_seq = [self.word]
            self.tag_seq = [label]

        else:
            self._terminal = False
            self._punctuation = False
            self._conjunction = False

            self.word_seq = []
            self.tag_seq = []
Пример #6
0
    def __init__(self, iden, labelspan, size, fvector, sent):
        # NP [0-3]
        self.iden = iden
        
        label, span = labelspan.split()
        self.span = tuple(map(int, span[1:-1].split("-")))
        
        if label[-1] == "*":
            label = label[:-1]
            self._spurious = True
        else:
            self._spurious = False
            
        self.label = "TOP" if label == "S1" else label
        self.label = symbol(self.label)
        self.edges = []
        
        #new features
        self.frags = []
        #self.tfedges = []

        #new feature: subtree str created for bp rules, NP(NN 'ch') -> lhs(bp) ### feats 
        self.subtree = ''
        
        ## N.B.: parse forest node can be termllinal
        word = sent[self.span[0]] if (size == 0) else None

        ## now in MT forest, nodes are always non-final. hyperedges can be final (terminal).

        ## in tree.py
        self.prepare_stuff(label, word)

        self.fvector = fvector

        self._root = False

        self._bin_len = None

        # surface string
        self.surface = '%s' % ''.join(sent[self.span[0]:self.span[1]])

        self._hash = hash(self.iden)
Пример #7
0
    def prepare_stuff(self, label, wrd=None, sym=True):
        
        self._coordination = None ## to be evaluated once called (same as C++'s const)
        self._str = None
        ## heads-info
        self.headinfo = { heads.SEM: heads.HeadInfo(), heads.SYN: heads.HeadInfo()}

        if wrd is not None:
            self.word = symbol(wrd) if sym else wrd

            self._terminal = True
            self._punctuation = is_punc(self.label)
            self._conjunction = is_conj(self.label)

            self.word_seq = [self.word]
            self.tag_seq = [label]
            
        else:
            self._terminal = False
            self._punctuation = False
            self._conjunction = False            

            self.word_seq = []
            self.tag_seq = []