Пример #1
0
    def extractBreaks(self):
        """ Construct phrase and subphrase context tier according to given breaks;
        also link each word with its corresponding phrase and subphrase.
        breaks: a PointTier contains break labels
        """
        breaks = self.get_tier("breaks")
        words = self.get_tier("words")
        phrases = PointTier("phrases", self.xmin, self.xmax)
        subphrases = PointTier("subphrases", self.xmin, self.xmax)

##        for w in words:
##            w.break3=None
##            w.break4=None
        
        # First pass: put phrasing information in words
##        o = 0
##        for w in words:
##            bs =  breaks.findBetween(w.xmin, w.xmax, offset=o)
##            for b in bs:
##                if '3' in b.mark:
##                    w.break3 = True
##                if '4' in b.mark:
##                    w.break4 = True
##                o = b.index
##        t = b3[0]       
##        for w in words:
##            w.links[b3.name]=b3.findBetween(w.xmin, w.xmax, t)
##            t = w.links[b3.name][-1]
##        t = b4[0]
##        for w in words:
##            w.links[b4.name]=b4.findBetween(w.xmin, w.xmax, t)
##            t = w.links[b4.name][-1]
            
        # Second pass: word position in subphrases: words.findBetween(sph.xmin, sph.xmax)
        
        b1 = breaks4[0]
        for b2 in breaks4[1:]:
            words = w.findBetween(b1.time, b2.time, words[-1].index+1)
            w1 = words[words[0].index-1]
            w2 = words[words[-1].index+1]
            if w1.xmax - b1 < b1 - w1.xmin:
                words = [w1]+words
            if w2.xmax - b2 > b2 - w2.xmin:
                words.append(w2)            
            sph = Subphrase(b1.time, b2.time, ' '.join([w.text for w in words]))         
            for i in range(len(words)):
                word.links[breaks4.name]=(b1, b2)
                word.links[subphrases.name]=sph
            subphrases.apend(sph)
            b1=b2
        
        b1 = breaks3[0]
        for b2 in breaks3[1:]:
            words = w.findBetween(b1.time, b2.time, words[-1].index+1)
            w1 = words[words[0].index-1]
            w2 = words[words[-1].index+1]
            if w1.xmax - b1 < b1 - w1.xmin:
                words = [w1]+words
            if w2.xmax - b2 > b2 - w2.xmin:
                words.append(w2)            
            ph = Phrase(b1.time, b2.time, ' '.join([w.text for w in words]))         
            for i in range(len(words)):
                word.links[breaks3.name]=(b1, b2)
                word.links[Phrase.name]=ph
            phrases.apend(sph)
            b1=b2

        text = []       
        tprev = self.xmin
        for w in words:
            if LMref.is_word(w.text):
                text+=[w]
                w.ip=len(text)
                if w.break4:
                    sphrs.append(Subphrase(tprev,w.xmax,text))
                    text=[]
                    tprev = w.xmax
        if not w.break4:    # last word
                sphrs.append(Subphrase(tprev,w.xmax,text))
            
        # Third pass: word position in phrases
        text = []       
        tprev = self.xmin
        for w in words:
            if LMref.is_word(w.text):
                text+=[w]
                w.IP=len(text)
                if w.break3:
                    phrs.append(Phrase(tprev,w.xmax,text))
                    text=[]
                    tprev = w.xmax
        if not w.break3:
                phrs.append(Phrase(tprev,w.xmax,text))

        self.append(sphrs)
        self.append(phrs)        
Пример #2
0
    def extractBreaks(self):
        """ Construct phrase and subphrase context tier according to given breaks;
        also link each word with its corresponding phrase and subphrase.
        breaks: a PointTier contains break labels
        """
        breaks = self.get_tier("breaks")
        words = self.get_tier("words")
        phrases = PointTier("phrases", self.xmin, self.xmax)
        subphrases = PointTier("subphrases", self.xmin, self.xmax)

        ##        for w in words:
        ##            w.break3=None
        ##            w.break4=None

        # First pass: put phrasing information in words
        ##        o = 0
        ##        for w in words:
        ##            bs =  breaks.findBetween(w.xmin, w.xmax, offset=o)
        ##            for b in bs:
        ##                if '3' in b.mark:
        ##                    w.break3 = True
        ##                if '4' in b.mark:
        ##                    w.break4 = True
        ##                o = b.index
        ##        t = b3[0]
        ##        for w in words:
        ##            w.links[b3.name]=b3.findBetween(w.xmin, w.xmax, t)
        ##            t = w.links[b3.name][-1]
        ##        t = b4[0]
        ##        for w in words:
        ##            w.links[b4.name]=b4.findBetween(w.xmin, w.xmax, t)
        ##            t = w.links[b4.name][-1]

        # Second pass: word position in subphrases: words.findBetween(sph.xmin, sph.xmax)

        b1 = breaks4[0]
        for b2 in breaks4[1:]:
            words = w.findBetween(b1.time, b2.time, words[-1].index + 1)
            w1 = words[words[0].index - 1]
            w2 = words[words[-1].index + 1]
            if w1.xmax - b1 < b1 - w1.xmin:
                words = [w1] + words
            if w2.xmax - b2 > b2 - w2.xmin:
                words.append(w2)
            sph = Subphrase(b1.time, b2.time,
                            ' '.join([w.text for w in words]))
            for i in range(len(words)):
                word.links[breaks4.name] = (b1, b2)
                word.links[subphrases.name] = sph
            subphrases.apend(sph)
            b1 = b2

        b1 = breaks3[0]
        for b2 in breaks3[1:]:
            words = w.findBetween(b1.time, b2.time, words[-1].index + 1)
            w1 = words[words[0].index - 1]
            w2 = words[words[-1].index + 1]
            if w1.xmax - b1 < b1 - w1.xmin:
                words = [w1] + words
            if w2.xmax - b2 > b2 - w2.xmin:
                words.append(w2)
            ph = Phrase(b1.time, b2.time, ' '.join([w.text for w in words]))
            for i in range(len(words)):
                word.links[breaks3.name] = (b1, b2)
                word.links[Phrase.name] = ph
            phrases.apend(sph)
            b1 = b2

        text = []
        tprev = self.xmin
        for w in words:
            if LMref.is_word(w.text):
                text += [w]
                w.ip = len(text)
                if w.break4:
                    sphrs.append(Subphrase(tprev, w.xmax, text))
                    text = []
                    tprev = w.xmax
        if not w.break4:  # last word
            sphrs.append(Subphrase(tprev, w.xmax, text))

        # Third pass: word position in phrases
        text = []
        tprev = self.xmin
        for w in words:
            if LMref.is_word(w.text):
                text += [w]
                w.IP = len(text)
                if w.break3:
                    phrs.append(Phrase(tprev, w.xmax, text))
                    text = []
                    tprev = w.xmax
        if not w.break3:
            phrs.append(Phrase(tprev, w.xmax, text))

        self.append(sphrs)
        self.append(phrs)