def extractBreaks(self): """ Construct phrase and subphrase context tier according to given breaks; also link each word with its corresponding phrase and subphrase. breaks: a PointTier contains break labels """ breaks = self.get_tier("breaks") words = self.get_tier("words") phrases = PointTier("phrases", self.xmin, self.xmax) subphrases = PointTier("subphrases", self.xmin, self.xmax) ## for w in words: ## w.break3=None ## w.break4=None # First pass: put phrasing information in words ## o = 0 ## for w in words: ## bs = breaks.findBetween(w.xmin, w.xmax, offset=o) ## for b in bs: ## if '3' in b.mark: ## w.break3 = True ## if '4' in b.mark: ## w.break4 = True ## o = b.index ## t = b3[0] ## for w in words: ## w.links[b3.name]=b3.findBetween(w.xmin, w.xmax, t) ## t = w.links[b3.name][-1] ## t = b4[0] ## for w in words: ## w.links[b4.name]=b4.findBetween(w.xmin, w.xmax, t) ## t = w.links[b4.name][-1] # Second pass: word position in subphrases: words.findBetween(sph.xmin, sph.xmax) b1 = breaks4[0] for b2 in breaks4[1:]: words = w.findBetween(b1.time, b2.time, words[-1].index+1) w1 = words[words[0].index-1] w2 = words[words[-1].index+1] if w1.xmax - b1 < b1 - w1.xmin: words = [w1]+words if w2.xmax - b2 > b2 - w2.xmin: words.append(w2) sph = Subphrase(b1.time, b2.time, ' '.join([w.text for w in words])) for i in range(len(words)): word.links[breaks4.name]=(b1, b2) word.links[subphrases.name]=sph subphrases.apend(sph) b1=b2 b1 = breaks3[0] for b2 in breaks3[1:]: words = w.findBetween(b1.time, b2.time, words[-1].index+1) w1 = words[words[0].index-1] w2 = words[words[-1].index+1] if w1.xmax - b1 < b1 - w1.xmin: words = [w1]+words if w2.xmax - b2 > b2 - w2.xmin: words.append(w2) ph = Phrase(b1.time, b2.time, ' '.join([w.text for w in words])) for i in range(len(words)): word.links[breaks3.name]=(b1, b2) word.links[Phrase.name]=ph phrases.apend(sph) b1=b2 text = [] tprev = self.xmin for w in words: if LMref.is_word(w.text): text+=[w] w.ip=len(text) if w.break4: sphrs.append(Subphrase(tprev,w.xmax,text)) text=[] tprev = w.xmax if not w.break4: # last word sphrs.append(Subphrase(tprev,w.xmax,text)) # Third pass: word position in phrases text = [] tprev = self.xmin for w in words: if LMref.is_word(w.text): text+=[w] w.IP=len(text) if w.break3: phrs.append(Phrase(tprev,w.xmax,text)) text=[] tprev = w.xmax if not w.break3: phrs.append(Phrase(tprev,w.xmax,text)) self.append(sphrs) self.append(phrs)
def extractBreaks(self): """ Construct phrase and subphrase context tier according to given breaks; also link each word with its corresponding phrase and subphrase. breaks: a PointTier contains break labels """ breaks = self.get_tier("breaks") words = self.get_tier("words") phrases = PointTier("phrases", self.xmin, self.xmax) subphrases = PointTier("subphrases", self.xmin, self.xmax) ## for w in words: ## w.break3=None ## w.break4=None # First pass: put phrasing information in words ## o = 0 ## for w in words: ## bs = breaks.findBetween(w.xmin, w.xmax, offset=o) ## for b in bs: ## if '3' in b.mark: ## w.break3 = True ## if '4' in b.mark: ## w.break4 = True ## o = b.index ## t = b3[0] ## for w in words: ## w.links[b3.name]=b3.findBetween(w.xmin, w.xmax, t) ## t = w.links[b3.name][-1] ## t = b4[0] ## for w in words: ## w.links[b4.name]=b4.findBetween(w.xmin, w.xmax, t) ## t = w.links[b4.name][-1] # Second pass: word position in subphrases: words.findBetween(sph.xmin, sph.xmax) b1 = breaks4[0] for b2 in breaks4[1:]: words = w.findBetween(b1.time, b2.time, words[-1].index + 1) w1 = words[words[0].index - 1] w2 = words[words[-1].index + 1] if w1.xmax - b1 < b1 - w1.xmin: words = [w1] + words if w2.xmax - b2 > b2 - w2.xmin: words.append(w2) sph = Subphrase(b1.time, b2.time, ' '.join([w.text for w in words])) for i in range(len(words)): word.links[breaks4.name] = (b1, b2) word.links[subphrases.name] = sph subphrases.apend(sph) b1 = b2 b1 = breaks3[0] for b2 in breaks3[1:]: words = w.findBetween(b1.time, b2.time, words[-1].index + 1) w1 = words[words[0].index - 1] w2 = words[words[-1].index + 1] if w1.xmax - b1 < b1 - w1.xmin: words = [w1] + words if w2.xmax - b2 > b2 - w2.xmin: words.append(w2) ph = Phrase(b1.time, b2.time, ' '.join([w.text for w in words])) for i in range(len(words)): word.links[breaks3.name] = (b1, b2) word.links[Phrase.name] = ph phrases.apend(sph) b1 = b2 text = [] tprev = self.xmin for w in words: if LMref.is_word(w.text): text += [w] w.ip = len(text) if w.break4: sphrs.append(Subphrase(tprev, w.xmax, text)) text = [] tprev = w.xmax if not w.break4: # last word sphrs.append(Subphrase(tprev, w.xmax, text)) # Third pass: word position in phrases text = [] tprev = self.xmin for w in words: if LMref.is_word(w.text): text += [w] w.IP = len(text) if w.break3: phrs.append(Phrase(tprev, w.xmax, text)) text = [] tprev = w.xmax if not w.break3: phrs.append(Phrase(tprev, w.xmax, text)) self.append(sphrs) self.append(phrs)