def fill_evident_holes(self): """ Fill holes if we find consecutive index values in prev/next anchors. """ to_add = [] for i in range(1, len(self)): prevann = self[i - 1] curann = self[i] if prevann.GetLabel().IsSilence(): continue if curann.GetLabel().IsSilence(): continue # there is a hole if prevann.GetLocation().GetEnd() < curann.GetLocation().GetBegin( ): idxprev = prevann.GetLabel().GetTypedValue() idxcur = curann.GetLabel().GetTypedValue() prevend = prevann.GetLocation().GetEnd() curbegin = curann.GetLocation().GetBegin() if idxprev + 1 == idxcur - 1: text = Text(idxprev + 1, data_type="int") hole = Annotation(TimeInterval(prevend, curbegin), Label(text)) to_add.append(hole) for a in to_add: self.Add(a) return len(to_add)
def test_window(self): self.t = AnchorTier() self.t.set_duration(12.) self.t.set_win_delay(4.) f, t = self.t.fix_window(0.) self.assertEqual(f, 0.) self.assertEqual(t, 4.) self.t.set_duration(18.) self.t.Append( Annotation(TimeInterval(TimePoint(13.), TimePoint(13.5)), Label(Text(18, data_type="int")))) self.t.Append( Annotation(TimeInterval(TimePoint(14.), TimePoint(14.6)), Label(Text(20, data_type="int")))) self.t.Append( Annotation(TimeInterval(TimePoint(14.6), TimePoint(15.6)), Label(Text(21, data_type="int")))) f, t = self.t.fix_window(14.) self.assertEqual(f, 15.6) self.assertEqual(t, 18.)
def test_export(self): self.t = AnchorTier() self.t.set_duration(17.8) self.t.set_win_delay(4.) self.t.set_ext_delay(1.) self.t.set_out_delay(0.2) # self.t.Append(Annotation(TimeInterval(TimePoint(0.), TimePoint(1.)), Label("#"))) self.t.Append( Annotation(TimeInterval(TimePoint(1.5), TimePoint(2.)), Label(Text(3, data_type="int")))) self.t.Append( Annotation(TimeInterval(TimePoint(4.5), TimePoint(6.3)), Label("#"))) self.t.Append( Annotation(TimeInterval(TimePoint(7.), TimePoint(8.)), Label(Text(8, data_type="int")))) self.t.Append( Annotation(TimeInterval(TimePoint(13.), TimePoint(13.5)), Label(Text(12, data_type="int")))) self.t.Append( Annotation(TimeInterval(TimePoint(14.), TimePoint(14.6)), Label(Text(13, data_type="int")))) self.t.Append( Annotation(TimeInterval(TimePoint(14.6), TimePoint(15.2)), Label(Text(14, data_type="int")))) self.t.Append( Annotation(TimeInterval(TimePoint(15.2), TimePoint(15.6)), Label("#"))) self.t.Append( Annotation(TimeInterval(TimePoint(16.), TimePoint(16.60)), Label(Text(16, data_type="int")))) self.t.Append( Annotation(TimeInterval(TimePoint(16.63), TimePoint(17.)), Label(Text(17, data_type="int")))) toklist = [ u"w0", u"w1", u"w2", u"w3", u"w4", u"w5", u"w6", u"w7", u"w8", u"w9", u"w10", u"w11", u"w12", u"w13", u"w14", u"w15", u"w16", u"w17" ] newtier = self.t.export(toklist)
def append_silences(self, channel): """ Append silences as anchors. :param channel: (sppasChannel) """ logging.debug(" ... Search silences:") # We have to find tracks first tracks_times = autils.search_channel_speech(channel, self._win_length, self._min_sil_dur, self._min_track_dur, self._shift_dur_start, self._shift_dur_end) radius = self._win_length / 2. toprec = 0. # Then, the silences are the holes between tracks for (from_time, to_time) in tracks_times: if toprec < from_time: begin = TimePoint(toprec, radius) if begin == 0.: begin = TimePoint(toprec, radius) end = TimePoint(from_time, radius) a = Annotation(TimeInterval(begin, end), Label("#")) self.Append(a) toprec = to_time # A silence at the end? if toprec < self._duration: begin = TimePoint(toprec, radius) end = TimePoint(self._duration, 0.) a = Annotation(TimeInterval(begin, end), Label("#")) self.Append(a) for i, a in enumerate(self): logging.debug(" ... ... %i: %s" % (i, a))
def test_holes(self): self.t = AnchorTier() self.t.set_duration(17.8) self.t.set_win_delay(4.) self.t.set_ext_delay(1.) self.t.set_out_delay(0.2) self.t.Append( Annotation(TimeInterval(TimePoint(0.), TimePoint(1.5)), Label("#"))) self.t.Append( Annotation(TimeInterval(TimePoint(1.5), TimePoint(2.)), Label(Text(0, data_type="int")))) self.t.Append( Annotation(TimeInterval(TimePoint(3.), TimePoint(3.5)), Label(Text(-1, data_type="int")))) self.t.Append( Annotation(TimeInterval(TimePoint(4.5), TimePoint(6.3)), Label("#"))) self.t.Append( Annotation(TimeInterval(TimePoint(7.), TimePoint(8.)), Label(Text(8, data_type="int")))) self.t.Append( Annotation(TimeInterval(TimePoint(11.3), TimePoint(12.)), Label(Text(-1, data_type="int")))) self.t.Append( Annotation(TimeInterval(TimePoint(13.), TimePoint(13.5)), Label(Text(18, data_type="int")))) self.t.Append( Annotation(TimeInterval(TimePoint(14.), TimePoint(14.6)), Label(Text(20, data_type="int")))) self.t.Append( Annotation(TimeInterval(TimePoint(14.6), TimePoint(15.6)), Label(Text(21, data_type="int")))) self.assertTrue(self.t.check_holes_ntokens(10)) self.assertFalse(self.t.check_holes_ntokens(9)) self.assertEqual(self.t.fill_evident_holes(), 1)
def test_window_sil(self): self.t = AnchorTier() self.t.set_duration(17.8) self.t.set_win_delay(4.) self.t.set_ext_delay(1.) self.t.set_out_delay(0.2) self.t.Append( Annotation(TimeInterval(TimePoint(0.), TimePoint(1.5)), Label("#"))) self.t.Append( Annotation(TimeInterval(TimePoint(4.5), TimePoint(6.3)), Label("#"))) self.t.Append( Annotation(TimeInterval(TimePoint(9.7), TimePoint(11.3)), Label("#"))) self.t.Append( Annotation(TimeInterval(TimePoint(14.6), TimePoint(17.8)), Label("#"))) f, t = self.t.fix_window(0.) self.assertEqual(f, 1.5) self.assertEqual(t, 4.5) f, t = self.t.fix_window(0.5) self.assertEqual(f, 1.5) self.assertEqual(t, 4.5) f, t = self.t.fix_window(1.5) self.assertEqual(f, 1.5) self.assertEqual(t, 4.5) f, t = self.t.fix_window(3.5) self.assertEqual(f, 3.5) self.assertEqual(t, 4.5) f, t = self.t.fix_window(4.5) self.assertEqual(f, 6.3) self.assertEqual(t, 9.7) f, t = self.t.fix_window(9.) self.assertEqual(f, 9.) self.assertEqual(t, 9.7) f, t = self.t.fix_window(9.6) self.assertEqual(f, 11.3) self.assertEqual(t, 14.6) f, t = self.t.fix_window(14.6) self.assertEqual(f, 17.8) self.assertEqual(t, 17.8) self.t.set_win_delay(10.) f, t = self.t.fix_window(0) self.assertEqual(f, 1.5) self.assertEqual(t, 4.5) self.t.set_win_delay(2.) f, t = self.t.fix_window(0) self.assertEqual(f, 1.5) self.assertEqual(t, 3.5) self.t.set_win_delay(1.) f, t = self.t.fix_window(0) self.assertEqual(f, 1.5) self.assertEqual(t, 2.5)
def test_near(self): self.t = AnchorTier() self.t.set_duration(17.8) self.t.set_win_delay(4.) self.t.set_ext_delay(1.) self.t.set_out_delay(0.2) self.assertIsNone(self.t.near_indexed_anchor(1., -1)) self.t.Append( Annotation(TimeInterval(TimePoint(0.), TimePoint(1.5)), Label("#"))) self.t.Append( Annotation(TimeInterval(TimePoint(1.5), TimePoint(2.)), Label(Text(1, data_type="int")))) self.t.Append( Annotation(TimeInterval(TimePoint(3.), TimePoint(3.5)), Label(Text(-1, data_type="int")))) self.t.Append( Annotation(TimeInterval(TimePoint(4.5), TimePoint(6.3)), Label("#"))) self.t.Append( Annotation(TimeInterval(TimePoint(7.), TimePoint(8.)), Label(Text(2, data_type="int")))) self.t.Append( Annotation(TimeInterval(TimePoint(9.7), TimePoint(11.3)), Label("#"))) self.t.Append( Annotation(TimeInterval(TimePoint(11.3), TimePoint(12.)), Label(Text(-1, data_type="int")))) self.t.Append( Annotation(TimeInterval(TimePoint(14.), TimePoint(14.6)), Label(Text(3, data_type="int")))) self.t.Append( Annotation(TimeInterval(TimePoint(14.6), TimePoint(17.8)), Label("#"))) a = self.t.Near(13., 1) self.assertEqual(a, 7) a = self.t.Near(17., 1) self.assertEqual(a, -1) a = self.t.Near(17.8, 1) self.assertEqual(a, -1) a = self.t.near_indexed_anchor(1., 1) self.assertEqual(a.GetLabel().GetTypedValue(), 1) a = self.t.near_indexed_anchor(1.5, 1) self.assertEqual(a.GetLabel().GetTypedValue(), 1) a = self.t.near_indexed_anchor(2., 1) self.assertEqual(a.GetLabel().GetTypedValue(), 2) a = self.t.near_indexed_anchor(11., 1) self.assertEqual(a.GetLabel().GetTypedValue(), 3) a = self.t.near_indexed_anchor(15., 1) self.assertIsNone(a) a = self.t.near_indexed_anchor(1., -1) self.assertIsNone(a) a = self.t.near_indexed_anchor(3., -1) self.assertEqual(a.GetLabel().GetTypedValue(), 1) a = self.t.near_indexed_anchor(3.5, -1) self.assertEqual(a.GetLabel().GetTypedValue(), 1) a = self.t.near_indexed_anchor(5., -1) self.assertEqual(a.GetLabel().GetTypedValue(), 1) a = self.t.near_indexed_anchor(7., -1) self.assertEqual(a.GetLabel().GetTypedValue(), 1) a = self.t.near_indexed_anchor(8., -1) self.assertEqual(a.GetLabel().GetTypedValue(), 2) a = self.t.near_indexed_anchor(9.7, -1) self.assertEqual(a.GetLabel().GetTypedValue(), 2) a = self.t.near_indexed_anchor(11., -1) self.assertEqual(a.GetLabel().GetTypedValue(), 2) a = self.t.near_indexed_anchor(12., -1) self.assertEqual(a.GetLabel().GetTypedValue(), 2) a = self.t.near_indexed_anchor(14., -1) self.assertEqual(a.GetLabel().GetTypedValue(), 2) a = self.t.near_indexed_anchor(18., -1) self.assertEqual(a.GetLabel().GetTypedValue(), 3)
def export(self, toklist): """ Create the "Chunks" tier and return it. :param toklist: Tokens used to fill the intervals. """ tier = Tier("Chunks") # Append silences in the result and pop them from the list of anchors anchors = AnchorTier() for ann in self: if ann.GetLabel().IsSilence(): try: tier.Append(ann.Copy()) except Exception: logging.debug("Error: Silence not appended: %s" % ann) pass else: try: anchors.Append(ann.Copy()) except Exception: logging.debug("Error: Anchor not appended: %s" % ann) pass # Fill the holes when prev-index and next-index made a sequence anchors.fill_evident_holes() # Fill holes between anchors for i in range(1, len(anchors)): prevann = anchors[i - 1] curann = anchors[i] # there is a hole if prevann.GetLocation().GetEnd() < curann.GetLocation().GetBegin( ): idxprev = prevann.GetLabel().GetTypedValue() idxcur = curann.GetLabel().GetTypedValue() prevend = prevann.GetLocation().GetEnd() curbegin = curann.GetLocation().GetBegin() if idxprev + 1 == idxcur: # hum... a little bit of hack!!! holeduration = curbegin.GetMidpoint( ) - prevend.GetMidpoint() if holeduration < 0.055: prevend.SetMidpoint(curbegin.GetMidpoint()) elif holeduration < 0.505 and prevann.GetLocation( ).GetDuration().GetValue() < 0.105: prevend.SetMidpoint(curbegin.GetMidpoint()) elif (idxprev + 1) < idxcur - 1: texte = " ".join(toklist[idxprev + 1:idxcur]) begin = prevend.GetMidpoint() end = curbegin.GetMidpoint() hole = Annotation( TimeInterval(TimePoint(begin), TimePoint(end)), Label(texte)) anns = tier.Find(begin, end, overlaps=True) if len(anns) == 0: try: tier.Add(hole) except Exception: pass # Append chunk of anchors start = 0 end = 0 to_continue = True if end + 1 >= anchors.GetSize(): to_continue = False while to_continue: idxcur = anchors[end].GetLabel().GetTypedValue() idxnex = anchors[end + 1].GetLabel().GetTypedValue() endtimecur = anchors[end].GetLocation().GetEnd() begtimenex = anchors[end + 1].GetLocation().GetBegin() # a sequence of anchors is finished if either: # - next anchor index does not directly follow the current one # - next anchor time does not directly follow the current one # - we already appended enough anchors in the chunk if idxcur + 1 != idxnex or (end - start) > 10 or begtimenex > endtimecur: # append the chunk idxstart = anchors[start].GetLabel().GetTypedValue() chunk_text = " ".join(toklist[idxstart:idxcur + 1]) tbegin = anchors[start].GetLocation().GetBegin() tend = anchors[end].GetLocation().GetEnd() ann = Annotation(TimeInterval(tbegin, tend), Label(chunk_text)) tier.Remove(tbegin, tend, overlaps=True) tier.Add(ann) start = end + 1 end += 1 if anchors.GetSize() <= end + 1: # the last chunk found if start <= end: idxstart = anchors[start].GetLabel().GetTypedValue() idxcur = anchors[end].GetLabel().GetTypedValue() chunk_text = " ".join(toklist[idxstart:idxcur + 1]) tbegin = anchors[start].GetLocation().GetBegin() tend = anchors[end].GetLocation().GetEnd() ann = Annotation(TimeInterval(tbegin, tend), Label(chunk_text)) tier.Remove(tbegin, tend, overlaps=True) tier.Add(ann) to_continue = False # Begin of the tier fi = anchors[0].GetLabel().GetTypedValue() ft = tier[0].GetLocation().GetBegin() at = anchors[0].GetLocation().GetBegin() if ft == 0.: # a silence to start ft = tier[0].GetLocation().GetEnd() else: ft = TimePoint(0.) if fi > 0 and ft < at: chunk_text = " ".join(toklist[0:fi]) ann = Annotation(TimeInterval(ft, at), Label(chunk_text)) tier.Add(ann) # End of the tier fi = anchors[-1].GetLabel().GetTypedValue() fi += 1 ft = tier[-1].GetLocation().GetEnd() at = anchors[-1].GetLocation().GetEnd() if ft == self._duration: # a silence to end ft = tier[-1].GetLocation().GetBegin() else: ft = TimePoint(self._duration) if fi < len(toklist) and at < ft: chunk_text = " ".join(toklist[fi + 1:len(toklist)]) ann = Annotation(TimeInterval(at, ft), Label(chunk_text)) tier.Add(ann) chunk_tier = Tier("Chunks") i = 1 while i < tier.GetSize(): prevtext = tier[i - 1].GetLabel().GetValue() curtext = tier[i].GetLabel().GetValue() newtext = prevtext + " " + curtext if tier[i-1].GetLabel().IsSilence() is False and \ tier[i].GetLabel().IsSilence() is False and \ (len(prevtext.split()) < 3 or len(newtext.split()) < 12): a = Annotation( TimeInterval(tier[i - 1].GetLocation().GetBegin(), tier[i].GetLocation().GetEnd()), Label(newtext)) chunk_tier.Append(a) i += 1 else: chunk_tier.Append(tier[i - 1]) i += 1 return chunk_tier