示例#1
0
文件: anchors.py 项目: lym0302/sppas
    def fill_evident_holes(self):
        """ Fill holes if we find consecutive index values in prev/next anchors. """

        to_add = []

        for i in range(1, len(self)):
            prevann = self[i - 1]
            curann = self[i]
            if prevann.GetLabel().IsSilence():
                continue
            if curann.GetLabel().IsSilence():
                continue
            # there is a hole
            if prevann.GetLocation().GetEnd() < curann.GetLocation().GetBegin(
            ):
                idxprev = prevann.GetLabel().GetTypedValue()
                idxcur = curann.GetLabel().GetTypedValue()
                prevend = prevann.GetLocation().GetEnd()
                curbegin = curann.GetLocation().GetBegin()
                if idxprev + 1 == idxcur - 1:
                    text = Text(idxprev + 1, data_type="int")
                    hole = Annotation(TimeInterval(prevend, curbegin),
                                      Label(text))
                    to_add.append(hole)

        for a in to_add:
            self.Add(a)

        return len(to_add)
示例#2
0
    def test_window(self):
        self.t = AnchorTier()
        self.t.set_duration(12.)
        self.t.set_win_delay(4.)
        f, t = self.t.fix_window(0.)
        self.assertEqual(f, 0.)
        self.assertEqual(t, 4.)

        self.t.set_duration(18.)
        self.t.Append(
            Annotation(TimeInterval(TimePoint(13.), TimePoint(13.5)),
                       Label(Text(18, data_type="int"))))
        self.t.Append(
            Annotation(TimeInterval(TimePoint(14.), TimePoint(14.6)),
                       Label(Text(20, data_type="int"))))
        self.t.Append(
            Annotation(TimeInterval(TimePoint(14.6), TimePoint(15.6)),
                       Label(Text(21, data_type="int"))))
        f, t = self.t.fix_window(14.)
        self.assertEqual(f, 15.6)
        self.assertEqual(t, 18.)
示例#3
0
    def test_export(self):
        self.t = AnchorTier()
        self.t.set_duration(17.8)
        self.t.set_win_delay(4.)
        self.t.set_ext_delay(1.)
        self.t.set_out_delay(0.2)
        # self.t.Append(Annotation(TimeInterval(TimePoint(0.),  TimePoint(1.)),   Label("#")))
        self.t.Append(
            Annotation(TimeInterval(TimePoint(1.5), TimePoint(2.)),
                       Label(Text(3, data_type="int"))))
        self.t.Append(
            Annotation(TimeInterval(TimePoint(4.5), TimePoint(6.3)),
                       Label("#")))
        self.t.Append(
            Annotation(TimeInterval(TimePoint(7.), TimePoint(8.)),
                       Label(Text(8, data_type="int"))))
        self.t.Append(
            Annotation(TimeInterval(TimePoint(13.), TimePoint(13.5)),
                       Label(Text(12, data_type="int"))))
        self.t.Append(
            Annotation(TimeInterval(TimePoint(14.), TimePoint(14.6)),
                       Label(Text(13, data_type="int"))))
        self.t.Append(
            Annotation(TimeInterval(TimePoint(14.6), TimePoint(15.2)),
                       Label(Text(14, data_type="int"))))
        self.t.Append(
            Annotation(TimeInterval(TimePoint(15.2), TimePoint(15.6)),
                       Label("#")))
        self.t.Append(
            Annotation(TimeInterval(TimePoint(16.), TimePoint(16.60)),
                       Label(Text(16, data_type="int"))))
        self.t.Append(
            Annotation(TimeInterval(TimePoint(16.63), TimePoint(17.)),
                       Label(Text(17, data_type="int"))))

        toklist = [
            u"w0", u"w1", u"w2", u"w3", u"w4", u"w5", u"w6", u"w7", u"w8",
            u"w9", u"w10", u"w11", u"w12", u"w13", u"w14", u"w15", u"w16",
            u"w17"
        ]

        newtier = self.t.export(toklist)
示例#4
0
文件: anchors.py 项目: lym0302/sppas
    def append_silences(self, channel):
        """ Append silences as anchors.

        :param channel: (sppasChannel)

        """
        logging.debug(" ... Search silences:")

        # We have to find tracks first
        tracks_times = autils.search_channel_speech(channel, self._win_length,
                                                    self._min_sil_dur,
                                                    self._min_track_dur,
                                                    self._shift_dur_start,
                                                    self._shift_dur_end)
        radius = self._win_length / 2.
        toprec = 0.

        # Then, the silences are the holes between tracks
        for (from_time, to_time) in tracks_times:
            if toprec < from_time:
                begin = TimePoint(toprec, radius)
                if begin == 0.:
                    begin = TimePoint(toprec, radius)
                end = TimePoint(from_time, radius)
                a = Annotation(TimeInterval(begin, end), Label("#"))
                self.Append(a)
            toprec = to_time

        # A silence at the end?
        if toprec < self._duration:
            begin = TimePoint(toprec, radius)
            end = TimePoint(self._duration, 0.)
            a = Annotation(TimeInterval(begin, end), Label("#"))
            self.Append(a)

        for i, a in enumerate(self):
            logging.debug(" ... ... %i: %s" % (i, a))
示例#5
0
    def test_holes(self):
        self.t = AnchorTier()
        self.t.set_duration(17.8)
        self.t.set_win_delay(4.)
        self.t.set_ext_delay(1.)
        self.t.set_out_delay(0.2)

        self.t.Append(
            Annotation(TimeInterval(TimePoint(0.), TimePoint(1.5)),
                       Label("#")))
        self.t.Append(
            Annotation(TimeInterval(TimePoint(1.5), TimePoint(2.)),
                       Label(Text(0, data_type="int"))))
        self.t.Append(
            Annotation(TimeInterval(TimePoint(3.), TimePoint(3.5)),
                       Label(Text(-1, data_type="int"))))
        self.t.Append(
            Annotation(TimeInterval(TimePoint(4.5), TimePoint(6.3)),
                       Label("#")))
        self.t.Append(
            Annotation(TimeInterval(TimePoint(7.), TimePoint(8.)),
                       Label(Text(8, data_type="int"))))
        self.t.Append(
            Annotation(TimeInterval(TimePoint(11.3), TimePoint(12.)),
                       Label(Text(-1, data_type="int"))))
        self.t.Append(
            Annotation(TimeInterval(TimePoint(13.), TimePoint(13.5)),
                       Label(Text(18, data_type="int"))))
        self.t.Append(
            Annotation(TimeInterval(TimePoint(14.), TimePoint(14.6)),
                       Label(Text(20, data_type="int"))))
        self.t.Append(
            Annotation(TimeInterval(TimePoint(14.6), TimePoint(15.6)),
                       Label(Text(21, data_type="int"))))

        self.assertTrue(self.t.check_holes_ntokens(10))
        self.assertFalse(self.t.check_holes_ntokens(9))

        self.assertEqual(self.t.fill_evident_holes(), 1)
示例#6
0
    def test_window_sil(self):
        self.t = AnchorTier()
        self.t.set_duration(17.8)
        self.t.set_win_delay(4.)
        self.t.set_ext_delay(1.)
        self.t.set_out_delay(0.2)

        self.t.Append(
            Annotation(TimeInterval(TimePoint(0.), TimePoint(1.5)),
                       Label("#")))
        self.t.Append(
            Annotation(TimeInterval(TimePoint(4.5), TimePoint(6.3)),
                       Label("#")))
        self.t.Append(
            Annotation(TimeInterval(TimePoint(9.7), TimePoint(11.3)),
                       Label("#")))
        self.t.Append(
            Annotation(TimeInterval(TimePoint(14.6), TimePoint(17.8)),
                       Label("#")))

        f, t = self.t.fix_window(0.)
        self.assertEqual(f, 1.5)
        self.assertEqual(t, 4.5)
        f, t = self.t.fix_window(0.5)
        self.assertEqual(f, 1.5)
        self.assertEqual(t, 4.5)
        f, t = self.t.fix_window(1.5)
        self.assertEqual(f, 1.5)
        self.assertEqual(t, 4.5)

        f, t = self.t.fix_window(3.5)
        self.assertEqual(f, 3.5)
        self.assertEqual(t, 4.5)

        f, t = self.t.fix_window(4.5)
        self.assertEqual(f, 6.3)
        self.assertEqual(t, 9.7)

        f, t = self.t.fix_window(9.)
        self.assertEqual(f, 9.)
        self.assertEqual(t, 9.7)

        f, t = self.t.fix_window(9.6)
        self.assertEqual(f, 11.3)
        self.assertEqual(t, 14.6)

        f, t = self.t.fix_window(14.6)
        self.assertEqual(f, 17.8)
        self.assertEqual(t, 17.8)

        self.t.set_win_delay(10.)
        f, t = self.t.fix_window(0)
        self.assertEqual(f, 1.5)
        self.assertEqual(t, 4.5)

        self.t.set_win_delay(2.)
        f, t = self.t.fix_window(0)
        self.assertEqual(f, 1.5)
        self.assertEqual(t, 3.5)

        self.t.set_win_delay(1.)
        f, t = self.t.fix_window(0)
        self.assertEqual(f, 1.5)
        self.assertEqual(t, 2.5)
示例#7
0
    def test_near(self):
        self.t = AnchorTier()
        self.t.set_duration(17.8)
        self.t.set_win_delay(4.)
        self.t.set_ext_delay(1.)
        self.t.set_out_delay(0.2)

        self.assertIsNone(self.t.near_indexed_anchor(1., -1))

        self.t.Append(
            Annotation(TimeInterval(TimePoint(0.), TimePoint(1.5)),
                       Label("#")))
        self.t.Append(
            Annotation(TimeInterval(TimePoint(1.5), TimePoint(2.)),
                       Label(Text(1, data_type="int"))))
        self.t.Append(
            Annotation(TimeInterval(TimePoint(3.), TimePoint(3.5)),
                       Label(Text(-1, data_type="int"))))
        self.t.Append(
            Annotation(TimeInterval(TimePoint(4.5), TimePoint(6.3)),
                       Label("#")))
        self.t.Append(
            Annotation(TimeInterval(TimePoint(7.), TimePoint(8.)),
                       Label(Text(2, data_type="int"))))
        self.t.Append(
            Annotation(TimeInterval(TimePoint(9.7), TimePoint(11.3)),
                       Label("#")))
        self.t.Append(
            Annotation(TimeInterval(TimePoint(11.3), TimePoint(12.)),
                       Label(Text(-1, data_type="int"))))
        self.t.Append(
            Annotation(TimeInterval(TimePoint(14.), TimePoint(14.6)),
                       Label(Text(3, data_type="int"))))
        self.t.Append(
            Annotation(TimeInterval(TimePoint(14.6), TimePoint(17.8)),
                       Label("#")))

        a = self.t.Near(13., 1)
        self.assertEqual(a, 7)
        a = self.t.Near(17., 1)
        self.assertEqual(a, -1)
        a = self.t.Near(17.8, 1)
        self.assertEqual(a, -1)

        a = self.t.near_indexed_anchor(1., 1)
        self.assertEqual(a.GetLabel().GetTypedValue(), 1)
        a = self.t.near_indexed_anchor(1.5, 1)
        self.assertEqual(a.GetLabel().GetTypedValue(), 1)
        a = self.t.near_indexed_anchor(2., 1)
        self.assertEqual(a.GetLabel().GetTypedValue(), 2)
        a = self.t.near_indexed_anchor(11., 1)
        self.assertEqual(a.GetLabel().GetTypedValue(), 3)
        a = self.t.near_indexed_anchor(15., 1)
        self.assertIsNone(a)

        a = self.t.near_indexed_anchor(1., -1)
        self.assertIsNone(a)
        a = self.t.near_indexed_anchor(3., -1)
        self.assertEqual(a.GetLabel().GetTypedValue(), 1)
        a = self.t.near_indexed_anchor(3.5, -1)
        self.assertEqual(a.GetLabel().GetTypedValue(), 1)
        a = self.t.near_indexed_anchor(5., -1)
        self.assertEqual(a.GetLabel().GetTypedValue(), 1)
        a = self.t.near_indexed_anchor(7., -1)
        self.assertEqual(a.GetLabel().GetTypedValue(), 1)
        a = self.t.near_indexed_anchor(8., -1)
        self.assertEqual(a.GetLabel().GetTypedValue(), 2)
        a = self.t.near_indexed_anchor(9.7, -1)
        self.assertEqual(a.GetLabel().GetTypedValue(), 2)
        a = self.t.near_indexed_anchor(11., -1)
        self.assertEqual(a.GetLabel().GetTypedValue(), 2)
        a = self.t.near_indexed_anchor(12., -1)
        self.assertEqual(a.GetLabel().GetTypedValue(), 2)
        a = self.t.near_indexed_anchor(14., -1)
        self.assertEqual(a.GetLabel().GetTypedValue(), 2)
        a = self.t.near_indexed_anchor(18., -1)
        self.assertEqual(a.GetLabel().GetTypedValue(), 3)
示例#8
0
文件: anchors.py 项目: lym0302/sppas
    def export(self, toklist):
        """ Create the "Chunks" tier and return it.

        :param toklist: Tokens used to fill the intervals.

        """
        tier = Tier("Chunks")

        # Append silences in the result and pop them from the list of anchors
        anchors = AnchorTier()
        for ann in self:
            if ann.GetLabel().IsSilence():
                try:
                    tier.Append(ann.Copy())
                except Exception:
                    logging.debug("Error: Silence not appended: %s" % ann)
                    pass
            else:
                try:
                    anchors.Append(ann.Copy())
                except Exception:
                    logging.debug("Error: Anchor not appended: %s" % ann)
                    pass

        # Fill the holes when prev-index and next-index made a sequence
        anchors.fill_evident_holes()

        # Fill holes between anchors
        for i in range(1, len(anchors)):
            prevann = anchors[i - 1]
            curann = anchors[i]
            # there is a hole
            if prevann.GetLocation().GetEnd() < curann.GetLocation().GetBegin(
            ):

                idxprev = prevann.GetLabel().GetTypedValue()
                idxcur = curann.GetLabel().GetTypedValue()
                prevend = prevann.GetLocation().GetEnd()
                curbegin = curann.GetLocation().GetBegin()

                if idxprev + 1 == idxcur:
                    # hum... a little bit of hack!!!
                    holeduration = curbegin.GetMidpoint(
                    ) - prevend.GetMidpoint()
                    if holeduration < 0.055:
                        prevend.SetMidpoint(curbegin.GetMidpoint())
                    elif holeduration < 0.505 and prevann.GetLocation(
                    ).GetDuration().GetValue() < 0.105:
                        prevend.SetMidpoint(curbegin.GetMidpoint())

                elif (idxprev + 1) < idxcur - 1:
                    texte = " ".join(toklist[idxprev + 1:idxcur])
                    begin = prevend.GetMidpoint()
                    end = curbegin.GetMidpoint()
                    hole = Annotation(
                        TimeInterval(TimePoint(begin), TimePoint(end)),
                        Label(texte))
                    anns = tier.Find(begin, end, overlaps=True)
                    if len(anns) == 0:
                        try:
                            tier.Add(hole)
                        except Exception:
                            pass

        # Append chunk of anchors
        start = 0
        end = 0
        to_continue = True
        if end + 1 >= anchors.GetSize():
            to_continue = False

        while to_continue:

            idxcur = anchors[end].GetLabel().GetTypedValue()
            idxnex = anchors[end + 1].GetLabel().GetTypedValue()
            endtimecur = anchors[end].GetLocation().GetEnd()
            begtimenex = anchors[end + 1].GetLocation().GetBegin()

            # a sequence of anchors is finished if either:
            #  - next anchor index does not directly follow the current one
            #  - next anchor time does not directly follow the current one
            #  - we already appended enough anchors in the chunk

            if idxcur + 1 != idxnex or (end -
                                        start) > 10 or begtimenex > endtimecur:
                # append the chunk
                idxstart = anchors[start].GetLabel().GetTypedValue()
                chunk_text = " ".join(toklist[idxstart:idxcur + 1])
                tbegin = anchors[start].GetLocation().GetBegin()
                tend = anchors[end].GetLocation().GetEnd()
                ann = Annotation(TimeInterval(tbegin, tend), Label(chunk_text))

                tier.Remove(tbegin, tend, overlaps=True)
                tier.Add(ann)

                start = end + 1

            end += 1

            if anchors.GetSize() <= end + 1:
                # the last chunk found
                if start <= end:
                    idxstart = anchors[start].GetLabel().GetTypedValue()
                    idxcur = anchors[end].GetLabel().GetTypedValue()

                    chunk_text = " ".join(toklist[idxstart:idxcur + 1])
                    tbegin = anchors[start].GetLocation().GetBegin()
                    tend = anchors[end].GetLocation().GetEnd()
                    ann = Annotation(TimeInterval(tbegin, tend),
                                     Label(chunk_text))

                    tier.Remove(tbegin, tend, overlaps=True)
                    tier.Add(ann)

                to_continue = False

        # Begin of the tier
        fi = anchors[0].GetLabel().GetTypedValue()
        ft = tier[0].GetLocation().GetBegin()
        at = anchors[0].GetLocation().GetBegin()
        if ft == 0.:
            # a silence to start
            ft = tier[0].GetLocation().GetEnd()
        else:
            ft = TimePoint(0.)
        if fi > 0 and ft < at:
            chunk_text = " ".join(toklist[0:fi])
            ann = Annotation(TimeInterval(ft, at), Label(chunk_text))
            tier.Add(ann)

        # End of the tier
        fi = anchors[-1].GetLabel().GetTypedValue()
        fi += 1
        ft = tier[-1].GetLocation().GetEnd()
        at = anchors[-1].GetLocation().GetEnd()
        if ft == self._duration:
            # a silence to end
            ft = tier[-1].GetLocation().GetBegin()
        else:
            ft = TimePoint(self._duration)
        if fi < len(toklist) and at < ft:
            chunk_text = " ".join(toklist[fi + 1:len(toklist)])
            ann = Annotation(TimeInterval(at, ft), Label(chunk_text))
            tier.Add(ann)

        chunk_tier = Tier("Chunks")
        i = 1
        while i < tier.GetSize():

            prevtext = tier[i - 1].GetLabel().GetValue()
            curtext = tier[i].GetLabel().GetValue()
            newtext = prevtext + " " + curtext

            if tier[i-1].GetLabel().IsSilence() is False and \
                    tier[i].GetLabel().IsSilence() is False and \
                    (len(prevtext.split()) < 3 or len(newtext.split()) < 12):
                a = Annotation(
                    TimeInterval(tier[i - 1].GetLocation().GetBegin(),
                                 tier[i].GetLocation().GetEnd()),
                    Label(newtext))
                chunk_tier.Append(a)
                i += 1
            else:
                chunk_tier.Append(tier[i - 1])

            i += 1

        return chunk_tier