Exemplo n.º 1
0
    def test_write_phonemes(self):
        """Write the phonetization of a track in a file."""
        # test to write an annotation with complex labels
        l1 = sppasLabel([sppasTag("j"), sppasTag("S")])
        l2 = sppasLabel([sppasTag("e"), sppasTag("E")])
        ann = sppasAnnotation(sppasLocation(sppasPoint(1)), [l1, l2])
        TracksWriter._write_phonemes(ann, TEMP, 1)
        fn = os.path.join(TEMP, "track_000001.phn")
        self.assertTrue(os.path.exists(fn))
        with codecs.open(fn, "r", sg.__encoding__) as fp:
            lines = fp.readlines()
            fp.close()
        self.assertEqual(1, len(lines))
        self.assertEqual("{j|S} {e|E}", lines[0])

        # test to write an annotation with already serialized labels
        sentence = "A serialized list of {labels|tags}"
        ann = sppasAnnotation(
            sppasLocation(sppasPoint(1)),
            sppasLabel(sppasTag(sentence)))
        TracksWriter._write_phonemes(ann, TEMP, 2)
        fn = os.path.join(TEMP, "track_000002.phn")
        self.assertTrue(os.path.exists(fn))
        with codecs.open(fn, "r", sg.__encoding__) as fp:
            lines = fp.readlines()
            fp.close()
        self.assertEqual(1, len(lines))
        self.assertEqual(sentence, lines[0])
Exemplo n.º 2
0
    def test_tier_tga(self):
        tier = sppasTier("tier")
        tier.create_annotation(sppasLocation(sppasInterval(sppasPoint(0., 0.), sppasPoint(1., 0.0))),
                               sppasLabel(sppasTag('#')))
        tier.create_annotation(sppasLocation(sppasInterval(sppasPoint(1., 0.), sppasPoint(2., 0.01))),
                               sppasLabel(sppasTag('toto')))
        tier.create_annotation(sppasLocation(sppasInterval(sppasPoint(3., 0.01), sppasPoint(4., 0.01))),
                               sppasLabel(sppasTag('titi')))
        tier.create_annotation(sppasLocation(sppasInterval(sppasPoint(4., 0.01), sppasPoint(5., 0.01))))
        tier.create_annotation(sppasLocation(sppasInterval(sppasPoint(5., 0.01), sppasPoint(6.5, 0.005))),
                               sppasLabel(sppasTag('toto')))
        tier.create_annotation(sppasLocation(sppasInterval(sppasPoint(6.5, 0.005), sppasPoint(9.5, 0.))),
                               sppasLabel(sppasTag('toto')))

        # test the timegroups tier
        tg = sppasTGA().syllables_to_timegroups(tier)
        self.assertEqual(3, len(tg))
        # to be tested:
        #  [1., 2.] tg_1
        #  [3.; 4.] tg_2
        #  [5.; 9.5] tg_3

        ts = sppasTGA().syllables_to_timesegments(tier)
        self.assertEqual(3, len(ts))
        # to be tested:
        #  [1., 2.] toto
        #  [3.; 4.] titi
        #  [5.; 9.5] toto toto

        tg_dur = sppasTGA().timegroups_to_durations(tier, tg)
        self.assertEqual(3, len(tg_dur))
        self.assertEqual([1.], tg_dur['tg_1'])
        self.assertEqual([1.], tg_dur['tg_2'])
        self.assertEqual([1.5, 3.0], tg_dur['tg_3'])

        tga = TimeGroupAnalysis(tg_dur)

        occurrences = tga.len()
        self.assertEqual(1, occurrences['tg_1'])
        self.assertEqual(1, occurrences['tg_2'])
        self.assertEqual(2, occurrences['tg_3'])

        total = tga.total()
        self.assertEqual(1.0, total['tg_1'])
        self.assertEqual(1.0, total['tg_2'])
        self.assertEqual(4.5, total['tg_3'])

        mean = tga.mean()
        self.assertEqual(1.0, mean['tg_1'])
        self.assertEqual(1.0, mean['tg_2'])
        self.assertEqual(2.25, mean['tg_3'])
Exemplo n.º 3
0
    def tracks_to_tier(tracks, end_time, vagueness):
        """Create a sppasTier object from tracks.

        :param tracks: (List of tuple) with (from, to) values in seconds
        :param end_time: (float) End-time of the tier
        :param vagueness: (float) vagueness used for silence search

        """
        if len(tracks) == 0:
            raise IOError('No IPUs to write.\n')

        tier = sppasTier("IPUs")
        tier.set_meta('number_of_ipus', str(len(tracks)))
        i = 0
        to_prec = 0.

        for (from_time, to_time) in tracks:

            if from_time == 0. or to_time == end_time:
                radius = 0.
            else:
                radius = vagueness / 2.

            # From the previous track to the current track: silence
            if to_prec < from_time:
                tier.create_annotation(
                    sppasLocation(
                        sppasInterval(sppasPoint(to_prec, radius),
                                      sppasPoint(from_time, radius))),
                    sppasLabel(sppasTag(SIL_ORTHO)))

            # New track with speech
            tier.create_annotation(
                sppasLocation(
                    sppasInterval(sppasPoint(from_time, radius),
                                  sppasPoint(to_time, radius))),
                sppasLabel(sppasTag("ipu_%d" % (i + 1))))

            # Go to the next
            i += 1
            to_prec = to_time

        # The end is a silence? Fill...
        begin = sppasPoint(to_prec, vagueness / 2.)
        if begin < end_time:
            tier.create_annotation(
                sppasLocation(sppasInterval(begin, sppasPoint(end_time))),
                sppasLabel(sppasTag(SIL_ORTHO)))

        return tier
Exemplo n.º 4
0
 def test_create_tok_tier(self):
     """Create a tier with tokens like 'w_1 w_2...w_n' from phonemes."""
     l1 = sppasLabel([sppasTag("j"), sppasTag("S")])
     l2 = sppasLabel([sppasTag("e"), sppasTag("E")])
     tier = sppasTier("phonemes")
     tier.create_annotation(sppasLocation(sppasPoint(1)),
                            [l1, l2])
     tier.create_annotation(sppasLocation(sppasPoint(2)),
                            sppasLabel(sppasTag("{j|S} {e|E}")))
     tok_tier = TracksWriter._create_tok_tier(tier)
     self.assertEqual(2, len(tok_tier))
     content_a1 = tok_tier[0].get_best_tag().get_content()
     self.assertEqual("w_1 w_2", content_a1)
     content_a2 = tok_tier[1].get_best_tag().get_content()
     self.assertEqual("w_1 w_2", content_a2)
Exemplo n.º 5
0
    def syllabify_interval(self, phonemes, from_p, to_p, syllables):
        """ Perform the syllabification of one interval.

        :param phonemes: (sppasTier)
        :param from_p: (int) index of the first phoneme to be syllabified
        :param to_p: (int) index of the last phoneme to be syllabified
        :param syllables: (sppasTier)

        """
        # create the sequence of phonemes to syllabify
        p = list()
        for ann in phonemes[from_p:to_p+1]:
            tag = ann.get_best_tag()
            p.append(tag.get_typed_content())

        # create the sequence of syllables
        s = self.syllabifier.annotate(p)

        # add the syllables into the tier
        for i, syll in enumerate(s):
            start_idx, end_idx = syll

            # create the location
            begin = phonemes[start_idx+from_p].get_lowest_localization().copy()
            end = phonemes[end_idx+from_p].get_highest_localization().copy()
            location = sppasLocation(sppasInterval(begin, end))

            # create the label
            syll_string = Syllabifier.phonetize_syllables(p, [syll])
            label = sppasLabel(sppasTag(syll_string))

            # add the syllable
            syllables.create_annotation(location, label)
Exemplo n.º 6
0
    def tga_to_tier_reglin(tga_result, timegroups, intercept=True):
        """Create tiers of intercept,slope from one of the TGA result.

        :param tga_result: One of the results of TGA
        :param timegroups: (sppasTier) Time groups
        :param intercept: (boolean) Export the intercept.
        If False, export Slope.

        :returns: (sppasTier)

        """
        if intercept is True:
            tier = sppasTier('TGA-Intercept')
        else:
            tier = sppasTier('TGA-Slope')

        for tg_ann in timegroups:
            tg_label = tg_ann.serialize_labels()
            loc = tg_ann.get_location().copy()
            if intercept is True:
                tag_value = tga_result[tg_label][0]
            else:
                tag_value = tga_result[tg_label][1]

            tag_value = round(tag_value, 5)
            tier.create_annotation(loc,
                                   sppasLabel(sppasTag(tag_value, "float")))

        return tier
Exemplo n.º 7
0
    def create_time_tier(self, begin, end, tier_name="MetaInformation"):
        """Create a tier with activated information as annotations.

        :param begin: (float) Begin midpoint value
        :param end: (float) End midpoint value
        :param tier_name: (str) Name of the tier to create
        :returns: sppasTier

        """
        active_keys = self.keys_enabled()
        if len(active_keys) == 0:
            return None

        tier_dur = float(end) - float(begin)
        ann_dur = round(tier_dur / float(len(active_keys)), 3)

        tier = sppasTier(tier_name)
        ann_begin = round(begin, 3)
        ann_end = begin + ann_dur
        for key in active_keys:
            value = self.get_metainfo(key)
            tag = sppasTag(key + "=" + value)

            tier.create_annotation(
                sppasLocation(
                    sppasInterval(sppasPoint(ann_begin), sppasPoint(ann_end))),
                sppasLabel(tag))
            ann_begin = ann_end
            ann_end = ann_begin + ann_dur

        tier[-1].get_location().get_best().set_end(sppasPoint(end))
        return tier
Exemplo n.º 8
0
    def test_phon_to_intervals(self):
        """... Create the intervals to be syllabified."""

        test_tier = self.tier.copy()

        expected = sppasTier('Expected')
        expected.create_annotation(sppasLocation(sppasInterval(sppasPoint(1), sppasPoint(3))))
        expected.create_annotation(sppasLocation(sppasInterval(sppasPoint(4), sppasPoint(6))))
        expected.create_annotation(sppasLocation(sppasInterval(sppasPoint(8), sppasPoint(9))))
        expected.create_annotation(sppasLocation(sppasInterval(sppasPoint(11), sppasPoint(13))))
        expected.create_annotation(sppasLocation(sppasInterval(sppasPoint(14), sppasPoint(16))))
        expected.create_annotation(sppasLocation(sppasInterval(sppasPoint(17), sppasPoint(19))))
        expected.create_annotation(sppasLocation(sppasInterval(sppasPoint(20), sppasPoint(24))))

        intervals = sppasSyll._phon_to_intervals(test_tier)
        self.assertEqual(len(expected), len(intervals))
        for a1, a2 in zip(expected, intervals):
            self.assertEqual(a1, a2)

        # add en empty interval at start
        test_tier.create_annotation(sppasLocation(sppasInterval(sppasPoint(0), sppasPoint(1))))
        intervals = sppasSyll._phon_to_intervals(test_tier)
        self.assertEqual(len(expected), len(intervals))
        for a1, a2 in zip(expected, intervals):
            self.assertEqual(a1, a2)

        # add en empty interval at end
        test_tier.create_annotation(sppasLocation(sppasInterval(sppasPoint(24), sppasPoint(25))))
        intervals = sppasSyll._phon_to_intervals(test_tier)
        self.assertEqual(len(expected), len(intervals))
        for a1, a2 in zip(expected, intervals):
            self.assertEqual(a1, a2)

        # silence at start
        test_tier[0].append_label(sppasLabel(sppasTag('#')))
        intervals = sppasSyll._phon_to_intervals(test_tier)
        self.assertEqual(len(expected), len(intervals))
        for a1, a2 in zip(expected, intervals):
            self.assertEqual(a1, a2)

        # silence at end
        test_tier[-1].append_label(sppasLabel(sppasTag('#')))
        intervals = sppasSyll._phon_to_intervals(test_tier)
        self.assertEqual(len(expected), len(intervals))
        for a1, a2 in zip(expected, intervals):
            self.assertEqual(a1, a2)
Exemplo n.º 9
0
    def set_label(self, value):
        """Change the IPU label.

        @param value (string)
        
        """
        ann = self.trs[self.tier_idx][self.ann_idx]
        ann.set_labels(anndata.sppasLabel(anndata.sppasTag(value)))
Exemplo n.º 10
0
    def test_map_label_reverse(self):
        """Map a single label, with reversed mapping table."""
        # Map normally
        self.map.set_keep_miss(True)
        self.map.set_reverse(False)
        l_un = self.map.map_label(sppasLabel(sppasTag("1")))
        l_sept = self.map.map_label(sppasLabel(sppasTag("70")))

        # Reverse the mapping table...
        self.map.set_reverse(True)

        # Re-map. Expect the initial result
        self.assertEqual(sppasLabel(sppasTag("1")),
                         self.map.map_label(l_un))

        self.assertEqual(sppasLabel(sppasTag("70")),
                         self.map.map_label(l_sept))

        # Map normally (for other tests!)
        self.map.set_reverse(False)
Exemplo n.º 11
0
    def read_aligned_tracks(self, dir_name):
        """Read time-aligned tracks in a directory.

        :param dir_name: (str) Input directory to get files.
        :returns: (sppasTier, sppasTier, sppasTier)

        """
        tier_phn, tier_tok, tier_pron = \
            TracksReader.read_aligned_tracks(dir_name)

        # map-back phonemes
        self._mapping.set_keep_miss(True)
        self._mapping.set_reverse(False)

        # Map-back time-aligned phonemes to SAMPA
        # include the mapping of alternative tags
        for ann in tier_phn:
            labels = list()
            for label in ann.get_labels():
                tags = list()
                scores = list()
                for tag, score in label:
                    text = tag.get_content()
                    tags.append(sppasTag(self._mapping.map_entry(text)))
                    scores.append(score)
                labels.append(sppasLabel(tags, scores))
            ann.set_labels(labels)

        for ann in tier_pron:
            labels = list()
            for label in ann.get_labels():
                tags = list()
                scores = list()
                for tag, score in label:
                    text = tag.get_content()
                    tags.append(sppasTag(
                        self._mapping.map(text, [separators.phonemes])))
                    scores.append(score)
                labels.append(sppasLabel(tags, scores))
            ann.set_labels(labels)
        return tier_phn, tier_tok, tier_pron
Exemplo n.º 12
0
    def test_syllabify_interval(self):
        """... Perform the syllabification of one interval."""

        expected = sppasTier('Expected')
        expected.create_annotation(sppasLocation(sppasInterval(sppasPoint(1), sppasPoint(3))),
                                   sppasLabel(sppasTag('l-@')))

        syllables = sppasTier('SyllAlign')
        self.syll.syllabify_interval(self.tier, 0, 1, syllables)
        self.assertEqual(len(expected), len(syllables))
        for a1, a2 in zip(expected, syllables):
            self.assertEqual(a1, a2)

        expected.create_annotation(sppasLocation(sppasInterval(sppasPoint(17), sppasPoint(18))),
                                   sppasLabel(sppasTag('E')))
        expected.create_annotation(sppasLocation(sppasInterval(sppasPoint(18), sppasPoint(19))),
                                   sppasLabel(sppasTag('o')))
        self.syll.syllabify_interval(self.tier, 13, 15, syllables)
        self.assertEqual(len(expected), len(syllables))
        for a1, a2 in zip(expected, syllables):
            self.assertEqual(a1, a2)
Exemplo n.º 13
0
    def test_map_tag_reverse(self):
        """Map a single tag, reversing the mapping table."""
        # Map normally
        self.map.set_keep_miss(True)
        self.map.set_reverse(False)
        t_un = self.map.map_tag(sppasTag("1"))
        t_sept = self.map.map_tag(sppasTag("70"))

        # Reverse the mapping table...
        self.map.set_reverse(True)

        # Re-map. Expect the initial result
        self.assertEqual([sppasTag("1")],
                         self.map.map_tag(t_un[0]))
        self.assertEqual([sppasTag("70")],
                         self.map.map_tag(t_sept[0]))
        self.assertEqual([sppasTag("70")],
                         self.map.map_tag(t_sept[1]))

        # Map normally (for other tests!)
        self.map.set_reverse(False)
Exemplo n.º 14
0
    def setUp(self):
        # Create tiers
        self.tierP = sppasTier("PointTier")
        self.tierI = sppasTier("IntervalTier")
        for i in range(8):
            self.tierP.create_annotation(
                sppasLocation(sppasPoint(i)),
                sppasLabel(sppasTag(str(i))))
            self.tierI.create_annotation(
                sppasLocation(
                    sppasInterval(sppasPoint(i), sppasPoint(i+1))),
                sppasLabel(sppasTag(str(i*10))))

        self.tierI.create_annotation(
            sppasLocation(
                sppasInterval(sppasPoint(9), sppasPoint(10))),
            sppasLabel(sppasTag("{quatre-vingts-dix|nonante}")))

        # Create TierMapping
        self.map = sppasMappingTier()
        self.map.add("1", "un")
        self.map.add("2", "deux")
        self.map.add("3", "trois")
        self.map.add("4", "quatre")
        self.map.add("5", "cinq")
        self.map.add("6", "six")
        self.map.add("7", "sept")
        self.map.add("8", "huit")
        self.map.add("9", "neuf")
        self.map.add("10", "dix")
        self.map.add("20", "vingt")
        self.map.add("30", "trente")
        self.map.add("40", "quarante")
        self.map.add("50", "cinquante")
        self.map.add("60", "soixante")
        self.map.add("70", "septante")
        self.map.add("70", "soixante-dix")
        self.map.add("80", "octante")
        self.map.add("80", "quatre-vingts")
        self.map.set_delimiters((";", ",", " ", ".", "|"))
Exemplo n.º 15
0
 def setUp(self):
     self.x = sppasAnnotation(
         sppasLocation(
             sppasInterval(sppasPoint(1., 0.), sppasPoint(2., 0.01))),
         sppasLabel(sppasTag('toto')))
     self.y = sppasAnnotation(
         sppasLocation(
             sppasInterval(sppasPoint(3., 0.01), sppasPoint(4., 0.01))),
         sppasLabel(sppasTag('titi')))
     self.a = sppasAnnotation(
         sppasLocation(
             sppasInterval(sppasPoint(5., 0.01), sppasPoint(6.5, 0.005))),
         sppasLabel(sppasTag('toto')))
     self.b = sppasAnnotation(
         sppasLocation(
             sppasInterval(sppasPoint(6.5, 0.005), sppasPoint(9.5, 0.))),
         sppasLabel(sppasTag('toto')))
     self.tier = sppasTier()
     self.tier.append(self.x)
     self.tier.append(self.y)
     self.tier.append(self.a)
     self.tier.append(self.b)
Exemplo n.º 16
0
    def convert(self, input_audio_filename, input_filename):
        """Return a tier with transcription aligned to the audio.

        :param input_audio_filename: (str) Input audio file
        :param input_filename: (str) Input transcription file

        """
        # Get audio and the channel we'll work on
        audio_speech = sppas.src.audiodata.aio.open(input_audio_filename)
        n = audio_speech.get_nchannels()
        if n != 1:
            raise AudioChannelError(n)

        idx = audio_speech.extract_channel()
        channel = audio_speech.get_channel(idx)

        # Get the units we'll work on
        parser = sppasRW(input_filename)
        trs = parser.read()
        if len(trs) > 1:
            pass
        if len(trs[0]) == 0:
            pass
        units = list()
        for a in trs[0]:
            units.append(a.serialize_labels())
        ipus = [u for u in units if u != SIL_ORTHO]

        # Create the instance to fill in IPUs
        filler = FillIPUs(channel, units)
        filler.set_min_ipu(self._options['min_ipu'])
        filler.set_min_sil(self._options['min_sil'])
        n = filler.fix_threshold_durations()
        if n != len(ipus):
            return

        # Process the data.
        tracks = filler.get_tracks(time_domain=True)
        tier = sppasSearchIPUs.tracks_to_tier(tracks, channel.get_duration(),
                                              filler.get_vagueness())
        tier.set_name('Transcription')
        self._set_meta(filler, tier)
        i = 0
        for a in tier:
            if a.get_best_tag().is_silence() is False:
                a.set_labels([sppasLabel(sppasTag(ipus[i]))])
                i += 1

        return tier
Exemplo n.º 17
0
    def syllables_to_timegroups(self, syllables):
        """Create the time group intervals.

        :param syllables: (sppasTier)
        :returns: (sppasTier) Time groups

        """
        intervals = syllables.export_to_intervals(self._tg_separators)
        intervals.set_name("TGA-TimeGroups")

        for i, tg in enumerate(intervals):
            tag_str = self._options['tg_prefix_label']
            tag_str += str(i + 1)
            tg.append_label(sppasLabel(sppasTag(tag_str)))

        return intervals
Exemplo n.º 18
0
    def test_write_text_tracks(self):
        """Write tokenization and phonetization into separated track files."""
        l1 = sppasLabel([sppasTag("j"), sppasTag("S")])
        l2 = sppasLabel([sppasTag("e"), sppasTag("E")])
        tier_phn = sppasTier("phonemes")
        tier_phn.create_annotation(sppasLocation(sppasPoint(1)),
                                   [l1, l2])
        tier_phn.create_annotation(sppasLocation(sppasPoint(2)),
                                   sppasLabel(sppasTag("j-e s-H-i")))
        tier_tok = sppasTier("tokens")
        tier_tok.create_annotation(sppasLocation(sppasPoint(1)),
                                   sppasLabel(sppasTag("j' ai")))
        tier_tok.create_annotation(sppasLocation(sppasPoint(2)),
                                   sppasLabel(sppasTag('je suis')))

        with self.assertRaises(SizeInputsError):
            TracksWriter._write_text_tracks(tier_phn, sppasTier('toto'), TEMP)

        dir_tracks = os.path.join(TEMP, "test_write_text_tracks_1")
        os.mkdir(dir_tracks)
        TracksWriter._write_text_tracks(tier_phn, None, dir_tracks)
        created_files = os.listdir(dir_tracks)
        self.assertEqual(4, len(created_files))
        lines = list()
        for fn in created_files:
            with codecs.open(os.path.join(dir_tracks, fn), "r", sg.__encoding__) as fp:
                new_lines = fp.readlines()
                fp.close()
            self.assertEqual(1, len(new_lines))
            lines.append(new_lines[0])
        self.assertTrue("w_1 w_2" in lines)
        self.assertTrue("{j|S} {e|E}" in lines)
        self.assertTrue("j-e s-H-i" in lines)

        dir_tracks = os.path.join(TEMP, "test_write_text_tracks_2")
        os.mkdir(dir_tracks)
        TracksWriter._write_text_tracks(tier_phn, tier_tok, dir_tracks)
        created_files = os.listdir(dir_tracks)
        self.assertEqual(4, len(created_files))
        lines = list()
        for fn in created_files:
            with codecs.open(os.path.join(dir_tracks, fn), "r", sg.__encoding__) as fp:
                new_lines = fp.readlines()
                fp.close()
            self.assertEqual(1, len(new_lines))
            lines.append(new_lines[0])
        self.assertTrue("j' ai" in lines)
        self.assertTrue("je suis" in lines)
        self.assertTrue("{j|S} {e|E}" in lines)
        self.assertTrue("j-e s-H-i" in lines)
Exemplo n.º 19
0
    def make_classes(self, syllables):
        """ Create the tier with syllable classes.

        :param syllables: (sppasTier)

        """
        classes = sppasTier("SyllClassAlign")
        classes.set_meta('syllabification_classes_of_tier', syllables.get_name())

        for syll in syllables:
            location = syll.get_location().copy()
            syll_tag = syll.get_best_tag()
            class_tag = sppasTag(
                self.syllabifier.classes_phonetized(
                    syll_tag.get_typed_content()))
            classes.create_annotation(location, sppasLabel(class_tag))

        return classes
Exemplo n.º 20
0
    def _create_tok_tier(phon_tier):
        """Create a tier with tokens like 'w_1 w_2...w_n' from phonemes.

        :param phon_tier: (sppasTier) time-aligned tier with phonetization
        :returns: (sppasTier)

        """
        tok_tier = phon_tier.copy()
        for ann in tok_tier:
            tag = ann.get_best_tag()
            if tag.is_silence() is False:
                phonemes = ann.serialize_labels(" ", "", alt=True)
                nb_phonemes = len(phonemes.split(' '))
                tokens = " ".join(
                    ["w_" + str(i + 1) for i in range(nb_phonemes)]
                )
                ann.set_labels([sppasLabel(sppasTag(tokens))])

        return tok_tier
Exemplo n.º 21
0
    def convert(self, tier):
        """ Phonetize annotations of a tokenized tier.

        :param tier: (Tier) the orthographic transcription previously tokenized.
        :returns: (Tier) phonetized tier with name "Phones"

        """
        if tier.is_empty() is True:
            raise EmptyInputError(name=tier.get_name())

        phones_tier = sppasTier("Phones")
        for i, ann in enumerate(tier):
            self.print_message(MSG_TRACK.format(number=i + 1), indent=2)

            location = ann.get_location().copy()
            labels = list()

            # Normalize all labels of the orthographic transcription
            for label in ann.get_labels():

                phonetizations = list()
                for text, score in label:
                    if text.is_pause() or text.is_silence():
                        # It's in case the pronunciation dictionary
                        # were not properly fixed.
                        phonetizations.append(SIL)

                    elif text.is_empty() is False:
                        phones = self.phonetize(text.get_content(), i)
                        for p in phones:
                            phonetizations.extend(p.split(VARIANTS_SEPARATOR))

                # New in SPPAS 1.9.6.
                #  - The result is a sequence of labels.
                #  - Variants are alternative tags.
                tags = [sppasTag(p) for p in set(phonetizations)]
                labels.append(sppasLabel(tags))

            phones_tier.create_annotation(location, labels)

        return phones_tier
Exemplo n.º 22
0
    def append_extra(self, trs):
        """Append extra tiers in trs.

        :param trs: (Transcription)

        """
        if self._options['activity'] is False and \
                self._options['activityduration'] is False:
            return

        token_align = trs.find("TokensAlign")
        if token_align is None:
            self.logfile.print_message(MSG_NO_TOKENS_ALIGN,
                                       indent=1,
                                       status=annots.warning)
            return trs

        # Activity tier
        try:
            self.logfile.print_message(MSG_ACTION_EXTRA_TIER, indent=1)
            activity = sppasActivity()
            tier = activity.get_tier(trs)
            if self._options['activity'] is True:
                trs.append(tier)
                trs.add_hierarchy_link("TimeAlignment", token_align, tier)

            if self._options['activityduration'] is True:
                dur_tier = trs.create_tier('ActivityDuration')
                for a in tier:
                    interval = a.get_location().get_best()
                    dur = interval.duration().get_value()
                    dur_tier.create_annotation(
                        sppasLocation(interval.copy()),
                        sppasLabel(sppasTag(dur, tag_type="float")))
                trs.add_hierarchy_link("TimeAssociation", tier, dur_tier)
        except Exception as e:
            logging.error(traceback.format_exc())
            self.logfile.print_message(MSG_EXTRA_TIER.format(
                tiername="Activities", message=str(e)),
                                       indent=2,
                                       status=annots.warning)
Exemplo n.º 23
0
    def tones_to_tier(tones, anchors_tier):
        """ Convert the INTSINT result into a tier.
        
        :param tones: (list)
        :param anchors_tier: (sppasTier)

        """
        if len(tones) != len(anchors_tier):
            raise AnnDataEqError("tones:" + str(len(tones)),
                                 "anchors:" + str(len(anchors_tier)))

        tier = sppasTier("INTSINT")
        for tone, anchor_ann in zip(tones, anchors_tier):
            # Create the label
            tag = sppasTag(tone)
            # Create the location
            location = anchor_ann.get_location().copy()
            # Create the annotation
            tier.create_annotation(location, sppasLabel(tag))

        return tier
Exemplo n.º 24
0
    def split_into_tracks(self, input_audio, phon_tier, tok_tier, dir_align):
        """Write tracks from the given data.

        :param input_audio: (str) Audio file name.
        :param phon_tier: (sppasTier) The phonetization tier.
        :param tok_tier: (sppasTier) The tokenization tier, or None.
        :param dir_align: (str) Output directory to store files.

        :returns: PhonAlign, TokensAlign

        """
        # Map phonemes from SAMPA to the expected ones.
        self._mapping.set_keep_miss(True)
        self._mapping.set_reverse(True)

        # Map phonetizations (even the alternatives)
        for ann in phon_tier:
            text = ann.serialize_labels(separator="\n", empty="", alt=True)
            tab = text.split('\n')
            content = list()
            for item in tab:
                item = item.replace('|', separators.variants)
                if item.startswith('{') and item.endswith('}'):
                    content.append(item[1:-1])
                else:
                    content.append(item)

            mapped = self._mapping.map(" ".join(content),
                                       TracksReaderWriter.DELIMITERS)
            ann.set_labels(sppasLabel(sppasTag(mapped)))

        try:
            TracksWriter.write_tracks(input_audio, phon_tier, tok_tier,
                                      dir_align)
        except SizeInputsError:
            # number of intervals are not matching
            TracksWriter.write_tracks(input_audio, phon_tier, None, dir_align)
        except BadInputError:
            # either phonemes or tokens is wrong... re-try with phonemes only
            TracksWriter.write_tracks(input_audio, phon_tier, None, dir_align)
Exemplo n.º 25
0
    def syllables_to_timesegments(self, syllables):
        """Create the time segments intervals.

        Time segments are time groups with serialized syllables.

        :param syllables:
        :returns: (sppasTier) Time segments

        """
        intervals = syllables.export_to_intervals(self._tg_separators)
        intervals.set_name("TGA-TimeSegments")

        for i, tg in enumerate(intervals):
            syll_anns = syllables.find(tg.get_lowest_localization(),
                                       tg.get_highest_localization())
            tag_str = ""
            for ann in syll_anns:
                tag_str += ann.serialize_labels(separator=" ")
                tag_str += " "
            tg.append_label(sppasLabel(sppasTag(tag_str)))

        return intervals
Exemplo n.º 26
0
    def map_tag(self, tag):
        """Run the mapping process on a tag.

        :param tag: (sppasTag) tag with symbols to map
        :returns: List of sppasTag()

        """
        # only non-empty strings can me mapped
        if tag.get_type() == 'str' and tag.is_empty() is False:
            # only speech can be mapped, not the symbols.
            if tag.is_speech() is True or self._map_symbols is True:

                result = list()
                content = tag.get_content()
                if content.startswith('{') and content.endswith('}'):
                    content = content[1:-1]
                mapped_content = self.map(content, self._delimiters)
                for content in mapped_content.split('|'):
                    result.append(sppasTag(content))
                return result

        return [tag.copy()]
Exemplo n.º 27
0
    def _add_aligned_track_into_tier(tier, tdata, delta, unitend):
        """Append a list of (start, end, text, score) into the tier.

        Shift start/end of a delta value and set the last end value.

        """
        try:

            for i, t in enumerate(tdata):

                # fix the location - an interval
                (loc_s, loc_e, contents, scores) = t
                loc_s += delta
                loc_e += delta
                if i == (len(tdata)-1):
                    loc_e = unitend
                location = sppasLocation(
                        sppasInterval(
                            sppasPoint(loc_s, TracksReader.RADIUS),
                            sppasPoint(loc_e, TracksReader.RADIUS)
                        ))

                # fix the label
                # allow to work with alternative tags
                tags = [sppasTag(c) for c in contents.split('|')]
                if scores is not None:
                    tag_scores = [float(s) for s in scores.split('|')]
                else:
                    tag_scores = None
                label = sppasLabel(tags, tag_scores)

                tier.create_annotation(location, label)

        except:
            logging.error('The following data were not added to the tier '
                          '{:s} at position {:f}: {:s}'
                          ''.format(tier.get_name(), delta, str(tdata)))
            logging.error(traceback.format_exc())
Exemplo n.º 28
0
    def tga_to_tier(tga_result, timegroups, tier_name, tag_type="float"):
        """Create a tier from one of the TGA result.

        :param tga_result: One of the results of TGA
        :param timegroups: (sppasTier) Time groups
        :param tier_name: (str) Name of the output tier
        :param tag_type: (str) Type of the sppasTag to be included

        :returns: (sppasTier)

        """
        tier = sppasTier(tier_name)

        for tg_ann in timegroups:
            tg_label = tg_ann.serialize_labels()
            tag_value = tga_result[tg_label]
            if tag_type == "float":
                tag_value = round(tag_value, 5)

            tier.create_annotation(tg_ann.get_location().copy(),
                                   sppasLabel(sppasTag(tag_value, tag_type)))

        return tier
Exemplo n.º 29
0
    while window_end[i] == 0:
        i -= 1
    ann_idx_end = idx_end + i

    # Assign a label to the new annotation
    mean_dist = sum(
        distances[idx_begin:idx_end + 1]) / float(idx_end - idx_begin)
    mean_dist = round(mean_dist, 2)
    if mean_dist == 0:
        print(" ERROR: mean dist equal to 0...")
        continue

    begin = tier[ann_idx_begin].get_lowest_localization().copy()
    end = tier[ann_idx_end].get_highest_localization().copy()
    loc = sppasLocation(sppasInterval(begin, end))
    label = sppasLabel(sppasTag(mean_dist, "float"))

    filtered_tier.create_annotation(loc, label)

if len(filtered_tier) == 0:
    print("No density area found.")

# ----------------------------------------------------------------------------
# Save result

if file_output is None:
    for a in filtered_tier:
        print(a)
else:

    parser.set_filename(file_output)
Exemplo n.º 30
0
    def ExportToAnnData(self):
        """ Export this transcription to anndata.sppasTranscription(). """

        trs = anndata.sppasTranscription(self.__name)

        for meta_key in self.metadata:
            if self.metadata[meta_key] is not None:
                trs.set_meta(meta_key, self.metadata[meta_key])

        for ctrl_vocab in self.GetCtrlVocab():
            other_cv = anndata.sppasCtrlVocab(ctrl_vocab.id,
                                              ctrl_vocab.GetDescription())
            for entry in ctrl_vocab:
                entry_text = entry.Text
                entry_desc = entry.GetDescription()
                other_cv.add(anndata.sppasTag(entry_text), entry_desc)
            trs.add_ctrl_vocab(other_cv)

        for media in self.GetMedia():
            other_m = anndata.sppasMedia(media.url, media.id, media.mime)
            trs.add_media(other_m)

        for tier in self:
            c = tier.GetCtrlVocab()
            if c is not None:
                ctrl_vocab = trs.get_ctrl_vocab_from_name(c.GetName())
            else:
                ctrl_vocab = None
            m = tier.GetMedia()
            if m is not None:
                media = trs.get_media_from_id(m.id)
            else:
                media = None
            other_t = trs.create_tier(tier.GetName(), ctrl_vocab, media)
            is_point = tier.IsPoint()
            for ann in tier:
                text = ann.GetLabel().GetLabel()
                if is_point is True:
                    p = ann.GetLocation().GetPoint().GetValue()
                    r = ann.GetLocation().GetPoint().GetRadius()
                    if r == 0.:
                        r = None
                    other_t.create_annotation(
                        anndata.sppasLocation(anndata.sppasPoint(p, r)),
                        anndata.sppasLabel(anndata.sppasTag(text)))
                else:
                    b = ann.GetLocation().GetBegin().GetValue()
                    rb = ann.GetLocation().GetBegin().GetRadius()
                    if rb == 0.:
                        rb = None
                    e = ann.GetLocation().GetEnd().GetValue()
                    re = ann.GetLocation().GetEnd().GetRadius()
                    if rb == 0.:
                        rb = None
                    other_t.create_annotation(
                        anndata.sppasLocation(
                            anndata.sppasInterval(anndata.sppasPoint(b, rb),
                                                  anndata.sppasPoint(e, re))),
                        anndata.sppasLabel(anndata.sppasTag(text)))

        for tier in self:
            parent_tier = self._hierarchy.get_parent(tier)
            if parent_tier is not None:
                link_type = self._hierarchy.get_hierarchy_type(tier)
                new_tier = trs.find(tier.GetName())
                new_parent_tier = trs.find(parent_tier.GetName())
                trs.add_hierarchy_link(link_type, new_parent_tier, new_tier)

        return trs