def test_write_phonemes(self): """Write the phonetization of a track in a file.""" # test to write an annotation with complex labels l1 = sppasLabel([sppasTag("j"), sppasTag("S")]) l2 = sppasLabel([sppasTag("e"), sppasTag("E")]) ann = sppasAnnotation(sppasLocation(sppasPoint(1)), [l1, l2]) TracksWriter._write_phonemes(ann, TEMP, 1) fn = os.path.join(TEMP, "track_000001.phn") self.assertTrue(os.path.exists(fn)) with codecs.open(fn, "r", sg.__encoding__) as fp: lines = fp.readlines() fp.close() self.assertEqual(1, len(lines)) self.assertEqual("{j|S} {e|E}", lines[0]) # test to write an annotation with already serialized labels sentence = "A serialized list of {labels|tags}" ann = sppasAnnotation( sppasLocation(sppasPoint(1)), sppasLabel(sppasTag(sentence))) TracksWriter._write_phonemes(ann, TEMP, 2) fn = os.path.join(TEMP, "track_000002.phn") self.assertTrue(os.path.exists(fn)) with codecs.open(fn, "r", sg.__encoding__) as fp: lines = fp.readlines() fp.close() self.assertEqual(1, len(lines)) self.assertEqual(sentence, lines[0])
def test_tier_tga(self): tier = sppasTier("tier") tier.create_annotation(sppasLocation(sppasInterval(sppasPoint(0., 0.), sppasPoint(1., 0.0))), sppasLabel(sppasTag('#'))) tier.create_annotation(sppasLocation(sppasInterval(sppasPoint(1., 0.), sppasPoint(2., 0.01))), sppasLabel(sppasTag('toto'))) tier.create_annotation(sppasLocation(sppasInterval(sppasPoint(3., 0.01), sppasPoint(4., 0.01))), sppasLabel(sppasTag('titi'))) tier.create_annotation(sppasLocation(sppasInterval(sppasPoint(4., 0.01), sppasPoint(5., 0.01)))) tier.create_annotation(sppasLocation(sppasInterval(sppasPoint(5., 0.01), sppasPoint(6.5, 0.005))), sppasLabel(sppasTag('toto'))) tier.create_annotation(sppasLocation(sppasInterval(sppasPoint(6.5, 0.005), sppasPoint(9.5, 0.))), sppasLabel(sppasTag('toto'))) # test the timegroups tier tg = sppasTGA().syllables_to_timegroups(tier) self.assertEqual(3, len(tg)) # to be tested: # [1., 2.] tg_1 # [3.; 4.] tg_2 # [5.; 9.5] tg_3 ts = sppasTGA().syllables_to_timesegments(tier) self.assertEqual(3, len(ts)) # to be tested: # [1., 2.] toto # [3.; 4.] titi # [5.; 9.5] toto toto tg_dur = sppasTGA().timegroups_to_durations(tier, tg) self.assertEqual(3, len(tg_dur)) self.assertEqual([1.], tg_dur['tg_1']) self.assertEqual([1.], tg_dur['tg_2']) self.assertEqual([1.5, 3.0], tg_dur['tg_3']) tga = TimeGroupAnalysis(tg_dur) occurrences = tga.len() self.assertEqual(1, occurrences['tg_1']) self.assertEqual(1, occurrences['tg_2']) self.assertEqual(2, occurrences['tg_3']) total = tga.total() self.assertEqual(1.0, total['tg_1']) self.assertEqual(1.0, total['tg_2']) self.assertEqual(4.5, total['tg_3']) mean = tga.mean() self.assertEqual(1.0, mean['tg_1']) self.assertEqual(1.0, mean['tg_2']) self.assertEqual(2.25, mean['tg_3'])
def tracks_to_tier(tracks, end_time, vagueness): """Create a sppasTier object from tracks. :param tracks: (List of tuple) with (from, to) values in seconds :param end_time: (float) End-time of the tier :param vagueness: (float) vagueness used for silence search """ if len(tracks) == 0: raise IOError('No IPUs to write.\n') tier = sppasTier("IPUs") tier.set_meta('number_of_ipus', str(len(tracks))) i = 0 to_prec = 0. for (from_time, to_time) in tracks: if from_time == 0. or to_time == end_time: radius = 0. else: radius = vagueness / 2. # From the previous track to the current track: silence if to_prec < from_time: tier.create_annotation( sppasLocation( sppasInterval(sppasPoint(to_prec, radius), sppasPoint(from_time, radius))), sppasLabel(sppasTag(SIL_ORTHO))) # New track with speech tier.create_annotation( sppasLocation( sppasInterval(sppasPoint(from_time, radius), sppasPoint(to_time, radius))), sppasLabel(sppasTag("ipu_%d" % (i + 1)))) # Go to the next i += 1 to_prec = to_time # The end is a silence? Fill... begin = sppasPoint(to_prec, vagueness / 2.) if begin < end_time: tier.create_annotation( sppasLocation(sppasInterval(begin, sppasPoint(end_time))), sppasLabel(sppasTag(SIL_ORTHO))) return tier
def test_create_tok_tier(self): """Create a tier with tokens like 'w_1 w_2...w_n' from phonemes.""" l1 = sppasLabel([sppasTag("j"), sppasTag("S")]) l2 = sppasLabel([sppasTag("e"), sppasTag("E")]) tier = sppasTier("phonemes") tier.create_annotation(sppasLocation(sppasPoint(1)), [l1, l2]) tier.create_annotation(sppasLocation(sppasPoint(2)), sppasLabel(sppasTag("{j|S} {e|E}"))) tok_tier = TracksWriter._create_tok_tier(tier) self.assertEqual(2, len(tok_tier)) content_a1 = tok_tier[0].get_best_tag().get_content() self.assertEqual("w_1 w_2", content_a1) content_a2 = tok_tier[1].get_best_tag().get_content() self.assertEqual("w_1 w_2", content_a2)
def syllabify_interval(self, phonemes, from_p, to_p, syllables): """ Perform the syllabification of one interval. :param phonemes: (sppasTier) :param from_p: (int) index of the first phoneme to be syllabified :param to_p: (int) index of the last phoneme to be syllabified :param syllables: (sppasTier) """ # create the sequence of phonemes to syllabify p = list() for ann in phonemes[from_p:to_p+1]: tag = ann.get_best_tag() p.append(tag.get_typed_content()) # create the sequence of syllables s = self.syllabifier.annotate(p) # add the syllables into the tier for i, syll in enumerate(s): start_idx, end_idx = syll # create the location begin = phonemes[start_idx+from_p].get_lowest_localization().copy() end = phonemes[end_idx+from_p].get_highest_localization().copy() location = sppasLocation(sppasInterval(begin, end)) # create the label syll_string = Syllabifier.phonetize_syllables(p, [syll]) label = sppasLabel(sppasTag(syll_string)) # add the syllable syllables.create_annotation(location, label)
def tga_to_tier_reglin(tga_result, timegroups, intercept=True): """Create tiers of intercept,slope from one of the TGA result. :param tga_result: One of the results of TGA :param timegroups: (sppasTier) Time groups :param intercept: (boolean) Export the intercept. If False, export Slope. :returns: (sppasTier) """ if intercept is True: tier = sppasTier('TGA-Intercept') else: tier = sppasTier('TGA-Slope') for tg_ann in timegroups: tg_label = tg_ann.serialize_labels() loc = tg_ann.get_location().copy() if intercept is True: tag_value = tga_result[tg_label][0] else: tag_value = tga_result[tg_label][1] tag_value = round(tag_value, 5) tier.create_annotation(loc, sppasLabel(sppasTag(tag_value, "float"))) return tier
def create_time_tier(self, begin, end, tier_name="MetaInformation"): """Create a tier with activated information as annotations. :param begin: (float) Begin midpoint value :param end: (float) End midpoint value :param tier_name: (str) Name of the tier to create :returns: sppasTier """ active_keys = self.keys_enabled() if len(active_keys) == 0: return None tier_dur = float(end) - float(begin) ann_dur = round(tier_dur / float(len(active_keys)), 3) tier = sppasTier(tier_name) ann_begin = round(begin, 3) ann_end = begin + ann_dur for key in active_keys: value = self.get_metainfo(key) tag = sppasTag(key + "=" + value) tier.create_annotation( sppasLocation( sppasInterval(sppasPoint(ann_begin), sppasPoint(ann_end))), sppasLabel(tag)) ann_begin = ann_end ann_end = ann_begin + ann_dur tier[-1].get_location().get_best().set_end(sppasPoint(end)) return tier
def test_phon_to_intervals(self): """... Create the intervals to be syllabified.""" test_tier = self.tier.copy() expected = sppasTier('Expected') expected.create_annotation(sppasLocation(sppasInterval(sppasPoint(1), sppasPoint(3)))) expected.create_annotation(sppasLocation(sppasInterval(sppasPoint(4), sppasPoint(6)))) expected.create_annotation(sppasLocation(sppasInterval(sppasPoint(8), sppasPoint(9)))) expected.create_annotation(sppasLocation(sppasInterval(sppasPoint(11), sppasPoint(13)))) expected.create_annotation(sppasLocation(sppasInterval(sppasPoint(14), sppasPoint(16)))) expected.create_annotation(sppasLocation(sppasInterval(sppasPoint(17), sppasPoint(19)))) expected.create_annotation(sppasLocation(sppasInterval(sppasPoint(20), sppasPoint(24)))) intervals = sppasSyll._phon_to_intervals(test_tier) self.assertEqual(len(expected), len(intervals)) for a1, a2 in zip(expected, intervals): self.assertEqual(a1, a2) # add en empty interval at start test_tier.create_annotation(sppasLocation(sppasInterval(sppasPoint(0), sppasPoint(1)))) intervals = sppasSyll._phon_to_intervals(test_tier) self.assertEqual(len(expected), len(intervals)) for a1, a2 in zip(expected, intervals): self.assertEqual(a1, a2) # add en empty interval at end test_tier.create_annotation(sppasLocation(sppasInterval(sppasPoint(24), sppasPoint(25)))) intervals = sppasSyll._phon_to_intervals(test_tier) self.assertEqual(len(expected), len(intervals)) for a1, a2 in zip(expected, intervals): self.assertEqual(a1, a2) # silence at start test_tier[0].append_label(sppasLabel(sppasTag('#'))) intervals = sppasSyll._phon_to_intervals(test_tier) self.assertEqual(len(expected), len(intervals)) for a1, a2 in zip(expected, intervals): self.assertEqual(a1, a2) # silence at end test_tier[-1].append_label(sppasLabel(sppasTag('#'))) intervals = sppasSyll._phon_to_intervals(test_tier) self.assertEqual(len(expected), len(intervals)) for a1, a2 in zip(expected, intervals): self.assertEqual(a1, a2)
def set_label(self, value): """Change the IPU label. @param value (string) """ ann = self.trs[self.tier_idx][self.ann_idx] ann.set_labels(anndata.sppasLabel(anndata.sppasTag(value)))
def test_map_label_reverse(self): """Map a single label, with reversed mapping table.""" # Map normally self.map.set_keep_miss(True) self.map.set_reverse(False) l_un = self.map.map_label(sppasLabel(sppasTag("1"))) l_sept = self.map.map_label(sppasLabel(sppasTag("70"))) # Reverse the mapping table... self.map.set_reverse(True) # Re-map. Expect the initial result self.assertEqual(sppasLabel(sppasTag("1")), self.map.map_label(l_un)) self.assertEqual(sppasLabel(sppasTag("70")), self.map.map_label(l_sept)) # Map normally (for other tests!) self.map.set_reverse(False)
def read_aligned_tracks(self, dir_name): """Read time-aligned tracks in a directory. :param dir_name: (str) Input directory to get files. :returns: (sppasTier, sppasTier, sppasTier) """ tier_phn, tier_tok, tier_pron = \ TracksReader.read_aligned_tracks(dir_name) # map-back phonemes self._mapping.set_keep_miss(True) self._mapping.set_reverse(False) # Map-back time-aligned phonemes to SAMPA # include the mapping of alternative tags for ann in tier_phn: labels = list() for label in ann.get_labels(): tags = list() scores = list() for tag, score in label: text = tag.get_content() tags.append(sppasTag(self._mapping.map_entry(text))) scores.append(score) labels.append(sppasLabel(tags, scores)) ann.set_labels(labels) for ann in tier_pron: labels = list() for label in ann.get_labels(): tags = list() scores = list() for tag, score in label: text = tag.get_content() tags.append(sppasTag( self._mapping.map(text, [separators.phonemes]))) scores.append(score) labels.append(sppasLabel(tags, scores)) ann.set_labels(labels) return tier_phn, tier_tok, tier_pron
def test_syllabify_interval(self): """... Perform the syllabification of one interval.""" expected = sppasTier('Expected') expected.create_annotation(sppasLocation(sppasInterval(sppasPoint(1), sppasPoint(3))), sppasLabel(sppasTag('l-@'))) syllables = sppasTier('SyllAlign') self.syll.syllabify_interval(self.tier, 0, 1, syllables) self.assertEqual(len(expected), len(syllables)) for a1, a2 in zip(expected, syllables): self.assertEqual(a1, a2) expected.create_annotation(sppasLocation(sppasInterval(sppasPoint(17), sppasPoint(18))), sppasLabel(sppasTag('E'))) expected.create_annotation(sppasLocation(sppasInterval(sppasPoint(18), sppasPoint(19))), sppasLabel(sppasTag('o'))) self.syll.syllabify_interval(self.tier, 13, 15, syllables) self.assertEqual(len(expected), len(syllables)) for a1, a2 in zip(expected, syllables): self.assertEqual(a1, a2)
def test_map_tag_reverse(self): """Map a single tag, reversing the mapping table.""" # Map normally self.map.set_keep_miss(True) self.map.set_reverse(False) t_un = self.map.map_tag(sppasTag("1")) t_sept = self.map.map_tag(sppasTag("70")) # Reverse the mapping table... self.map.set_reverse(True) # Re-map. Expect the initial result self.assertEqual([sppasTag("1")], self.map.map_tag(t_un[0])) self.assertEqual([sppasTag("70")], self.map.map_tag(t_sept[0])) self.assertEqual([sppasTag("70")], self.map.map_tag(t_sept[1])) # Map normally (for other tests!) self.map.set_reverse(False)
def setUp(self): # Create tiers self.tierP = sppasTier("PointTier") self.tierI = sppasTier("IntervalTier") for i in range(8): self.tierP.create_annotation( sppasLocation(sppasPoint(i)), sppasLabel(sppasTag(str(i)))) self.tierI.create_annotation( sppasLocation( sppasInterval(sppasPoint(i), sppasPoint(i+1))), sppasLabel(sppasTag(str(i*10)))) self.tierI.create_annotation( sppasLocation( sppasInterval(sppasPoint(9), sppasPoint(10))), sppasLabel(sppasTag("{quatre-vingts-dix|nonante}"))) # Create TierMapping self.map = sppasMappingTier() self.map.add("1", "un") self.map.add("2", "deux") self.map.add("3", "trois") self.map.add("4", "quatre") self.map.add("5", "cinq") self.map.add("6", "six") self.map.add("7", "sept") self.map.add("8", "huit") self.map.add("9", "neuf") self.map.add("10", "dix") self.map.add("20", "vingt") self.map.add("30", "trente") self.map.add("40", "quarante") self.map.add("50", "cinquante") self.map.add("60", "soixante") self.map.add("70", "septante") self.map.add("70", "soixante-dix") self.map.add("80", "octante") self.map.add("80", "quatre-vingts") self.map.set_delimiters((";", ",", " ", ".", "|"))
def setUp(self): self.x = sppasAnnotation( sppasLocation( sppasInterval(sppasPoint(1., 0.), sppasPoint(2., 0.01))), sppasLabel(sppasTag('toto'))) self.y = sppasAnnotation( sppasLocation( sppasInterval(sppasPoint(3., 0.01), sppasPoint(4., 0.01))), sppasLabel(sppasTag('titi'))) self.a = sppasAnnotation( sppasLocation( sppasInterval(sppasPoint(5., 0.01), sppasPoint(6.5, 0.005))), sppasLabel(sppasTag('toto'))) self.b = sppasAnnotation( sppasLocation( sppasInterval(sppasPoint(6.5, 0.005), sppasPoint(9.5, 0.))), sppasLabel(sppasTag('toto'))) self.tier = sppasTier() self.tier.append(self.x) self.tier.append(self.y) self.tier.append(self.a) self.tier.append(self.b)
def convert(self, input_audio_filename, input_filename): """Return a tier with transcription aligned to the audio. :param input_audio_filename: (str) Input audio file :param input_filename: (str) Input transcription file """ # Get audio and the channel we'll work on audio_speech = sppas.src.audiodata.aio.open(input_audio_filename) n = audio_speech.get_nchannels() if n != 1: raise AudioChannelError(n) idx = audio_speech.extract_channel() channel = audio_speech.get_channel(idx) # Get the units we'll work on parser = sppasRW(input_filename) trs = parser.read() if len(trs) > 1: pass if len(trs[0]) == 0: pass units = list() for a in trs[0]: units.append(a.serialize_labels()) ipus = [u for u in units if u != SIL_ORTHO] # Create the instance to fill in IPUs filler = FillIPUs(channel, units) filler.set_min_ipu(self._options['min_ipu']) filler.set_min_sil(self._options['min_sil']) n = filler.fix_threshold_durations() if n != len(ipus): return # Process the data. tracks = filler.get_tracks(time_domain=True) tier = sppasSearchIPUs.tracks_to_tier(tracks, channel.get_duration(), filler.get_vagueness()) tier.set_name('Transcription') self._set_meta(filler, tier) i = 0 for a in tier: if a.get_best_tag().is_silence() is False: a.set_labels([sppasLabel(sppasTag(ipus[i]))]) i += 1 return tier
def syllables_to_timegroups(self, syllables): """Create the time group intervals. :param syllables: (sppasTier) :returns: (sppasTier) Time groups """ intervals = syllables.export_to_intervals(self._tg_separators) intervals.set_name("TGA-TimeGroups") for i, tg in enumerate(intervals): tag_str = self._options['tg_prefix_label'] tag_str += str(i + 1) tg.append_label(sppasLabel(sppasTag(tag_str))) return intervals
def test_write_text_tracks(self): """Write tokenization and phonetization into separated track files.""" l1 = sppasLabel([sppasTag("j"), sppasTag("S")]) l2 = sppasLabel([sppasTag("e"), sppasTag("E")]) tier_phn = sppasTier("phonemes") tier_phn.create_annotation(sppasLocation(sppasPoint(1)), [l1, l2]) tier_phn.create_annotation(sppasLocation(sppasPoint(2)), sppasLabel(sppasTag("j-e s-H-i"))) tier_tok = sppasTier("tokens") tier_tok.create_annotation(sppasLocation(sppasPoint(1)), sppasLabel(sppasTag("j' ai"))) tier_tok.create_annotation(sppasLocation(sppasPoint(2)), sppasLabel(sppasTag('je suis'))) with self.assertRaises(SizeInputsError): TracksWriter._write_text_tracks(tier_phn, sppasTier('toto'), TEMP) dir_tracks = os.path.join(TEMP, "test_write_text_tracks_1") os.mkdir(dir_tracks) TracksWriter._write_text_tracks(tier_phn, None, dir_tracks) created_files = os.listdir(dir_tracks) self.assertEqual(4, len(created_files)) lines = list() for fn in created_files: with codecs.open(os.path.join(dir_tracks, fn), "r", sg.__encoding__) as fp: new_lines = fp.readlines() fp.close() self.assertEqual(1, len(new_lines)) lines.append(new_lines[0]) self.assertTrue("w_1 w_2" in lines) self.assertTrue("{j|S} {e|E}" in lines) self.assertTrue("j-e s-H-i" in lines) dir_tracks = os.path.join(TEMP, "test_write_text_tracks_2") os.mkdir(dir_tracks) TracksWriter._write_text_tracks(tier_phn, tier_tok, dir_tracks) created_files = os.listdir(dir_tracks) self.assertEqual(4, len(created_files)) lines = list() for fn in created_files: with codecs.open(os.path.join(dir_tracks, fn), "r", sg.__encoding__) as fp: new_lines = fp.readlines() fp.close() self.assertEqual(1, len(new_lines)) lines.append(new_lines[0]) self.assertTrue("j' ai" in lines) self.assertTrue("je suis" in lines) self.assertTrue("{j|S} {e|E}" in lines) self.assertTrue("j-e s-H-i" in lines)
def make_classes(self, syllables): """ Create the tier with syllable classes. :param syllables: (sppasTier) """ classes = sppasTier("SyllClassAlign") classes.set_meta('syllabification_classes_of_tier', syllables.get_name()) for syll in syllables: location = syll.get_location().copy() syll_tag = syll.get_best_tag() class_tag = sppasTag( self.syllabifier.classes_phonetized( syll_tag.get_typed_content())) classes.create_annotation(location, sppasLabel(class_tag)) return classes
def _create_tok_tier(phon_tier): """Create a tier with tokens like 'w_1 w_2...w_n' from phonemes. :param phon_tier: (sppasTier) time-aligned tier with phonetization :returns: (sppasTier) """ tok_tier = phon_tier.copy() for ann in tok_tier: tag = ann.get_best_tag() if tag.is_silence() is False: phonemes = ann.serialize_labels(" ", "", alt=True) nb_phonemes = len(phonemes.split(' ')) tokens = " ".join( ["w_" + str(i + 1) for i in range(nb_phonemes)] ) ann.set_labels([sppasLabel(sppasTag(tokens))]) return tok_tier
def convert(self, tier): """ Phonetize annotations of a tokenized tier. :param tier: (Tier) the orthographic transcription previously tokenized. :returns: (Tier) phonetized tier with name "Phones" """ if tier.is_empty() is True: raise EmptyInputError(name=tier.get_name()) phones_tier = sppasTier("Phones") for i, ann in enumerate(tier): self.print_message(MSG_TRACK.format(number=i + 1), indent=2) location = ann.get_location().copy() labels = list() # Normalize all labels of the orthographic transcription for label in ann.get_labels(): phonetizations = list() for text, score in label: if text.is_pause() or text.is_silence(): # It's in case the pronunciation dictionary # were not properly fixed. phonetizations.append(SIL) elif text.is_empty() is False: phones = self.phonetize(text.get_content(), i) for p in phones: phonetizations.extend(p.split(VARIANTS_SEPARATOR)) # New in SPPAS 1.9.6. # - The result is a sequence of labels. # - Variants are alternative tags. tags = [sppasTag(p) for p in set(phonetizations)] labels.append(sppasLabel(tags)) phones_tier.create_annotation(location, labels) return phones_tier
def append_extra(self, trs): """Append extra tiers in trs. :param trs: (Transcription) """ if self._options['activity'] is False and \ self._options['activityduration'] is False: return token_align = trs.find("TokensAlign") if token_align is None: self.logfile.print_message(MSG_NO_TOKENS_ALIGN, indent=1, status=annots.warning) return trs # Activity tier try: self.logfile.print_message(MSG_ACTION_EXTRA_TIER, indent=1) activity = sppasActivity() tier = activity.get_tier(trs) if self._options['activity'] is True: trs.append(tier) trs.add_hierarchy_link("TimeAlignment", token_align, tier) if self._options['activityduration'] is True: dur_tier = trs.create_tier('ActivityDuration') for a in tier: interval = a.get_location().get_best() dur = interval.duration().get_value() dur_tier.create_annotation( sppasLocation(interval.copy()), sppasLabel(sppasTag(dur, tag_type="float"))) trs.add_hierarchy_link("TimeAssociation", tier, dur_tier) except Exception as e: logging.error(traceback.format_exc()) self.logfile.print_message(MSG_EXTRA_TIER.format( tiername="Activities", message=str(e)), indent=2, status=annots.warning)
def tones_to_tier(tones, anchors_tier): """ Convert the INTSINT result into a tier. :param tones: (list) :param anchors_tier: (sppasTier) """ if len(tones) != len(anchors_tier): raise AnnDataEqError("tones:" + str(len(tones)), "anchors:" + str(len(anchors_tier))) tier = sppasTier("INTSINT") for tone, anchor_ann in zip(tones, anchors_tier): # Create the label tag = sppasTag(tone) # Create the location location = anchor_ann.get_location().copy() # Create the annotation tier.create_annotation(location, sppasLabel(tag)) return tier
def split_into_tracks(self, input_audio, phon_tier, tok_tier, dir_align): """Write tracks from the given data. :param input_audio: (str) Audio file name. :param phon_tier: (sppasTier) The phonetization tier. :param tok_tier: (sppasTier) The tokenization tier, or None. :param dir_align: (str) Output directory to store files. :returns: PhonAlign, TokensAlign """ # Map phonemes from SAMPA to the expected ones. self._mapping.set_keep_miss(True) self._mapping.set_reverse(True) # Map phonetizations (even the alternatives) for ann in phon_tier: text = ann.serialize_labels(separator="\n", empty="", alt=True) tab = text.split('\n') content = list() for item in tab: item = item.replace('|', separators.variants) if item.startswith('{') and item.endswith('}'): content.append(item[1:-1]) else: content.append(item) mapped = self._mapping.map(" ".join(content), TracksReaderWriter.DELIMITERS) ann.set_labels(sppasLabel(sppasTag(mapped))) try: TracksWriter.write_tracks(input_audio, phon_tier, tok_tier, dir_align) except SizeInputsError: # number of intervals are not matching TracksWriter.write_tracks(input_audio, phon_tier, None, dir_align) except BadInputError: # either phonemes or tokens is wrong... re-try with phonemes only TracksWriter.write_tracks(input_audio, phon_tier, None, dir_align)
def syllables_to_timesegments(self, syllables): """Create the time segments intervals. Time segments are time groups with serialized syllables. :param syllables: :returns: (sppasTier) Time segments """ intervals = syllables.export_to_intervals(self._tg_separators) intervals.set_name("TGA-TimeSegments") for i, tg in enumerate(intervals): syll_anns = syllables.find(tg.get_lowest_localization(), tg.get_highest_localization()) tag_str = "" for ann in syll_anns: tag_str += ann.serialize_labels(separator=" ") tag_str += " " tg.append_label(sppasLabel(sppasTag(tag_str))) return intervals
def map_tag(self, tag): """Run the mapping process on a tag. :param tag: (sppasTag) tag with symbols to map :returns: List of sppasTag() """ # only non-empty strings can me mapped if tag.get_type() == 'str' and tag.is_empty() is False: # only speech can be mapped, not the symbols. if tag.is_speech() is True or self._map_symbols is True: result = list() content = tag.get_content() if content.startswith('{') and content.endswith('}'): content = content[1:-1] mapped_content = self.map(content, self._delimiters) for content in mapped_content.split('|'): result.append(sppasTag(content)) return result return [tag.copy()]
def _add_aligned_track_into_tier(tier, tdata, delta, unitend): """Append a list of (start, end, text, score) into the tier. Shift start/end of a delta value and set the last end value. """ try: for i, t in enumerate(tdata): # fix the location - an interval (loc_s, loc_e, contents, scores) = t loc_s += delta loc_e += delta if i == (len(tdata)-1): loc_e = unitend location = sppasLocation( sppasInterval( sppasPoint(loc_s, TracksReader.RADIUS), sppasPoint(loc_e, TracksReader.RADIUS) )) # fix the label # allow to work with alternative tags tags = [sppasTag(c) for c in contents.split('|')] if scores is not None: tag_scores = [float(s) for s in scores.split('|')] else: tag_scores = None label = sppasLabel(tags, tag_scores) tier.create_annotation(location, label) except: logging.error('The following data were not added to the tier ' '{:s} at position {:f}: {:s}' ''.format(tier.get_name(), delta, str(tdata))) logging.error(traceback.format_exc())
def tga_to_tier(tga_result, timegroups, tier_name, tag_type="float"): """Create a tier from one of the TGA result. :param tga_result: One of the results of TGA :param timegroups: (sppasTier) Time groups :param tier_name: (str) Name of the output tier :param tag_type: (str) Type of the sppasTag to be included :returns: (sppasTier) """ tier = sppasTier(tier_name) for tg_ann in timegroups: tg_label = tg_ann.serialize_labels() tag_value = tga_result[tg_label] if tag_type == "float": tag_value = round(tag_value, 5) tier.create_annotation(tg_ann.get_location().copy(), sppasLabel(sppasTag(tag_value, tag_type))) return tier
while window_end[i] == 0: i -= 1 ann_idx_end = idx_end + i # Assign a label to the new annotation mean_dist = sum( distances[idx_begin:idx_end + 1]) / float(idx_end - idx_begin) mean_dist = round(mean_dist, 2) if mean_dist == 0: print(" ERROR: mean dist equal to 0...") continue begin = tier[ann_idx_begin].get_lowest_localization().copy() end = tier[ann_idx_end].get_highest_localization().copy() loc = sppasLocation(sppasInterval(begin, end)) label = sppasLabel(sppasTag(mean_dist, "float")) filtered_tier.create_annotation(loc, label) if len(filtered_tier) == 0: print("No density area found.") # ---------------------------------------------------------------------------- # Save result if file_output is None: for a in filtered_tier: print(a) else: parser.set_filename(file_output)
def ExportToAnnData(self): """ Export this transcription to anndata.sppasTranscription(). """ trs = anndata.sppasTranscription(self.__name) for meta_key in self.metadata: if self.metadata[meta_key] is not None: trs.set_meta(meta_key, self.metadata[meta_key]) for ctrl_vocab in self.GetCtrlVocab(): other_cv = anndata.sppasCtrlVocab(ctrl_vocab.id, ctrl_vocab.GetDescription()) for entry in ctrl_vocab: entry_text = entry.Text entry_desc = entry.GetDescription() other_cv.add(anndata.sppasTag(entry_text), entry_desc) trs.add_ctrl_vocab(other_cv) for media in self.GetMedia(): other_m = anndata.sppasMedia(media.url, media.id, media.mime) trs.add_media(other_m) for tier in self: c = tier.GetCtrlVocab() if c is not None: ctrl_vocab = trs.get_ctrl_vocab_from_name(c.GetName()) else: ctrl_vocab = None m = tier.GetMedia() if m is not None: media = trs.get_media_from_id(m.id) else: media = None other_t = trs.create_tier(tier.GetName(), ctrl_vocab, media) is_point = tier.IsPoint() for ann in tier: text = ann.GetLabel().GetLabel() if is_point is True: p = ann.GetLocation().GetPoint().GetValue() r = ann.GetLocation().GetPoint().GetRadius() if r == 0.: r = None other_t.create_annotation( anndata.sppasLocation(anndata.sppasPoint(p, r)), anndata.sppasLabel(anndata.sppasTag(text))) else: b = ann.GetLocation().GetBegin().GetValue() rb = ann.GetLocation().GetBegin().GetRadius() if rb == 0.: rb = None e = ann.GetLocation().GetEnd().GetValue() re = ann.GetLocation().GetEnd().GetRadius() if rb == 0.: rb = None other_t.create_annotation( anndata.sppasLocation( anndata.sppasInterval(anndata.sppasPoint(b, rb), anndata.sppasPoint(e, re))), anndata.sppasLabel(anndata.sppasTag(text))) for tier in self: parent_tier = self._hierarchy.get_parent(tier) if parent_tier is not None: link_type = self._hierarchy.get_hierarchy_type(tier) new_tier = trs.find(tier.GetName()) new_parent_tier = trs.find(parent_tier.GetName()) trs.add_hierarchy_link(link_type, new_parent_tier, new_tier) return trs