def __convert(self, tier, actions): """Normalize all tags of all labels of an annotation. """ tokens_tier = sppasTier("Tokens") for i, ann in enumerate(tier): self.logfile.print_message( (info(1220, "annotations")).format(number=i + 1), indent=1) location = ann.get_location().copy() labels = list() # Normalize all labels of the orthographic transcription for label in ann.get_labels(): tokens = list() # Normalize only the best tag because each label of an ortho # should only concern 1 tag! text = label.get_best() # Do not tokenize an empty label, noises, laughter... if text.is_speech() is True: try: tokens = self.__normalizer.normalize( text.get_content(), actions) except Exception as e: message = (info(1258, "annotations")).format(i) + \ "{:s}".format(str(e)) self.logfile.print_message(message, indent=2) elif text.is_silence(): # in ortho a silence could be one of "#" or "gpf_". # we normalize! tokens = [SIL_ORTHO] else: tokens = [text.get_content()] # New in SPPAS 1.9.6. # - The result is a sequence of labels. # - Token variants are stored into alternative tags for tok in tokens: if tok.startswith('{') and tok.endswith('}'): tok = tok[1:-1] tags = [sppasTag(p) for p in tok.split('|')] else: tags = sppasTag(tok) labels.append(sppasLabel(tags)) tokens_tier.create_annotation(location, labels) return tokens_tier
def syllabify_interval(self, phonemes, from_p, to_p, syllables): """Perform the syllabification of one interval. :param phonemes: (sppasTier) :param from_p: (int) index of the first phoneme to be syllabified :param to_p: (int) index of the last phoneme to be syllabified :param syllables: (sppasTier) """ # create the sequence of phonemes to syllabify p = list() for ann in phonemes[from_p:to_p + 1]: tag = ann.get_best_tag() p.append(tag.get_typed_content()) # create the sequence of syllables s = self.__syllabifier.annotate(p) # add the syllables into the tier for i, syll in enumerate(s): start_idx, end_idx = syll # create the location begin = phonemes[start_idx + from_p].get_lowest_localization().copy() end = phonemes[end_idx + from_p].get_highest_localization().copy() location = sppasLocation(sppasInterval(begin, end)) # create the label syll_string = Syllabifier.phonetize_syllables(p, [syll]) label = sppasLabel(sppasTag(syll_string)) # add the syllable syllables.create_annotation(location, label)
def __add_repetition(repetition, spk1_tier, spk2_tier, start_idx1, start_idx2, src_tier, echo_tier): """Add a repetition - source and echos - in tiers. :param repetition: (DataRepetition) :param spk1_tier: (Tier) The tier of speaker 1 (to detect sources) :param spk2_tier: (Tier) The tier of speaker 2 (to detect echos) :param start_idx1: start index of the interval in spk1_tier :param start_idx2: start index of the interval in spk2_tier :param src_tier: (Tier) The resulting tier with sources :param echo_tier: (Tier) The resulting tier with echos :returns: (bool) the repetition was added or not """ index = len(src_tier) # Source s, e = repetition.get_source() src_begin = spk1_tier[start_idx1 + s].get_lowest_localization() src_end = spk1_tier[start_idx1 + e].get_highest_localization() time = sppasInterval(src_begin.copy(), src_end.copy()) try: a = src_tier.create_annotation( sppasLocation(time), sppasLabel(sppasTag("S" + str(index + 1)))) src_id = a.get_meta('id') except TierAddError: return False # Echos for (s, e) in repetition.get_echos(): rep_begin = spk2_tier[start_idx2 + s].get_lowest_localization() rep_end = spk2_tier[start_idx2 + e].get_highest_localization() time = sppasInterval(rep_begin.copy(), rep_end.copy()) r = sppasLabel(sppasTag("R" + str(index + 1))) try: a = echo_tier.create_annotation(sppasLocation(time), r) a.set_meta('is_other_repetition_of', src_id) except TierAddError: a = echo_tier.find(rep_begin, rep_end) if len(a) > 0: a[0].append_label(r) return True
def __add_repetition(repetition, spk_tier, start_idx, src_tier, echo_tier): """Add a repetition - source and echos - in tiers. :param repetition: (DataRepetition) :param spk_tier: (sppasTier) The tier of the speaker (to detect sources) :param start_idx: (int) start index of the interval in spk_tier :param src_tier: (sppasTier) The resulting tier with sources :param echo_tier: (sppasTier) The resulting tier with echos :returns: (bool) the repetition was added or not """ index = len(src_tier) # Source s, e = repetition.get_source() src_begin = spk_tier[start_idx + s].get_lowest_localization() src_end = spk_tier[start_idx + e].get_highest_localization() time = sppasInterval(src_begin.copy(), src_end.copy()) try: a = src_tier.create_annotation( sppasLocation(time), sppasLabel(sppasTag("S" + str(index + 1)))) src_id = a.get_meta('id') except: return False # Echos for (s, e) in repetition.get_echos(): rep_begin = spk_tier[start_idx + s].get_lowest_localization() rep_end = spk_tier[start_idx + e].get_highest_localization() time = sppasInterval(rep_begin.copy(), rep_end.copy()) a = echo_tier.create_annotation( sppasLocation(time), sppasLabel(sppasTag("R" + str(index + 1)))) a.set_meta('is_self_repetition_of', src_id) return True
def make_stop_words(self, tier): """Return a tier indicating if entries are stop-words. :param tier: (sppasTier) Time-aligned tokens. """ stp_tier = sppasTier('StopWord') for ann in tier: token = ann.serialize_labels() if token not in symbols.all: stp = self._stop_words.is_in(token) stp_tier.create_annotation( ann.get_location().copy(), sppasLabel(sppasTag(stp, tag_type="bool")) ) return stp_tier
def anchors_to_tier(anchors): """Transform anchors to a sppasTier. Anchors are stored in frames. It is converted to seconds (a frame is during 10ms). :param anchors: (List of Anchor) :returns: (sppasTier) """ tier = sppasTier('Momel') for anchor in anchors: tier.create_annotation( sppasLocation(sppasPoint(anchor.x * 0.01, 0.005)), sppasLabel(sppasTag(anchor.y, "float"))) return tier
def convert(self, tier): """Phonetize annotations of a tokenized tier. :param tier: (Tier) the ortho transcription previously tokenized. :returns: (Tier) phonetized tier with name "Phones" """ if tier is None: raise IOError('No given tier.') if tier.is_empty() is True: raise EmptyInputError(name=tier.get_name()) phones_tier = sppasTier("Phones") for i, ann in enumerate(tier): self.logfile.print_message( (info(1220, "annotations")).format(number=i + 1), indent=1) location = ann.get_location().copy() labels = list() # Normalize all labels of the orthographic transcription for label in ann.get_labels(): phonetizations = list() for text, score in label: if text.is_pause() or text.is_silence(): # It's in case the pronunciation dictionary # were not properly fixed. phonetizations.append(SIL) elif text.is_empty() is False: phones = self._phonetize(text.get_content()) for p in phones: phonetizations.extend(p.split(separators.variants)) # New in SPPAS 1.9.6. # - The result is a sequence of labels. # - Variants are alternative tags. tags = [sppasTag(p) for p in set(phonetizations)] labels.append(sppasLabel(tags)) phones_tier.create_annotation(location, labels) return phones_tier
def make_classes(self, syllables): """Create the tier with syllable classes. :param syllables: (sppasTier) """ classes = sppasTier("SyllClassAlign") classes.set_meta('syllabification_classes_of_tier', syllables.get_name()) for syll in syllables: location = syll.get_location().copy() syll_tag = syll.get_best_tag() class_tag = sppasTag( self.__syllabifier.classes_phonetized( syll_tag.get_typed_content())) classes.create_annotation(location, sppasLabel(class_tag)) return classes
def make_word_strain(self, tier): """Return a tier with modified tokens. :param tier: (sppasTier) Time-aligned tokens. """ if len(self._word_strain) == 0: return tier self.logfile.print_message("Words strain enabled.", indent=1, status=2) lems_tier = sppasTier('TokenStrain') for ann in tier: token = ann.serialize_labels() lem = self._word_strain.get(token, token) lems_tier.create_annotation( ann.get_location().copy(), sppasLabel(sppasTag(lem)) ) return lems_tier
def tones_to_tier(tones, anchors_tier): """Convert the INTSINT result into a tier. :param tones: (list) :param anchors_tier: (sppasTier) """ if len(tones) != len(anchors_tier): raise AnnDataEqError("tones:" + str(len(tones)), "anchors:" + str(len(anchors_tier))) tier = sppasTier("INTSINT") for tone, anchor_ann in zip(tones, anchors_tier): # Create the label tag = sppasTag(tone) # Create the location location = anchor_ann.get_location().copy() # Create the annotation tier.create_annotation(location, sppasLabel(tag)) return tier
# Append in new tier ti = sppasInterval(sppasPoint(b, 0.0001), sppasPoint(e, 0.0001)) if len(texts) > 1: missing = False for t in texts: if len(t.strip()) == 0: # missing annotation label... missing = True if missing is True: text = "" else: text = ";".join(texts) else: text = str(texts[0]) behavior_tier.create_annotation(sppasLocation(ti), sppasLabel(sppasTag(text))) # ---------------------------------------------------------------------------- synchro_tier = trs_input.create_tier("Synchronicity") for ann in behavior_tier: text = ann.serialize_labels() if len(text) > 0: values = text.split(';') v1 = values[0].strip() v2 = values[1].strip() if v1 == "0" or v2 == "0": if v1 == "0" and v2 == "0": v = -1 else: v = 0
if is_silence(h) is False: # the middle of the hyp must be inside the ref # or the contrary! hb, he, hl = get_ann_infos(h) hm = hb + (he-hb)/2. rm = rb + (re-rb)/2. if rb < hm < re or hb < rm < he: ipus_hyp_anns.append(h) # the ipu of the ref does not match any ipu in the hyp. if len(ipus_hyp_anns) == 0: # this is the critical situation. nb_ref_not_match += 1 logging.debug(' REF IPU: [ {:f} ; {:f} ; {:s} ] has no HYP.' ''.format(rb, re, etiquette)) result_ann.set_labels(sppasLabel(sppasTag('Missing'))) # the ipu of the ref is matching only one ipu in the hyp elif len(ipus_hyp_anns) == 1: # this is a success. nb_ref_perfect_match += 1 result_ann.set_labels(ipus_hyp_anns[0].get_labels()) # the ipu of the ref is matching several ipus in the hyp. else: # This over-segmentation could correspond to a short-pause, # or a silence into a laugh. # This is an error but not a critical one. nb_ref_several_match += 1 logging.debug(' REF IPU: [ {:f} ; {:f} ; {:s} ] has several HYPs:' ''.format(rb, re, etiquette))
continue if ann.get_best_tag().is_silence(): continue old_label = ann.serialize_labels(separator=" ", empty="", alt=True) if old_label.startswith("ipu_"): try: space = old_label.index(' ') old_label = old_label[space:].strip() except ValueError: old_label = "" if len(old_label) > 0: ipu += 1 new_labels = list() new_labels.append(sppasLabel(sppasTag('ipu_%d' % ipu))) new_labels.append(sppasLabel(sppasTag(old_label))) ann.set_labels(new_labels) else: ann.set_labels(sppasLabel(sppasTag(SIL_ORTHO))) # Merge continuous silences i = len(tier)-1 while i >= 0: label = tier[i].serialize_labels() i -= 1 c = i while label == SIL_ORTHO: label = tier[c].serialize_labels()
tier_token = trs_input.find('TokensAlign') if tier_token is None: print("Error: can't find the tier TokensAlign.") sys.exit(1) # ---------------------------------------------------------------------------- # 2. Create the expected data new_tier = trs_input.create_tier('PhnTokAlign') for ann_token in tier_token: # Create the sequence of phonemes beg = ann_token.get_lowest_localization() end = ann_token.get_highest_localization() ann_phons = tier_phon.find(beg, end) content = "-".join(ann.serialize_labels() for ann in ann_phons) # Append in the new tier loc = ann_token.get_location().copy() new_tier.create_annotation(loc, sppasLabel(sppasTag(content))) trs_input.add_hierarchy_link("TimeAssociation", tier_token, new_tier) # ---------------------------------------------------------------------------- # 3. Save new version of the file if args.quiet is False: print("Override input file: {:s}".format(args.i)) parser.write(trs_input)
if len(tier_input) < 2: print('The tier does not contains enough intervals.') sys.exit(1) if args.o: tier = tier_input.copy() tier.set_name(tier_input.get_name() + "-fill") else: tier = tier_input # --------------------------------------------------------------------------- # Create the tag to fill empty intervals # --------------------------------------------------------------------------- if tier.is_int(): filler = sppasTag(args.f, "int") elif tier.is_float(): filler = sppasTag(args.f, "float") elif tier.is_bool(): filler = sppasTag(args.f, "bool") else: filler = sppasTag(args.f) ctrl_vocab = tier.get_ctrl_vocab() if ctrl_vocab is not None: if ctrl_vocab.contains(filler) is False: ctrl_vocab.add(filler, description="Filler") # ---------------------------------------------------------------------------- # Fill in # ----------------------------------------------------------------------------