def run(self, input_file, opt_input_file=None, output_file=None): """Run the automatic annotation process on an input. :param input_file: (list of str) momel anchors :param opt_input_file: (list of str) ignored :param output_file: (str) the output file name :returns: (sppasTranscription) """ # Get the tier to be annotated. parser = sppasRW(input_file[0]) trs_input = parser.read() tier_input = sppasFindTier.pitch_anchors(trs_input) # Annotate the tier targets = sppasIntsint.tier_to_anchors(tier_input) tones = self.__intsint.annotate(targets) tier_intsint = sppasIntsint.tones_to_tier(tones, tier_input) # Create the transcription result trs_output = sppasTranscription(self.name) trs_output.append(tier_intsint) trs_output.set_meta('intsint_result_of', input_file[0]) # Save in a file if output_file is not None: parser = sppasRW(output_file) parser.write(trs_output) return trs_output
def run(self, input_file, opt_input_file=None, output_file=None): """Run the automatic annotation process on an input. Input file is a tuple with 2 files: the main speaker and the echoing speaker. :param input_file: (list of str) time-aligned tokens :param opt_input_file: (list of str) ignored :param output_file: (str) the output file name :returns: (sppasTranscription) """ self.print_options() self.print_diagnosis(input_file[0]) self.print_diagnosis(input_file[1]) # Get the tier to be used parser = sppasRW(input_file[0]) trs_input1 = parser.read() tier_tokens = sppasFindTier.aligned_tokens(trs_input1) tier_input1 = self.make_word_strain(tier_tokens) tier_input1.set_name(tier_input1.get_name() + "-source") # Get the tier to be used parser = sppasRW(input_file[1]) trs_input2 = parser.read() tier_tokens = sppasFindTier.aligned_tokens(trs_input2) tier_input2 = self.make_word_strain(tier_tokens) tier_input2.set_name(tier_input2.get_name() + "-echo") # Repetition Automatic Detection (src_tier, echo_tier) = self.other_detection(tier_input1, tier_input2) # Create the transcription result trs_output = sppasTranscription(self.name) trs_output.set_meta('other_repetition_result_of_src', input_file[0]) trs_output.set_meta('other_repetition_result_of_echo', input_file[1]) if len(self._word_strain) > 0: trs_output.append(tier_input1) if self._options['stopwords'] is True: trs_output.append(self.make_stop_words(tier_input1)) trs_output.append(src_tier) trs_output.append(echo_tier) if len(self._word_strain) > 0: trs_output.append(tier_input2) # Save in a file if output_file is not None: if len(trs_output) > 0: parser = sppasRW(output_file) parser.write(trs_output) self.print_filename(output_file) else: raise EmptyOutputError return trs_output
def run(self, input_file, opt_input_file=None, output_file=None): """Run the automatic annotation process on an input. :param input_file: (list of str) time-aligned phonemes :param opt_input_file: (list of str) ignored :param output_file: (str) the output file name :returns: (sppasTranscription) """ # Get the tier to syllabify parser = sppasRW(input_file[0]) trs_input = parser.read() tier_input = sppasFindTier.aligned_phones(trs_input) # Create the transcription result trs_output = sppasTranscription(self.name) trs_output.set_meta('syllabification_result_of', input_file[0]) # Syllabify the tier if self._options['usesphons'] is True: tier_syll = self.convert(tier_input) trs_output.append(tier_syll) if self._options['createclasses']: trs_output.append(self.make_classes(tier_syll)) # Extra tier: syllabify between given intervals if self._options['usesintervals'] is True: intervals = trs_input.find(self._options['tiername']) if intervals is None: self.logfile.print_message((info( 1264, "annotations")).format(tiername=self._options['tiername']), indent=2, status=annots.warning) else: tier_syll_int = self.convert(tier_input, intervals) tier_syll_int.set_name("SyllAlign-Intervals") tier_syll_int.set_meta('syllabification_used_intervals', intervals.get_name()) trs_output.append(tier_syll_int) if self._options['createclasses']: t = self.make_classes(tier_syll_int) t.set_name("SyllClassAlign-Intervals") trs_output.append(t) # Save in a file if output_file is not None: if len(trs_output) > 0: parser = sppasRW(output_file) parser.write(trs_output) else: raise EmptyOutputError return trs_output
def run(self, input_file, opt_input_file=None, output_file=None): """Run the automatic annotation process on an input. :param input_file: (list of str) pitch values :param opt_input_file: (list of str) ignored :param output_file: (str) the output file name :returns: (sppasTranscription) """ # Get pitch values from the input pitch = self.fix_pitch(input_file[0]) # Search for anchors anchors_tier = self.convert(pitch) self.logfile.print_message(str(len(anchors_tier)) + " anchors found.", indent=2, status=annots.info) # Fix result trs_output = sppasTranscription(self.name) trs_output.append(anchors_tier) trs_output.set_meta('annotation_result_of', input_file[0]) if output_file is not None: parser = sppasRW(output_file) parser.write(trs_output) return trs_output
def fix_pitch(input_filename): """Load pitch values from a file. It is supposed that the given file contains a tier with name "Pitch" with a pitch value every 10ms, or a tier with name "PitchTier". :returns: A list of pitch values (one value each 10 ms). """ parser = sppasRW(input_filename) trs = parser.read() pitch_tier = trs.find("Pitch") if pitch_tier is None: pitch_tier = trs.find("PitchTier") if pitch_tier is not None: pitch_list = trs.to_pitch() else: raise NoInputError else: pitch_list = [ round(a.get_best_tag().get_typed_content(), 6) for a in pitch_tier ] if len(pitch_list) == 0: raise EmptyInputError(name="Pitch") return pitch_list
def run(self, input_file, opt_input_file=None, output_file=None): """Run the automatic annotation process on an input. :param input_file: (list of str) orthographic transcription :param opt_input_file: (list of str) ignored :param output_file: (str) the output file name :returns: (sppasTranscription) """ # Get input tier to tokenize parser = sppasRW(input_file[0]) trs_input = parser.read() tier_input = sppasFindTier.transcription(trs_input) # Tokenize the tier tier_faked_tokens, tier_std_tokens, tier_custom = self.convert( tier_input) # Create the transcription result trs_output = sppasTranscription(self.name) if tier_faked_tokens is not None: trs_output.append(tier_faked_tokens) if tier_std_tokens is not None: trs_output.append(tier_std_tokens) if tier_custom is not None: trs_output.append(tier_custom) trs_output.set_meta('text_normalization_result_of', input_file[0]) trs_output.set_meta('text_normalization_vocab', self.__normalizer.get_vocab_filename()) trs_output.set_meta('language_iso', "iso639-3") trs_output.set_meta('language_code_0', self.__normalizer.lang) trs_output.set_meta('language_name_0', "Undetermined") trs_output.set_meta( 'language_url_0', "https://iso639-3.sil.org/code/" + self.__normalizer.lang) # Save in a file if output_file is not None: if len(trs_output) > 0: parser = sppasRW(output_file) parser.write(trs_output) else: raise EmptyOutputError return trs_output
def run(self, input_file, opt_input_file=None, output_file=None): """Run the automatic annotation process on an input. :param input_file: (list of str) time-aligned tokens :param opt_input_file: (list of str) ignored :param output_file: (str) the output file name :returns: (sppasTranscription) """ # Get the tier to be used parser = sppasRW(input_file[0]) trs_input = parser.read() tier_tokens = sppasFindTier.aligned_tokens(trs_input) tier_input = self.make_word_strain(tier_tokens) # Repetition Automatic Detection (src_tier, echo_tier) = self.self_detection(tier_input) # Create the transcription result trs_output = sppasTranscription(self.name) trs_output.set_meta('self_repetition_result_of', input_file[0]) if len(self._word_strain) > 0: trs_output.append(tier_input) if self._options['stopwords'] is True: trs_output.append(self.make_stop_words(tier_input)) trs_output.append(src_tier) trs_output.append(echo_tier) # Save in a file if output_file is not None: if len(trs_output) > 0: parser = sppasRW(output_file) parser.write(trs_output) else: raise EmptyOutputError return trs_output
def run(self, input_file, opt_input_file=None, output_file=None): """Run the automatic annotation process on an input. :param input_file: (list of str) normalized text :param opt_input_file: (list of str) ignored :param output_file: (str) the output file name :returns: (sppasTranscription) """ # Get the tier to be phonetized. pattern = "" if self._options['usestdtokens'] is True: pattern = "std" parser = sppasRW(input_file[0]) trs_input = parser.read() tier_input = sppasFindTier.tokenization(trs_input, pattern) # Phonetize the tier tier_phon = self.convert(tier_input) # Create the transcription result trs_output = sppasTranscription(self.name) if tier_phon is not None: trs_output.append(tier_phon) trs_output.set_meta('text_phonetization_result_of', input_file[0]) trs_output.set_meta('text_phonetization_dict', self.__phonetizer.get_dict_filename()) # Save in a file if output_file is not None: if len(trs_output) > 0: parser = sppasRW(output_file) parser.write(trs_output) else: raise EmptyOutputError return trs_output
def get_tier(filename, tier_idx): """Return the tier of the given index in an annotated file. :param filename: (str) Name of the annotated file :param tier_idx: (int) Index of the tier to get :returns: sppasTier or None """ try: parser = sppasRW(filename) trs_input = parser.read(filename) except: return None if tier_idx < 0 or tier_idx >= len(trs_input): return None return trs_input[tier_idx]
def get_tier(filename, tier_idx): """Return the tier of the given index in an annotated file. :param filename: (str) Name of the annotated file :param tier_idx: (int) Index of the tier to get :returns: sppasTier or None """ try: parser = sppasRW(filename) trs_input = parser.read(filename) except Exception as e: logging.error("Parsing file {:s} failed: {:s}".format(filename, str(e))) return None if tier_idx < 0 or tier_idx >= len(trs_input): return None return trs_input[tier_idx]
help='Input annotated file name') parser.add_argument("-t", metavar="value", default=1, type=int, help='Tier number (default: 1)') if len(sys.argv) <= 1: sys.argv.append('-h') args = parser.parse_args() # ---------------------------------------------------------------------------- parser = sppasRW(args.i) trs_input = parser.read() if args.t <= 0 or args.t > len(trs_input): print('Error: Bad tier number.\n') sys.exit(1) tier = trs_input[args.t - 1] # Get the tier type tier_type = "Unknown" if tier.is_point() is True: tier_type = "Point" elif tier.is_interval() is True: tier_type = "Interval" elif tier.is_disjoint() is True: tier_type = "DisjointIntervals"
nb_hyp_not_match_total += nb_hyp_not_match nb_hyp_move_begin_total += nb_hyp_move_begin nb_hyp_move_end_total += nb_hyp_move_end # communicate the results logging.info(" - [split]: {:d}. ({:.2f}% of the ipus of hyp)" "".format(nb_hyp_split_ipus, (float(nb_hyp_split_ipus) / float(nb_ipus_hyp)) * 100.)) logging.info(" - [ignore]: {:d}. ({:.2f}% of the ipus of hyp are false positives)" "".format(nb_hyp_not_match, (float(nb_hyp_not_match) / float(nb_ipus_hyp)) * 100.)) logging.info(" - [move_begin]: {:d}. ({:.2f}% of the ipus of ref)" "".format(nb_hyp_move_begin, (float(nb_hyp_move_begin) / float(nb_ipus_ref)) * 100.)) logging.info(" - [move_end]: {:d}. ({:.2f}% of the ipus of ref)" "".format(nb_hyp_move_end, (float(nb_hyp_move_end) / float(nb_ipus_ref)) * 100.)) p = sppasRW(os.path.join(out_name)+"-"+os.path.basename(fh)) p.write(trs) # ----------------------------------------------------------------------- # Write/save global results # ----------------------------------------------------------------------- if nb_ipus_ref_total == 0: sys.exit(1) # Prepare summary messages # ------------------------ r = ' ==> Correct matching is {:.2f}%.'.format( (float(nb_ref_perfect_match_total) / float(nb_ipus_ref_total)) * 100.) r0 = ' ==> Actions to check IPUs are:'
sys.exit(1) # --------------------------------------------------------------------------- # Convert input file if not TextGrid # --------------------------------------------------------------------------- fname, fext = os.path.splitext(filename) if fname.endswith("-palign") is False: print("ERROR: MarsaTag plugin requires SPPAS alignment files " "(i.e. with -palign in its name).") sys.exit(1) # read to check data content # -------------------------- parser = sppasRW(filename) trs_input = parser.read(filename) tier = trs_input.find("TokensAlign", case_sensitive=False) if tier is None: print("ERROR: A tier with name TokensAlign is required.") sys.exit(1) # write as textgrid # ----------------- if fext.lower().endswith("textgrid") is False: trs = sppasTranscription(name="TokensAlign") trs.append(tier) filename = fname + ".TextGrid" parser.set_filename(filename) parser.write(trs)
log_level = 0 else: log_level = cg.log_level else: log_level = cg.quiet_log_level lgs = sppasLogSetup(log_level) lgs.stream_handler() # ----------------------------------------------------------------------- # Read # ----------------------------------------------------------------------- logging.info("Read {:s}".format(args.i)) start_time = time.time() parser = sppasRW(args.i) trs_input = parser.read() end_time = time.time() # General information # ------------------- logging.debug( "Elapsed time for reading: {:f} seconds" "".format(end_time - start_time)) pickle_string = pickle.dumps(trs_input) logging.debug( "Memory usage of the transcription: {:d} bytes" "".format(sys.getsizeof(pickle_string))) # ----------------------------------------------------------------------- # Work
log_level = cg.quiet_log_level lgs = sppasLogSetup(log_level) lgs.stream_handler() # ----------------------------------------------------------------------- # Read # ----------------------------------------------------------------------- trs_output = sppasTranscription("Merged") for file_idx, trs_input_file in enumerate(args.i): logging.info("Read {:s}".format(args.i)) start_time = time.time() parser = sppasRW(trs_input_file) trs_input = parser.read() end_time = time.time() # General information # ------------------- logging.debug("Elapsed time for reading: {:f} seconds" "".format(end_time - start_time)) pickle_string = pickle.dumps(trs_input) logging.debug("Memory usage of the transcription: {:d} bytes" "".format(sys.getsizeof(pickle_string))) # Copy all media/ctrl vocab # ------------------------- trs_output.set_media_list(trs_input.get_media_list()) trs_output.set_ctrl_vocab_list(trs_input.get_ctrl_vocab_list())
with_radius = 0 if args.addradius: with_radius = 1 if args.deductradius: with_radius = -1 # ----------------------------------------------------------------------- # Read data # ----------------------------------------------------------------------- tiers = dict() for file_input in args.i: logging.info("Read {:s}".format(file_input)) start_time = time.time() parser = sppasRW(file_input) trs_input = parser.read() end_time = time.time() # General information # ------------------- logging.debug("Elapsed time for reading: {:f} seconds" "".format(end_time - start_time)) pickle_string = pickle.dumps(trs_input) logging.debug("Memory usage of the transcription: {:d} bytes" "".format(sys.getsizeof(pickle_string))) # Get expected tier # ----------------- tier = trs_input.find(args.t, case_sensitive=False) if tier is not None: