示例#1
0
    def run(self, input_file, opt_input_file=None, output_file=None):
        """Run the automatic annotation process on an input.

        :param input_file: (list of str) momel anchors
        :param opt_input_file: (list of str) ignored
        :param output_file: (str) the output file name
        :returns: (sppasTranscription)

        """
        # Get the tier to be annotated.
        parser = sppasRW(input_file[0])
        trs_input = parser.read()
        tier_input = sppasFindTier.pitch_anchors(trs_input)

        # Annotate the tier
        targets = sppasIntsint.tier_to_anchors(tier_input)
        tones = self.__intsint.annotate(targets)
        tier_intsint = sppasIntsint.tones_to_tier(tones, tier_input)

        # Create the transcription result
        trs_output = sppasTranscription(self.name)
        trs_output.append(tier_intsint)
        trs_output.set_meta('intsint_result_of', input_file[0])

        # Save in a file
        if output_file is not None:
            parser = sppasRW(output_file)
            parser.write(trs_output)

        return trs_output
示例#2
0
    def run(self, input_file, opt_input_file=None, output_file=None):
        """Run the automatic annotation process on an input.

        Input file is a tuple with 2 files: the main speaker and the echoing
        speaker.

        :param input_file: (list of str) time-aligned tokens
        :param opt_input_file: (list of str) ignored
        :param output_file: (str) the output file name
        :returns: (sppasTranscription)

        """
        self.print_options()
        self.print_diagnosis(input_file[0])
        self.print_diagnosis(input_file[1])

        # Get the tier to be used
        parser = sppasRW(input_file[0])
        trs_input1 = parser.read()
        tier_tokens = sppasFindTier.aligned_tokens(trs_input1)
        tier_input1 = self.make_word_strain(tier_tokens)
        tier_input1.set_name(tier_input1.get_name() + "-source")

        # Get the tier to be used
        parser = sppasRW(input_file[1])
        trs_input2 = parser.read()
        tier_tokens = sppasFindTier.aligned_tokens(trs_input2)
        tier_input2 = self.make_word_strain(tier_tokens)
        tier_input2.set_name(tier_input2.get_name() + "-echo")

        # Repetition Automatic Detection
        (src_tier, echo_tier) = self.other_detection(tier_input1, tier_input2)

        # Create the transcription result
        trs_output = sppasTranscription(self.name)
        trs_output.set_meta('other_repetition_result_of_src', input_file[0])
        trs_output.set_meta('other_repetition_result_of_echo', input_file[1])
        if len(self._word_strain) > 0:
            trs_output.append(tier_input1)
        if self._options['stopwords'] is True:
            trs_output.append(self.make_stop_words(tier_input1))
        trs_output.append(src_tier)
        trs_output.append(echo_tier)
        if len(self._word_strain) > 0:
            trs_output.append(tier_input2)

        # Save in a file
        if output_file is not None:
            if len(trs_output) > 0:
                parser = sppasRW(output_file)
                parser.write(trs_output)
                self.print_filename(output_file)
            else:
                raise EmptyOutputError

        return trs_output
示例#3
0
    def run(self, input_file, opt_input_file=None, output_file=None):
        """Run the automatic annotation process on an input.

        :param input_file: (list of str) time-aligned phonemes
        :param opt_input_file: (list of str) ignored
        :param output_file: (str) the output file name
        :returns: (sppasTranscription)

        """
        # Get the tier to syllabify
        parser = sppasRW(input_file[0])
        trs_input = parser.read()
        tier_input = sppasFindTier.aligned_phones(trs_input)

        # Create the transcription result
        trs_output = sppasTranscription(self.name)
        trs_output.set_meta('syllabification_result_of', input_file[0])

        # Syllabify the tier
        if self._options['usesphons'] is True:
            tier_syll = self.convert(tier_input)
            trs_output.append(tier_syll)
            if self._options['createclasses']:
                trs_output.append(self.make_classes(tier_syll))

        # Extra tier: syllabify between given intervals
        if self._options['usesintervals'] is True:
            intervals = trs_input.find(self._options['tiername'])
            if intervals is None:
                self.logfile.print_message((info(
                    1264,
                    "annotations")).format(tiername=self._options['tiername']),
                                           indent=2,
                                           status=annots.warning)
            else:
                tier_syll_int = self.convert(tier_input, intervals)
                tier_syll_int.set_name("SyllAlign-Intervals")
                tier_syll_int.set_meta('syllabification_used_intervals',
                                       intervals.get_name())
                trs_output.append(tier_syll_int)
                if self._options['createclasses']:
                    t = self.make_classes(tier_syll_int)
                    t.set_name("SyllClassAlign-Intervals")
                    trs_output.append(t)

        # Save in a file
        if output_file is not None:
            if len(trs_output) > 0:
                parser = sppasRW(output_file)
                parser.write(trs_output)
            else:
                raise EmptyOutputError

        return trs_output
示例#4
0
    def run(self, input_file, opt_input_file=None, output_file=None):
        """Run the automatic annotation process on an input.

        :param input_file: (list of str) pitch values
        :param opt_input_file: (list of str) ignored
        :param output_file: (str) the output file name
        :returns: (sppasTranscription)

        """
        # Get pitch values from the input
        pitch = self.fix_pitch(input_file[0])

        # Search for anchors
        anchors_tier = self.convert(pitch)
        self.logfile.print_message(str(len(anchors_tier)) + " anchors found.",
                                   indent=2,
                                   status=annots.info)

        # Fix result
        trs_output = sppasTranscription(self.name)
        trs_output.append(anchors_tier)
        trs_output.set_meta('annotation_result_of', input_file[0])

        if output_file is not None:
            parser = sppasRW(output_file)
            parser.write(trs_output)

        return trs_output
示例#5
0
    def fix_pitch(input_filename):
        """Load pitch values from a file.

        It is supposed that the given file contains a tier with name "Pitch"
        with a pitch value every 10ms, or a tier with name "PitchTier".

        :returns: A list of pitch values (one value each 10 ms).

        """
        parser = sppasRW(input_filename)
        trs = parser.read()
        pitch_tier = trs.find("Pitch")
        if pitch_tier is None:
            pitch_tier = trs.find("PitchTier")
            if pitch_tier is not None:
                pitch_list = trs.to_pitch()
            else:
                raise NoInputError
        else:
            pitch_list = [
                round(a.get_best_tag().get_typed_content(), 6)
                for a in pitch_tier
            ]

        if len(pitch_list) == 0:
            raise EmptyInputError(name="Pitch")

        return pitch_list
示例#6
0
    def run(self, input_file, opt_input_file=None, output_file=None):
        """Run the automatic annotation process on an input.

        :param input_file: (list of str) orthographic transcription
        :param opt_input_file: (list of str) ignored
        :param output_file: (str) the output file name
        :returns: (sppasTranscription)

        """
        # Get input tier to tokenize
        parser = sppasRW(input_file[0])
        trs_input = parser.read()
        tier_input = sppasFindTier.transcription(trs_input)

        # Tokenize the tier
        tier_faked_tokens, tier_std_tokens, tier_custom = self.convert(
            tier_input)

        # Create the transcription result
        trs_output = sppasTranscription(self.name)
        if tier_faked_tokens is not None:
            trs_output.append(tier_faked_tokens)
        if tier_std_tokens is not None:
            trs_output.append(tier_std_tokens)
        if tier_custom is not None:
            trs_output.append(tier_custom)

        trs_output.set_meta('text_normalization_result_of', input_file[0])
        trs_output.set_meta('text_normalization_vocab',
                            self.__normalizer.get_vocab_filename())
        trs_output.set_meta('language_iso', "iso639-3")
        trs_output.set_meta('language_code_0', self.__normalizer.lang)
        trs_output.set_meta('language_name_0', "Undetermined")
        trs_output.set_meta(
            'language_url_0',
            "https://iso639-3.sil.org/code/" + self.__normalizer.lang)

        # Save in a file
        if output_file is not None:
            if len(trs_output) > 0:
                parser = sppasRW(output_file)
                parser.write(trs_output)
            else:
                raise EmptyOutputError

        return trs_output
示例#7
0
    def run(self, input_file, opt_input_file=None, output_file=None):
        """Run the automatic annotation process on an input.

        :param input_file: (list of str) time-aligned tokens
        :param opt_input_file: (list of str) ignored
        :param output_file: (str) the output file name
        :returns: (sppasTranscription)

        """
        # Get the tier to be used
        parser = sppasRW(input_file[0])
        trs_input = parser.read()

        tier_tokens = sppasFindTier.aligned_tokens(trs_input)
        tier_input = self.make_word_strain(tier_tokens)

        # Repetition Automatic Detection
        (src_tier, echo_tier) = self.self_detection(tier_input)

        # Create the transcription result
        trs_output = sppasTranscription(self.name)
        trs_output.set_meta('self_repetition_result_of', input_file[0])
        if len(self._word_strain) > 0:
            trs_output.append(tier_input)
        if self._options['stopwords'] is True:
            trs_output.append(self.make_stop_words(tier_input))
        trs_output.append(src_tier)
        trs_output.append(echo_tier)

        # Save in a file
        if output_file is not None:
            if len(trs_output) > 0:
                parser = sppasRW(output_file)
                parser.write(trs_output)
            else:
                raise EmptyOutputError

        return trs_output
示例#8
0
    def run(self, input_file, opt_input_file=None, output_file=None):
        """Run the automatic annotation process on an input.

        :param input_file: (list of str) normalized text
        :param opt_input_file: (list of str) ignored
        :param output_file: (str) the output file name
        :returns: (sppasTranscription)

        """
        # Get the tier to be phonetized.
        pattern = ""
        if self._options['usestdtokens'] is True:
            pattern = "std"
        parser = sppasRW(input_file[0])
        trs_input = parser.read()
        tier_input = sppasFindTier.tokenization(trs_input, pattern)

        # Phonetize the tier
        tier_phon = self.convert(tier_input)

        # Create the transcription result
        trs_output = sppasTranscription(self.name)
        if tier_phon is not None:
            trs_output.append(tier_phon)

        trs_output.set_meta('text_phonetization_result_of', input_file[0])
        trs_output.set_meta('text_phonetization_dict',
                            self.__phonetizer.get_dict_filename())

        # Save in a file
        if output_file is not None:
            if len(trs_output) > 0:
                parser = sppasRW(output_file)
                parser.write(trs_output)
            else:
                raise EmptyOutputError

        return trs_output
def get_tier(filename, tier_idx):
    """Return the tier of the given index in an annotated file.

    :param filename: (str) Name of the annotated file
    :param tier_idx: (int) Index of the tier to get
    :returns: sppasTier or None

    """
    try:
        parser = sppasRW(filename)
        trs_input = parser.read(filename)
    except:
        return None
    if tier_idx < 0 or tier_idx >= len(trs_input):
        return None

    return trs_input[tier_idx]
示例#10
0
def get_tier(filename, tier_idx):
    """Return the tier of the given index in an annotated file.

    :param filename: (str) Name of the annotated file
    :param tier_idx: (int) Index of the tier to get
    :returns: sppasTier or None

    """
    try:
        parser = sppasRW(filename)
        trs_input = parser.read(filename)
    except Exception as e:
        logging.error("Parsing file {:s} failed: {:s}".format(filename, str(e)))
        return None
    if tier_idx < 0 or tier_idx >= len(trs_input):
        return None

    return trs_input[tier_idx]
示例#11
0
                    help='Input annotated file name')

parser.add_argument("-t",
                    metavar="value",
                    default=1,
                    type=int,
                    help='Tier number (default: 1)')

if len(sys.argv) <= 1:
    sys.argv.append('-h')

args = parser.parse_args()

# ----------------------------------------------------------------------------

parser = sppasRW(args.i)
trs_input = parser.read()

if args.t <= 0 or args.t > len(trs_input):
    print('Error: Bad tier number.\n')
    sys.exit(1)
tier = trs_input[args.t - 1]

# Get the tier type
tier_type = "Unknown"
if tier.is_point() is True:
    tier_type = "Point"
elif tier.is_interval() is True:
    tier_type = "Interval"
elif tier.is_disjoint() is True:
    tier_type = "DisjointIntervals"
示例#12
0
        nb_hyp_not_match_total += nb_hyp_not_match
        nb_hyp_move_begin_total += nb_hyp_move_begin
        nb_hyp_move_end_total += nb_hyp_move_end

        # communicate the results
        logging.info("        - [split]: {:d}. ({:.2f}% of the ipus of hyp)"
                     "".format(nb_hyp_split_ipus, (float(nb_hyp_split_ipus) / float(nb_ipus_hyp)) * 100.))
        logging.info("        - [ignore]: {:d}. ({:.2f}% of the ipus of hyp are false positives)"
                     "".format(nb_hyp_not_match, (float(nb_hyp_not_match) / float(nb_ipus_hyp)) * 100.))

        logging.info("        - [move_begin]: {:d}. ({:.2f}% of the ipus of ref)"
                     "".format(nb_hyp_move_begin, (float(nb_hyp_move_begin) / float(nb_ipus_ref)) * 100.))
        logging.info("        - [move_end]: {:d}. ({:.2f}% of the ipus of ref)"
                     "".format(nb_hyp_move_end, (float(nb_hyp_move_end) / float(nb_ipus_ref)) * 100.))

        p = sppasRW(os.path.join(out_name)+"-"+os.path.basename(fh))
        p.write(trs)

    # -----------------------------------------------------------------------
    # Write/save global results
    # -----------------------------------------------------------------------

    if nb_ipus_ref_total == 0:
        sys.exit(1)

    # Prepare summary messages
    # ------------------------
    r = ' ==> Correct matching is {:.2f}%.'.format(
        (float(nb_ref_perfect_match_total) / float(nb_ipus_ref_total)) * 100.)

    r0 = '    ==> Actions to check IPUs are:'
示例#13
0
    sys.exit(1)

# ---------------------------------------------------------------------------
# Convert input file if not TextGrid
# ---------------------------------------------------------------------------

fname, fext = os.path.splitext(filename)

if fname.endswith("-palign") is False:
    print("ERROR: MarsaTag plugin requires SPPAS alignment files "
          "(i.e. with -palign in its name).")
    sys.exit(1)

# read to check data content
# --------------------------
parser = sppasRW(filename)
trs_input = parser.read(filename)
tier = trs_input.find("TokensAlign", case_sensitive=False)
if tier is None:
    print("ERROR: A tier with name TokensAlign is required.")
    sys.exit(1)

# write as textgrid
# -----------------
if fext.lower().endswith("textgrid") is False:
    trs = sppasTranscription(name="TokensAlign")
    trs.append(tier)
    filename = fname + ".TextGrid"
    parser.set_filename(filename)
    parser.write(trs)
示例#14
0
            log_level = 0
        else:
            log_level = cg.log_level
    else:
        log_level = cg.quiet_log_level
    lgs = sppasLogSetup(log_level)
    lgs.stream_handler()

# -----------------------------------------------------------------------
# Read
# -----------------------------------------------------------------------

logging.info("Read {:s}".format(args.i))

start_time = time.time()
parser = sppasRW(args.i)
trs_input = parser.read()
end_time = time.time()

# General information
# -------------------
logging.debug(
    "Elapsed time for reading: {:f} seconds"
    "".format(end_time - start_time))
pickle_string = pickle.dumps(trs_input)
logging.debug(
    "Memory usage of the transcription: {:d} bytes"
    "".format(sys.getsizeof(pickle_string)))

# -----------------------------------------------------------------------
# Work
示例#15
0
            log_level = cg.quiet_log_level
        lgs = sppasLogSetup(log_level)
        lgs.stream_handler()

    # -----------------------------------------------------------------------
    # Read
    # -----------------------------------------------------------------------

    trs_output = sppasTranscription("Merged")

    for file_idx, trs_input_file in enumerate(args.i):

        logging.info("Read {:s}".format(args.i))

        start_time = time.time()
        parser = sppasRW(trs_input_file)
        trs_input = parser.read()
        end_time = time.time()

        # General information
        # -------------------
        logging.debug("Elapsed time for reading: {:f} seconds"
                      "".format(end_time - start_time))
        pickle_string = pickle.dumps(trs_input)
        logging.debug("Memory usage of the transcription: {:d} bytes"
                      "".format(sys.getsizeof(pickle_string)))

        # Copy all media/ctrl vocab
        # -------------------------
        trs_output.set_media_list(trs_input.get_media_list())
        trs_output.set_ctrl_vocab_list(trs_input.get_ctrl_vocab_list())
示例#16
0
    with_radius = 0
    if args.addradius:
        with_radius = 1
    if args.deductradius:
        with_radius = -1

    # -----------------------------------------------------------------------
    # Read data
    # -----------------------------------------------------------------------

    tiers = dict()
    for file_input in args.i:

        logging.info("Read {:s}".format(file_input))
        start_time = time.time()
        parser = sppasRW(file_input)
        trs_input = parser.read()
        end_time = time.time()

        # General information
        # -------------------
        logging.debug("Elapsed time for reading: {:f} seconds"
                      "".format(end_time - start_time))
        pickle_string = pickle.dumps(trs_input)
        logging.debug("Memory usage of the transcription: {:d} bytes"
                      "".format(sys.getsizeof(pickle_string)))

        # Get expected tier
        # -----------------
        tier = trs_input.find(args.t, case_sensitive=False)
        if tier is not None: