Пример #1
0
    def run(self, input_file, opt_input_file=None, output_file=None):
        """Run the automatic annotation process on an input.

        :param input_file: (list of str) time-aligned phonemes
        :param opt_input_file: (list of str) ignored
        :param output_file: (str) the output file name
        :returns: (sppasTranscription)

        """
        # Get the tier to syllabify
        parser = sppasRW(input_file[0])
        trs_input = parser.read()
        tier_input = sppasFindTier.aligned_phones(trs_input)

        # Create the transcription result
        trs_output = sppasTranscription(self.name)
        trs_output.set_meta('syllabification_result_of', input_file[0])

        # Syllabify the tier
        if self._options['usesphons'] is True:
            tier_syll = self.convert(tier_input)
            trs_output.append(tier_syll)
            if self._options['createclasses']:
                trs_output.append(self.make_classes(tier_syll))

        # Extra tier: syllabify between given intervals
        if self._options['usesintervals'] is True:
            intervals = trs_input.find(self._options['tiername'])
            if intervals is None:
                self.logfile.print_message((info(
                    1264,
                    "annotations")).format(tiername=self._options['tiername']),
                                           indent=2,
                                           status=annots.warning)
            else:
                tier_syll_int = self.convert(tier_input, intervals)
                tier_syll_int.set_name("SyllAlign-Intervals")
                tier_syll_int.set_meta('syllabification_used_intervals',
                                       intervals.get_name())
                trs_output.append(tier_syll_int)
                if self._options['createclasses']:
                    t = self.make_classes(tier_syll_int)
                    t.set_name("SyllClassAlign-Intervals")
                    trs_output.append(t)

        # Save in a file
        if output_file is not None:
            if len(trs_output) > 0:
                parser = sppasRW(output_file)
                parser.write(trs_output)
            else:
                raise EmptyOutputError

        return trs_output
Пример #2
0
    def print_options(self):
        """Print the list of options in the user log."""
        self.logfile.print_message(info(1050, "annotations") + ": ",
                                   indent=0,
                                   status=None)

        for k, v in self._options.items():
            msg = " ... {!s:s}: {!s:s}".format(k, v)
            self.logfile.print_message(msg, indent=0, status=None)

        self.logfile.print_newline()
Пример #3
0
    def check_file(filename):
        """Check file of any type: audio or annotated file.

        The extension of the filename is used to know the type of the file.

        :param filename: (str) name of the input file to diagnose.
        :returns: tuple with (status identifier, message)

        """
        ext = os.path.splitext(filename)[1]

        if ext.lower() in sppas.src.audiodata.aio.extensions:
            return sppasDiagnosis.check_audio_file(filename)

        if ext.lower() in sppas.src.anndata.aio.extensions:
            return sppasDiagnosis.check_trs_file(filename)

        message = info(1006, "annotations") + \
                  (info(1020, "annotations")).format(extension=ext)
        return annots.error, message
Пример #4
0
    def print_stat_item(self, step_number, value=None):
        """Print a statistic value in the output stream for a given step.

        Do not print anything if no parameters were given.

        :param step_number: (1..N)
        :param value: (str) A statistic value.
        Instead, print the status (enabled or disabled).

        """
        if self.parameters is None:
            return

        if value is None:
            if self.parameters.get_step_status(step_number):
                value = info(1030, "annotations")
            else:
                value = info(1031, "annotations")

        self.print_item(self.parameters.get_step_name(step_number),
                        str(value))
Пример #5
0
    def print_diagnosis(self, *filenames):
        """Print the diagnosis of a list of files in the user report.

        :param filenames: (list) List of files.

        """
        for filename in filenames:
            if filename is not None and os.path.exists(filename):
                fn = os.path.basename(filename)
                (s, m) = sppasDiagnosis.check_file(filename)
                msg = (info(1056,
                            "annotations")).format(fn) + ": {!s:s}".format(m)
                self.logfile.print_message(msg, indent=0, status=None)
Пример #6
0
    def print_header(self):
        """Print the parameters information in the output file stream."""
        sppas_name = sg.__name__ + ' ' + info(1032, "annotations") \
                     + ' ' + sg.__version__
        sppas_copy = sg.__copyright__
        sppas_url = info(1033, "annotations") + ': ' + sg.__url__
        sppas_contact = info(1034, "annotations") + ': ' + \
                        sg.__author__ + " (" + sg.__contact__ + ")"

        if self.logfp is not None:
            self.logfp.seek(0, 2)
            self.print_message(sppas_name)
            self.print_message(sppas_copy)
            self.print_message(sppas_url)
            self.print_message(sppas_contact)
            self.print_newline()
            self.print_separator()
        else:

            logging.info(sppas_name)
            logging.info(sppas_copy)
            logging.info(sppas_url)
            logging.info(sppas_contact)
Пример #7
0
    def _phonetize(self, entry):
        """Phonetize a text.

        Because we absolutely need to match with the number of tokens, this
        method will always return a string: either the automatic phonetization
        (from dict or from phonunk) or the unk stamp.

        :param entry: (str) The string to be phonetized.
        :returns: phonetization of the given entry

        """
        unk = symbols.unk
        tab = self.__phonetizer.get_phon_tokens(
            entry.split(), phonunk=self._options['phonunk'])
        tab_phones = list()
        for tex, p, s in tab:
            message = None
            if s == annots.error:
                message = (info(1110, "annotations")).format(tex) + \
                          info(1114, "annotations")
                self.logfile.print_message(message, indent=2, status=s)
                return [unk]
            else:
                if s == annots.warning:
                    message = (info(1110, "annotations")).format(tex)
                    if len(p) > 0:
                        message = message + (info(1112,
                                                  "annotations")).format(p)
                    else:
                        message = message + info(1114, "annotations")
                        p = unk
                tab_phones.append(p)

            if message:
                self.logfile.print_message(message, indent=2, status=s)

        return tab_phones
Пример #8
0
    def convert(self, tier):
        """Phonetize annotations of a tokenized tier.

        :param tier: (Tier) the ortho transcription previously tokenized.
        :returns: (Tier) phonetized tier with name "Phones"

        """
        if tier is None:
            raise IOError('No given tier.')
        if tier.is_empty() is True:
            raise EmptyInputError(name=tier.get_name())

        phones_tier = sppasTier("Phones")
        for i, ann in enumerate(tier):
            self.logfile.print_message(
                (info(1220, "annotations")).format(number=i + 1), indent=1)

            location = ann.get_location().copy()
            labels = list()

            # Normalize all labels of the orthographic transcription
            for label in ann.get_labels():

                phonetizations = list()
                for text, score in label:
                    if text.is_pause() or text.is_silence():
                        # It's in case the pronunciation dictionary
                        # were not properly fixed.
                        phonetizations.append(SIL)

                    elif text.is_empty() is False:
                        phones = self._phonetize(text.get_content())
                        for p in phones:
                            phonetizations.extend(p.split(separators.variants))

                # New in SPPAS 1.9.6.
                #  - The result is a sequence of labels.
                #  - Variants are alternative tags.
                tags = [sppasTag(p) for p in set(phonetizations)]
                labels.append(sppasLabel(tags))

            phones_tier.create_annotation(location, labels)

        return phones_tier
Пример #9
0
    def check_trs_file(filename):
        """Check an annotated file.

        Are verified:

            1. the format of the file (error);
            2. the file encoding (error);
            3. the filename (warning).

        :param filename: (string) name of the input file
        :returns: tuple with (status identifier, message)

        """
        status = annots.ok
        message = info(1000, "annotations")

        # test encoding
        try:
            f = codecs.open(filename, "r", sg.__encoding__)
            f.close()
        except UnicodeDecodeError:
            message = info(1004, "annotations") + \
                      (info(1026, "annotations")).format(encoding=sg.__encoding__)
            return annots.error, message
        except Exception as e:
            message = info(1004, "annotations") + str(e)
            return annots.error, message

        # test US_ASCII in filename
        if all(ord(x) < 128 for x in filename) is False:
            message = info(1002, "annotations") + info(1022, "annotations")
            return annots.warning, message

        # test whitespace in filename
        if " " in filename:
            message = info(1002, "annotations") + info(1024, "annotations")
            return annots.warning, message

        return status, message
Пример #10
0
    def convert(self, phonemes, intervals=None):
        """Syllabify labels of a time-aligned phones tier.

        :param phonemes: (sppasTier) time-aligned phonemes tier
        :param intervals: (sppasTier)
        :returns: (sppasTier)

        """
        if intervals is None:
            intervals = sppasSyll._phon_to_intervals(phonemes)

        syllables = sppasTier("SyllAlign")
        syllables.set_meta('syllabification_of_tier', phonemes.get_name())

        for interval in intervals:

            # get the index of the phonemes containing the begin
            # of the interval
            start_phon_idx = phonemes.lindex(
                interval.get_lowest_localization())
            if start_phon_idx == -1:
                start_phon_idx = phonemes.mindex(
                    interval.get_lowest_localization(), bound=-1)

            # get the index of the phonemes containing the end of the interval
            end_phon_idx = phonemes.rindex(interval.get_highest_localization())
            if end_phon_idx == -1:
                end_phon_idx = phonemes.mindex(
                    interval.get_highest_localization(), bound=1)

            # syllabify within the interval
            if start_phon_idx != -1 and end_phon_idx != -1:
                self.syllabify_interval(phonemes, start_phon_idx, end_phon_idx,
                                        syllables)
            else:
                self.logfile.print_message(
                    (info(1224, "annotations")).format(interval),
                    indent=2,
                    status=annots.warning)

        return syllables
Пример #11
0
    def segment(self, audio_filename, phon_name, token_name, align_name):
        """Call an aligner to perform speech segmentation and manage errors.

        :param audio_filename: (str) the audio file name of an IPU
        :param phon_name: (str) file name with the phonetization
        :param token_name: (str) file name with the tokenization
        :param align_name: (str) file name to save the result WITHOUT ext.

        :returns: A message of the aligner in case of any problem, or
        an empty string if success.

        """
        # Get the phonetization and tokenization strings to time-align.
        phones = ""
        tokens = ""

        if phon_name is not None:
            phones = self._readline(phon_name)
        self._aligner.set_phones(phones)
        self._basic_aligner.set_phones(phones)

        if token_name is not None:
            tokens = self._readline(token_name)
        self._aligner.set_tokens(tokens)
        self._basic_aligner.set_tokens(tokens)

        # Do not align nothing!
        if len(phones) == 0:
            self._basic_aligner.run_alignment(audio_filename, align_name)
            return info(1222, "annotations")

        # Do not align only one phoneme!
        if len(phones.split()) <= 1 and "-" not in phones:
            self._basic_aligner.run_alignment(audio_filename, align_name)
            return ""

        # Execute Alignment
        ret = self._aligner.check_data()
        ret += self._aligner.run_alignment(audio_filename, align_name)

        return ret
Пример #12
0
    def print_annotations_header(self):
        """Print the parameters information in the output stream.

        Do not print anything if no parameters were given.

        """
        if self.parameters is None:
            return

        self.print_message(' '*24 + info(1054, "annotations"))
        self.print_newline()
        self.print_message(' '*24 + info(1035, "annotations"))
        self.print_separator()
        self.print_newline()

        self.print_message(info(1036, "annotations") + ': ' + sppasTime().now)
        self.print_message(info(1037, "annotations") + ': ')
        for i in range(self.parameters.get_step_numbers()):
            if self.parameters.get_lang(i) is not None:
                self.print_item(self.parameters.get_step_name(i),
                                self.parameters.get_lang(i))
            else:
                self.print_item(self.parameters.get_step_name(i), "---")
        self.print_newline()

        self.print_message(info(1038, "annotations") + ': ')
        for sinput in self.parameters.get_sppasinput():
            self.print_item(sinput)
        self.print_newline()

        self.print_message(info(1039, "annotations") + ': ')
        for i in range(self.parameters.get_step_numbers()):
            self.print_stat_item(i)
        self.print_newline()

        self.print_message(info(1040, "annotations") +
                           ': ' +
                           self.parameters.get_output_format())
        self.print_newline()
Пример #13
0
    def batch_processing(self,
                         file_names,
                         progress=None,
                         output_format=annots.extension):
        """Perform the annotation on a set of files.

        The given list of inputs can be either:
            - a list of the files to be used as a single input:
              [file1, file2, ...]
            - a list of the files to be used as several-required-inputs:
              [(file1_a, file1_b), (file2_a, file2_b), ...]
            - a list of the files to be used as inputs and optional-inputs:
              [((file_1_a), (file_1_x)), ((file_2_a), (file_2_x)), ... ]
            - a list of the files to be used as several-required-inputs and
              optional-inputs:
              [((file1_a, file1_b), (file_1_x, file_1_y)), ...]

        :param file_names: (list) List of inputs
        :param progress: ProcessProgressTerminal() or ProcessProgressDialog()
        :param output_format: (str)
        :return: (int) Number of files processed with success

        """
        if len(self._options) > 0:
            self.print_options()

        total = len(file_names)
        if total == 0:
            return 0
        files_processed_success = 0
        if progress:
            progress.set_header(self.name)
            progress.update(0, "")

        # Execute the annotation for each file in the list
        for i, input_files in enumerate(file_names):

            required_inputs, optional_inputs = self._split_inputs(input_files)
            self.print_diagnosis(*required_inputs)
            self.print_diagnosis(*optional_inputs)

            out_name = self.run_for_batch_processing(required_inputs,
                                                     optional_inputs,
                                                     output_format)

            if out_name is None:
                self.logfile.print_message(info(1306, "annotations"),
                                           indent=1,
                                           status=annots.info)
            else:
                files_processed_success += 1
                self.logfile.print_message(out_name,
                                           indent=1,
                                           status=annots.ok)
            self.logfile.print_newline()
            if progress:
                progress.set_fraction(round(float((i + 1)) / float(total), 2))

        # Indicate completed!
        if progress:
            progress.update(
                1, (info(9000, "ui").format(files_processed_success, total)))
            progress.set_header("")

        return files_processed_success
Пример #14
0
from sppas.src.models.acm.modelmixer import sppasModelMixer
from sppas.src.utils.fileutils import sppasFileUtils

from ..baseannot import sppasBaseAnnotation
from ..searchtier import sppasFindTier
from ..annotationsexc import AnnotationOptionError
from ..annotationsexc import EmptyDirectoryError
from ..annotationsexc import NoInputError

from .tracksio import TracksReaderWriter
from .tracksgmt import TrackSegmenter
from .activity import sppasActivity

# ---------------------------------------------------------------------------

MSG_MODEL_L1_FAILED = (info(1210, "annotations"))
MSG_ALIGN_TRACK = (info(1220, "annotations"))
MSG_ALIGN_FAILED = (info(1230, "annotations"))
MSG_BASIC = (info(1240, "annotations"))
MSG_ACTION_SPLIT_INTERVALS = (info(1250, "annotations"))
MSG_ACTION_ALIGN_INTERVALS = (info(1252, "annotations"))
MSG_ACTION_MERGE_INTERVALS = (info(1254, "annotations"))
MSG_ACTION_EXTRA_TIER = (info(1256, "annotations"))
MSG_TOKENS_DISABLED = (info(1260, "annotations"))
MSG_NO_TOKENS_ALIGN = (info(1262, "annotations"))
MSG_EXTRA_TIER = (info(1270, "annotations"))
MSG_WORKDIR = (info(1280, "annotations"))

# ---------------------------------------------------------------------------

Пример #15
0
    def check_audio_file(filename):
        """Check an audio file.

        Are verified:

            1. the format of the file (error);
            2. the number of channels (error);
            3. the sample width (error or warning);
            4. the framerate (error or warning;
            5. the filename (warning).

        :param filename: (str) name of the input file
        :returns: tuple with (status identifier, message)

        """
        status = annots.ok
        message = ""

        # test file format: can we support it?
        try:
            audio = sppas.src.audiodata.aio.open(filename)
            fm = audio.get_framerate()
            sp = audio.get_sampwidth()*8
            nc = audio.get_nchannels()
            audio.close()
        except UnicodeDecodeError:
            message = info(1004, "annotations") + \
                      (info(1026, "annotations")).format(encoding=sg.__encoding__)
            return annots.error, message
        except Exception as e:
            message = info(1004, "annotations") + str(e)
            return annots.error, message

        if nc > sppasDiagnosis.EXPECTED_CHANNELS:
            status = annots.error
            message += (info(1010, "annotations")).format(number=nc)

        if sp < sppasDiagnosis.EXPECTED_SAMPLE_WIDTH*8:
            status = annots.error
            message += (info(1012, "annotations")).format(sampwidth=sp)

        if fm < sppasDiagnosis.EXPECTED_FRAME_RATE:
            status = annots.error
            message += (info(1014, "annotations")).format(framerate=fm)

        if status != annots.error:
            if sp > sppasDiagnosis.EXPECTED_SAMPLE_WIDTH*8:
                status = annots.warning
                message += (info(1016, "annotations")).format(sampwidth=sp)

            if fm > sppasDiagnosis.EXPECTED_FRAME_RATE:
                status = annots.warning
                message += (info(1018, "annotations")).format(framerate=fm)

        # test US-ASCII chars
        if all(ord(x) < 128 for x in filename) is False:
            status = annots.warning
            message += info(1022, "annotations")

        if " " in filename:
            status = annots.warning
            message += info(1024, "annotations")

        # test whitespace
        if status == annots.error:
            message = info(1004, "annotations") + message
        elif status == annots.warning:
            message = info(1002, "annotations") + message
        else:
            message = info(1000, "annotations")

        return status, message