示例#1
0
    def run_alignment(self, inputwav, outputalign, N=3):
        """
        Execute the external program `julius` to align.
        The data related to the unit to time-align need to be previously
        fixed with:
            - set_phones(str)
            - set_tokens(str)

        @param inputwav (str - IN) the audio input file name, of type PCM-WAV 16000 Hz, 16 bits
        @param outputalign (str - OUT) the output file name
        @param N (int) N value of N-grams, used only if SLM (i.e. outext=walign)

        @return (str) A message of `julius`.

        """
        outputalign = outputalign + "." + self._outext

        basename = os.path.splitext(inputwav)[0]
        if self._outext == "palign":
            self.gen_grammar_dependencies(basename)
        else:
            self.gen_slm_dependencies(basename)

        self.run_julius(inputwav, basename, outputalign)
        with codecs.open(outputalign, 'r', encoding) as f:
            lines = f.readlines()

        errorlines = ""
        message = ""

        entries = []
        for line in lines:
            if line.find("Error: voca_load_htkdict")>-1 and line.find("not found")>-1:
                line = ToStrip( line )
                line = line[line.find('"')+1:]
                line = line[:line.find('"')]
                if len(line)>0:
                    entries = line.split()

        if len(entries) > 0:
            added = self.add_tiedlist(entries)
            if len(added) > 0:
                message = "The acoustic model was modified. The following entries were successfully added into the tiedlist: "
                message = message + " ".join(added) + "\n"
                self.run_julius(inputwav, basename, outputalign)
                with codecs.open(outputalign, 'r', encoding) as f:
                    lines = f.readlines()

        for line in lines:
            if (line.startswith("Error:") or line.startswith("ERROR:")) and not " line " in line:
                errorlines = errorlines + line
            if "search failed" in line:
                message = "Julius search has failed to find the transcription in the audio file of this unit."
                errorlines = "Search error. "+ errorlines

        if len(errorlines) > 0:
            raise Exception(message + errorlines)

        return message
示例#2
0
    def get_phon_entry(self, entry):
        """
        Return the phonetization of an entry.
        Unknown entries are not automatically phonetized.
        This is a pure dictionary-based method.

        @param `entry` (str) The token to phonetize.
        @return A string with the phonetization of `entry` or
        the unknown symbol.

        """
        entry = ToStrip(entry)

        # Specific strings... for the italian transcription...
        # For the participation at the CLIPS-Evalita 2011 campaign.
        if entry.startswith(u"<") is True and entry.endswith(u">") is True:
            entry = entry[1:-1]

        # No entry! Nothing to do.
        if len(entry) == 0:
            return ""

        # Specific strings used in the CID transcription...
        # CID is Corpus of Interactional Data, http://sldr.org/sldr000720
        if entry.startswith(u"gpd_") is True or entry.startswith(u"gpf_") is True:
            return ""

        # Specific strings used in SPPAS IPU segmentation...
        if entry.find(u"ipu_")>-1:
            return ""

        # Find entry in the dict as it is given
        _strphon = self._pdict.get_pron( entry )

        # OK, the entry is properly phonetized.
        if _strphon != self._pdict.unkstamp:
            return self._map_phonentry( _strphon )

        return self._pdict.unkstamp