def make_transcription(self):
        text = dict()
        for trs in utils.list_directory(self.transcription_dir, abspath=True):
            spk_id = os.path.splitext(os.path.basename(trs))[0]
            lines = utils.open_utf8(trs, 'r').readlines()

            # add utterence id from even lines starting at line 2
            ids = [
                spk_id + u'_' + re.sub(ur'\s+|:|;', u'', e)
                for e in lines[1::2]
            ]
            # delete linebreaks on odd lines starting at line 3
            # (this does not take into account fancy unicode
            # linebreaks), see
            # http://stackoverflow.com/questions/3219014
            transcriptions = [
Пример #2
0
    def export(self):
        """Copy model files to output_dir"""
        result_directory = os.path.join(self.recipe_dir, 'exp',
                                        self.model_type)

        for path in (
                # exclude files starting with numbers, as we want only
                # final state
                p for p in utils.list_directory(result_directory, abspath=True)
                if not os.path.basename(p)[0].isdigit()):
            if os.path.isdir(path):  # for log subdir
                shutil.copytree(
                    path, os.path.join(self.output_dir,
                                       os.path.basename(path)))
            else:
                shutil.copy(path, self.output_dir)

        super(AbstractAcousticModel, self).export()
Пример #3
0
    def export(self):
        """Copy the whole <recipe-dir>/decode to <output-dir>, copy
        <recipe-dir>/graph to <output-dir>/graph

        """
        self.log.debug('exporting results to %s', self.output_dir)

        result_directory = os.path.join(self.recipe_dir, 'decode')
        for path in utils.list_directory(result_directory, abspath=True):
            if os.path.isdir(path):
                shutil.copytree(path, os.path.join(
                    self.output_dir, os.path.basename(path)))
            else:
                shutil.copy(path, self.output_dir)

        shutil.copytree(
            os.path.join(self.recipe_dir, 'graph'),
            os.path.join(self.output_dir, 'graph'))

        super(Decode, self).export()
Пример #4
0
    def correct_transcription(self):
        """Correct problems with the GlobalPhone Vietnamese transcripts

        The corrections are completely ad hoc and the result are
        stored in a temporary folder.

        - remove trailings spaces and all double spacings and '_' from
          transcriptions on every odd line but the first

        - double spacings and '_' are actually only found for speakers
          200 to 208

        """
        # generate temporary output folder
        corrected_transcription_dir = tempfile.mkdtemp()

        # get the list of transcription files
        trss = utils.list_directory(self.transcription_dir, abspath=True)

        for trs in trss:
            # read transcript file
            lines = utils.open_utf8(trs, 'r').readlines()

            # correct odd lines
            lines[2::2] = [line.replace(u'_', u' ').replace(
                u'  ', u' ').strip() +
                           u'\n' for line in lines[2::2]]

            # # write corrected version to temp
            output_file = os.path.join(
                corrected_transcription_dir, os.path.basename(trs))

            with utils.open_utf8(output_file, 'w') as out:
                for line in lines:
                    out.write(line)

        self.transcription_dir = corrected_transcription_dir
        return True