示例#1
0
 def __init__(self, corpus, log=logger.null_logger()):
     self.log = log
     self.corpus = corpus
     # read utt2spk from the input corpus
     utt_ids, utt_speakers = zip(*self.corpus.utt2spk.items())
     self.utts = zip(utt_ids, utt_speakers)
     self.size = len(utt_ids)
     self.speakers = set(utt_speakers)
     self.segments = self.corpus.segments
     self.utt2dur = self.corpus.utt2duration()
     self.log.debug('loaded %i utterances from %i speakers',
                    self.size, len(self.speakers))
示例#2
0
    def __init__(self, corpus, log=logger.null_logger(), random_seed=None):
        self.log = log
        self.corpus = corpus

        # read utt2spk from the input corpus
        utt_ids, utt_speakers = zip(*self.corpus.utt2spk.iteritems())
        self.utts = zip(utt_ids, utt_speakers)
        self.size = len(utt_ids)
        self.speakers = set(utt_speakers)
        self.limits = dict()
        self.gender = dict()
        self.spk2utts = dict()
        self.log.debug('loaded %i utterances from %i speakers', self.size,
                       len(self.speakers))
示例#3
0
    def __init__(self, corpus, log=logger.null_logger()):
        """Removes utterances in 'not_kept_utts' from the
        wavs in the corpus

        'corpus' is an instance of Corpus'

        'not_kept_utts' is a dictionnary of the form :
        not_kept_utts=(speaker :[(utt1,wav_id,start_time,stop_time),
        (utt1,wav_id,start_time,stop_time)...])
        """
        self.log = log
        self.corpus = corpus

        utt_ids, utt_speakers = zip(*self.corpus.utt2spk.iteritems())
        self.utts = zip(utt_ids, utt_speakers)
        self.speakers = set(utt_speakers)
示例#4
0
    def __init__(self, corpus, log=logger.null_logger(),
                 random_seed=None, prune=True):
        self.log = log
        self.prune = prune
        self.corpus = corpus

        # seed the random generator
        if random_seed is not None:
            self.log.debug('random seed is %i', random_seed)
        random.seed(random_seed)

        # read utt2spk from the input corpus
        utt_ids, utt_speakers = zip(*self.corpus.utt2spk.items())
        self.utts = list(zip(utt_ids, utt_speakers))
        self.size = len(utt_ids)
        self.speakers = set(utt_speakers)
        self.log.debug('loaded %i utterances from %i speakers',
                       self.size, len(self.speakers))
示例#5
0
    def __init__(self, corpus, recipe_dir,
                 name='recipe', log=logger.null_logger()):
        # filter out short utterances
        self.corpus = corpus.subcorpus(
            self._desired_utterances(corpus), validate=False)

        # init the recipe directory, create it if needed
        self.recipe_dir = recipe_dir
        if not os.path.isdir(self.recipe_dir):
            os.makedirs(self.recipe_dir)

        self.name = name
        self.log = log

        # init the path to kaldi
        self.kaldi_root = config.get('kaldi', 'kaldi-directory')

        # init the path to abkhazia/share
        self.share_dir = pkg_resources.resource_filename(
            pkg_resources.Requirement.parse('abkhazia'), 'abkhazia/share')
示例#6
0
 def __init__(self, corpus, njobs=default_njobs(),
              log=logger.null_logger()):
     self.corpus = corpus
     self.njobs = njobs
     self.log = log
示例#7
0
def prepare_lang(
        corpus,
        output_dir,
        level='word',
        silence_probability=0.5,
        position_dependent_phones=False,
        keep_tmp_dirs=False,
        log=logger.null_logger()):
    """Wrapper on the Kaldi wsj/utils/prepare_lang.sh script

    Create the directory `output_dir` and populate it as described in
    http://kaldi-asr.org/doc/data_prep.html#data_prep_lang_creating. It
    produces (among other files) the L.fst part of the HCLG model.

    Parameters:
    -----------

    corpus (Corpus): abkhazia corpus to prepare lang for.

    output_dir (path): directory where to write prepared files,
      created if nonexisting.

    level ('word' or 'phone'): set to 'word' (the default) to prepare
      the corpus at word level, or 'phone' to prepare it at phone
      level. The prepared data will be used to train language and
      acoustic models at either word or phone level.

    silence_probability (float): The probability to have a silence
      phone. Usually 0.0 or 0.5, default is 0.5.

    position_dependent_phones (bool): default to False. Should be set
      to true or false depending on whether the language model
      produced is destined to be used with an acoustic model trained
      with or without word position dependent variants of the phones.

    keep_tmp_dir (bool): default to False. If true, keep the
      directories 'recipe' and 'local' in `output_dir`, if false
      remove them before returning.

    log (logger.Logging): the logger instance where to send messages,
      default is too disable the log.

    Return:
    -------

    The return code of the Kaldi prepare_lang script. 0 for success,
    any other for error.

    """
    output_dir = os.path.abspath(output_dir)
    log.info('preparing lexicon in %s (L.fst)...', output_dir)

    # init the kaldi recipe in output_dir/recipe
    a2k = Abkhazia2Kaldi(
        corpus, os.path.join(output_dir, 'recipe'), name='dict', log=log)

    a2k.setup_phones()
    a2k.setup_silences()
    a2k.setup_variants()
    a2k.setup_kaldi_folders()
    a2k.setup_machine_specific_scripts()

    if level == 'word':
        a2k.setup_lexicon()
    else:
        a2k.setup_phone_lexicon()

    # choosing the script according to level and word position
    # dependent phones. If word_position_dependent is true and the lm
    # is at the phone level, use prepare_lang_wpdpl.sh in the local
    # folder, otherwise we fall back to the original prepare_lang.sh
    # (some slight customizations of the script are necessary to
    # decode with a phone loop language model when word position
    # dependent phone variants have been trained).
    script_prepare_lm = os.path.join(
        a2k.kaldi_root, 'egs', 'wsj', 's5', 'utils', 'prepare_lang.sh')

    script_prepare_lm_wpdpl = os.path.join(
        a2k.share_dir, 'prepare_lang_wpdpl.sh')

    script = (script_prepare_lm_wpdpl
              if level == 'phone' and position_dependent_phones
              else script_prepare_lm)

    # generate the bash command we will run
    command = (
        script + ' --position-dependent-phones {wpd}'
        ' --sil-prob {sil} {input} "<unk>" {temp} {output}'.format(
            wpd=bool2str(position_dependent_phones),
            sil=silence_probability,
            input=os.path.join(a2k._local_path()),
            temp=os.path.join(output_dir, 'local'),
            output=output_dir))

    # run the command in Kaldi and forward its return code
    log.info('running "%s"', command)
    try:
        return jobs.run(
            command, cwd=a2k.recipe_dir, env=kaldi_path(), stdout=log.debug)
    finally:
        if not keep_tmp_dirs:
            remove(a2k.recipe_dir)
            remove(os.path.join(output_dir, 'local'))
示例#8
0
 def __init__(self, log=null_logger()):
     self.log = log
     self.meta = Meta()