Пример #1
0
    def __init__(self, filelist_path, filter_length, hop_length, win_length,
                 sampling_rate, mel_fmin, mel_fmax, max_wav_value, p_arpabet,
                 cmudict_path, text_cleaners, speaker_ids=None,
                 use_attn_prior=False, attn_prior_threshold=1e-4,
                 randomize=True, seed=1234):
        self.max_wav_value = max_wav_value
        self.audiopaths_and_text = load_filepaths_and_text(filelist_path)
        self.use_attn_prior = use_attn_prior
        self.attn_prior_threshold = attn_prior_threshold

        if speaker_ids is None or speaker_ids == '':
            self.speaker_ids = self.create_speaker_lookup_table(self.audiopaths_and_text)
        else:
            self.speaker_ids = speaker_ids

        self.stft = TacotronSTFT(filter_length=filter_length,
                                 hop_length=hop_length,
                                 win_length=win_length,
                                 sampling_rate=sampling_rate,
                                 mel_fmin=mel_fmin, mel_fmax=mel_fmax)
        self.sampling_rate = sampling_rate
        self.text_cleaners = text_cleaners
        self.p_arpabet = p_arpabet
        self.cmudict = cmudict.CMUDict(cmudict_path, keep_ambiguous=True)
        if speaker_ids is None:
            self.speaker_ids = self.create_speaker_lookup_table(self.audiopaths_and_text)
        else:
            self.speaker_ids = speaker_ids

        random.seed(seed)
        if randomize:
            random.shuffle(self.audiopaths_and_text)
Пример #2
0
    def __init__(self,
                 filelist_path,
                 filter_length,
                 hop_length,
                 win_length,
                 sampling_rate,
                 mel_fmin,
                 mel_fmax,
                 max_wav_value,
                 p_arpabet,
                 cmudict_path,
                 text_cleaners,
                 speaker_ids=None,
                 use_attn_prior=False,
                 attn_prior_threshold=1e-4,
                 prior_cache_path="",
                 betab_scaling_factor=1.0,
                 randomize=True,
                 keep_ambiguous=False,
                 seed=1234):
        self.max_wav_value = max_wav_value
        self.audiopaths_and_text = load_filepaths_and_text(filelist_path)
        self.use_attn_prior = use_attn_prior
        self.betab_scaling_factor = betab_scaling_factor
        self.attn_prior_threshold = attn_prior_threshold
        self.keep_ambiguous = keep_ambiguous

        if speaker_ids is None or speaker_ids == '':
            self.speaker_ids = self.create_speaker_lookup_table(
                self.audiopaths_and_text)
        else:
            self.speaker_ids = speaker_ids

        self.stft = TacotronSTFT(filter_length=filter_length,
                                 hop_length=hop_length,
                                 win_length=win_length,
                                 sampling_rate=sampling_rate,
                                 mel_fmin=mel_fmin,
                                 mel_fmax=mel_fmax)
        self.sampling_rate = sampling_rate
        self.text_cleaners = text_cleaners
        self.p_arpabet = p_arpabet
        self.cmudict = cmudict.CMUDict(cmudict_path,
                                       keep_ambiguous=keep_ambiguous)
        if speaker_ids is None:
            self.speaker_ids = self.create_speaker_lookup_table(
                self.audiopaths_and_text)
        else:
            self.speaker_ids = speaker_ids

        # caching makes sense for p_phoneme=1.0
        # for other values, everytime text lengths will change
        self.prior_cache_path = prior_cache_path
        self.caching_enabled = False
        if (self.prior_cache_path is not None and self.prior_cache_path != ""
                and p_arpabet == 1.0):
            self.caching_enabled = True
        # make sure caching path exists
        if (self.caching_enabled
                and not os.path.exists(self.prior_cache_path)):
            os.makedirs(self.prior_cache_path)

        random.seed(seed)
        if randomize:
            random.shuffle(self.audiopaths_and_text)