示例#1
0
    def __init__(self,
                 audiopaths_and_text,
                 hparams,
                 speaker_ids=None,
                 mode='train'):
        self.hparams = hparams
        tmp = mode.split('-')
        if tmp[0] == 'train':
            self.audiopaths_and_text = load_filepaths_and_text_train(
                audiopaths_and_text, split='\t')
            if len(tmp) == 2:
                self.mode = tmp[1]
            else:
                self.mode = True
        else:
            if isinstance(audiopaths_and_text,
                          (str, Path)) and os.path.isfile(audiopaths_and_text):
                self.audiopaths_and_text = load_filepaths_and_text(
                    audiopaths_and_text, split='\t')
            else:
                self.audiopaths_and_text = ['audiopath', 'text', 'speaker']
            self.mode = False
        self.text_cleaners = hparams.text_cleaners
        self.max_wav_value = hparams.max_wav_value
        self.sampling_rate = hparams.sampling_rate
        self.stft = layers.TacotronSTFT(hparams.filter_length,
                                        hparams.hop_length, hparams.win_length,
                                        hparams.n_mel_channels,
                                        hparams.sampling_rate,
                                        hparams.mel_fmin, hparams.mel_fmax)
        self.sampling_rate = hparams.sampling_rate
        self.filter_length = hparams.filter_length
        self.hop_length = hparams.hop_length
        self.f0_min = hparams.f0_min
        self.f0_max = hparams.f0_max
        self.harm_thresh = hparams.harm_thresh
        self.p_arpabet = hparams.p_arpabet
        self.max_decoder_steps = hparams.max_decoder_steps

        self.f0_dim = hparams.prenet_f0_dim  # f0的维度设置
        self.encoder_model_fpath = hparams.encoder_model_fpath

        self.cmudict = None
        if hparams.cmudict_path is not None:
            self.cmudict = cmudict.CMUDict(hparams.cmudict_path)

        self.speaker_ids = speaker_ids

        if self.speaker_ids is None:
            self.speaker_ids = self.create_speaker_lookup_table(
                self.audiopaths_and_text)

        # random.seed(1234)
        # random.shuffle(self.audiopaths_and_text)

        self.ids = set(range(len(self.audiopaths_and_text)))
示例#2
0
    def __init__(self,
                 audiopaths_and_text,
                 hparams,
                 speaker_ids=None,
                 mode='train'):
        self.hparams = hparams
        tmp = mode.split('-')
        if tmp[0] == 'train':
            self.audiopaths_and_text = load_filepaths_and_text_train(
                audiopaths_and_text, split='\t')
            if len(tmp) == 2:
                self.mode = tmp[1]
            else:
                self.mode = True
        else:
            self.audiopaths_and_text = load_filepaths_and_text(
                audiopaths_and_text, split='\t')
            self.mode = False
        self.text_cleaners = hparams.text_cleaners
        self.max_wav_value = hparams.max_wav_value
        self.sampling_rate = hparams.sampling_rate
        self.stft = layers.TacotronSTFT(hparams.filter_length,
                                        hparams.hop_length, hparams.win_length,
                                        hparams.n_mel_channels,
                                        hparams.sampling_rate,
                                        hparams.mel_fmin, hparams.mel_fmax)
        self.sampling_rate = hparams.sampling_rate
        self.filter_length = hparams.filter_length
        self.hop_length = hparams.hop_length
        self.f0_min = hparams.f0_min
        self.f0_max = hparams.f0_max
        self.harm_thresh = hparams.harm_thresh
        self.p_arpabet = hparams.p_arpabet

        self.f0_dim = hparams.prenet_f0_dim  # f0的维度设置

        self.cmudict = None
        if hparams.cmudict_path is not None:
            self.cmudict = cmudict.CMUDict(hparams.cmudict_path)

        self.speaker_ids = speaker_ids

        if self.speaker_ids is None:
            self.speaker_ids = self.create_speaker_lookup_table(
                self.audiopaths_and_text)