def __init__( self, root_dir, charactor_query="*-ids.npy", mel_query="*-norm-feats.npy", duration_query="*-durations.npy", charactor_load_fn=np.load, mel_load_fn=np.load, duration_load_fn=np.load, mel_length_threshold=0, ): """Initialize dataset. Args: root_dir (str): Root directory including dumped files. charactor_query (str): Query to find charactor files in root_dir. mel_query (str): Query to find feature files in root_dir. duration_query (str): Query to find duration files in root_dir. charactor_load_fn (func): Function to load charactor file. mel_load_fn (func): Function to load feature file. duration_load_fn (func): Function to load duration file. mel_length_threshold (int): Threshold to remove short feature files. return_utt_id (bool): Whether to return the utterance id with arrays. """ # find all of charactor and mel files. charactor_files = sorted(find_files(root_dir, charactor_query)) mel_files = sorted(find_files(root_dir, mel_query)) duration_files = sorted(find_files(root_dir, duration_query)) # assert the number of files assert len(mel_files) != 0, f"Not found any mels files in ${root_dir}." assert ( len(mel_files) == len(charactor_files) == len(duration_files) ), f"Number of charactor, mel and duration files are different \ ({len(mel_files)} vs {len(charactor_files)} vs {len(duration_files)})." if ".npy" in charactor_query: suffix = charactor_query[1:] utt_ids = [ os.path.basename(f).replace(suffix, "") for f in charactor_files ] # set global params self.utt_ids = utt_ids self.mel_files = mel_files self.charactor_files = charactor_files self.duration_files = duration_files self.mel_load_fn = mel_load_fn self.charactor_load_fn = charactor_load_fn self.duration_load_fn = duration_load_fn self.mel_length_threshold = mel_length_threshold
def __init__( self, root_dir, audio_query="*-wave.npy", mel_query="*-raw-feats.npy", audio_load_fn=np.load, mel_load_fn=np.load, audio_length_threshold=0, mel_length_threshold=0, ): """Initialize dataset. Args: root_dir (str): Root directory including dumped files. audio_query (str): Query to find audio files in root_dir. mel_query (str): Query to find feature files in root_dir. audio_load_fn (func): Function to load audio file. mel_load_fn (func): Function to load feature file. audio_length_threshold (int): Threshold to remove short audio files. mel_length_threshold (int): Threshold to remove short feature files. return_utt_id (bool): Whether to return the utterance id with arrays. """ # find all of audio and mel files. audio_files = sorted(find_files(root_dir, audio_query)) mel_files = sorted(find_files(root_dir, mel_query)) # assert the number of files assert len( audio_files) != 0, f"Not found any audio files in ${root_dir}." assert len(audio_files) == len( mel_files ), f"Number of audio and mel files are different ({len(audio_files)} vs {len(mel_files)})." if ".npy" in audio_query: suffix = audio_query[1:] utt_ids = [ os.path.basename(f).replace(suffix, "") for f in audio_files ] # set global params self.utt_ids = utt_ids self.audio_files = audio_files self.mel_files = mel_files self.audio_load_fn = audio_load_fn self.mel_load_fn = mel_load_fn self.audio_length_threshold = audio_length_threshold self.mel_length_threshold = mel_length_threshold
def __init__( self, root_dir, mel_query="*-raw-feats.h5", mel_load_fn=np.load, mel_length_threshold=0, ): """Initialize dataset. Args: root_dir (str): Root directory including dumped files. mel_query (str): Query to find feature files in root_dir. mel_load_fn (func): Function to load feature file. mel_length_threshold (int): Threshold to remove short feature files. """ # find all of mel files. mel_files = sorted(find_files(root_dir, mel_query)) mel_lengths = [mel_load_fn(f).shape[0] for f in mel_files] # assert the number of files assert len(mel_files) != 0, f"Not found any mel files in ${root_dir}." if ".npy" in mel_query: suffix = mel_query[1:] utt_ids = [ os.path.basename(f).replace(suffix, "") for f in mel_files ] # set global params self.utt_ids = utt_ids self.mel_files = mel_files self.mel_lengths = mel_lengths self.mel_load_fn = mel_load_fn self.mel_length_threshold = mel_length_threshold
def main(): args = parse_args() args.train_dataset = args.eval_dataset = args.dataset with open(args.config) as f: config = yaml.load(f, Loader=yaml.Loader) with open(config['speech_config']) as f: mel_config = yaml.load(f, Loader=yaml.Loader) config.update(mel_config) config.update(vars(args)) model = GE2E(name='ge2e', **config['model']) model.load_weights(args.restore) window_length = (config['data']['min_frames'] + config['data']['max_frames']) // 2 suffix = '*_mel.npy' save_as = '_gc.npy' for dataset in args.dataset: files = find_files(dataset, suffix) print('files of %s:'% dataset, len(files), files[0]) for file in tqdm(files): save_name = file.replace(suffix[1:], save_as) # if os.path.isfile(save_name): # continue s = np.load(file) len_s = len(s) if len_s < window_length: s = np.concatenate([s for _ in range((window_length + len_s) // len_s)]) s = batch_frames(s, window_length=window_length, overlap=0.5) d = model.inference(s) np.save(save_name, d)
def __init__( self, root_dir, charactor_query="*-ids.npy", charactor_load_fn=np.load, return_utt_id=False, ): """Initialize dataset. Args: root_dir (str): Root directory including dumped files. charactor_query (str): Query to find charactor files in root_dir. charactor_load_fn (func): Function to load charactor file. return_utt_id (bool): Whether to return the utterance id with arrays. """ # find all of charactor and mel files. charactor_files = sorted(find_files(root_dir, charactor_query)) # assert the number of files assert ( len(charactor_files) != 0 ), f"Not found any char or duration files in ${root_dir}." if ".npy" in charactor_query: suffix = charactor_query[1:] utt_ids = [os.path.basename(f).replace(suffix, "") for f in charactor_files] # set global params self.utt_ids = utt_ids self.charactor_files = charactor_files self.charactor_load_fn = charactor_load_fn self.return_utt_id = return_utt_id
def __init__(self, roots, **kwargs): super().__init__(**kwargs) self._metadata = [] for root in roots: self._metadata += find_files(root, '*_mel.npy') self._metadata = sorted(self._metadata) self._get_speaker_from_filename = lambda filename: \ filename.split('/')[-3] if 'libri' in filename.lower() else filename.split('/')[-2]
def __init__( self, root_dir, audio_query="*-wave.npy", audio_load_fn=np.load, audio_length_threshold=None, return_utt_id=False, ): """Initialize dataset. Args: root_dir (str): Root directory including dumped files. audio_query (str): Query to find feature files in root_dir. audio_load_fn (func): Function to load feature file. audio_length_threshold (int): Threshold to remove short feature files. return_utt_id (bool): Whether to return the utterance id with arrays. """ # find all of mel files. audio_files = sorted(find_files(root_dir, audio_query)) audio_lengths = [audio_load_fn(f).shape[0] for f in audio_files] # filter by threshold if audio_length_threshold is not None: idxs = [ idx for idx in range(len(audio_files)) if audio_lengths[idx] > audio_length_threshold ] if len(audio_files) != len(idxs): logging.warning( f"Some files are filtered by mel length threshold " f"({len(audio_files)} -> {len(idxs)}).") audio_files = [audio_files[idx] for idx in idxs] # assert the number of files assert len( audio_files) != 0, f"Not found any mel files in ${root_dir}." if ".npy" in audio_query: suffix = audio_query[1:] utt_ids = [ os.path.basename(f).replace(suffix, "") for f in audio_files ] # set global params self.utt_ids = utt_ids self.audio_files = audio_files self.audio_lengths = audio_lengths self.audio_load_fn = audio_load_fn self.return_utt_id = return_utt_id
def __init__(self, root_dir, mel_query="*-raw-feats.h5", mel_load_fn=np.load, mel_length_threshold=None, return_utt_id=False): """Initialize dataset. Args: root_dir (str): Root directory including dumped files. mel_query (str): Query to find feature files in root_dir. mel_load_fn (func): Function to load feature file. mel_length_threshold (int): Threshold to remove short feature files. return_utt_id (bool): Whether to return the utterance id with arrays. """ # find all of mel files. mel_files = sorted(find_files(root_dir, mel_query)) mel_lengths = [mel_load_fn(f).shape[0] for f in mel_files] # filter by threshold if mel_length_threshold is not None: idxs = [ idx for idx in range(len(mel_files)) if mel_lengths[idx] > mel_length_threshold ] if len(mel_files) != len(idxs): logging.warning( f"Some files are filtered by mel length threshold " f"({len(mel_files)} -> {len(idxs)}).") mel_files = [mel_files[idx] for idx in idxs] # assert the number of files assert len(mel_files) != 0, f"Not found any mel files in ${root_dir}." if ".npy" in mel_query: utt_ids = [ "-".join([ os.path.basename(f).split("-")[0], os.path.basename(f).split("-")[1] ]) for f in mel_files ] # set global params self.utt_ids = utt_ids self.mel_files = mel_files self.mel_lengths = mel_lengths self.mel_load_fn = mel_load_fn self.return_utt_id = return_utt_id
def main(): args = parse_args() if 'vctk' in args.dataset.lower(): depth, start = 2, 1 elif 'libri' in args.dataset.lower(): depth, start = 3, 13 else: depth, start = 3, 13 speaker_files = find_files(args.dataset, '_gc.npy', depth) func = lambda filename: filename.split('/')[-depth] d = {} for f in speaker_files: s = func(f) if s not in d: d[s] = [] d[s].append(f) speaker_files = d if args.save: os.makedirs(args.save, exist_ok=True) out_v = io.open('/'.join([args.save, 'vecs.tsv']), 'w', encoding='utf-8') out_m = io.open('/'.join([args.save, 'meta.tsv']), 'w', encoding='utf-8') with open(args.speaker_info) as file: for line in np.random.choice(file.readlines()[start:], size=args.n): speaker, info = line.split(maxsplit=1) if depth == 2: speaker = 'p' + speaker if speaker not in speaker_files: continue for i, filename in enumerate( np.random.choice( speaker_files[speaker], size=args.m, replace=args.m < len(speaker_files[speaker]))): d_vector = np.load(filename) out_m.write(line.replace(speaker, speaker + ' ' + str(i))) out_v.write('\t'.join([str(x) for x in d_vector]) + '\n') out_v.close() out_m.close()
def main(): args = parse_args() with open(args.config) as f: config = yaml.load(f, Loader=yaml.Loader) model = TFSpeechFeaturizer(config) executor = ProcessPoolExecutor(max_workers=cpu_count()) all_filenames = find_files(args.dataset, args.suffix) futures = [] print('num files total: %d' % len(all_filenames), all_filenames[0]) suffix = args.suffix.replace('*', '') # for file in all_filenames: # futures.append(executor.submit(partial(process_file, file, model, suffix))) # results = [future.result() for future in tqdm(futures)] for file in tqdm(all_filenames): process_file(file, model, suffix)
def __init__( self, root_dir, charactor_query="*-ids.npy", mel_query="*-norm-feats.npy", duration_query="*-durations.npy", f0_query="*-raw-f0.npy", energy_query="*-raw-energy.npy", f0_stat="./dump/stats_f0.npy", energy_stat="./dump/stats_energy.npy", charactor_load_fn=np.load, mel_load_fn=np.load, duration_load_fn=np.load, f0_load_fn=np.load, energy_load_fn=np.load, mel_length_threshold=0, speakers_map=None ): """Initialize dataset. Args: root_dir (str): Root directory including dumped files. charactor_query (str): Query to find charactor files in root_dir. mel_query (str): Query to find feature files in root_dir. duration_query (str): Query to find duration files in root_dir. f0_query (str): Query to find f0 files in root_dir. energy_query (str): Query to find energy files in root_dir. f0_stat (str): str path of f0_stat. energy_stat (str): str path of energy_stat. charactor_load_fn (func): Function to load charactor file. mel_load_fn (func): Function to load feature file. duration_load_fn (func): Function to load duration file. f0_load_fn (func): Function to load f0 file. energy_load_fn (func): Function to load energy file. mel_length_threshold (int): Threshold to remove short feature files. speakers_map (dict): Speakers map generated in dataset preprocessing """ # find all of charactor and mel files. charactor_files = sorted(find_files(root_dir, charactor_query)) mel_files = sorted(find_files(root_dir, mel_query)) duration_files = sorted(find_files(root_dir, duration_query)) f0_files = sorted(find_files(root_dir, f0_query)) energy_files = sorted(find_files(root_dir, energy_query)) # assert the number of files assert len(mel_files) != 0, f"Not found any mels files in ${root_dir}." assert ( len(mel_files) == len(charactor_files) == len(duration_files) == len(f0_files) == len(energy_files) ), f"Number of charactor, mel, duration, f0 and energy files are different" assert speakers_map != None, f"No speakers map found. Did you set --dataset_mapping?" if ".npy" in charactor_query: suffix = charactor_query[1:] utt_ids = [os.path.basename(f).replace(suffix, "") for f in charactor_files] # set global params self.utt_ids = utt_ids self.mel_files = mel_files self.charactor_files = charactor_files self.duration_files = duration_files self.f0_files = f0_files self.energy_files = energy_files self.mel_load_fn = mel_load_fn self.charactor_load_fn = charactor_load_fn self.duration_load_fn = duration_load_fn self.f0_load_fn = f0_load_fn self.energy_load_fn = energy_load_fn self.mel_length_threshold = mel_length_threshold self.speakers_map = speakers_map self.speakers = [self.speakers_map[i.split("_")[0]] for i in self.utt_ids] print("Speaker: utt_id", list(zip(self.speakers, self.utt_ids))) self.f0_stat = np.load(f0_stat) self.energy_stat = np.load(energy_stat)
def __init__( self, dataset, root_dir, charactor_query="*-ids.npy", mel_query="*-norm-feats.npy", charactor_load_fn=np.load, mel_load_fn=np.load, mel_length_threshold=0, reduction_factor=1, mel_pad_value=0.0, char_pad_value=0, ga_pad_value=-1.0, g=0.2, use_fixed_shapes=False, ): """Initialize dataset. Args: root_dir (str): Root directory including dumped files. charactor_query (str): Query to find charactor files in root_dir. mel_query (str): Query to find feature files in root_dir. charactor_load_fn (func): Function to load charactor file. mel_load_fn (func): Function to load feature file. mel_length_threshold (int): Threshold to remove short feature files. reduction_factor (int): Reduction factor on Tacotron-2 paper. mel_pad_value (float): Padding value for mel-spectrogram. char_pad_value (int): Padding value for charactor. ga_pad_value (float): Padding value for guided attention. g (float): G value for guided attention. use_fixed_shapes (bool): Use fixed shape for mel targets or not. max_char_length (int): maximum charactor length if use_fixed_shapes=True. max_mel_length (int): maximum mel length if use_fixed_shapes=True """ # find all of charactor and mel files. charactor_files = sorted(find_files(root_dir, charactor_query)) mel_files = sorted(find_files(root_dir, mel_query)) mel_lengths = [mel_load_fn(f).shape[0] for f in mel_files] char_lengths = [charactor_load_fn(f).shape[0] for f in charactor_files] # assert the number of files assert len(mel_files) != 0, f"Not found any mels files in ${root_dir}." assert ( len(mel_files) == len(charactor_files) == len(mel_lengths) ), f"Number of charactor, mel and duration files are different \ ({len(mel_files)} vs {len(charactor_files)} vs {len(mel_lengths)})." if ".npy" in charactor_query: suffix = charactor_query[1:] utt_ids = [ os.path.basename(f).replace(suffix, "") for f in charactor_files ] # set global params self.utt_ids = utt_ids self.mel_files = mel_files self.charactor_files = charactor_files self.mel_load_fn = mel_load_fn self.charactor_load_fn = charactor_load_fn self.mel_lengths = mel_lengths self.char_lengths = char_lengths self.reduction_factor = reduction_factor self.mel_length_threshold = mel_length_threshold self.mel_pad_value = mel_pad_value self.char_pad_value = char_pad_value self.ga_pad_value = ga_pad_value self.g = g self.use_fixed_shapes = use_fixed_shapes self.max_char_length = np.max(char_lengths) if np.max(mel_lengths) % self.reduction_factor == 0: self.max_mel_length = np.max(mel_lengths) else: self.max_mel_length = (np.max(mel_lengths) + self.reduction_factor - np.max(mel_lengths) % self.reduction_factor)
return extract_from_mel(features) @tf.function(input_signature=[ tf.TensorSpec(shape=[None, speech_config['n_mels'], 1], dtype=tf.float32, name="signal") ]) def extract_from_mel(features): with tf.device('/cpu:0'): encoded = conformer.encoder_inference(features) return encoded suffix = '.wav' mel_query = '_mel.npy' feature_query = '_conformer_enc16.npy' audio_files = sorted(find_files(args.dataset, '*' + suffix)) print('files:', len(audio_files), audio_files[0]) for filename in tqdm(audio_files): mel = filename.replace(suffix, mel_query) if os.path.exists(mel): features = np.load(mel).reshape([-1, speech_config['n_mels'], 1]) encoded = extract_from_mel(features) else: signal = read_raw_audio(filename) encoded = extract_from_audio(signal) np.save(filename.replace(suffix, feature_query), encoded)
def __init__(self, root_dir, audio_query="*-wave.npy", mel_query="*-raw-feats.npy", audio_load_fn=np.load, mel_load_fn=np.load, audio_length_threshold=None, mel_length_threshold=None, return_utt_id=False ): """Initialize dataset. Args: root_dir (str): Root directory including dumped files. audio_query (str): Query to find audio files in root_dir. mel_query (str): Query to find feature files in root_dir. audio_load_fn (func): Function to load audio file. mel_load_fn (func): Function to load feature file. audio_length_threshold (int): Threshold to remove short audio files. mel_length_threshold (int): Threshold to remove short feature files. return_utt_id (bool): Whether to return the utterance id with arrays. """ # find all of audio and mel files. audio_files = sorted(find_files(root_dir, audio_query)) mel_files = sorted(find_files(root_dir, mel_query)) # filter by threshold if audio_length_threshold is not None: audio_lengths = [audio_load_fn(f).shape[0] for f in audio_files] idxs = [idx for idx in range(len(audio_files)) if audio_lengths[idx] > audio_length_threshold] if len(audio_files) != len(idxs): logging.warning(f"Some files are filtered by audio length threshold " f"({len(audio_files)} -> {len(idxs)}).") audio_files = [audio_files[idx] for idx in idxs] mel_files = [mel_files[idx] for idx in idxs] if mel_length_threshold is not None: mel_lengths = [mel_load_fn(f).shape[0] for f in mel_files] idxs = [idx for idx in range(len(mel_files)) if mel_lengths[idx] > mel_length_threshold] if len(mel_files) != len(idxs): logging.warning(f"Some files are filtered by mel length threshold " f"({len(mel_files)} -> {len(idxs)}).") audio_files = [audio_files[idx] for idx in idxs] mel_files = [mel_files[idx] for idx in idxs] # assert the number of files assert len(audio_files) != 0, f"Not found any audio files in ${root_dir}." assert len(audio_files) == len(mel_files), \ f"Number of audio and mel files are different ({len(audio_files)} vs {len(mel_files)})." if ".npy" in audio_query: utt_ids = [os.path.basename(f).replace("-wave.npy", "") for f in audio_files] else: utt_ids = [os.path.splitext(os.path.basename(f))[0] for f in audio_files] # set global params self.utt_ids = utt_ids self.audio_files = audio_files self.mel_files = mel_files self.audio_load_fn = audio_load_fn self.mel_load_fn = mel_load_fn self.return_utt_id = return_utt_id
def __init__( self, root_dir, charactor_query="*-ids.npy", mel_query="*-norm-feats.npy", duration_query="*-durations.npy", f0_query="*-raw-f0.npy", energy_query="*-raw-energy.npy", f0_stat="./dump/stats_f0.npy", energy_stat="./dump/stats_energy.npy", charactor_load_fn=np.load, mel_load_fn=np.load, duration_load_fn=np.load, f0_load_fn=np.load, energy_load_fn=np.load, mel_length_threshold=0, ): """Initialize dataset. Args: root_dir (str): Root directory including dumped files. charactor_query (str): Query to find charactor files in root_dir. mel_query (str): Query to find feature files in root_dir. duration_query (str): Query to find duration files in root_dir. f0_query (str): Query to find f0 files in root_dir. energy_query (str): Query to find energy files in root_dir. f0_stat (str): str path of f0_stat. energy_stat (str): str path of energy_stat. charactor_load_fn (func): Function to load charactor file. mel_load_fn (func): Function to load feature file. duration_load_fn (func): Function to load duration file. f0_load_fn (func): Function to load f0 file. energy_load_fn (func): Function to load energy file. mel_length_threshold (int): Threshold to remove short feature files. """ # find all of charactor and mel files. charactor_files = sorted(find_files(root_dir, charactor_query)) mel_files = sorted(find_files(root_dir, mel_query)) duration_files = sorted(find_files(root_dir, duration_query)) f0_files = sorted(find_files(root_dir, f0_query)) energy_files = sorted(find_files(root_dir, energy_query)) # assert the number of files assert len(mel_files) != 0, f"Not found any mels files in ${root_dir}." assert ( len(mel_files) == len(charactor_files) == len(duration_files) == len(f0_files) == len(energy_files) ), f"Number of charactor, mel, duration, f0 and energy files are different" if ".npy" in charactor_query: suffix = charactor_query[1:] utt_ids = [os.path.basename(f).replace(suffix, "") for f in charactor_files] # set global params self.utt_ids = utt_ids self.mel_files = mel_files self.charactor_files = charactor_files self.duration_files = duration_files self.f0_files = f0_files self.energy_files = energy_files self.mel_load_fn = mel_load_fn self.charactor_load_fn = charactor_load_fn self.duration_load_fn = duration_load_fn self.f0_load_fn = f0_load_fn self.energy_load_fn = energy_load_fn self.mel_length_threshold = mel_length_threshold self.speakers_map = {} # TODO sp_id = 0 for i in self.utt_ids: sp_name = i.split("_")[0] if sp_name not in self.speakers_map: self.speakers_map[sp_name] = sp_id sp_id += 1 self.speakers = [ self.speakers_map[i.split("_")[0]] for i in self.utt_ids ] # TODO change but at the moment mfa folder name = speaker name self.f0_stat = np.load(f0_stat) self.energy_stat = np.load(energy_stat)
def __init__( self, root_dir, charactor_query="*-ids.npy", mel_query="*-norm-feats.npy", duration_query="*-durations.npy", charactor_load_fn=np.load, mel_load_fn=np.load, duration_load_fn=np.load, mel_length_threshold=None, return_utt_id=False, ): """Initialize dataset. Args: root_dir (str): Root directory including dumped files. charactor_query (str): Query to find charactor files in root_dir. mel_query (str): Query to find feature files in root_dir. duration_query (str): Query to find duration files in root_dir. charactor_load_fn (func): Function to load charactor file. mel_load_fn (func): Function to load feature file. duration_load_fn (func): Function to load duration file. mel_length_threshold (int): Threshold to remove short feature files. return_utt_id (bool): Whether to return the utterance id with arrays. """ # find all of charactor and mel files. charactor_files = sorted(find_files(root_dir, charactor_query)) mel_files = sorted(find_files(root_dir, mel_query)) duration_files = sorted(find_files(root_dir, duration_query)) # filter by threshold if mel_length_threshold is not None: mel_lengths = [mel_load_fn(f).shape[0] for f in mel_files] idxs = [ idx for idx in range(len(mel_files)) if mel_lengths[idx] > mel_length_threshold ] if len(mel_files) != len(idxs): logging.warning( f"Some files are filtered by mel length threshold " f"({len(mel_files)} -> {len(idxs)})." ) mel_files = [mel_files[idx] for idx in idxs] charactor_files = [charactor_files[idx] for idx in idxs] duration_files = [duration_files[idx] for idx in idxs] mel_lengths = [mel_lengths[idx] for idx in idxs] # bucket sequence length trick, sort based-on mel-length. idx_sort = np.argsort(mel_lengths) # sort mel_files = np.array(mel_files)[idx_sort] charactor_files = np.array(charactor_files)[idx_sort] duration_files = np.array(duration_files)[idx_sort] mel_lengths = np.array(mel_lengths)[idx_sort] # group idx_lengths = [ [idx, length] for idx, length in zip(np.arange(len(mel_lengths)), mel_lengths) ] groups = [ list(g) for _, g in itertools.groupby(idx_lengths, lambda a: a[1]) ] # group shuffle random.shuffle(groups) # get idxs affter group shuffle idxs = [] for group in groups: for idx, _ in group: idxs.append(idx) # re-arange dataset mel_files = np.array(mel_files)[idxs] charactor_files = np.array(charactor_files)[idxs] duration_files = np.array(duration_files)[idxs] mel_lengths = np.array(mel_lengths)[idxs] # assert the number of files assert len(mel_files) != 0, f"Not found any mels files in ${root_dir}." assert ( len(mel_files) == len(charactor_files) == len(duration_files) ), f"Number of charactor, mel and duration files are different \ ({len(mel_files)} vs {len(charactor_files)} vs {len(duration_files)})." if ".npy" in charactor_query: suffix = charactor_query[1:] utt_ids = [os.path.basename(f).replace(suffix, "") for f in charactor_files] # set global params self.utt_ids = utt_ids self.mel_files = mel_files self.charactor_files = charactor_files self.duration_files = duration_files self.mel_load_fn = mel_load_fn self.charactor_load_fn = charactor_load_fn self.duration_load_fn = duration_load_fn self.return_utt_id = return_utt_id
def __init__( self, root_dir, charactor_query="*-ids.npy", mel_query="*-norm-feats.npy", charactor_load_fn=np.load, mel_load_fn=np.load, mel_length_threshold=None, return_utt_id=False, return_guided_attention=True, reduction_factor=1, mel_pad_value=0.0, char_pad_value=0, ga_pad_value=-1.0, g=0.2, use_fixed_shapes=False, ): """Initialize dataset. Args: root_dir (str): Root directory including dumped files. charactor_query (str): Query to find charactor files in root_dir. mel_query (str): Query to find feature files in root_dir. charactor_load_fn (func): Function to load charactor file. mel_load_fn (func): Function to load feature file. mel_length_threshold (int): Threshold to remove short feature files. return_utt_id (bool): Whether to return the utterance id with arrays. reduction_factor (int): Reduction factor on Tacotron-2 paper. mel_pad_value (float): Padding value for mel-spectrogram. char_pad_value (int): Padding value for charactor. ga_pad_value (float): Padding value for guided attention. g (float): G value for guided attention. use_fixed_shapes (bool): Use fixed shape for mel targets or not. max_char_length (int): maximum charactor length if use_fixed_shapes=True. max_mel_length (int): maximum mel length if use_fixed_shapes=True """ # find all of charactor and mel files. charactor_files = sorted(find_files(root_dir, charactor_query)) mel_files = sorted(find_files(root_dir, mel_query)) mel_lengths = [mel_load_fn(f).shape[0] for f in mel_files] char_lengths = [charactor_load_fn(f).shape[0] for f in charactor_files] # filter by threshold if mel_length_threshold is not None: idxs = [ idx for idx in range(len(mel_files)) if mel_lengths[idx] > mel_length_threshold ] if len(mel_files) != len(idxs): logging.warning( f"Some files are filtered by mel length threshold " f"({len(mel_files)} -> {len(idxs)}).") mel_files = [mel_files[idx] for idx in idxs] charactor_files = [charactor_files[idx] for idx in idxs] mel_lengths = [mel_lengths[idx] for idx in idxs] char_lengths = [char_lengths[idx] for idx in idxs] # bucket sequence length trick, sort based-on mel-length. idx_sort = np.argsort(mel_lengths) # sort mel_files = np.array(mel_files)[idx_sort] charactor_files = np.array(charactor_files)[idx_sort] mel_lengths = np.array(mel_lengths)[idx_sort] char_lengths = np.array(char_lengths)[idx_sort] # group idx_lengths = [[ idx, length ] for idx, length in zip(np.arange(len(mel_lengths)), mel_lengths)] groups = [ list(g) for _, g in itertools.groupby(idx_lengths, lambda a: a[1]) ] # group shuffle random.shuffle(groups) # get idxs affter group shuffle idxs = [] for group in groups: for idx, _ in group: idxs.append(idx) # re-arange dataset mel_files = np.array(mel_files)[idxs] charactor_files = np.array(charactor_files)[idxs] mel_lengths = np.array(mel_lengths)[idxs] char_lengths = np.array(char_lengths)[idxs] # assert the number of files assert len(mel_files) != 0, f"Not found any mels files in ${root_dir}." assert len(mel_files) == len(charactor_files) == len(mel_lengths), \ f"Number of charactor, mel and duration files are different \ ({len(mel_files)} vs {len(charactor_files)} vs {len(mel_lengths)})." if ".npy" in charactor_query: suffix = charactor_query[1:] utt_ids = [ os.path.basename(f).replace(suffix, "") for f in charactor_files ] # set global params self.utt_ids = utt_ids self.mel_files = mel_files self.charactor_files = charactor_files self.mel_load_fn = mel_load_fn self.charactor_load_fn = charactor_load_fn self.return_utt_id = return_utt_id self.return_guided_attention = return_guided_attention self.mel_lengths = mel_lengths self.char_lengths = char_lengths self.reduction_factor = reduction_factor self.mel_pad_value = mel_pad_value self.char_pad_value = char_pad_value self.ga_pad_value = ga_pad_value self.g = g self.use_fixed_shapes = use_fixed_shapes self.max_char_length = np.max(char_lengths) + 1 # +1 for eos self.max_mel_length = np.max( mel_lengths ) + self.reduction_factor - np.max(mel_lengths) % self.reduction_factor