def __init__(
        self,
        root_dir,
        charactor_query="*-ids.npy",
        mel_query="*-norm-feats.npy",
        duration_query="*-durations.npy",
        charactor_load_fn=np.load,
        mel_load_fn=np.load,
        duration_load_fn=np.load,
        mel_length_threshold=0,
    ):
        """Initialize dataset.

        Args:
            root_dir (str): Root directory including dumped files.
            charactor_query (str): Query to find charactor files in root_dir.
            mel_query (str): Query to find feature files in root_dir.
            duration_query (str): Query to find duration files in root_dir.
            charactor_load_fn (func): Function to load charactor file.
            mel_load_fn (func): Function to load feature file.
            duration_load_fn (func): Function to load duration file.
            mel_length_threshold (int): Threshold to remove short feature files.
            return_utt_id (bool): Whether to return the utterance id with arrays.

        """
        # find all of charactor and mel files.
        charactor_files = sorted(find_files(root_dir, charactor_query))
        mel_files = sorted(find_files(root_dir, mel_query))
        duration_files = sorted(find_files(root_dir, duration_query))

        # assert the number of files
        assert len(mel_files) != 0, f"Not found any mels files in ${root_dir}."
        assert (
            len(mel_files) == len(charactor_files) == len(duration_files)
        ), f"Number of charactor, mel and duration files are different \
                ({len(mel_files)} vs {len(charactor_files)} vs {len(duration_files)})."

        if ".npy" in charactor_query:
            suffix = charactor_query[1:]
            utt_ids = [
                os.path.basename(f).replace(suffix, "")
                for f in charactor_files
            ]

        # set global params
        self.utt_ids = utt_ids
        self.mel_files = mel_files
        self.charactor_files = charactor_files
        self.duration_files = duration_files
        self.mel_load_fn = mel_load_fn
        self.charactor_load_fn = charactor_load_fn
        self.duration_load_fn = duration_load_fn
        self.mel_length_threshold = mel_length_threshold
    def __init__(
        self,
        root_dir,
        audio_query="*-wave.npy",
        mel_query="*-raw-feats.npy",
        audio_load_fn=np.load,
        mel_load_fn=np.load,
        audio_length_threshold=0,
        mel_length_threshold=0,
    ):
        """Initialize dataset.

        Args:
            root_dir (str): Root directory including dumped files.
            audio_query (str): Query to find audio files in root_dir.
            mel_query (str): Query to find feature files in root_dir.
            audio_load_fn (func): Function to load audio file.
            mel_load_fn (func): Function to load feature file.
            audio_length_threshold (int): Threshold to remove short audio files.
            mel_length_threshold (int): Threshold to remove short feature files.
            return_utt_id (bool): Whether to return the utterance id with arrays.

        """
        # find all of audio and mel files.
        audio_files = sorted(find_files(root_dir, audio_query))
        mel_files = sorted(find_files(root_dir, mel_query))

        # assert the number of files
        assert len(
            audio_files) != 0, f"Not found any audio files in ${root_dir}."
        assert len(audio_files) == len(
            mel_files
        ), f"Number of audio and mel files are different ({len(audio_files)} vs {len(mel_files)})."

        if ".npy" in audio_query:
            suffix = audio_query[1:]
            utt_ids = [
                os.path.basename(f).replace(suffix, "") for f in audio_files
            ]

        # set global params
        self.utt_ids = utt_ids
        self.audio_files = audio_files
        self.mel_files = mel_files
        self.audio_load_fn = audio_load_fn
        self.mel_load_fn = mel_load_fn
        self.audio_length_threshold = audio_length_threshold
        self.mel_length_threshold = mel_length_threshold
示例#3
0
    def __init__(
        self,
        root_dir,
        mel_query="*-raw-feats.h5",
        mel_load_fn=np.load,
        mel_length_threshold=0,
    ):
        """Initialize dataset.

        Args:
            root_dir (str): Root directory including dumped files.
            mel_query (str): Query to find feature files in root_dir.
            mel_load_fn (func): Function to load feature file.
            mel_length_threshold (int): Threshold to remove short feature files.

        """
        # find all of mel files.
        mel_files = sorted(find_files(root_dir, mel_query))
        mel_lengths = [mel_load_fn(f).shape[0] for f in mel_files]

        # assert the number of files
        assert len(mel_files) != 0, f"Not found any mel files in ${root_dir}."

        if ".npy" in mel_query:
            suffix = mel_query[1:]
            utt_ids = [
                os.path.basename(f).replace(suffix, "") for f in mel_files
            ]

        # set global params
        self.utt_ids = utt_ids
        self.mel_files = mel_files
        self.mel_lengths = mel_lengths
        self.mel_load_fn = mel_load_fn
        self.mel_length_threshold = mel_length_threshold
def main():
  args = parse_args()
  args.train_dataset = args.eval_dataset = args.dataset
  with open(args.config) as f:
    config = yaml.load(f, Loader=yaml.Loader)
  with open(config['speech_config']) as f:
    mel_config = yaml.load(f, Loader=yaml.Loader)
  config.update(mel_config)
  config.update(vars(args))

  model = GE2E(name='ge2e', **config['model'])
  model.load_weights(args.restore)

  window_length = (config['data']['min_frames'] + config['data']['max_frames']) // 2

  suffix = '*_mel.npy'
  save_as = '_gc.npy'
  for dataset in args.dataset:
    files = find_files(dataset, suffix)
    print('files of %s:'% dataset, len(files), files[0])

    for file in tqdm(files):
      save_name = file.replace(suffix[1:], save_as)
      # if os.path.isfile(save_name):
        # continue
      s = np.load(file)
      len_s = len(s)
      if len_s < window_length:
        s = np.concatenate([s for _ in range((window_length + len_s) // len_s)])
      s = batch_frames(s, window_length=window_length, overlap=0.5)

      d = model.inference(s)
      np.save(save_name, d)
    def __init__(
        self,
        root_dir,
        charactor_query="*-ids.npy",
        charactor_load_fn=np.load,
        return_utt_id=False,
    ):
        """Initialize dataset.

        Args:
            root_dir (str): Root directory including dumped files.
            charactor_query (str): Query to find charactor files in root_dir.
            charactor_load_fn (func): Function to load charactor file.
            return_utt_id (bool): Whether to return the utterance id with arrays.

        """
        # find all of charactor and mel files.
        charactor_files = sorted(find_files(root_dir, charactor_query))

        # assert the number of files
        assert (
            len(charactor_files) != 0
        ), f"Not found any char or duration files in ${root_dir}."
        if ".npy" in charactor_query:
            suffix = charactor_query[1:]
            utt_ids = [os.path.basename(f).replace(suffix, "") for f in charactor_files]

        # set global params
        self.utt_ids = utt_ids
        self.charactor_files = charactor_files
        self.charactor_load_fn = charactor_load_fn
        self.return_utt_id = return_utt_id
示例#6
0
 def __init__(self, roots, **kwargs):
   super().__init__(**kwargs)
   self._metadata = []
   for root in roots:
     self._metadata += find_files(root, '*_mel.npy')
   self._metadata = sorted(self._metadata)
   self._get_speaker_from_filename = lambda filename: \
     filename.split('/')[-3] if 'libri' in filename.lower() else filename.split('/')[-2]
示例#7
0
    def __init__(
        self,
        root_dir,
        audio_query="*-wave.npy",
        audio_load_fn=np.load,
        audio_length_threshold=None,
        return_utt_id=False,
    ):
        """Initialize dataset.

        Args:
            root_dir (str): Root directory including dumped files.
            audio_query (str): Query to find feature files in root_dir.
            audio_load_fn (func): Function to load feature file.
            audio_length_threshold (int): Threshold to remove short feature files.
            return_utt_id (bool): Whether to return the utterance id with arrays.

        """
        # find all of mel files.
        audio_files = sorted(find_files(root_dir, audio_query))
        audio_lengths = [audio_load_fn(f).shape[0] for f in audio_files]

        # filter by threshold
        if audio_length_threshold is not None:
            idxs = [
                idx for idx in range(len(audio_files))
                if audio_lengths[idx] > audio_length_threshold
            ]
            if len(audio_files) != len(idxs):
                logging.warning(
                    f"Some files are filtered by mel length threshold "
                    f"({len(audio_files)} -> {len(idxs)}).")
            audio_files = [audio_files[idx] for idx in idxs]

        # assert the number of files
        assert len(
            audio_files) != 0, f"Not found any mel files in ${root_dir}."

        if ".npy" in audio_query:
            suffix = audio_query[1:]
            utt_ids = [
                os.path.basename(f).replace(suffix, "") for f in audio_files
            ]

        # set global params
        self.utt_ids = utt_ids
        self.audio_files = audio_files
        self.audio_lengths = audio_lengths
        self.audio_load_fn = audio_load_fn
        self.return_utt_id = return_utt_id
示例#8
0
    def __init__(self,
                 root_dir,
                 mel_query="*-raw-feats.h5",
                 mel_load_fn=np.load,
                 mel_length_threshold=None,
                 return_utt_id=False):
        """Initialize dataset.

        Args:
            root_dir (str): Root directory including dumped files.
            mel_query (str): Query to find feature files in root_dir.
            mel_load_fn (func): Function to load feature file.
            mel_length_threshold (int): Threshold to remove short feature files.
            return_utt_id (bool): Whether to return the utterance id with arrays.

        """
        # find all of mel files.
        mel_files = sorted(find_files(root_dir, mel_query))
        mel_lengths = [mel_load_fn(f).shape[0] for f in mel_files]

        # filter by threshold
        if mel_length_threshold is not None:
            idxs = [
                idx for idx in range(len(mel_files))
                if mel_lengths[idx] > mel_length_threshold
            ]
            if len(mel_files) != len(idxs):
                logging.warning(
                    f"Some files are filtered by mel length threshold "
                    f"({len(mel_files)} -> {len(idxs)}).")
            mel_files = [mel_files[idx] for idx in idxs]

        # assert the number of files
        assert len(mel_files) != 0, f"Not found any mel files in ${root_dir}."

        if ".npy" in mel_query:
            utt_ids = [
                "-".join([
                    os.path.basename(f).split("-")[0],
                    os.path.basename(f).split("-")[1]
                ]) for f in mel_files
            ]

        # set global params
        self.utt_ids = utt_ids
        self.mel_files = mel_files
        self.mel_lengths = mel_lengths
        self.mel_load_fn = mel_load_fn
        self.return_utt_id = return_utt_id
def main():
    args = parse_args()

    if 'vctk' in args.dataset.lower():
        depth, start = 2, 1
    elif 'libri' in args.dataset.lower():
        depth, start = 3, 13
    else:
        depth, start = 3, 13
    speaker_files = find_files(args.dataset, '_gc.npy', depth)
    func = lambda filename: filename.split('/')[-depth]
    d = {}
    for f in speaker_files:
        s = func(f)
        if s not in d:
            d[s] = []
        d[s].append(f)
    speaker_files = d

    if args.save:
        os.makedirs(args.save, exist_ok=True)
    out_v = io.open('/'.join([args.save, 'vecs.tsv']), 'w', encoding='utf-8')
    out_m = io.open('/'.join([args.save, 'meta.tsv']), 'w', encoding='utf-8')
    with open(args.speaker_info) as file:
        for line in np.random.choice(file.readlines()[start:], size=args.n):
            speaker, info = line.split(maxsplit=1)
            if depth == 2:
                speaker = 'p' + speaker
            if speaker not in speaker_files:
                continue

            for i, filename in enumerate(
                    np.random.choice(
                        speaker_files[speaker],
                        size=args.m,
                        replace=args.m < len(speaker_files[speaker]))):
                d_vector = np.load(filename)
                out_m.write(line.replace(speaker, speaker + ' ' + str(i)))
                out_v.write('\t'.join([str(x) for x in d_vector]) + '\n')
    out_v.close()
    out_m.close()
示例#10
0
def main():

    args = parse_args()

    with open(args.config) as f:
        config = yaml.load(f, Loader=yaml.Loader)

    model = TFSpeechFeaturizer(config)
    executor = ProcessPoolExecutor(max_workers=cpu_count())

    all_filenames = find_files(args.dataset, args.suffix)

    futures = []

    print('num files total: %d' % len(all_filenames), all_filenames[0])

    suffix = args.suffix.replace('*', '')
    # for file in all_filenames:
    #   futures.append(executor.submit(partial(process_file, file, model, suffix)))
    # results = [future.result() for future in tqdm(futures)]

    for file in tqdm(all_filenames):
        process_file(file, model, suffix)
示例#11
0
    def __init__(
        self,
        root_dir,
        charactor_query="*-ids.npy",
        mel_query="*-norm-feats.npy",
        duration_query="*-durations.npy",
        f0_query="*-raw-f0.npy",
        energy_query="*-raw-energy.npy",
        f0_stat="./dump/stats_f0.npy",
        energy_stat="./dump/stats_energy.npy",
        charactor_load_fn=np.load,
        mel_load_fn=np.load,
        duration_load_fn=np.load,
        f0_load_fn=np.load,
        energy_load_fn=np.load,
        mel_length_threshold=0,
        speakers_map=None
    ):
        """Initialize dataset.

        Args:
            root_dir (str): Root directory including dumped files.
            charactor_query (str): Query to find charactor files in root_dir.
            mel_query (str): Query to find feature files in root_dir.
            duration_query (str): Query to find duration files in root_dir.
            f0_query (str): Query to find f0 files in root_dir.
            energy_query (str): Query to find energy files in root_dir.
            f0_stat (str): str path of f0_stat.
            energy_stat (str): str path of energy_stat.
            charactor_load_fn (func): Function to load charactor file.
            mel_load_fn (func): Function to load feature file.
            duration_load_fn (func): Function to load duration file.
            f0_load_fn (func): Function to load f0 file.
            energy_load_fn (func): Function to load energy file.
            mel_length_threshold (int): Threshold to remove short feature files.
            speakers_map (dict): Speakers map generated in dataset preprocessing

        """
        # find all of charactor and mel files.
        charactor_files = sorted(find_files(root_dir, charactor_query))
        mel_files = sorted(find_files(root_dir, mel_query))
        duration_files = sorted(find_files(root_dir, duration_query))
        f0_files = sorted(find_files(root_dir, f0_query))
        energy_files = sorted(find_files(root_dir, energy_query))

        # assert the number of files
        assert len(mel_files) != 0, f"Not found any mels files in ${root_dir}."
        assert (
            len(mel_files)
            == len(charactor_files)
            == len(duration_files)
            == len(f0_files)
            == len(energy_files)
        ), f"Number of charactor, mel, duration, f0 and energy files are different"

        assert speakers_map != None, f"No speakers map found. Did you set --dataset_mapping?"

        if ".npy" in charactor_query:
            suffix = charactor_query[1:]
            utt_ids = [os.path.basename(f).replace(suffix, "") for f in charactor_files]

        # set global params
        self.utt_ids = utt_ids
        self.mel_files = mel_files
        self.charactor_files = charactor_files
        self.duration_files = duration_files
        self.f0_files = f0_files
        self.energy_files = energy_files
        self.mel_load_fn = mel_load_fn
        self.charactor_load_fn = charactor_load_fn
        self.duration_load_fn = duration_load_fn
        self.f0_load_fn = f0_load_fn
        self.energy_load_fn = energy_load_fn
        self.mel_length_threshold = mel_length_threshold
        self.speakers_map = speakers_map
        self.speakers = [self.speakers_map[i.split("_")[0]] for i in self.utt_ids]
        print("Speaker: utt_id", list(zip(self.speakers, self.utt_ids)))
        self.f0_stat = np.load(f0_stat)
        self.energy_stat = np.load(energy_stat)
示例#12
0
    def __init__(
        self,
        dataset,
        root_dir,
        charactor_query="*-ids.npy",
        mel_query="*-norm-feats.npy",
        charactor_load_fn=np.load,
        mel_load_fn=np.load,
        mel_length_threshold=0,
        reduction_factor=1,
        mel_pad_value=0.0,
        char_pad_value=0,
        ga_pad_value=-1.0,
        g=0.2,
        use_fixed_shapes=False,
    ):
        """Initialize dataset.

        Args:
            root_dir (str): Root directory including dumped files.
            charactor_query (str): Query to find charactor files in root_dir.
            mel_query (str): Query to find feature files in root_dir.
            charactor_load_fn (func): Function to load charactor file.
            mel_load_fn (func): Function to load feature file.
            mel_length_threshold (int): Threshold to remove short feature files.
            reduction_factor (int): Reduction factor on Tacotron-2 paper.
            mel_pad_value (float): Padding value for mel-spectrogram.
            char_pad_value (int): Padding value for charactor.
            ga_pad_value (float): Padding value for guided attention.
            g (float): G value for guided attention.
            use_fixed_shapes (bool): Use fixed shape for mel targets or not.
            max_char_length (int): maximum charactor length if use_fixed_shapes=True.
            max_mel_length (int): maximum mel length if use_fixed_shapes=True

        """
        # find all of charactor and mel files.
        charactor_files = sorted(find_files(root_dir, charactor_query))
        mel_files = sorted(find_files(root_dir, mel_query))
        mel_lengths = [mel_load_fn(f).shape[0] for f in mel_files]
        char_lengths = [charactor_load_fn(f).shape[0] for f in charactor_files]

        # assert the number of files
        assert len(mel_files) != 0, f"Not found any mels files in ${root_dir}."
        assert (
            len(mel_files) == len(charactor_files) == len(mel_lengths)
        ), f"Number of charactor, mel and duration files are different \
                ({len(mel_files)} vs {len(charactor_files)} vs {len(mel_lengths)})."

        if ".npy" in charactor_query:
            suffix = charactor_query[1:]
            utt_ids = [
                os.path.basename(f).replace(suffix, "")
                for f in charactor_files
            ]

        # set global params
        self.utt_ids = utt_ids
        self.mel_files = mel_files
        self.charactor_files = charactor_files
        self.mel_load_fn = mel_load_fn
        self.charactor_load_fn = charactor_load_fn
        self.mel_lengths = mel_lengths
        self.char_lengths = char_lengths
        self.reduction_factor = reduction_factor
        self.mel_length_threshold = mel_length_threshold
        self.mel_pad_value = mel_pad_value
        self.char_pad_value = char_pad_value
        self.ga_pad_value = ga_pad_value
        self.g = g
        self.use_fixed_shapes = use_fixed_shapes
        self.max_char_length = np.max(char_lengths)

        if np.max(mel_lengths) % self.reduction_factor == 0:
            self.max_mel_length = np.max(mel_lengths)
        else:
            self.max_mel_length = (np.max(mel_lengths) +
                                   self.reduction_factor -
                                   np.max(mel_lengths) % self.reduction_factor)
示例#13
0
    return extract_from_mel(features)


@tf.function(input_signature=[
    tf.TensorSpec(shape=[None, speech_config['n_mels'], 1],
                  dtype=tf.float32,
                  name="signal")
])
def extract_from_mel(features):
    with tf.device('/cpu:0'):
        encoded = conformer.encoder_inference(features)
    return encoded


suffix = '.wav'
mel_query = '_mel.npy'
feature_query = '_conformer_enc16.npy'
audio_files = sorted(find_files(args.dataset, '*' + suffix))
print('files:', len(audio_files), audio_files[0])

for filename in tqdm(audio_files):
    mel = filename.replace(suffix, mel_query)
    if os.path.exists(mel):
        features = np.load(mel).reshape([-1, speech_config['n_mels'], 1])
        encoded = extract_from_mel(features)
    else:
        signal = read_raw_audio(filename)
        encoded = extract_from_audio(signal)

    np.save(filename.replace(suffix, feature_query), encoded)
示例#14
0
    def __init__(self,
                 root_dir,
                 audio_query="*-wave.npy",
                 mel_query="*-raw-feats.npy",
                 audio_load_fn=np.load,
                 mel_load_fn=np.load,
                 audio_length_threshold=None,
                 mel_length_threshold=None,
                 return_utt_id=False
                 ):
        """Initialize dataset.

        Args:
            root_dir (str): Root directory including dumped files.
            audio_query (str): Query to find audio files in root_dir.
            mel_query (str): Query to find feature files in root_dir.
            audio_load_fn (func): Function to load audio file.
            mel_load_fn (func): Function to load feature file.
            audio_length_threshold (int): Threshold to remove short audio files.
            mel_length_threshold (int): Threshold to remove short feature files.
            return_utt_id (bool): Whether to return the utterance id with arrays.

        """
        # find all of audio and mel files.
        audio_files = sorted(find_files(root_dir, audio_query))
        mel_files = sorted(find_files(root_dir, mel_query))

        # filter by threshold
        if audio_length_threshold is not None:
            audio_lengths = [audio_load_fn(f).shape[0] for f in audio_files]
            idxs = [idx for idx in range(len(audio_files)) if audio_lengths[idx] > audio_length_threshold]
            if len(audio_files) != len(idxs):
                logging.warning(f"Some files are filtered by audio length threshold "
                                f"({len(audio_files)} -> {len(idxs)}).")
            audio_files = [audio_files[idx] for idx in idxs]
            mel_files = [mel_files[idx] for idx in idxs]
        if mel_length_threshold is not None:
            mel_lengths = [mel_load_fn(f).shape[0] for f in mel_files]
            idxs = [idx for idx in range(len(mel_files)) if mel_lengths[idx] > mel_length_threshold]
            if len(mel_files) != len(idxs):
                logging.warning(f"Some files are filtered by mel length threshold "
                                f"({len(mel_files)} -> {len(idxs)}).")
            audio_files = [audio_files[idx] for idx in idxs]
            mel_files = [mel_files[idx] for idx in idxs]

        # assert the number of files
        assert len(audio_files) != 0, f"Not found any audio files in ${root_dir}."
        assert len(audio_files) == len(mel_files), \
            f"Number of audio and mel files are different ({len(audio_files)} vs {len(mel_files)})."

        if ".npy" in audio_query:
            utt_ids = [os.path.basename(f).replace("-wave.npy", "") for f in audio_files]
        else:
            utt_ids = [os.path.splitext(os.path.basename(f))[0] for f in audio_files]

        # set global params
        self.utt_ids = utt_ids
        self.audio_files = audio_files
        self.mel_files = mel_files
        self.audio_load_fn = audio_load_fn
        self.mel_load_fn = mel_load_fn
        self.return_utt_id = return_utt_id
    def __init__(
        self,
        root_dir,
        charactor_query="*-ids.npy",
        mel_query="*-norm-feats.npy",
        duration_query="*-durations.npy",
        f0_query="*-raw-f0.npy",
        energy_query="*-raw-energy.npy",
        f0_stat="./dump/stats_f0.npy",
        energy_stat="./dump/stats_energy.npy",
        charactor_load_fn=np.load,
        mel_load_fn=np.load,
        duration_load_fn=np.load,
        f0_load_fn=np.load,
        energy_load_fn=np.load,
        mel_length_threshold=0,
    ):
        """Initialize dataset.

        Args:
            root_dir (str): Root directory including dumped files.
            charactor_query (str): Query to find charactor files in root_dir.
            mel_query (str): Query to find feature files in root_dir.
            duration_query (str): Query to find duration files in root_dir.
            f0_query (str): Query to find f0 files in root_dir.
            energy_query (str): Query to find energy files in root_dir.
            f0_stat (str): str path of f0_stat.
            energy_stat (str): str path of energy_stat.
            charactor_load_fn (func): Function to load charactor file.
            mel_load_fn (func): Function to load feature file.
            duration_load_fn (func): Function to load duration file.
            f0_load_fn (func): Function to load f0 file.
            energy_load_fn (func): Function to load energy file.
            mel_length_threshold (int): Threshold to remove short feature files.

        """
        # find all of charactor and mel files.
        charactor_files = sorted(find_files(root_dir, charactor_query))
        mel_files = sorted(find_files(root_dir, mel_query))
        duration_files = sorted(find_files(root_dir, duration_query))
        f0_files = sorted(find_files(root_dir, f0_query))
        energy_files = sorted(find_files(root_dir, energy_query))

        # assert the number of files
        assert len(mel_files) != 0, f"Not found any mels files in ${root_dir}."
        assert (
            len(mel_files)
            == len(charactor_files)
            == len(duration_files)
            == len(f0_files)
            == len(energy_files)
        ), f"Number of charactor, mel, duration, f0 and energy files are different"

        if ".npy" in charactor_query:
            suffix = charactor_query[1:]
            utt_ids = [os.path.basename(f).replace(suffix, "") for f in charactor_files]

        # set global params
        self.utt_ids = utt_ids
        self.mel_files = mel_files
        self.charactor_files = charactor_files
        self.duration_files = duration_files
        self.f0_files = f0_files
        self.energy_files = energy_files
        self.mel_load_fn = mel_load_fn
        self.charactor_load_fn = charactor_load_fn
        self.duration_load_fn = duration_load_fn
        self.f0_load_fn = f0_load_fn
        self.energy_load_fn = energy_load_fn
        self.mel_length_threshold = mel_length_threshold

        self.speakers_map = {}  # TODO
        sp_id = 0
        for i in self.utt_ids:
            sp_name = i.split("_")[0]
            if sp_name not in self.speakers_map:
                self.speakers_map[sp_name] = sp_id
                sp_id += 1
        self.speakers = [
            self.speakers_map[i.split("_")[0]] for i in self.utt_ids
        ]  # TODO change but at the moment mfa folder name = speaker name

        self.f0_stat = np.load(f0_stat)
        self.energy_stat = np.load(energy_stat)
    def __init__(
        self,
        root_dir,
        charactor_query="*-ids.npy",
        mel_query="*-norm-feats.npy",
        duration_query="*-durations.npy",
        charactor_load_fn=np.load,
        mel_load_fn=np.load,
        duration_load_fn=np.load,
        mel_length_threshold=None,
        return_utt_id=False,
    ):
        """Initialize dataset.

        Args:
            root_dir (str): Root directory including dumped files.
            charactor_query (str): Query to find charactor files in root_dir.
            mel_query (str): Query to find feature files in root_dir.
            duration_query (str): Query to find duration files in root_dir.
            charactor_load_fn (func): Function to load charactor file.
            mel_load_fn (func): Function to load feature file.
            duration_load_fn (func): Function to load duration file.
            mel_length_threshold (int): Threshold to remove short feature files.
            return_utt_id (bool): Whether to return the utterance id with arrays.

        """
        # find all of charactor and mel files.
        charactor_files = sorted(find_files(root_dir, charactor_query))
        mel_files = sorted(find_files(root_dir, mel_query))
        duration_files = sorted(find_files(root_dir, duration_query))
        # filter by threshold
        if mel_length_threshold is not None:
            mel_lengths = [mel_load_fn(f).shape[0] for f in mel_files]

            idxs = [
                idx
                for idx in range(len(mel_files))
                if mel_lengths[idx] > mel_length_threshold
            ]
            if len(mel_files) != len(idxs):
                logging.warning(
                    f"Some files are filtered by mel length threshold "
                    f"({len(mel_files)} -> {len(idxs)})."
                )
            mel_files = [mel_files[idx] for idx in idxs]
            charactor_files = [charactor_files[idx] for idx in idxs]
            duration_files = [duration_files[idx] for idx in idxs]
            mel_lengths = [mel_lengths[idx] for idx in idxs]

            # bucket sequence length trick, sort based-on mel-length.
            idx_sort = np.argsort(mel_lengths)

            # sort
            mel_files = np.array(mel_files)[idx_sort]
            charactor_files = np.array(charactor_files)[idx_sort]
            duration_files = np.array(duration_files)[idx_sort]
            mel_lengths = np.array(mel_lengths)[idx_sort]

            # group
            idx_lengths = [
                [idx, length]
                for idx, length in zip(np.arange(len(mel_lengths)), mel_lengths)
            ]
            groups = [
                list(g) for _, g in itertools.groupby(idx_lengths, lambda a: a[1])
            ]

            # group shuffle
            random.shuffle(groups)

            # get idxs affter group shuffle
            idxs = []
            for group in groups:
                for idx, _ in group:
                    idxs.append(idx)

            # re-arange dataset
            mel_files = np.array(mel_files)[idxs]
            charactor_files = np.array(charactor_files)[idxs]
            duration_files = np.array(duration_files)[idxs]
            mel_lengths = np.array(mel_lengths)[idxs]

        # assert the number of files
        assert len(mel_files) != 0, f"Not found any mels files in ${root_dir}."
        assert (
            len(mel_files) == len(charactor_files) == len(duration_files)
        ), f"Number of charactor, mel and duration files are different \
                ({len(mel_files)} vs {len(charactor_files)} vs {len(duration_files)})."

        if ".npy" in charactor_query:
            suffix = charactor_query[1:]
            utt_ids = [os.path.basename(f).replace(suffix, "") for f in charactor_files]

        # set global params
        self.utt_ids = utt_ids
        self.mel_files = mel_files
        self.charactor_files = charactor_files
        self.duration_files = duration_files
        self.mel_load_fn = mel_load_fn
        self.charactor_load_fn = charactor_load_fn
        self.duration_load_fn = duration_load_fn
        self.return_utt_id = return_utt_id
示例#17
0
    def __init__(
        self,
        root_dir,
        charactor_query="*-ids.npy",
        mel_query="*-norm-feats.npy",
        charactor_load_fn=np.load,
        mel_load_fn=np.load,
        mel_length_threshold=None,
        return_utt_id=False,
        return_guided_attention=True,
        reduction_factor=1,
        mel_pad_value=0.0,
        char_pad_value=0,
        ga_pad_value=-1.0,
        g=0.2,
        use_fixed_shapes=False,
    ):
        """Initialize dataset.

        Args:
            root_dir (str): Root directory including dumped files.
            charactor_query (str): Query to find charactor files in root_dir.
            mel_query (str): Query to find feature files in root_dir.
            charactor_load_fn (func): Function to load charactor file.
            mel_load_fn (func): Function to load feature file.
            mel_length_threshold (int): Threshold to remove short feature files.
            return_utt_id (bool): Whether to return the utterance id with arrays.
            reduction_factor (int): Reduction factor on Tacotron-2 paper.
            mel_pad_value (float): Padding value for mel-spectrogram.
            char_pad_value (int): Padding value for charactor.
            ga_pad_value (float): Padding value for guided attention.
            g (float): G value for guided attention.
            use_fixed_shapes (bool): Use fixed shape for mel targets or not.
            max_char_length (int): maximum charactor length if use_fixed_shapes=True.
            max_mel_length (int): maximum mel length if use_fixed_shapes=True

        """
        # find all of charactor and mel files.
        charactor_files = sorted(find_files(root_dir, charactor_query))
        mel_files = sorted(find_files(root_dir, mel_query))
        mel_lengths = [mel_load_fn(f).shape[0] for f in mel_files]
        char_lengths = [charactor_load_fn(f).shape[0] for f in charactor_files]

        # filter by threshold
        if mel_length_threshold is not None:
            idxs = [
                idx for idx in range(len(mel_files))
                if mel_lengths[idx] > mel_length_threshold
            ]
            if len(mel_files) != len(idxs):
                logging.warning(
                    f"Some files are filtered by mel length threshold "
                    f"({len(mel_files)} -> {len(idxs)}).")
            mel_files = [mel_files[idx] for idx in idxs]
            charactor_files = [charactor_files[idx] for idx in idxs]
            mel_lengths = [mel_lengths[idx] for idx in idxs]
            char_lengths = [char_lengths[idx] for idx in idxs]

            # bucket sequence length trick, sort based-on mel-length.
            idx_sort = np.argsort(mel_lengths)

            # sort
            mel_files = np.array(mel_files)[idx_sort]
            charactor_files = np.array(charactor_files)[idx_sort]
            mel_lengths = np.array(mel_lengths)[idx_sort]
            char_lengths = np.array(char_lengths)[idx_sort]

            # group
            idx_lengths = [[
                idx, length
            ] for idx, length in zip(np.arange(len(mel_lengths)), mel_lengths)]
            groups = [
                list(g)
                for _, g in itertools.groupby(idx_lengths, lambda a: a[1])
            ]

            # group shuffle
            random.shuffle(groups)

            # get idxs affter group shuffle
            idxs = []
            for group in groups:
                for idx, _ in group:
                    idxs.append(idx)

            # re-arange dataset
            mel_files = np.array(mel_files)[idxs]
            charactor_files = np.array(charactor_files)[idxs]
            mel_lengths = np.array(mel_lengths)[idxs]
            char_lengths = np.array(char_lengths)[idxs]

        # assert the number of files
        assert len(mel_files) != 0, f"Not found any mels files in ${root_dir}."
        assert len(mel_files) == len(charactor_files) == len(mel_lengths), \
            f"Number of charactor, mel and duration files are different \
                ({len(mel_files)} vs {len(charactor_files)} vs {len(mel_lengths)})."

        if ".npy" in charactor_query:
            suffix = charactor_query[1:]
            utt_ids = [
                os.path.basename(f).replace(suffix, "")
                for f in charactor_files
            ]

        # set global params
        self.utt_ids = utt_ids
        self.mel_files = mel_files
        self.charactor_files = charactor_files
        self.mel_load_fn = mel_load_fn
        self.charactor_load_fn = charactor_load_fn
        self.return_utt_id = return_utt_id
        self.return_guided_attention = return_guided_attention
        self.mel_lengths = mel_lengths
        self.char_lengths = char_lengths
        self.reduction_factor = reduction_factor
        self.mel_pad_value = mel_pad_value
        self.char_pad_value = char_pad_value
        self.ga_pad_value = ga_pad_value
        self.g = g
        self.use_fixed_shapes = use_fixed_shapes
        self.max_char_length = np.max(char_lengths) + 1  # +1 for eos
        self.max_mel_length = np.max(
            mel_lengths
        ) + self.reduction_factor - np.max(mel_lengths) % self.reduction_factor