yaml = ruamel.yaml.YAML()
with open(str(Path(args.CONFIG) / 'data_config.yaml'), 'rb') as conf_yaml:
    config = yaml.load(conf_yaml)
args.DATA_DIR = config['data_directory']
args.META_FILE = os.path.join(args.DATA_DIR, config['metadata_filename'])
args.WAV_DIR = os.path.join(args.DATA_DIR, config['wav_subdir_name'])
args.TARGET_DIR = config['train_data_directory']
if args.TARGET_DIR is None:
    args.TARGET_DIR = args.DATA_DIR

mel_dir = os.path.join(args.TARGET_DIR, 'mels')
if not os.path.exists(mel_dir):
    os.makedirs(mel_dir)

phon_path = os.path.join(args.TARGET_DIR, 'phonemes.npy')
text_proc = Pipeline.default_training_pipeline(config['phoneme_language'],
                                               add_start_end=True)
# if os.path.exists(phon_path) and not args.RECOMPUTE_PHON:
#     print('Using cached phonemes.')
#     audio_data = np.load(phon_path)
# else:
print('\nLoading and cleaning text')

audio_data = []
with open(args.META_FILE, 'r', encoding='utf-8') as f:
    for l in f.readlines():
        l_split = l.split(args.COLUMN_SEP)
        filename, text = l_split[0], l_split[1]
        if filename.endswith('.wav'):
            filename = filename.split('.')[0]
        text = text_proc.cleaner(text)
        audio_data.append((filename, text))
示例#2
0
    train_len = metadata_len - test_len
    print(f'\nReading metadata from {metadatareader.metadata_path}')
    print(f'\nRemoving {len(remove_files)} lines out of {metadata_len_tot}.')
    print(f'\nMetadata contains {metadata_len} lines.')
    print(f'Files will be stored under {cm.data_dir}')
    print(f' - all: {phonemized_metadata_path}')
    print(f' - {train_len} training lines: {train_metadata_path}')
    print(f' - {test_len} validation lines: {test_metadata_path}')

    print('\nMetadata samples:')
    for i in sample_items:
        print(f'{i}:{metadatareader.text_dict[i]}')

    # run cleaner on raw text
    text_proc = Pipeline.default_training_pipeline(
        cm.config['phoneme_language'],
        add_start_end=False,
        with_stress=cm.config['with_stress'])
    texts = [metadatareader.text_dict[k] for k in metadatareader.filenames]
    clean_texts = text_proc.cleaner(list(texts))
    clean_texts = dict(zip(metadatareader.filenames, clean_texts))
    key_list = list(clean_texts.keys())
    print('\nCleaned metadata samples:')
    for i in sample_items:
        print(f'{i}:{clean_texts[i]}')

    print('\nPHONEMIZING')
    batch_size = args.phonemizer_batch_size
    failed_files = []
    phonemized_data = {}
    for i in tqdm.tqdm(range(0, len(key_list) + batch_size, batch_size)):
        batch_keys = key_list[i:i + batch_size]
示例#3
0
args.META_FILE = os.path.join(args.DATA_DIR, config['metadata_filename'])
args.WAV_DIR = os.path.join(args.DATA_DIR, config['wav_subdir_name'])
args.TARGET_DIR = config['train_data_directory']
if args.TARGET_DIR is None:
    args.TARGET_DIR = args.DATA_DIR

mel_dir = os.path.join(args.TARGET_DIR, 'mels')
print(mel_dir)
print(args.TARGET_DIR)

if not os.path.exists(mel_dir):
    os.makedirs(mel_dir)

phon_path = os.path.join(args.TARGET_DIR, 'phonemes.npy')
print('phon_path:',phon_path)
text_proc = Pipeline.default_training_pipeline(config['phoneme_language'], add_start_end=True,with_stress = 'False' )
if os.path.exists(phon_path) and not args.RECOMPUTE_PHON:
    print('Using cached phonemes.')
    audio_data = np.load(phon_path)
else:
    print('\nLoading and cleaning text')
    
    audio_data = []
    with open(args.META_FILE, 'r', encoding='utf-8') as f:
      csv_reader = csv.reader(f)
      for l in csv_reader:
        filename, text = l[0], l[1]
        text = text_proc.cleaner(text)
        audio_data.append((filename, text))
    audio_data = np.array(audio_data)