yaml = ruamel.yaml.YAML() with open(str(Path(args.CONFIG) / 'data_config.yaml'), 'rb') as conf_yaml: config = yaml.load(conf_yaml) args.DATA_DIR = config['data_directory'] args.META_FILE = os.path.join(args.DATA_DIR, config['metadata_filename']) args.WAV_DIR = os.path.join(args.DATA_DIR, config['wav_subdir_name']) args.TARGET_DIR = config['train_data_directory'] if args.TARGET_DIR is None: args.TARGET_DIR = args.DATA_DIR mel_dir = os.path.join(args.TARGET_DIR, 'mels') if not os.path.exists(mel_dir): os.makedirs(mel_dir) phon_path = os.path.join(args.TARGET_DIR, 'phonemes.npy') text_proc = Pipeline.default_training_pipeline(config['phoneme_language'], add_start_end=True) # if os.path.exists(phon_path) and not args.RECOMPUTE_PHON: # print('Using cached phonemes.') # audio_data = np.load(phon_path) # else: print('\nLoading and cleaning text') audio_data = [] with open(args.META_FILE, 'r', encoding='utf-8') as f: for l in f.readlines(): l_split = l.split(args.COLUMN_SEP) filename, text = l_split[0], l_split[1] if filename.endswith('.wav'): filename = filename.split('.')[0] text = text_proc.cleaner(text) audio_data.append((filename, text))
train_len = metadata_len - test_len print(f'\nReading metadata from {metadatareader.metadata_path}') print(f'\nRemoving {len(remove_files)} lines out of {metadata_len_tot}.') print(f'\nMetadata contains {metadata_len} lines.') print(f'Files will be stored under {cm.data_dir}') print(f' - all: {phonemized_metadata_path}') print(f' - {train_len} training lines: {train_metadata_path}') print(f' - {test_len} validation lines: {test_metadata_path}') print('\nMetadata samples:') for i in sample_items: print(f'{i}:{metadatareader.text_dict[i]}') # run cleaner on raw text text_proc = Pipeline.default_training_pipeline( cm.config['phoneme_language'], add_start_end=False, with_stress=cm.config['with_stress']) texts = [metadatareader.text_dict[k] for k in metadatareader.filenames] clean_texts = text_proc.cleaner(list(texts)) clean_texts = dict(zip(metadatareader.filenames, clean_texts)) key_list = list(clean_texts.keys()) print('\nCleaned metadata samples:') for i in sample_items: print(f'{i}:{clean_texts[i]}') print('\nPHONEMIZING') batch_size = args.phonemizer_batch_size failed_files = [] phonemized_data = {} for i in tqdm.tqdm(range(0, len(key_list) + batch_size, batch_size)): batch_keys = key_list[i:i + batch_size]
args.META_FILE = os.path.join(args.DATA_DIR, config['metadata_filename']) args.WAV_DIR = os.path.join(args.DATA_DIR, config['wav_subdir_name']) args.TARGET_DIR = config['train_data_directory'] if args.TARGET_DIR is None: args.TARGET_DIR = args.DATA_DIR mel_dir = os.path.join(args.TARGET_DIR, 'mels') print(mel_dir) print(args.TARGET_DIR) if not os.path.exists(mel_dir): os.makedirs(mel_dir) phon_path = os.path.join(args.TARGET_DIR, 'phonemes.npy') print('phon_path:',phon_path) text_proc = Pipeline.default_training_pipeline(config['phoneme_language'], add_start_end=True,with_stress = 'False' ) if os.path.exists(phon_path) and not args.RECOMPUTE_PHON: print('Using cached phonemes.') audio_data = np.load(phon_path) else: print('\nLoading and cleaning text') audio_data = [] with open(args.META_FILE, 'r', encoding='utf-8') as f: csv_reader = csv.reader(f) for l in csv_reader: filename, text = l[0], l[1] text = text_proc.cleaner(text) audio_data.append((filename, text)) audio_data = np.array(audio_data)