from data.audio import Audio np.random.seed(42) parser = argparse.ArgumentParser() parser.add_argument('--config', type=str, default='config/session_paths.yaml') parser.add_argument('--skip_phonemes', action='store_true') parser.add_argument('--skip_mels', action='store_true') parser.add_argument('--skip_speakers', action='store_true') args = parser.parse_args() for arg in vars(args): print('{}: {}'.format(arg, getattr(args, arg))) cm = Config(args.config, asr=True) cm.create_remove_dirs() metadatareader = DataReader.from_config(cm, kind='original') summary_manager = SummaryManager(model=None, log_dir=cm.log_dir / 'data_preprocessing', config=cm.config, default_writer='data_preprocessing') print(f'\nFound {len(metadatareader.filenames)} audio files.') audio = Audio(config=cm.config) if not args.skip_mels: def process_file(tuples): len_dict = {} spk_file_dict = {} remove_files = [] for idx in trange(len(tuples), desc=''):
from ctc_segmentation import ctc_segmentation, determine_utterance_segments from ctc_segmentation import CtcSegmentationParameters from ctc_segmentation import prepare_token_list import tgt np.random.seed(42) tf.random.set_seed(42) dynamic_memory_allocation() parser = basic_train_parser() args = parser.parse_args() config = Config(config_path=args.config, asr=True) config_dict = config.config config.create_remove_dirs(clear_dir=args.clear_dir, clear_logs=args.clear_logs, clear_weights=args.clear_weights) config.dump_config() config.print_config() model = config.get_model() config.compile_model(model) data_handler = ASRDataset.from_config(config, tokenizer=model.text_pipeline.tokenizer, kind='valid') dataset = data_handler.get_dataset( bucket_batch_sizes=config_dict['bucket_batch_sizes'], bucket_boundaries=config_dict['bucket_boundaries'], shuffle=False)