def main(argv): if len(argv) > 1: raise app.UsageError('Too many command-line arguments.') dataset = preprocessor.get_dataset_from_tfds(FLAGS.dataset, FLAGS.split) preprocessor.write_dataset(dataset, FLAGS.save_path) token_vocab = preprocessor.get_token_vocab(FLAGS.save_path) preprocessor.write_token_vocab(token_vocab, FLAGS.save_path)
"hertz_high" : args.hertz_high, "normalize_mel" : args.normalize_mel, "max_duration" : args.max_duration } # Create dataset train_dataset_list = ["train-clean-100", "train-clean-360"] train_dataset = preprocess_librispeech(args.data_dir, train_dataset_list, hp) dev_dataset_list = ["dev-clean"] dev_dataset = preprocess_librispeech(args.data_dir, dev_dataset_list, hp) # Serialize dataset train_dataset = train_dataset.map( preprocess.serialize_example, num_parallel_calls=tf.data.experimental.AUTOTUNE) dev_dataset = dev_dataset.map( preprocess.serialize_example, num_parallel_calls=tf.data.experimental.AUTOTUNE) # Store dataset train_dataset_path = os.path.join(args.out_dir, "train.tfrecord") os.makedirs(args.out_dir, exist_ok=True) preprocess.write_dataset(train_dataset, train_dataset_path) print("preprocessing for train dataset is done") dev_dataset_path = os.path.join(args.out_dir, "dev.tfrecord") os.makedirs(args.out_dir, exist_ok=True) preprocess.write_dataset(dev_dataset, dev_dataset_path) print("preprocessing for dev dataset is done")