Пример #1
0
def main(argv):
    if len(argv) > 1:
        raise app.UsageError('Too many command-line arguments.')

    dataset = preprocessor.get_dataset_from_tfds(FLAGS.dataset, FLAGS.split)
    preprocessor.write_dataset(dataset, FLAGS.save_path)
    token_vocab = preprocessor.get_token_vocab(FLAGS.save_path)
    preprocessor.write_token_vocab(token_vocab, FLAGS.save_path)
        "hertz_high" : args.hertz_high,
        "normalize_mel" : args.normalize_mel,
        "max_duration" : args.max_duration
    }

    # Create dataset
    train_dataset_list = ["train-clean-100", "train-clean-360"]
    train_dataset = preprocess_librispeech(args.data_dir, train_dataset_list, hp)

    dev_dataset_list = ["dev-clean"]
    dev_dataset = preprocess_librispeech(args.data_dir, dev_dataset_list, hp)

    # Serialize dataset
    train_dataset = train_dataset.map(
        preprocess.serialize_example,
        num_parallel_calls=tf.data.experimental.AUTOTUNE)
    dev_dataset = dev_dataset.map(
        preprocess.serialize_example,
        num_parallel_calls=tf.data.experimental.AUTOTUNE)

    # Store dataset
    train_dataset_path = os.path.join(args.out_dir, "train.tfrecord")
    os.makedirs(args.out_dir, exist_ok=True)
    preprocess.write_dataset(train_dataset, train_dataset_path)
    print("preprocessing for train dataset is done")

    dev_dataset_path = os.path.join(args.out_dir, "dev.tfrecord")
    os.makedirs(args.out_dir, exist_ok=True)
    preprocess.write_dataset(dev_dataset, dev_dataset_path)
    print("preprocessing for dev dataset is done")