def create_data(config, data_path): dataset = LJSpeech(data_path) train_dataset = SliceDataset(dataset, config["valid_size"], len(dataset)) train_collator = DataCollector(config["p_pronunciation"]) train_sampler = RandomSampler(train_dataset) train_cargo = DataCargo(train_dataset, train_collator, batch_size=config["batch_size"], sampler=train_sampler) train_loader = DataLoader\ .from_generator(capacity=10, return_list=True)\ .set_batch_generator(train_cargo) valid_dataset = SliceDataset(dataset, 0, config["valid_size"]) valid_collector = DataCollector(1.) valid_sampler = SequentialSampler(valid_dataset) valid_cargo = DataCargo(valid_dataset, valid_collector, batch_size=1, sampler=valid_sampler) valid_loader = DataLoader\ .from_generator(capacity=2, return_list=True)\ .set_batch_generator(valid_cargo) return train_loader, valid_loader
config = ruamel.yaml.safe_load(f) ljspeech_meta = LJSpeechMetaData(args.data) data_config = config["data"] sample_rate = data_config["sample_rate"] n_fft = data_config["n_fft"] win_length = data_config["win_length"] hop_length = data_config["hop_length"] n_mels = data_config["n_mels"] train_clip_seconds = data_config["train_clip_seconds"] transform = Transform(sample_rate, n_fft, win_length, hop_length, n_mels) ljspeech = TransformDataset(ljspeech_meta, transform) valid_size = data_config["valid_size"] ljspeech_valid = SliceDataset(ljspeech, 0, valid_size) ljspeech_train = SliceDataset(ljspeech, valid_size, len(ljspeech)) model_config = config["model"] n_loop = model_config["n_loop"] n_layer = model_config["n_layer"] filter_size = model_config["filter_size"] context_size = 1 + n_layer * sum([filter_size**i for i in range(n_loop)]) print("context size is {} samples".format(context_size)) train_batch_fn = DataCollector(context_size, sample_rate, hop_length, train_clip_seconds) valid_batch_fn = DataCollector(context_size, sample_rate, hop_length, train_clip_seconds, valid=True)
print("{}: {}".format(k, v)) ljspeech_meta = LJSpeechMetaData(args.data) data_config = config["data"] sample_rate = data_config["sample_rate"] n_fft = data_config["n_fft"] win_length = data_config["win_length"] hop_length = data_config["hop_length"] n_mels = data_config["n_mels"] train_clip_seconds = data_config["train_clip_seconds"] transform = Transform(sample_rate, n_fft, win_length, hop_length, n_mels) ljspeech = TransformDataset(ljspeech_meta, transform) valid_size = data_config["valid_size"] ljspeech_valid = CacheDataset(SliceDataset(ljspeech, 0, valid_size)) ljspeech_train = CacheDataset( SliceDataset(ljspeech, valid_size, len(ljspeech))) model_config = config["model"] n_loop = model_config["n_loop"] n_layer = model_config["n_layer"] filter_size = model_config["filter_size"] context_size = 1 + n_layer * sum([filter_size**i for i in range(n_loop)]) print("context size is {} samples".format(context_size)) train_batch_fn = DataCollector(context_size, sample_rate, hop_length, train_clip_seconds) valid_batch_fn = DataCollector(context_size, sample_rate, hop_length, train_clip_seconds,