def create_data(config, data_path): dataset = LJSpeech(data_path) train_dataset = SliceDataset(dataset, config["valid_size"], len(dataset)) train_collator = DataCollector(config["p_pronunciation"]) train_sampler = RandomSampler(train_dataset) train_cargo = DataCargo(train_dataset, train_collator, batch_size=config["batch_size"], sampler=train_sampler) train_loader = DataLoader\ .from_generator(capacity=10, return_list=True)\ .set_batch_generator(train_cargo) valid_dataset = SliceDataset(dataset, 0, config["valid_size"]) valid_collector = DataCollector(1.) valid_sampler = SequentialSampler(valid_dataset) valid_cargo = DataCargo(valid_dataset, valid_collector, batch_size=1, sampler=valid_sampler) valid_loader = DataLoader\ .from_generator(capacity=2, return_list=True)\ .set_batch_generator(valid_cargo) return train_loader, valid_loader
n_mels = data_config["n_mels"] train_clip_seconds = data_config["train_clip_seconds"] transform = Transform(sample_rate, n_fft, win_length, hop_length, n_mels) ljspeech = TransformDataset(ljspeech_meta, transform) valid_size = data_config["valid_size"] ljspeech_valid = SliceDataset(ljspeech, 0, valid_size) ljspeech_train = SliceDataset(ljspeech, valid_size, len(ljspeech)) model_config = config["model"] n_loop = model_config["n_loop"] n_layer = model_config["n_layer"] filter_size = model_config["filter_size"] context_size = 1 + n_layer * sum([filter_size**i for i in range(n_loop)]) print("context size is {} samples".format(context_size)) train_batch_fn = DataCollector(context_size, sample_rate, hop_length, train_clip_seconds) valid_batch_fn = DataCollector(context_size, sample_rate, hop_length, train_clip_seconds, valid=True) batch_size = data_config["batch_size"] train_cargo = DataCargo(ljspeech_train, train_batch_fn, batch_size, sampler=RandomSampler(ljspeech_train)) # only batch=1 for validation is enabled valid_cargo = DataCargo(ljspeech_valid, valid_batch_fn,
clip_norm) ljspeech = TransformDataset(meta, transform) # =========================dataiterator========================= # use meta data's text length as a sort key for the sampler train_config = config["train"] batch_size = train_config["batch_size"] text_lengths = [len(example[2]) for example in meta] sampler = PartialyRandomizedSimilarTimeLengthSampler(text_lengths, batch_size) # some hyperparameters affect how we process data, so create a data collector! model_config = config["model"] downsample_factor = model_config["downsample_factor"] r = model_config["outputs_per_step"] collector = DataCollector(downsample_factor=downsample_factor, r=r) ljspeech_loader = DataCargo( ljspeech, batch_fn=collector, batch_size=batch_size, sampler=sampler) # =========================model========================= if args.device == -1: place = fluid.CPUPlace() else: place = fluid.CUDAPlace(args.device) with dg.guard(place): # =========================model========================= n_speakers = model_config["n_speakers"] speaker_dim = model_config["speaker_embed_dim"] speaker_embed_std = model_config["speaker_embedding_weight_std"] n_vocab = en.n_vocab