Пример #1
0
def create_data(config, data_path):
    dataset = LJSpeech(data_path)

    train_dataset = SliceDataset(dataset, config["valid_size"], len(dataset))
    train_collator = DataCollector(config["p_pronunciation"])
    train_sampler = RandomSampler(train_dataset)
    train_cargo = DataCargo(train_dataset,
                            train_collator,
                            batch_size=config["batch_size"],
                            sampler=train_sampler)
    train_loader = DataLoader\
                 .from_generator(capacity=10, return_list=True)\
                 .set_batch_generator(train_cargo)

    valid_dataset = SliceDataset(dataset, 0, config["valid_size"])
    valid_collector = DataCollector(1.)
    valid_sampler = SequentialSampler(valid_dataset)
    valid_cargo = DataCargo(valid_dataset,
                            valid_collector,
                            batch_size=1,
                            sampler=valid_sampler)
    valid_loader = DataLoader\
                 .from_generator(capacity=2, return_list=True)\
                 .set_batch_generator(valid_cargo)
    return train_loader, valid_loader
Пример #2
0
    n_mels = data_config["n_mels"]
    train_clip_seconds = data_config["train_clip_seconds"]
    transform = Transform(sample_rate, n_fft, win_length, hop_length, n_mels)
    ljspeech = TransformDataset(ljspeech_meta, transform)

    valid_size = data_config["valid_size"]
    ljspeech_valid = SliceDataset(ljspeech, 0, valid_size)
    ljspeech_train = SliceDataset(ljspeech, valid_size, len(ljspeech))

    model_config = config["model"]
    n_loop = model_config["n_loop"]
    n_layer = model_config["n_layer"]
    filter_size = model_config["filter_size"]
    context_size = 1 + n_layer * sum([filter_size**i for i in range(n_loop)])
    print("context size is {} samples".format(context_size))
    train_batch_fn = DataCollector(context_size, sample_rate, hop_length,
                                   train_clip_seconds)
    valid_batch_fn = DataCollector(context_size,
                                   sample_rate,
                                   hop_length,
                                   train_clip_seconds,
                                   valid=True)

    batch_size = data_config["batch_size"]
    train_cargo = DataCargo(ljspeech_train,
                            train_batch_fn,
                            batch_size,
                            sampler=RandomSampler(ljspeech_train))

    # only batch=1 for validation is enabled
    valid_cargo = DataCargo(ljspeech_valid,
                            valid_batch_fn,
Пример #3
0
                          clip_norm)
    ljspeech = TransformDataset(meta, transform)

    # =========================dataiterator=========================
    # use meta data's text length as a sort key for the sampler
    train_config = config["train"]
    batch_size = train_config["batch_size"]
    text_lengths = [len(example[2]) for example in meta]
    sampler = PartialyRandomizedSimilarTimeLengthSampler(text_lengths,
                                                         batch_size)

    # some hyperparameters affect how we process data, so create a data collector!
    model_config = config["model"]
    downsample_factor = model_config["downsample_factor"]
    r = model_config["outputs_per_step"]
    collector = DataCollector(downsample_factor=downsample_factor, r=r)
    ljspeech_loader = DataCargo(
        ljspeech, batch_fn=collector, batch_size=batch_size, sampler=sampler)

    # =========================model=========================
    if args.device == -1:
        place = fluid.CPUPlace()
    else:
        place = fluid.CUDAPlace(args.device)

    with dg.guard(place):
        # =========================model=========================
        n_speakers = model_config["n_speakers"]
        speaker_dim = model_config["speaker_embed_dim"]
        speaker_embed_std = model_config["speaker_embedding_weight_std"]
        n_vocab = en.n_vocab