示例#1
0
def test_minmax():
    # Pick linguistic features for testing
    X, _ = example_file_data_sources_for_acoustic_model()
    X = FileSourceDataset(X)
    lengths = [len(x) for x in X]
    D = X[0].shape[-1]
    X_min, X_max = P.minmax(X)
    assert np.isfinite(X_min).all()
    assert np.isfinite(X_max).all()

    x = X[0]
    x_scaled = P.minmax_scale(x, X_min, X_max, feature_range=(0, 0.99))
    assert np.max(x_scaled) <= 1
    assert np.min(x_scaled) >= 0
    assert np.isfinite(x_scaled).all()

    # Need to specify (min, max) or (scale_, min_)
    @raises(ValueError)
    def __test_raise1(x, X_min, X_max):
        P.minmax_scale(x)

    @raises(ValueError)
    def __test_raise2(x, X_min, X_max):
        P.inv_minmax_scale(x)

    __test_raise1(x, X_min, X_max)
    __test_raise2(x, X_min, X_max)

    # Explicit scale_ and min_
    min_, scale_ = P.minmax_scale_params(X_min, X_max, feature_range=(0, 0.99))
    x_scaled_hat = P.minmax_scale(x, min_=min_, scale_=scale_)
    assert np.allclose(x_scaled, x_scaled_hat)

    # For padded dataset
    X, _ = example_file_data_sources_for_acoustic_model()
    X = PaddedFileSourceDataset(X, 1000)
    # Should get same results with padded features
    X_min_hat, X_max_hat = P.minmax(X, lengths)
    assert np.allclose(X_min, X_min_hat)
    assert np.allclose(X_max, X_max_hat)

    # Inverse transform
    x = X[0]
    x_hat = P.inv_minmax_scale(P.minmax_scale(x, X_min, X_max), X_min, X_max)
    assert np.allclose(x, x_hat)

    x_hat = P.inv_minmax_scale(P.minmax_scale(x, scale_=scale_, min_=min_),
                               scale_=scale_,
                               min_=min_)
    assert np.allclose(x, x_hat)
示例#2
0
 def _get_x_stat(self):
     return minmax(self.xs, self._get_lengths(self.xs))
示例#3
0
文件: train.py 项目: tony32769/gantts
                                        last_sample_count=last_sample_count)
        data_std = np.sqrt(data_var)

        np.save(join(data_dir, "data_mean"), data_mean)
        np.save(join(data_dir, "data_var"), data_var)

        if hp.generator_params["in_dim"] is None:
            hp.generator_params["in_dim"] = data_mean.shape[-1]
        if hp.generator_params["out_dim"] is None:
            hp.generator_params["out_dim"] = data_mean.shape[-1]

        # Dataset loaders
        dataset_loaders = get_vc_data_loaders(X, Y, data_mean, data_std)
    else:
        ty = "acoustic" if hp == hparams.tts_acoustic else "duration"
        X_data_min, X_data_max = P.minmax(X[phase])
        Y_data_mean, Y_data_var = P.meanvar(Y[phase])
        Y_data_std = np.sqrt(Y_data_var)

        np.save(join(data_dir, "X_{}_data_min".format(ty)), X_data_min)
        np.save(join(data_dir, "X_{}_data_max".format(ty)), X_data_max)
        np.save(join(data_dir, "Y_{}_data_mean".format(ty)), Y_data_mean)
        np.save(join(data_dir, "Y_{}_data_var".format(ty)), Y_data_var)

        if hp.generator_params["in_dim"] is None:
            hp.generator_params["in_dim"] = X_data_min.shape[-1]
        if hp.generator_params["out_dim"] is None:
            hp.generator_params["out_dim"] = Y_data_mean.shape[-1]
        if hp.discriminator_params["in_dim"] is None:
            sizes = get_static_stream_sizes(hp.stream_sizes,
                                            hp.has_dynamic_features,
示例#4
0
        FeatureFileSource(os.path.join(DATA_ROOT, "X_{}".format(ty)),
                          dim=x_dim))
    Y[ty] = FileSourceDataset(
        FeatureFileSource(os.path.join(DATA_ROOT, "Y_{}".format(ty)),
                          dim=y_dim))
    # this triggers file loads, but can be neglectable in terms of performance.
    utt_lengths[ty] = [len(x) for x in X[ty]]

X_min = {}
X_max = {}
Y_mean = {}
Y_var = {}
Y_scale = {}

for typ in ["acoustic", "duration"]:
    X_min[typ], X_max[typ] = minmax(X[typ], utt_lengths[typ])
    Y_mean[typ], Y_var[typ] = meanvar(Y[typ], utt_lengths[typ])
    Y_scale[typ] = np.sqrt(Y_var[typ])

fname_list = [
    'X_min.pkl', 'X_max.pkl', 'Y_mean.pkl', 'Y_var.pkl', 'Y_scale.pkl'
]

with ExitStack() as stack:
    f = [
        stack.enter_context(open(os.path.join(DATA_ROOT, fname), 'wb'))
        for fname in fname_list
    ]
    pickle.dump(X_min, f[0])
    pickle.dump(X_max, f[1])
    pickle.dump(Y_mean, f[2])
示例#5
0
                             train=train))
        Y[ty][phase] = FileSourceDataset(
            BinaryFileSource(join(DATA_ROOT, "Y_{}".format(ty)),
                             dim=y_dim,
                             train=train))
        utt_lengths[ty][phase] = np.array([len(x) for x in X[ty][phase]],
                                          dtype=np.int)

X_min = {}
X_max = {}
Y_mean = {}
Y_var = {}
Y_scale = {}

for typ in ["acoustic"]:
    X_min[typ], X_max[typ] = minmax(X[typ]["train"], utt_lengths[typ]["train"])
    Y_mean[typ], Y_var[typ] = meanvar(Y[typ]["train"],
                                      utt_lengths[typ]["train"])
    Y_scale[typ] = np.sqrt(Y_var[typ])

from torch.utils import data as data_utils

import torch
from torch import nn
from torch.autograd import Variable
from tqdm import tnrange, tqdm
from torch import optim
import torch.nn.functional as F

z_dim = args.z_dim
dropout = args.dropout_ratio
示例#6
0
def create_loader(test=False):
    DATA_ROOT = "./data/basic5000"
    X = {"acoustic": {}}
    Y = {"acoustic": {}}
    utt_lengths = {"acoustic": {}}
    for ty in ["acoustic"]:
        for phase in ["train", "test"]:
            train = phase == "train"
            x_dim = (duration_linguistic_dim
                     if ty == "duration" else acoustic_linguisic_dim)
            y_dim = duration_dim if ty == "duration" else acoustic_dim
            X[ty][phase] = FileSourceDataset(
                BinaryFileSource(join(DATA_ROOT, "X_{}".format(ty)),
                                 dim=x_dim,
                                 train=train))
            Y[ty][phase] = FileSourceDataset(
                BinaryFileSource(join(DATA_ROOT, "Y_{}".format(ty)),
                                 dim=y_dim,
                                 train=train))
            utt_lengths[ty][phase] = np.array([len(x) for x in X[ty][phase]],
                                              dtype=np.int)

    X_min = {}
    X_max = {}
    Y_mean = {}
    Y_var = {}
    Y_scale = {}

    for typ in ["acoustic"]:
        X_min[typ], X_max[typ] = minmax(X[typ]["train"],
                                        utt_lengths[typ]["train"])
        Y_mean[typ], Y_var[typ] = meanvar(Y[typ]["train"],
                                          utt_lengths[typ]["train"])
        Y_scale[typ] = np.sqrt(Y_var[typ])

    mora_index_lists = sorted(
        glob(join("data/basic5000/mora_index", "squeezed_*.csv")))
    mora_index_lists_for_model = [
        np.loadtxt(path).reshape(-1) for path in mora_index_lists
    ]

    train_mora_index_lists = []
    test_mora_index_lists = []
    test_not_valid = []

    for i, mora_i in enumerate(mora_index_lists_for_model):
        if (i - 1) % 20 == 0:  # test
            if test:
                test_not_valid.append(i)
            else:
                pass
        elif i % 20 == 0:  # valid
            test_mora_index_lists.append(mora_i)
        else:
            train_mora_index_lists.append(mora_i)

    X_acoustic_train = [
        minmax_scale(
            X["acoustic"]["train"][i],
            X_min["acoustic"],
            X_max["acoustic"],
            feature_range=(0.01, 0.99),
        ) for i in range(len(X["acoustic"]["train"]))
    ]
    Y_acoustic_train = [y for y in Y["acoustic"]["train"]]
    # Y_acoustic_train = [scale(Y["acoustic"]["train"][i], Y_mean["acoustic"], Y_scale["acoustic"]) for i in range(len(Y["acoustic"]["train"]))]
    train_mora_index_lists = [
        train_mora_index_lists[i] for i in range(len(train_mora_index_lists))
    ]

    X_acoustic_test = [
        minmax_scale(
            X["acoustic"]["test"][i],
            X_min["acoustic"],
            X_max["acoustic"],
            feature_range=(0.01, 0.99),
        ) for i in range(len(X["acoustic"]["test"]))
    ]
    Y_acoustic_test = [y for y in Y["acoustic"]["test"]]
    # Y_acoustic_test = [scale(Y["acoustic"]["test"][i], Y_mean["acoustic"], Y_scale["acoustic"])for i in range(len(Y["acoustic"]["test"]))]
    test_mora_index_lists = [
        test_mora_index_lists[i] for i in range(len(test_mora_index_lists))
    ]

    train_loader = [[
        X_acoustic_train[i], Y_acoustic_train[i], train_mora_index_lists[i]
    ] for i in range(len(train_mora_index_lists))]
    test_loader = [[
        X_acoustic_test[i], Y_acoustic_test[i], test_mora_index_lists[i]
    ] for i in range(len(test_mora_index_lists))]

    if test:
        return train_loader, test_loader, test_not_valid_loader
    else:
        return train_loader, test_loader
Y_silenceIdx = FileSourceDataset(SilenceSampleIdxSource(data_root=DATA_ROOT, frame_shift_in_micro_sec=625))
X_linguistic = FileSourceDataset(LinguisticSource(data_root=DATA_ROOT, question_path=QUESTION_PATH))
X_pyworld = FileSourceDataset(PyworldSource(data_root=DATA_ROOT))
X_melmfcc = FileSourceDataset(MelspecMfccSource(data_root=DATA_ROOT))
print('X_lingusitc with dimension = {}'.format(X_linguistic[0].shape[1]))
print('X_pyworld with dimension = {}'.format(X_pyworld[0].shape[1]))
print('X_melmfcc with dimension = {}'.format(X_melmfcc[0].shape[1]))
# Calculate Scale factors:

print('Calculating scale factors: This process will take longer than 10 minutes...')
#wav_len = [len(y) for y in Y]
#y_min, y_max = minmax(Y, wav_len)

scale_factors = {}
scale_factors['linguistic_len'] = [len(x) for x in X_linguistic]
scale_factors['linguistic_min'], scale_factors['linguistic_max'] = minmax(X_linguistic, scale_factors['linguistic_len'])

scale_factors['pyworld_len'] = [len(x) for x in X_pyworld]
scale_factors['pyworld_mean'], scale_factors['pyworld_var'] = meanvar(X_pyworld, scale_factors['pyworld_len'])
scale_factors['pyworld_std'] = np.sqrt(scale_factors['pyworld_var'])
scale_factors['pyworld_min'], scale_factors['pyworld_max'] = minmax(X_pyworld, scale_factors['pyworld_len'])

scale_factors['melmfcc_mean'], scale_factors['melmfcc_var'] = meanvar(X_melmfcc, scale_factors['pyworld_len'])
scale_factors['melmfcc_std'] = np.sqrt(scale_factors['melmfcc_var'])
scale_factors['melmfcc_min'], scale_factors['melmfcc_max'] = minmax(X_melmfcc, scale_factors['pyworld_len'])
np.save(DST_ROOT + 'scale_factors.npy', scale_factors)

''' To load scale_factors:
    scale_factors = np.load(DST_ROOT + 'scale_factors.npy').item()  '''
scale_factors = np.load(DST_ROOT + 'scale_factors.npy').item()
# <wav>
示例#8
0
    if not exists(checkpoint_dir):
        os.makedirs(checkpoint_dir)

    X = []
    Y = []
    # utt_lengths = []

    X = FileSourceDataset(TextDataSource())
    Mel = FileSourceDataset(MelSpecDataSource())
    Y = FileSourceDataset(LinearSpecDataSource())

    print("Size of dataset for {}: {}".format(phase, len(X)))

    ty = "acoustic" if hp == hparams_gan.tts_acoustic else "duration"
    X_data_min, X_data_max = P.minmax( X )
    Mel_data_mean, Mel_data_var = P.meanvar( Mel )
    Mel_data_std = np.sqrt( Mel_data_var )

    np.save(join(data_dir, "X_{}_data_min".format(ty)), X_data_min)
    np.save(join(data_dir, "X_{}_data_max".format(ty)), X_data_max)
    np.save(join(data_dir, "Mel_{}_data_mean".format(ty)), Mel_data_mean)
    np.save(join(data_dir, "Mel_{}_data_var".format(ty)), Mel_data_var)

    if hp.discriminator_params["in_dim"] is None:
        sizes = get_static_stream_sizes(
            hp.stream_sizes, hp.has_dynamic_features, len(hp.windows))
        D = int(np.array(sizes[hp.adversarial_streams]).sum())
        if hp.adversarial_streams[0]:
            D -= hp.mask_nth_mgc_for_adv_loss
        if hp.discriminator_linguistic_condition: