Пример #1
0
def train_VAE(
    gpu=GPU,
    dataset_fileName=f'{DATASET_SAVE_PATH}/wsj0_normalize_{N_FFT}_{HOP_LENGTH}.pic'
):
    file_suffix = f"normal-scale=gamma-D={N_LATENT}"

    if os.path.isfile(MODEL_SAVE_PATH +
                      '/model-best-{0}.npz'.format(file_suffix)):
        print(f"{MODEL_SAVE_PATH}model-best-{file_suffix}.npz already exist")
        exit

    cuda.get_device_from_id(gpu).use()

    # Load dataset
    with open(dataset_fileName, 'rb') as f:
        dataset = pic.load(f)
    n_data = dataset.shape[1]

    # Prepare VAE model
    model = network_VAE.VAE(n_freq=int(N_FFT / 2 + 1), n_latent=N_LATENT)
    model.to_gpu()

    # Setup Optimizer
    optimizer = optimizers.Adam(LEARNING_RATE)
    optimizer.setup(model)

    # Learning loop
    min_loss = np.inf
    loss_list = []
    for epoch in range(N_EPOCH):
        print('Epoch:', epoch + 1)

        sum_loss = 0
        perm = np.random.permutation(n_data)
        for ii in progressbar(range(0, n_data, BATCH_SIZE)):
            minibatch = dataset[:, perm[ii:ii + BATCH_SIZE]].T
            scales = np.random.gamma(2, 0.5, (len(minibatch)))
            minibatch = minibatch * scales[:, None]
            x = chainer.Variable(cp.asarray(minibatch, dtype=cp.float32))

            optimizer.update(model.get_loss_func(), x)

            sum_loss += float(model.loss.data) * BATCH_SIZE
            loss_list.append(float(model.loss.data))

        sum_loss /= n_data
        print("Loss:", sum_loss)

        print('save the model and optimizer')
        serializers.save_npz(
            MODEL_SAVE_PATH + 'model-{0}.npz'.format(file_suffix), model)
        with open(MODEL_SAVE_PATH + 'loss-{0}.pic'.format(file_suffix),
                  'wb') as f:
            pic.dump(loss_list, f)

        if sum_loss < min_loss:
            shutil.copyfile(
                MODEL_SAVE_PATH + 'model-{0}.npz'.format(file_suffix),
                MODEL_SAVE_PATH + 'model-best-{0}.npz'.format(file_suffix))
            min_loss = sum_loss
        sum_loss = 0
Пример #2
0
    parser.add_argument(                '--n_fft', type= int, default=  1024, help='number of frequencies')
    parser.add_argument(              '--n_noise', type= int, default=     1, help='number of noise')
    parser.add_argument(             '--n_latent', type= int, default=    16, help='dimention of encoded vector')
    parser.add_argument(        '--n_basis_noise', type= int, default=    64, help='number of basis of noise (MODE_noise=NMF)')
    parser.add_argument(             '--init_SCM', type=  str, default="obs", help='unit, obs, ILRMA')
    parser.add_argument(          '--n_iteration', type= int, default=    30, help='number of iteration')
    parser.add_argument(        '--n_Z_iteration', type= int, default=    30, help='number of update Z iteration')
    parser.add_argument(        '--mode_update_Z', type= str, default="sampling", help='sampling, sampling2, backprop, backprop2, hybrid, hybrid2')
    parser.add_argument('--mode_update_parameter', type= str, default= "all", help='all, one_by_one')
    args = parser.parse_args()


    sys.path.append("../DeepSpeechPrior")
    import network_VAE
    model_fileName = "../DeepSpeechPrior/model-VAE-best-scale=gamma-D={}.npz".format(args.n_latent)
    speech_VAE = network_VAE.VAE(n_latent=args.n_latent)
    serializers.load_npz(model_fileName, speech_VAE)
    name_DNN = "VAE"

    if args.gpu < 0:
        import numpy as xp
    else:
        import cupy as xp
        print("Use GPU " + str(args.gpu))
        cuda.get_device_from_id(args.gpu).use()
        speech_VAE.to_gpu()

    wav, fs = sf.read(args.input_fileName)
    wav = wav.T
    M = len(wav)
    for m in range(M):