示例#1
0
class TestVQVAE(unittest.TestCase):
    def setUp(self) -> None:
        # self.model2 = VAE(3, 10)
        self.model = VQVAE(3, 64, 512)

    def test_summary(self):
        print(summary(self.model, (3, 64, 64), device='cpu'))
        # print(summary(self.model2, (3, 64, 64), device='cpu'))

    def test_forward(self):
        print(
            sum(p.numel() for p in self.model.parameters() if p.requires_grad))
        x = torch.randn(16, 3, 64, 64)
        y = self.model(x)
        print("Model Output size:", y[0].size())
        # print("Model2 Output size:", self.model2(x)[0].size())

    def test_loss(self):
        x = torch.randn(16, 3, 64, 64)

        result = self.model(x)
        loss = self.model.loss_function(*result, M_N=0.005)
        print(loss)

    def test_sample(self):
        self.model.cuda()
        y = self.model.sample(8, 'cuda')
        print(y.shape)

    def test_generate(self):
        x = torch.randn(16, 3, 64, 64)
        y = self.model.generate(x)
        print(y.shape)
示例#2
0
def train_CIFAR10(opt):

    import torchvision.datasets as datasets
    import torchvision.transforms as transforms
    from torchvision.utils import make_grid
    from matplotlib import pyplot as plt
    params = get_config(opt.config)

    save_path = os.path.join(
        params['save_path'],
        datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S'))
    os.makedirs(save_path, exist_ok=True)
    shutil.copy('models.py', os.path.join(save_path, 'models.py'))
    shutil.copy('train.py', os.path.join(save_path, 'train.py'))
    shutil.copy(opt.config,
                os.path.join(save_path, os.path.basename(opt.config)))

    cuda = torch.cuda.is_available()
    gpu_ids = [i for i in range(torch.cuda.device_count())]

    TensorType = torch.cuda.FloatTensor if cuda else torch.Tensor

    data_path = os.path.join(params['data_root'], 'cifar10')

    os.makedirs(data_path, exist_ok=True)

    train_dataset = datasets.CIFAR10(root=data_path,
                                     train=True,
                                     download=True,
                                     transform=transforms.Compose([
                                         transforms.ToTensor(),
                                         transforms.Normalize((0.5, 0.5, 0.5),
                                                              (0.5, 0.5, 0.5))
                                     ]))

    val_dataset = datasets.CIFAR10(root=data_path,
                                   train=False,
                                   download=True,
                                   transform=transforms.Compose([
                                       transforms.ToTensor(),
                                       transforms.Normalize((0.5, 0.5, 0.5),
                                                            (0.5, 0.5, 0.5))
                                   ]))

    train_loader = DataLoader(train_dataset,
                              batch_size=params['batch_size'] * len(gpu_ids),
                              shuffle=True,
                              num_workers=params['num_workers'],
                              pin_memory=cuda)
    val_loader = DataLoader(val_dataset,
                            batch_size=1,
                            num_workers=params['num_workers'],
                            pin_memory=cuda)

    data_variance = np.var(train_dataset.train_data / 255.0)

    encoder = Encoder(params['dim'], params['residual_channels'],
                      params['n_layers'], params['d'])
    decoder = Decoder(params['dim'], params['residual_channels'],
                      params['n_layers'], params['d'])

    vq = VectorQuantizer(params['k'], params['d'], params['beta'],
                         params['decay'], TensorType)

    if params['checkpoint'] != None:
        checkpoint = torch.load(params['checkpoint'])

        params['start_epoch'] = checkpoint['epoch']
        encoder.load_state_dict(checkpoint['encoder'])
        decoder.load_state_dict(checkpoint['decoder'])
        vq.load_state_dict(checkpoint['vq'])

    model = VQVAE(encoder, decoder, vq)

    if cuda:
        model = nn.DataParallel(model.cuda(), device_ids=gpu_ids)

    parameters = list(model.parameters())
    opt = torch.optim.Adam([p for p in parameters if p.requires_grad],
                           lr=params['lr'])

    for epoch in range(params['start_epoch'], params['num_epochs']):
        train_bar = tqdm(train_loader)
        for data, _ in train_bar:
            if cuda:
                data = data.cuda()
            opt.zero_grad()

            vq_loss, data_recon, _ = model(data)
            recon_error = torch.mean((data_recon - data)**2) / data_variance
            loss = recon_error + vq_loss.mean()
            loss.backward()
            opt.step()

            train_bar.set_description('Epoch {}: loss {:.4f}'.format(
                epoch + 1,
                loss.mean().item()))

        model.eval()
        data_val = next(iter(val_loader))
        data_val, _ = data_val

        if cuda:
            data_val = data_val.cuda()
        _, data_recon_val, _ = model(data_val)

        plt.imsave(os.path.join(save_path, 'latest_val_recon.png'),
                   (make_grid(data_recon_val.cpu().data) +
                    0.5).numpy().transpose(1, 2, 0))
        plt.imsave(os.path.join(save_path, 'latest_val_orig.png'),
                   (make_grid(data_val.cpu().data) + 0.5).numpy().transpose(
                       1, 2, 0))

        model.train()

        torch.save(
            {
                'epoch': epoch,
                'encoder': encoder.state_dict(),
                'decoder': decoder.state_dict(),
                'vq': vq.state_dict(),
            }, os.path.join(save_path, '{}_checkpoint.pth'.format(epoch)))
示例#3
0
#train_files, test_files = train_test_split(files, test_size=test_size, random_state=random_state)

for i, mora_i in enumerate(mora_index_lists_for_model):
    if (i - 1) % 20 == 0:  #test
        pass
    elif i % 20 == 0:  #valid
        test_mora_index_lists.append(mora_i)
    else:
        train_mora_index_lists.append(mora_i)

model = VQVAE().to(device)

if args.model_path != '':
    model.load_state_dict(torch.load(args.model_path))

optimizer = optim.Adam(model.parameters(), lr=2e-3)  #1e-3

start = time.time()
beta = 0.3


# Reconstruction + KL divergence losses summed over all elements and batch
def loss_function(recon_x, x, z, z_unquantized):
    MSE = F.mse_loss(
        recon_x.view(-1), x.view(-1, ), reduction='sum'
    )  #F.binary_cross_entropy(recon_x.view(-1), x.view(-1, ), reduction='sum')

    with torch.no_grad():
        z_no_grad = z
        z_unquantized_no_grad = z_unquantized
示例#4
0
def objective(trial):
    mora_index_lists = sorted(
        glob(join('data/basic5000/mora_index', "squeezed_*.csv")))
    #mora_index_lists = mora_index_lists[:len(mora_index_lists)-5] # last 5 is real testset
    mora_index_lists_for_model = [
        np.loaadtxt(path).reshape(-1) for path in mora_index_lists
    ]

    train_mora_index_lists = []
    test_mora_index_lists = []
    #train_files, test_files = train_test_split(files, test_size=test_size, random_state=random_state)

    for i, mora_i in enumerate(mora_index_lists_for_model):
        if (i - 1) % 20 == 0:  #test
            pass
        elif i % 20 == 0:  #valid
            test_mora_index_lists.append(mora_i)
        else:
            train_mora_index_lists.append(mora_i)

    num_lstm_layers = trial.suggest_int('num_lstm_layers', 1, 3)
    z_dim = trial.suggest_categorical('z_dim', [
        1,
        2,
        8,
    ])
    num_class = trial.suggest_int('num_class', 2, 4)

    model = VQVAE(num_class=num_class, num_layers=num_lstm_layers,
                  z_dim=z_dim).to(device)

    optimizer = optim.Adam(model.parameters(), lr=2e-3)  #1e-3

    start = time.time()

    # Reconstruction + KL divergence losses summed over all elements and batch
    def loss_function(recon_x, x, z, z_unquantized, beta=1):

        MSE = F.mse_loss(
            recon_x.view(-1), x.view(-1, ), reduction='sum'
        )  #F.binary_cross_entropy(recon_x.view(-1), x.view(-1, ), reduction='sum')

        vq_loss = F.mse_loss(
            z.view(-1), z_unquantized.detach().view(-1, ),
            reduction='sum') + beta * F.mse_loss(
                z.detach().view(-1), z_unquantized.view(-1, ), reduction='sum')
        #print(KLD)
        return MSE + vq_loss

    func_tensor = np.vectorize(torch.from_numpy)

    train_ratio = int(args.train_ratio * len(train_mora_index_lists))  #1

    X_acoustic_train = [
        X['acoustic']['train'][i] for i in range(len(X['acoustic']['train']))
    ][:train_ratio]
    Y_acoustic_train = [
        Y['acoustic']['train'][i] for i in range(len(Y['acoustic']['train']))
    ][:train_ratio]
    train_mora_index_lists = [
        train_mora_index_lists[i] for i in range(len(train_mora_index_lists))
    ][:train_ratio]

    train_num = len(X_acoustic_train)

    X_acoustic_test = [
        X['acoustic']['test'][i] for i in range(len(X['acoustic']['test']))
    ]
    Y_acoustic_test = [
        Y['acoustic']['test'][i] for i in range(len(Y['acoustic']['test']))
    ]
    test_mora_index_lists = [
        test_mora_index_lists[i] for i in range(len(test_mora_index_lists))
    ]

    train_loader = [[
        X_acoustic_train[i], Y_acoustic_train[i], train_mora_index_lists[i]
    ] for i in range(len(train_mora_index_lists))]
    test_loader = [[
        X_acoustic_test[i], Y_acoustic_test[i], test_mora_index_lists[i]
    ] for i in range(len(test_mora_index_lists))]

    def train(epoch):
        model.train()
        train_loss = 0
        for batch_idx, data in enumerate(train_loader):
            tmp = []

            for j in range(2):
                tmp.append(torch.from_numpy(data[j]).to(device))

            optimizer.zero_grad()
            recon_batch, z, z_unquantized = model(tmp[0], tmp[1], data[2])
            loss = loss_function(recon_batch, tmp[1], z, z_unquantized)
            loss.backward()
            train_loss += loss.item()
            optimizer.step()
            del tmp
            if batch_idx % len(train_loader) == 0:
                print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                    epoch, batch_idx, train_num, 100. * batch_idx / train_num,
                    loss.item()))

        print('====> Epoch: {} Average loss: {:.4f}'.format(
            epoch, train_loss / len(train_loader)))

        return train_loss / len(train_loader)

    def test(epoch):
        model.eval()
        test_loss = 0
        f0_loss = 0
        with torch.no_grad():
            for i, data, in enumerate(test_loader):
                tmp = []

                for j in range(2):
                    tmp.append(torch.tensor(data[j]).to(device))

                recon_batch, z, z_unquantized = model(tmp[0], tmp[1], data[2])
                test_loss += loss_function(recon_batch, tmp[1], z,
                                           z_unquantized).item()
                f0_loss += calc_lf0_rmse(
                    recon_batch.cpu().numpy().reshape(-1, 199),
                    tmp[1].cpu().numpy().reshape(-1, 199), lf0_start_idx,
                    vuv_start_idx)
                del tmp

        test_loss /= len(test_loader)
        print('====> Test set loss: {:.4f}'.format(test_loss))

        return test_loss, f0_loss

    loss_list = []
    test_loss_list = []
    test_f0_erros = []

    num_epochs = args.num_epoch

    for epoch in range(1, num_epochs + 1):
        loss = train(epoch)
        test_loss, f0_loss = test(epoch)

        print('epoch [{}/{}], loss: {:.4f} test_loss: {:.4f}'.format(
            epoch + 1, num_epochs, loss, test_loss))

        # logging
        loss_list.append(loss)
        test_loss_list.append(test_loss)
        test_f0_erros.append(f0_loss)

        print(time.time() - start)

        if epoch % 5 == 0:
            torch.save(
                model.state_dict(), '{}/{}layers_zdim{}_model_{}.pth'.format(
                    args.output_dir, num_lstm_layers, z_dim, epoch))
        np.save(
            args.output_dir +
            '/{}layers_zdim{}_loss_list.npy'.format(num_lstm_layers, z_dim),
            np.array(loss_list))
        np.save(
            args.output_dir + '/{}layers_zdim{}_test_loss_list.npy'.format(
                num_lstm_layers, z_dim), np.array(test_loss_list))
        np.save(
            args.output_dir + '/{}layers_zdim{}_test_f0_loss_list.npy'.format(
                num_lstm_layers, z_dim), np.array(test_f0_erros))

    return f0_loss