示例#1
0
    def generate_animation(self, epoch):

        for i1 in range(self.code_dim):
            utils.generate_animation(
                self.save_path + '/visualization/code%02d' % i1, epoch)

        print("Animations saved")
示例#2
0
    def result_vary(self, epoch):
        image_num = 10
        row = 10
        k = 0
        i = 0
        for X, Y in self.valid_loader:


            if Y.numpy() == k:
                if i == 0:
                    images = X
                else:
                    images = torch.cat((images, X), 0)
                i += 1
                k += 1
            if i == image_num:
                break
        self.load(epoch)
        X = utils.to_var(images)

        mu, sigma = self.E(self.FC(X))



        for epoch in range(0,100):
            images = X
            for k in range((image_num-1)):
                eps = utils.to_var(torch.randn(X.size(0), self.z_dim))
                X_rec = self.G(mu + eps * torch.exp(sigma / 2.0))
                images =  torch.cat((images, X_rec),0)


            if torch.cuda.is_available():
                images = images.cpu().data.numpy().transpose(0, 2, 3, 1) # 1

            else:
                images = images.data.numpy().transpose(0, 2, 3, 1)
            new_images = []

            for i in range(image_num):
                k = i
                for _ in range(image_num):
                    new_images.append(images[k])
                    k += 10

            images = np.array(new_images)
            save_dir = os.path.join(self.root, self.result_dir, self.dataset, self.model_name, str(self.args.seed_random))
            utils.save_images(images[:, :, :, :], [row, row],
                              os.path.join(save_dir, 'variational' + '_epoch%03d' % (epoch+1) + '.png'))

        utils.generate_animation(save_dir+"/variational", 100)



        self.G.eval()
        self.E.eval()
        self.FC.eval()
    def latent_traversal(self, samples=None, epoch=0, save_path=None):
        if save_path is None:
            save_path = os.path.join(self.save_path, 'visualization')
        if not os.path.exists(save_path):
            os.makedirs(save_path)

        for i0 in tnrange(self.model.hidden_dim,
                          desc='latent traversal',
                          leave=False):
            if samples is None:
                images = self.model.latent_traversal_dim(
                    i0)  # list T, tensor B C H W
            else:
                images = self.model.latent_traversal_given_samples_dim(
                    samples, i0)  # list T, tensor B C H W

            epoch_path = os.path.join(save_path, f'latent_traversal_{epoch}')
            if not os.path.exists(epoch_path):
                os.makedirs(epoch_path)
            utils.generate_animation(images, epoch_path, f'code{i0:03}')

        return self
示例#4
0
    def train(self):

        self.G.apply(self.G.weights_init)
        self.D.train()

        for classe in range(10):
            self.train_hist = {}
            self.train_hist['D_loss'] = []
            self.train_hist['G_loss'] = []
            self.train_hist['per_epoch_time'] = []
            self.train_hist['total_time'] = []
            # self.G.apply(self.G.weights_init) does not work for instance

            if self.gpu_mode:
                self.y_real_, self.y_fake_ = Variable(
                    torch.ones(self.batch_size, 1).cuda()), Variable(
                        torch.zeros(self.batch_size, 1).cuda())
            else:
                self.y_real_, self.y_fake_ = Variable(
                    torch.ones(self.batch_size,
                               1)), Variable(torch.zeros(self.batch_size, 1))

            self.D.train()
            self.data_loader_train = get_iter_dataset(self.dataset_train,
                                                      self.list_class_train,
                                                      self.batch_size, classe)
            self.data_loader_valid = get_iter_dataset(self.dataset_valid,
                                                      self.list_class_valid,
                                                      self.batch_size, classe)
            print('training class : ' + str(classe))
            start_time = time.time()
            for epoch in range(self.epoch):
                self.G.train()
                epoch_start_time = time.time()
                n_batch = 0.

                for iter, (x_, t_) in enumerate(self.data_loader_train):
                    n_batch += 1
                    z_ = torch.rand((self.batch_size, self.z_dim))

                    if self.gpu_mode:
                        x_, z_ = Variable(x_.cuda()), Variable(z_.cuda())
                    else:
                        x_, z_ = Variable(x_), Variable(z_)

                    # update D network
                    self.D_optimizer.zero_grad()

                    D_real = self.D(x_)
                    D_real_err = torch.mean(torch.abs(D_real - x_))

                    G_ = self.G(z_)
                    D_fake = self.D(G_)
                    D_fake_err = torch.mean(torch.abs(D_fake - G_))

                    D_loss = D_real_err - self.k * D_fake_err
                    self.train_hist['D_loss'].append(D_loss.data[0])

                    D_loss.backward()
                    self.D_optimizer.step()

                    # update G network
                    self.G_optimizer.zero_grad()

                    G_ = self.G(z_)
                    D_fake = self.D(G_)
                    D_fake_err = torch.mean(torch.abs(D_fake - G_))

                    G_loss = D_fake_err
                    self.train_hist['G_loss'].append(G_loss.data[0])

                    G_loss.backward()
                    self.G_optimizer.step()

                    # convergence metric
                    temp_M = D_real_err + torch.abs(self.gamma * D_real_err -
                                                    D_fake_err)

                    # operation for updating k
                    temp_k = self.k + self.lambda_ * (self.gamma * D_real_err -
                                                      D_fake_err)
                    temp_k = temp_k.data[0]

                    # self.k = temp_k.data[0]
                    self.k = min(max(temp_k, 0), 1)
                    self.M = temp_M.data[0]

                    if ((iter + 1) % 100) == 0:
                        print(
                            "classe : [%1d] Epoch: [%2d] [%4d/%4d] D_loss: %.8f, G_loss: %.8f, M: %.8f, k: %.8f"
                            % (classe, (epoch + 1),
                               (iter + 1), self.size_epoch, D_loss.data[0],
                               G_loss.data[0], self.M, self.k))

                self.train_hist['per_epoch_time'].append(time.time() -
                                                         epoch_start_time)
                self.visualize_results((epoch + 1), classe)

            self.save_G(classe)

            result_dir = self.result_dir + '/' + 'classe-' + str(classe)
            utils.generate_animation(result_dir + '/' + self.model_name,
                                     epoch + 1)
            utils.loss_plot(self.train_hist, result_dir, self.model_name)

            np.savetxt(
                os.path.join(result_dir,
                             'began_training_' + self.dataset + '.txt'),
                np.transpose([self.train_hist['G_loss']]))

        self.train_hist['total_time'].append(time.time() - start_time)
        print("Avg one epoch time: %.2f, total %d epochs time: %.2f" %
              (np.mean(self.train_hist['per_epoch_time']), self.epoch,
               self.train_hist['total_time'][0]))
        print("Training finish!... save training results")
示例#5
0
    def train(self):
        self.size_epoch = 1000

        if self.gpu_mode:
            self.y_real_, self.y_fake_ = Variable(torch.ones(self.batch_size, 1).cuda(self.device)), Variable(
                torch.zeros(self.batch_size, 1).cuda(self.device))
        else:
            self.y_real_, self.y_fake_ = Variable(torch.ones(self.batch_size, 1)), Variable(
                torch.zeros(self.batch_size, 1))

        self.G.apply(self.G.weights_init)
        self.D.train()

        print('training start!!')
        start_time = time.time()
        for classe in range(10):

            self.train_hist = {}
            self.train_hist['D_loss'] = []
            self.train_hist['G_loss'] = []
            self.train_hist['per_epoch_time'] = []
            self.train_hist['total_time'] = []

            #self.G.apply(self.G.weights_init) does not work for instance

            self.data_loader_train = get_iter_dataset(self.dataset_train, self.list_class_train, self.batch_size,
                                                      classe)
            self.data_loader_valid = get_iter_dataset(self.dataset_valid, self.list_class_valid, self.batch_size,
                                                      classe)

            for epoch in range(self.epoch):
                self.G.train()
                epoch_start_time = time.time()
                for iter, (x_, t_) in enumerate(self.data_loader_train):

                    if x_.shape[0] != self.batch_size:
                        break

                    z_ = torch.rand((self.batch_size, self.z_dim))

                    if self.gpu_mode:
                        x_, z_ = Variable(x_.cuda(self.device)), Variable(z_.cuda(self.device))
                    else:
                        x_, z_ = Variable(x_), Variable(z_)

                    # update D network
                    self.D_optimizer.zero_grad()

                    D_real = self.D(x_)
                    D_real_loss = self.BCELoss(D_real, self.y_real_)

                    G_ = self.G(z_)
                    D_fake = self.D(G_)
                    D_fake_loss = self.BCELoss(D_fake, self.y_fake_)

                    D_loss = D_real_loss + D_fake_loss
                    self.train_hist['D_loss'].append(D_loss.data[0])

                    D_loss.backward()
                    self.D_optimizer.step()

                    # update G network
                    self.G_optimizer.zero_grad()

                    G_ = self.G(z_)
                    D_fake = self.D(G_)
                    G_loss = self.BCELoss(D_fake, self.y_real_)
                    self.train_hist['G_loss'].append(G_loss.data[0])

                    G_loss.backward()
                    self.G_optimizer.step()

                    if ((iter + 1) % 100) == 0:
                        print("classe : [%1d] Epoch: [%2d] [%4d/%4d] D_loss: %.8f, G_loss: %.8f" %
                              (classe, (epoch + 1), (iter + 1), len(self.data_loader_train), D_loss.data[0], G_loss.data[0]))

                self.train_hist['per_epoch_time'].append(time.time() - epoch_start_time)
                self.visualize_results((epoch + 1), classe)
                self.save_G(classe)
            utils.generate_animation(
                self.result_dir + '/' + 'classe-' + str(classe) + '/' + self.model_name, self.epoch)
            utils.loss_plot(self.train_hist, self.save_dir,self.model_name)

        self.train_hist['total_time'].append(time.time() - start_time)
        print("Avg one epoch time: %.2f, total %d epochs time: %.2f" % (np.mean(self.train_hist['per_epoch_time']),
                                                                        self.epoch, self.train_hist['total_time'][0]))
        print("Training finish!... save training results")

        self.save()
示例#6
0
    def train(self):
        self.train_hist = {}
        self.train_hist['D_loss'] = []
        self.train_hist['G_loss'] = []
        self.train_hist['per_epoch_time'] = []
        self.train_hist['total_time'] = []

        self.y_real_, self.y_fake_ = torch.ones(self.batch_size,
                                                1), torch.zeros(
                                                    self.batch_size, 1)
        if self.gpu_mode:
            self.y_real_, self.y_fake_ = self.y_real_.cuda(
            ), self.y_fake_.cuda()

        self.D.train()
        print('training start!!')
        start_time = time.time()
        for epoch in range(self.epoch):
            self.save()
            self.G.train()
            epoch_start_time = time.time()
            for iter, (x_, y_) in enumerate(self.data_loader):
                if iter == self.data_loader.dataset.__len__(
                ) // self.batch_size:
                    break
                z_ = torch.rand((self.batch_size, self.z_dim))
                y_vec_ = torch.zeros(
                    (self.batch_size, self.class_num)).scatter_(
                        1,
                        y_.type(torch.LongTensor).unsqueeze(1), 1)
                if self.gpu_mode:
                    x_, z_, y_vec_ = x_.cuda(), z_.cuda(), y_vec_.cuda()

                # update D network
                self.D_optimizer.zero_grad()

                D_real, C_real = self.D(x_)
                D_real_loss = self.BCE_loss(D_real, self.y_real_)
                C_real_loss = self.CE_loss(C_real, torch.max(y_vec_, 1)[1])

                G_ = self.G(z_, y_vec_)
                D_fake, C_fake = self.D(G_)
                D_fake_loss = self.BCE_loss(D_fake, self.y_fake_)
                C_fake_loss = self.CE_loss(C_fake, torch.max(y_vec_, 1)[1])

                D_loss = D_real_loss + C_real_loss + D_fake_loss + C_fake_loss
                self.train_hist['D_loss'].append(D_loss.item())

                D_loss.backward()
                self.D_optimizer.step()

                # update G network
                self.G_optimizer.zero_grad()

                G_ = self.G(z_, y_vec_)
                D_fake, C_fake = self.D(G_)

                G_loss = self.BCE_loss(D_fake, self.y_real_)
                C_fake_loss = self.CE_loss(C_fake, torch.max(y_vec_, 1)[1])

                G_loss += C_fake_loss
                self.train_hist['G_loss'].append(G_loss.item())

                G_loss.backward()
                self.G_optimizer.step()

                if ((iter + 1) % 100) == 0:
                    print("Epoch: [%2d] [%4d/%4d] D_loss: %.8f, G_loss: %.8f" %
                          ((epoch + 1),
                           (iter + 1), self.data_loader.dataset.__len__() //
                           self.batch_size, D_loss.item(), G_loss.item()))

            self.train_hist['per_epoch_time'].append(time.time() -
                                                     epoch_start_time)
            with torch.no_grad():
                self.visualize_results((epoch + 1))

        self.train_hist['total_time'].append(time.time() - start_time)
        print("Avg one epoch time: %.2f, total %d epochs time: %.2f" %
              (np.mean(self.train_hist['per_epoch_time']), self.epoch,
               self.train_hist['total_time'][0]))
        print("Training finish!... save training results")

        self.save()
        utils.generate_animation(
            self.result_dir + '/' + self.dataset + '/' + self.model_name +
            '/' + self.model_name, self.epoch)
        utils.loss_plot(
            self.train_hist,
            os.path.join(self.save_dir, self.dataset, self.model_name),
            self.model_name)
示例#7
0
    def train(self):
        self.train_hist = {}
        self.train_hist['D_loss'] = []
        self.train_hist['G_loss'] = []
        self.train_hist['E_loss'] = []
        self.train_hist['per_epoch_time'] = []
        self.train_hist['total_time'] = []

        # if torch.cuda.is_available():
        #     self.y_real_, self.y_fake_ = Variable(torch.ones(self.batch_size, 1).cuda()), Variable(torch.zeros(self.batch_size, 1).cuda())
        # else:
        #     self.y_real_, self.y_fake_ = Variable(torch.ones(self.batch_size, 1)), Variable(torch.zeros(self.batch_size, 1))

        # self.D.train()
        print('training start!!')
        start_time = time.time()
        for epoch in trange(self.epoch, desc='epoch'):
            self.G_optimizer.param_groups[0]['lr'] = self.args.lrG / np.sqrt(epoch + 1)
            self.D_optimizer.param_groups[0]['lr'] = self.args.lrD / np.sqrt(epoch + 1)
            # reset training mode of G and E

            epoch_start_time = time.time()
            E_err = []
            D_err = []
            G_err = []
            # learning rate decay
            # if (epoch+1) % 20 == 0:
            #     self.G_optimizer.param_groups[0]['lr'] /= 2
            #     self.D_optimizer.param_groups[0]['lr'] /= 2
            #     self.E_optimizer.param_groups[0]['lr'] /= 2
            #     print("learning rate change!")
            # self.G_optimizer.param_groups[0]['lr'] /= np.sqrt(epoch+1)
            # self.D_optimizer.param_groups[0]['lr'] /= np.sqrt(epoch+1)
            # self.E_optimizer.param_groups[0]['lr'] /= np.sqrt(epoch+1)
            # print("learning rate change!")

            scale = min(1.0, (1.0/32.)*(epoch + 1)) # linear warm-up

            total_len = self.data_loader.dataset.__len__() // self.batch_size
            for iter, (X, _) in tqdm(enumerate(self.data_loader), total=total_len, desc='iteration'):

                X = utils.to_var(X)


                """Discriminator"""
                z = utils.to_var(torch.randn(self.batch_size, self.z_dim))
                X_hat = self.G(z)
                D_real = self.D(self.FC(X))
                D_fake = self.D(self.FC(X_hat))
                D_loss = -1.0*torch.mean( D_real - torch.exp(D_fake-1.0) )
                self.train_hist['D_loss'].append(D_loss.data.item())
                D_err.append(D_loss.data.item())
                # Optimize
                D_loss.backward()
                # gradient clipping
                torch.nn.utils.clip_grad_value_(chain(self.D.parameters(), self.FC.parameters()), 1.0)
                # update
                self.D_optimizer.step()
                self.__reset_grad()





                """Generator"""
                # Use both Discriminator and Encoder to update Generator
                z = utils.to_var(torch.randn(self.batch_size, self.z_dim))
                X_hat = self.G(z)
                D_fake = self.D(self.FC(X_hat))
                z_mu, z_sigma = self.E(self.FC(X_hat))
                E_loss = torch.mean(
                    torch.sum(0.5 * (z - z_mu) ** 2 * torch.exp(-z_sigma) +
                               0.5 * z_sigma + 0.919, dim=1))

                # G_loss = torch.mean( -1.0 * torch.exp(D_fake-1.0) )
                # G_loss = torch.mean( torch.exp(-D_fake) ) # deal with gradient vanish
                G_loss = -1.0*torch.mean( D_fake ) # deal with gradient vanish 2
                # G_loss = torch.mean( (2.0-D_fake)*(D_fake >= 1).float() + torch.exp(1.0-D_fake)**(D_fake < 1).float() )
                # G_loss = torch.mean( (2.0-torch.exp(D_fake-1.0))*(D_fake >= 1).float() + torch.exp(1.0-D_fake)**(D_fake < 1).float() )

                total_loss = G_loss + scale * E_loss
                self.train_hist['G_loss'].append(G_loss.data.item())
                G_err.append(G_loss.data.item())
                E_err.append(E_loss.data.item())
                # Optimize
                total_loss.backward()
                # gradient clipping
                torch.nn.utils.clip_grad_value_(chain(self.G.parameters(), self.E.parameters()), 1.0)
                # update
                self.G_optimizer.step()
                self.E_optimizer.step()
                self.__reset_grad()

                """ Plot """
                if (iter+1) == self.data_loader.dataset.__len__() // self.batch_size:
                    # Print and plot every epoch
                    print('Epoch-{}; D_loss: {:.4}; G_loss: {:.4}; E_loss: {:.4}\n'
                          .format(epoch, np.mean(D_err), np.mean(G_err), np.mean(E_err)))
                    for iter, (X, _) in enumerate(self.valid_loader):
                        X = utils.to_var(X)
                        self.visualize_results(X, epoch+1)
                        break

                    break

            self.train_hist['per_epoch_time'].append(time.time() - epoch_start_time)

            # Save model
            if (epoch+1) % 5 == 0:
                self.save(epoch)

        self.train_hist['total_time'].append(time.time() - start_time)
        print("Avg one epoch time: %.2f, total %d epochs time: %.2f" % (np.mean(self.train_hist['per_epoch_time']),
              self.epoch, self.train_hist['total_time'][0]))
        print("Training finish!... save training results")
        #self.save(epoch)

        # Generate animation of reconstructed plot
        utils.generate_animation(self.root + '/' + self.result_dir + '/' + self.dataset + '/' + self.model_name + '/reconstructed',
                                 self.epoch)
        utils.loss_plot(self.train_hist, os.path.join(self.root, self.save_dir, self.dataset, self.model_name), self.model_name)
示例#8
0
    def train(self):
        self.train_hist = {}
        self.train_hist['D_loss'] = []
        self.train_hist['G_loss'] = []
        self.train_hist['C_loss'] = []
        self.train_hist['per_epoch_time'] = []
        self.train_hist['total_time'] = []

        # self.y_real_, self.y_fake_ = torch.ones(self.batch_size, 1), torch.zeros(self.batch_size, 1)
        # if self.gpu_mode:
        #     self.y_real_, self.y_fake_ = self.y_real_.cuda(), self.y_fake_.cuda()

        self.D.train()
        print('training start!!')
        start_time = time.time()
        for epoch in range(self.epoch):
            self.G.train()
            self.C.train()
            epoch_start_time = time.time()
            for iter, (x_, y_) in enumerate(self.data_loader):
                if iter == self.data_loader.dataset.__len__(
                ) // self.batch_size:
                    break

                z_ = torch.rand((self.batch_size, self.z_dim))
                # 类别转化成onehot
                y_vec_ = torch.zeros(
                    (self.batch_size, self.class_num)).scatter_(
                        1,
                        y_.type(torch.LongTensor).unsqueeze(1), 1)
                i = 1
                while i < self.batch_size:
                    if y_vec_[i][self.minority] == 1:
                        y_vec_[i][-1] = self.minority_label
                    i += 1

                y_fill_ = y_vec_.unsqueeze(2).unsqueeze(3).expand(
                    self.batch_size, self.class_num, self.input_size,
                    self.input_size)

                if self.gpu_mode:
                    x_, z_, y_vec_, y_fill_ = x_.cuda(), z_.cuda(
                    ), y_vec_.cuda(), y_fill_.cuda()

                # update D network
                self.D_optimizer.zero_grad()

                D_real = self.D(x_, y_fill_)
                D_real_loss = -torch.mean(D_real)

                G_ = self.G(z_, y_vec_)
                D_G_ = self.D(G_, y_fill_)
                D_G_loss = torch.mean(D_G_)

                C_ = self.C(x_)
                C_ = C_.unsqueeze(2).unsqueeze(3).expand(
                    self.batch_size, self.class_num, self.input_size,
                    self.input_size)
                D_C_ = self.D(x_, C_)
                D_C_loss = torch.mean(D_C_)

                D_loss = 2 * D_real_loss + D_G_loss + D_C_loss

                self.train_hist['D_loss'].append(D_loss.item())

                D_loss.backward()
                self.D_optimizer.step()

                # clipping D
                for p in self.D.parameters():
                    p.data.clamp_(-self.c, self.c)

                # update G network
                self.G_optimizer.zero_grad()

                G_ = self.G(z_, y_vec_)
                D_G_ = self.D(G_, y_fill_)
                G_loss = -torch.mean(D_G_)
                self.train_hist['G_loss'].append(G_loss.item())

                G_loss.backward()
                self.G_optimizer.step()

                # update C network
                self.C_optimizer.zero_grad()

                C_ = self.C(x_)
                C_ = C_.unsqueeze(2).unsqueeze(3).expand(
                    self.batch_size, self.class_num, self.input_size,
                    self.input_size)
                D_C_ = self.D(x_, C_)
                C_loss = -torch.mean(D_C_)
                self.train_hist['C_loss'].append(C_loss.item())

                C_loss.backward()
                self.C_optimizer.step()

                if ((iter + 1) % 100) == 0:
                    print(
                        "Epoch: [%2d] [%4d/%4d] D_loss: %.8f, G_loss: %.8f, C_loss: %.8f"
                        %
                        ((epoch + 1), (iter + 1),
                         self.data_loader.dataset.__len__() // self.batch_size,
                         D_loss.item(), G_loss.item(), C_loss.item()))

            self.train_hist['per_epoch_time'].append(time.time() -
                                                     epoch_start_time)
            with torch.no_grad():
                self.visualize_results((epoch + 1))

        self.train_hist['total_time'].append(time.time() - start_time)
        print("Avg one epoch time: %.2f, total %d epochs time: %.2f" %
              (np.mean(self.train_hist['per_epoch_time']), self.epoch,
               self.train_hist['total_time'][0]))
        print("Training finish!... save training results")

        self.save()
        utils.generate_animation(
            self.result_dir + '/' + self.dataset + '/' + self.model_name +
            '/' + self.model_name, self.epoch)
        utils.loss_plot(
            self.train_hist,
            os.path.join(self.save_dir, self.dataset, self.model_name),
            self.model_name)
示例#9
0
    def train(self):
        self.train_hist = {}
        self.train_hist['D_loss'] = []
        self.train_hist['G_loss'] = []
        self.train_hist['per_epoch_time'] = []
        self.train_hist['total_time'] = []

        self.y_real_, self.y_fake_ = torch.ones(self.batch_size,
                                                1), torch.zeros(
                                                    self.batch_size, 1)
        if self.gpu_mode:
            self.y_real_, self.y_fake_ = self.y_real_.cuda(
            ), self.y_fake_.cuda()

        self.D.train()
        print('training start!!')
        start_time = time.time()
        for epoch in range(self.epoch):
            self.G.train()
            epoch_start_time = time.time()
            for iter, (x_, _) in enumerate(self.data_loader):
                if iter == self.data_loader.dataset.__len__(
                ) // self.batch_size:
                    break

                z_ = torch.rand((self.batch_size, self.z_dim))
                if self.gpu_mode:
                    x_, z_ = x_.cuda(), z_.cuda()

                # update D network
                self.D_optimizer.zero_grad()

                D_real = self.D(x_)
                D_real_loss = -torch.mean(D_real)
                epsilon = torch.FloatTensor(self.batch_size,
                                            self.z_dim).normal_(0, 1).cuda()
                noise = self.mu + self.sigma * epsilon
                #noise.cuda()

                #G_ = self.G(z_)
                weighted_noise = torch.mul(noise, self.weight)
                weighted_noise.cuda()
                G_ = self.G(weighted_noise)
                D_fake = self.D(G_)
                D_fake_loss = torch.mean(D_fake)
                mu_loss = 0.1 * (abs(0.1 - self.mu)).sum()
                sig_loss = 0.5 * (abs(1 - self.sigma)).sum()
                weight_loss = 0.01 * (abs(self.weight.sum() - 1))
                D_loss = D_real_loss + D_fake_loss  #+weight_loss+sig_loss+mu_loss

                D_loss.backward()
                self.D_optimizer.step()

                # clipping D
                for p in self.D.parameters():
                    p.data.clamp_(-self.c, self.c)

                if ((iter + 1) % self.n_critic) == 0:
                    # update G network
                    self.G_optimizer.zero_grad()
                    noise = self.mu + self.sigma * epsilon
                    weighted_noise = torch.mul(noise, self.weight)
                    G_ = self.G(weighted_noise)
                    D_fake = self.D(G_)
                    G_loss = -torch.mean(D_fake)
                    self.train_hist['G_loss'].append(G_loss.item())

                    G_loss.backward()
                    self.G_optimizer.step()

                    self.train_hist['D_loss'].append(D_loss.item())

                if ((iter + 1) % 10) == 0:
                    print(
                        "Epoch: [%2d] [%4d/%4d] D_loss: %.8f, G_loss: %.8f, weight: %.8f"
                        %
                        ((epoch + 1), (iter + 1),
                         self.data_loader.dataset.__len__() // self.batch_size,
                         D_loss.item(), G_loss.item(), self.weight.sum()))
                    print(torch.mean(self.weight))
            self.train_hist['per_epoch_time'].append(time.time() -
                                                     epoch_start_time)
            with torch.no_grad():
                self.visualize_results((epoch + 1))

        self.train_hist['total_time'].append(time.time() - start_time)
        print("Avg one epoch time: %.2f, total %d epochs time: %.2f" %
              (np.mean(self.train_hist['per_epoch_time']), self.epoch,
               self.train_hist['total_time'][0]))
        print("Training finish!... save training results")

        self.save()
        utils.generate_animation(
            self.result_dir + '/' + 'cifar10' + '/' + self.model_name + '/' +
            self.model_name, self.epoch)
        utils.loss_plot(
            self.train_hist,
            os.path.join(self.save_dir, 'cifar10', self.model_name),
            self.model_name)
示例#10
0
    def train(self):
        self.train_hist = {}
        self.train_hist['D_loss'] = []
        self.train_hist['G_loss'] = []
        self.train_hist['per_epoch_time'] = []
        self.train_hist['total_time'] = []

        if self.gpu_mode:
            self.y_real_, self.y_fake_ = Variable(
                torch.ones(self.batch_size, 1).cuda()), Variable(
                    torch.zeros(self.batch_size, 1).cuda())
        else:
            self.y_real_, self.y_fake_ = Variable(
                torch.ones(self.batch_size,
                           1)), Variable(torch.zeros(self.batch_size, 1))

        self.D.train()
        print('training start!!')
        start_time = time.time()
        for epoch in range(self.epoch):
            self.G.train()
            epoch_start_time = time.time()
            for iter, (x_, y_) in enumerate(self.data_loader):
                if iter == self.data_loader.dataset.__len__(
                ) // self.batch_size:
                    break

                if self.dataset == 'celebA':
                    y_ = self.attr[y_]
                y_ = y_.view((self.batch_size, self.y_dim))

                z_ = torch.rand((self.batch_size, self.z_dim))

                if self.gpu_mode:
                    x_, z_ = Variable(x_.cuda()), Variable(z_.cuda())
                    y_ = Variable(y_.cuda())
                else:
                    x_, z_ = Variable(x_), Variable(z_)

                # update D network
                self.D_optimizer.zero_grad()

                D_real, C_real = self.D(x_)
                D_real_loss = -torch.mean(D_real)
                # C_real_loss = self.CE_loss(C_real, y_)
                C_real_loss = self.BCE_loss(C_real, y_)

                if self.dataset == 'mnist':
                    label = torch.zeros(self.batch_size, 10).cuda()
                    label = label.scatter_(1, y_.data.view(self.batch_size, 1),
                                           1)
                    label = Variable(label)
                elif self.dataset == 'celebA':
                    label = torch.zeros(self.batch_size, self.y_dim).cuda()
                    label[:50, 0] = 1
                    # label[24:75, 1] = 1
                    label = Variable(label)
                G_ = self.G(z_, label)
                D_fake, C_fake = self.D(G_)
                D_fake_loss = torch.mean(D_fake)
                # C_fake_loss = self.CE_loss(C_fake, y_)
                C_fake_loss = self.BCE_loss(C_fake, label)

                # gradient penalty
                if self.gpu_mode:
                    alpha = torch.rand(x_.size()).cuda()
                else:
                    alpha = torch.rand(x_.size())

                x_hat = Variable(alpha * x_.data + (1 - alpha) * G_.data,
                                 requires_grad=True)

                pred_hat, _ = self.D(x_hat)
                if self.gpu_mode:
                    gradients = grad(outputs=pred_hat,
                                     inputs=x_hat,
                                     grad_outputs=torch.ones(
                                         pred_hat.size()).cuda(),
                                     create_graph=True,
                                     retain_graph=True,
                                     only_inputs=True)[0]
                else:
                    gradients = grad(outputs=pred_hat,
                                     inputs=x_hat,
                                     grad_outputs=torch.ones(pred_hat.size()),
                                     create_graph=True,
                                     retain_graph=True,
                                     only_inputs=True)[0]

                gradient_penalty = self.lambda_ * (
                    (gradients.view(gradients.size()[0], -1).norm(2, 1) - 1)**
                    2).mean()

                # D_loss = D_real_loss + D_fake_loss + gradient_penalty + self.lambda_cl * (C_real_loss + C_fake_loss)
                D_loss = D_real_loss + D_fake_loss + self.lambda_cl * (
                    C_real_loss + C_fake_loss)

                D_loss.backward()
                self.D_optimizer.step()

                # clipping D
                for p in self.D.parameters():
                    p.data.clamp_(-self.c, self.c)

                if ((iter + 1) % self.n_critic) == 0:
                    # update G network
                    self.G_optimizer.zero_grad()

                    if self.dataset == 'mnist':
                        label = torch.zeros(self.batch_size, 10).cuda()
                        label = label.scatter_(
                            1, y_.data.view(self.batch_size, 1), 1)
                        label = Variable(label)
                    elif self.dataset == 'celebA':
                        label = torch.zeros(self.batch_size, self.y_dim).cuda()
                        label[:50, 0] = 1
                        # label[24:75, 1] = 1
                        label = Variable(label)
                    G_ = self.G(z_, label)
                    D_fake, C_fake = self.D(G_)
                    # G_loss = -torch.mean(D_fake) + self.lambda_cl * self.CE_loss(C_fake, y_)
                    G_loss = -torch.mean(
                        D_fake) + self.lambda_cl * self.BCE_loss(C_fake, y_)
                    self.train_hist['G_loss'].append(G_loss.data[0])

                    G_loss.backward()
                    self.G_optimizer.step()

                    self.train_hist['D_loss'].append(D_loss.data[0])

                if ((iter + 1) % 100) == 0:
                    print("Epoch: [%2d] [%4d/%4d] D_loss: %.8f, G_loss: %.8f" %
                          ((epoch + 1),
                           (iter + 1), self.data_loader.dataset.__len__() //
                           self.batch_size, D_loss.data[0], G_loss.data[0]))

            self.train_hist['per_epoch_time'].append(time.time() -
                                                     epoch_start_time)
            self.visualize_results((epoch + 1))

        self.train_hist['total_time'].append(time.time() - start_time)
        print("Avg one epoch time: %.2f, total %d epochs time: %.2f" %
              (np.mean(self.train_hist['per_epoch_time']), self.epoch,
               self.train_hist['total_time'][0]))
        print("Training finish!... save training results")

        self.save()
        utils.generate_animation(
            self.result_dir + '/' + self.dataset + '/' + self.model_name +
            '/' + self.model_name, self.epoch)
        utils.loss_plot(
            self.train_hist,
            os.path.join(self.save_dir, self.dataset, self.model_name),
            self.model_name)
示例#11
0
    def train(self):
        self.train_hist = {}
        self.train_hist['D_loss'] = []
        self.train_hist['G_loss'] = []
        self.train_hist['per_epoch_time'] = []
        self.train_hist['total_time'] = []

        if self.gpu_mode:
            self.y_real_, self.y_fake_ = Variable(
                torch.ones(self.batch_size, 1).cuda()), Variable(
                    torch.zeros(self.batch_size, 1).cuda())
        else:
            self.y_real_, self.y_fake_ = Variable(
                torch.ones(self.batch_size,
                           1)), Variable(torch.zeros(self.batch_size, 1))

        self.D.train()
        print('training start!!')
        start_time = time.time()
        for epoch in range(self.epoch):
            self.G.train()
            epoch_start_time = time.time()
            for iter_, x_ in enumerate(self.data_loader):
                if iter_ == self.data_loader.dataset.__len__(
                ) // self.batch_size:
                    break

                #z_ = torch.rand((self.batch_size, self.z_dim))
                try:
                    z_, _ = self.z_loader.next()
                except StopIteration:
                    self.z_loader = iter(utils.get_mat_loader(self.batch_size))
                    z_, _ = self.z_loader.next()

                if z_.shape[0] != x_.shape[0]:
                    self.z_loader = iter(utils.get_mat_loader(self.batch_size))
                    z_, _ = self.z_loader.next()

                if self.gpu_mode:
                    x_, z_ = Variable(x_.cuda()), Variable(z_.cuda())
                else:
                    x_, z_ = Variable(x_), Variable(z_)

                # update D network
                self.D_optimizer.zero_grad()

                D_real = self.D(x_)
                D_real_loss = self.MSE_loss(D_real, self.y_real_)

                G_ = self.G(z_)
                D_fake = self.D(G_)
                D_fake_loss = self.MSE_loss(D_fake, self.y_fake_)

                D_loss = D_real_loss + D_fake_loss
                self.train_hist['D_loss'].append(D_loss.data[0])

                D_loss.backward()
                self.D_optimizer.step()

                # update G network
                self.G_optimizer.zero_grad()

                G_ = self.G(z_)
                D_fake = self.D(G_)
                G_loss = self.MSE_loss(D_fake, self.y_real_)
                self.train_hist['G_loss'].append(G_loss.data[0])

                G_loss.backward()
                self.G_optimizer.step()

                if ((iter_ + 1) % 100) == 0:
                    print("Epoch: [%2d] [%4d/%4d] D_loss: %.8f, G_loss: %.8f" %
                          ((epoch + 1),
                           (iter_ + 1), self.data_loader.dataset.__len__() //
                           self.batch_size, D_loss.data[0], G_loss.data[0]))

            self.train_hist['per_epoch_time'].append(time.time() -
                                                     epoch_start_time)
            self.visualize_results((epoch + 1))

            latest_g_loss = np.mean(self.train_hist['G_loss'][-200:])
            latest_d_loss = np.mean(self.train_hist['D_loss'][-200:])
            print("g_loss = %f, d_loss = %f" % (latest_g_loss, latest_d_loss))

        self.train_hist['total_time'].append(time.time() - start_time)
        print("Avg one epoch time: %.2f, total %d epochs time: %.2f" %
              (np.mean(self.train_hist['per_epoch_time']), self.epoch,
               self.train_hist['total_time'][0]))
        print("Training finish!... save training results")

        self.save()
        utils.generate_animation(
            self.result_dir + '/' + self.dataset + '/' + self.model_name +
            '/' + self.model_name, self.epoch)
    def train(self):
        self.train_hist = {}
        self.train_hist['D_loss'] = []
        self.train_hist['G_loss'] = []
        self.train_hist['info_loss'] = []
        self.train_hist['per_epoch_time'] = []
        self.train_hist['total_time'] = []

        self.y_real_, self.y_fake_ = torch.ones(self.batch_size, 1), torch.zeros(self.batch_size, 1)
        if self.gpu_mode:
            self.y_real_, self.y_fake_ = self.y_real_.cuda(), self.y_fake_.cuda()

        self.D.train()
        print('training start!!')
        start_time = time.time()
        for epoch in range(self.epoch):
            self.G.train()
            epoch_start_time = time.time()
            for iter, (x_, y_) in enumerate(self.data_loader):
                if iter == self.data_loader.dataset.__len__() // self.batch_size:
                    break

                z_ = torch.rand((self.batch_size, self.z_dim))

                # for infogan
                if self.SUPERVISED == True:
                    y_disc_ = torch.zeros((self.batch_size, self.len_discrete_code)).scatter_(1, y_.type(torch.LongTensor).unsqueeze(1), 1)
                else:
                    y_disc_ = torch.from_numpy(
                        np.random.multinomial(1, self.len_discrete_code * [float(1.0 / self.len_discrete_code)],
                                              size=[self.batch_size])).type(torch.FloatTensor)

                y_cont_ = torch.from_numpy(np.random.uniform(-1, 1, size=(self.batch_size, 2))).type(torch.FloatTensor)
                

                # if self.gpu_mode:
                #     x_, z_ = x_.cuda(), z_.cuda()

                if self.gpu_mode:
                    x_, z_, y_disc_, y_cont_ = x_.cuda(), z_.cuda(), y_disc_.cuda(), y_cont_.cuda()

                # update D network
                self.D_optimizer.zero_grad()

                D_real, _, _ = self.D(x_)
                D_real_loss = self.BCE_loss(D_real, self.y_real_)

                # G_ = self.G(z_)
                # D_fake = self.D(G_)
                # D_fake_loss = torch.mean(D_fake)

                G_ = self.G(z_, y_cont_, y_disc_)
                D_fake, _, _ = self.D(G_)
                D_fake_loss = self.BCE_loss(D_fake, self.y_fake_)

                D_loss = D_real_loss + D_fake_loss


                D_loss.backward(retain_graph=True)
                
                
                # clipping D
                for p in self.D.parameters():
                    p.data.clamp_(-self.c, self.c)
                
                for p in self.D.parameters():
                    samp = self.m.sample(sample_shape=p.grad.shape).cuda()
                    p.grad += samp
                
                self.D_optimizer.step()

                

                if ((iter+1) % self.n_critic) == 0:
                    # update G network
                    self.G_optimizer.zero_grad()

                    G_ = self.G(z_, y_cont_, y_disc_)
                    D_fake, D_cont, D_disc = self.D(G_)


                    G_loss = self.BCE_loss(D_fake, self.y_real_)
                    self.train_hist['G_loss'].append(G_loss.item())

                    G_loss.backward(retain_graph=True)
                    self.G_optimizer.step()

                    self.train_hist['D_loss'].append(D_loss.item())

                    # information loss
                    disc_loss = self.CE_loss(D_disc, torch.max(y_disc_, 1)[1])
                    cont_loss = self.MSE_loss(D_cont, y_cont_)
                    info_loss = disc_loss + cont_loss
                    self.train_hist['info_loss'].append(info_loss.item())

                    info_loss.backward()
                    self.info_optimizer.step()


                if ((iter + 1) % 100) == 0:
                    print("Epoch: [%2d] [%4d/%4d] D_loss: %.8f, G_loss: %.8f, info_loss: %.8f" %
                          ((epoch + 1), (iter + 1), self.data_loader.dataset.__len__() // self.batch_size, D_loss.item(), G_loss.item(), info_loss.item()))

            self.train_hist['per_epoch_time'].append(time.time() - epoch_start_time)
            with torch.no_grad():
                self.visualize_results((epoch+1))

        self.train_hist['total_time'].append(time.time() - start_time)
        print("Avg one epoch time: %.2f, total %d epochs time: %.2f" % (np.mean(self.train_hist['per_epoch_time']),
              self.epoch, self.train_hist['total_time'][0]))
        print("Training finish!... save training results")

        self.save()
        utils.generate_animation(self.result_dir + '/' + self.dataset + '/' + self.model_name + '/' + self.model_name,
                                 self.epoch)
        utils.loss_plot(self.train_hist, os.path.join(self.save_dir, self.dataset, self.model_name), self.model_name)
示例#13
0
    def train(self):
        self.train_hist = {}
        self.train_hist['D_loss'] = []
        self.train_hist['G_loss'] = []
        self.train_hist['per_epoch_time'] = []
        self.train_hist['total_time'] = []

        if self.gpu_mode:
            self.y_real_, self.y_fake_ = Variable(
                torch.ones(self.batch_size, 1).cuda()), Variable(
                    torch.zeros(self.batch_size, 1).cuda())
        else:
            self.y_real_, self.y_fake_ = Variable(
                torch.ones(self.batch_size,
                           1)), Variable(torch.zeros(self.batch_size, 1))

        self.D.train()
        print('training start!!')
        start_time = time.time()
        for epoch in range(self.epoch):
            self.G.train()
            epoch_start_time = time.time()
            for iter, (x_, _) in enumerate(self.data_loader):
                if iter == self.data_loader.dataset.__len__(
                ) // self.batch_size:
                    break

                z_ = torch.rand((self.batch_size, self.z_dim))

                if self.gpu_mode:
                    x_, z_ = Variable(x_.cuda()), Variable(z_.cuda())
                else:
                    x_, z_ = Variable(x_), Variable(z_)

                # update D network
                self.D_optimizer.zero_grad()

                D_real = self.D(x_)
                D_real_err = torch.mean(torch.abs(D_real - x_))

                G_ = self.G(z_)
                D_fake = self.D(G_)
                D_fake_err = torch.mean(torch.abs(D_fake - G_))

                D_loss = D_real_err - self.k * D_fake_err
                self.train_hist['D_loss'].append(D_loss.data[0])

                D_loss.backward()
                self.D_optimizer.step()

                # update G network
                self.G_optimizer.zero_grad()

                G_ = self.G(z_)
                D_fake = self.D(G_)
                D_fake_err = torch.mean(torch.abs(D_fake - G_))

                G_loss = D_fake_err
                self.train_hist['G_loss'].append(G_loss.data[0])

                G_loss.backward()
                self.G_optimizer.step()

                # convergence metric
                temp_M = D_real_err + torch.abs(self.gamma * D_real_err -
                                                D_fake_err)

                # operation for updating k
                temp_k = self.k + self.lambda_ * (self.gamma * D_real_err -
                                                  D_fake_err)
                temp_k = temp_k.data[0]

                # self.k = temp_k.data[0]
                self.k = min(max(temp_k, 0), 1)
                self.M = temp_M.data[0]

                if ((iter + 1) % 100) == 0:
                    print(
                        "Epoch: [%2d] [%4d/%4d] D_loss: %.8f, G_loss: %.8f, M: %.8f, k: %.8f"
                        %
                        ((epoch + 1), (iter + 1),
                         self.data_loader.dataset.__len__() // self.batch_size,
                         D_loss.data[0], G_loss.data[0], self.M, self.k))

            self.train_hist['per_epoch_time'].append(time.time() -
                                                     epoch_start_time)
            self.visualize_results((epoch + 1))

        self.train_hist['total_time'].append(time.time() - start_time)
        print("Avg one epoch time: %.2f, total %d epochs time: %.2f" %
              (np.mean(self.train_hist['per_epoch_time']), self.epoch,
               self.train_hist['total_time'][0]))
        print("Training finish!... save training results")

        self.save()
        utils.generate_animation(
            self.result_dir + '/' + self.dataset + '/' + self.model_name +
            '/' + self.model_name, self.epoch)
        utils.loss_plot(
            self.train_hist,
            os.path.join(self.save_dir, self.dataset, self.model_name),
            self.model_name)
示例#14
0
    def train(self):
        self.train_hist = {}
        self.train_hist['D_loss'] = []
        self.train_hist['G_loss'] = []
        self.train_hist['C_loss'] = []
        self.train_hist['per_epoch_time'] = []
        self.train_hist['total_time'] = []

        if self.gpu_mode:
            self.y_real_, self.y_fake_ = Variable(
                torch.ones(self.batch_size, 1).cuda()), Variable(
                    torch.zeros(self.batch_size, 1).cuda())
        else:
            self.y_real_, self.y_fake_ = Variable(
                torch.ones(self.batch_size,
                           1)), Variable(torch.zeros(self.batch_size, 1))

        self.D.train()
        self.C.train()
        print('training start!!')
        start_time = time.time()
        for epoch in range(self.epoch):
            self.G.train()
            epoch_start_time = time.time()
            for iter in range(len(self.data_X) // self.batch_size):
                x_ = self.data_X[iter * self.batch_size:(iter + 1) *
                                 self.batch_size]
                z_ = torch.rand((self.batch_size, self.z_dim))
                # z_ = torch.Tensor(self.batch_size, self.z_dim).normal_(0, 1)
                y_vec_ = self.data_Y[iter * self.batch_size:(iter + 1) *
                                     self.batch_size]

                if self.gpu_mode:
                    x_, z_, y_vec_ = Variable(x_.cuda()), Variable(
                        z_.cuda()), Variable(y_vec_.cuda())
                else:
                    x_, z_, y_vec_ = Variable(x_), Variable(z_), Variable(
                        y_vec_)

                # update D network
                self.D_optimizer.zero_grad()
                self.C_optimizer.zero_grad()

                D_real = self.D(x_)
                C_real = self.C(x_)

                D_real_loss = self.BCE_loss(D_real, self.y_real_)
                C_real_loss = self.CE_loss(C_real, torch.max(y_vec_, 1)[1])

                G_ = self.G(z_, y_vec_)
                D_fake = self.D(G_)
                C_fake = self.C(G_)
                D_fake_loss = self.BCE_loss(D_fake, self.y_fake_)
                C_fake_loss = self.CE_loss(C_fake, torch.max(y_vec_, 1)[1])

                D_loss = D_real_loss + D_fake_loss
                C_loss = C_real_loss + C_fake_loss

                self.train_hist['D_loss'].append(D_loss.item())
                self.train_hist['C_loss'].append(C_loss.item())

                D_loss.backward(retain_graph=True)
                self.D_optimizer.step()

                C_loss.backward(retain_graph=True)
                self.C_optimizer.step()

                # update G network
                self.G_optimizer.zero_grad()

                G_ = self.G(z_, y_vec_)
                D_fake = self.D(G_)
                C_fake = self.C(G_)

                G_loss = self.BCE_loss(D_fake, self.y_real_)
                C_fake_loss = self.CE_loss(C_fake, torch.max(y_vec_, 1)[1])

                G_loss += C_fake_loss
                self.train_hist['G_loss'].append(G_loss.item())

                G_loss.backward(retain_graph=True)
                self.G_optimizer.step()

                if ((iter + 1) % 100) == 0:
                    print(
                        "Epoch: [%2d] [%4d/%4d] D_loss: %.8f, G_loss: %.8f, C_loss: %.8f"
                        % ((epoch + 1),
                           (iter + 1), len(self.data_X) // self.batch_size,
                           D_loss.item(), G_loss.item(), C_loss.item()))

            self.train_hist['per_epoch_time'].append(time.time() -
                                                     epoch_start_time)
            self.visualize_results((epoch + 1), fix=False)

            print('\n[INFO]: Test the classifier:')
            # self.C.eval()
            correct = 0
            nb_test = len(self.X_test)

            for iter in range(nb_test // self.batch_size):
                x_ = self.X_test[iter * self.batch_size:(iter + 1) *
                                 self.batch_size]
                y_vec_ = self.y_test_vec[iter * self.batch_size:(iter + 1) *
                                         self.batch_size]

                if self.gpu_mode:
                    x_, y_vec_ = Variable(x_.cuda()), Variable(y_vec_.cuda())
                else:
                    x_, y_vec_ = Variable(x_), Variable(y_vec_)

                outputs = self.C(x_)

                #  C_real_loss = self.CE_loss(C_real, torch.max(y_vec_, 1)[1])

                # loss = self.CE_loss(outputs, torch.max(y_vec_, 1)[1])

                pred = outputs.data.max(1)[
                    1]  # get the index of the max log-probability
                pred = pred.eq(torch.max(y_vec_, 1)[1].data).cpu().data.float()
                correct += pred.sum()

            print('Accuracy of the network on the test images: %.2f %%' %
                  (100. * correct / nb_test))

            print('[INFO]: Testing finish! \n')

        self.train_hist['total_time'].append(time.time() - start_time)
        print("Avg one epoch time: %.2f, total %d epochs time: %.2f" %
              (np.mean(self.train_hist['per_epoch_time']), self.epoch,
               self.train_hist['total_time'][0]))
        print("Training finish!... save training results")

        self.save()
        utils.generate_animation(
            self.result_dir + '/' + self.dataset + '/' + self.model_name +
            '/' + self.model_name, self.epoch)
        utils.loss_plot(
            self.train_hist,
            os.path.join(self.save_dir, self.dataset, self.model_name),
            self.model_name)
示例#15
0
    def train(self):
        self.train_hist = {}
        self.train_hist['D_loss'] = []
        self.train_hist['G_loss'] = []
        self.train_hist['per_epoch_time'] = []
        self.train_hist['total_time'] = []

        if self.gpu_mode:
            self.y_real_, self.y_fake_ = Variable(torch.ones(self.batch_size, 1).cuda()), Variable(torch.zeros(self.batch_size, 1).cuda())
        else:
            self.y_real_, self.y_fake_ = Variable(torch.ones(self.batch_size, 1)), Variable(torch.zeros(self.batch_size, 1))

        self.D.train()
        print('training start!!')
        start_time = time.time()
        for epoch in range(self.epoch):
            self.G.train()
            epoch_start_time = time.time()
            for iter, (x_, _) in enumerate(self.data_loader):
                if iter == self.data_loader.dataset.__len__() // self.batch_size:
                    break

                z_ = torch.rand((self.batch_size, self.z_dim))

                if self.gpu_mode:
                    x_, z_ = Variable(x_.cuda()), Variable(z_.cuda())
                else:
                    x_, z_ = Variable(x_), Variable(z_)

                # update D network
                self.D_optimizer.zero_grad()

                D_real, D_real_code = self.D(x_)
                D_real_err = self.MSE_loss(D_real, x_)

                G_ = self.G(z_)
                D_fake, D_fake_code = self.D(G_)
                D_fake_err = self.MSE_loss(D_fake, G_.detach())
                if list(self.margin-D_fake_err.data)[0] > 0:
                    D_loss = D_real_err + (self.margin - D_fake_err)
                else:
                    D_loss = D_real_err
                self.train_hist['D_loss'].append(D_loss.data[0])

                D_loss.backward()
                self.D_optimizer.step()

                # update G network
                self.G_optimizer.zero_grad()

                G_ = self.G(z_)
                D_fake, D_fake_code = self.D(G_)
                D_fake_err = self.MSE_loss(D_fake, G_.detach())
                G_loss = D_fake_err + self.pt_loss_weight * self.pullaway_loss(D_fake_code)
                self.train_hist['G_loss'].append(G_loss.data[0])

                G_loss.backward()
                self.G_optimizer.step()

                if ((iter + 1) % 100) == 0:
                    print("Epoch: [%2d] [%4d/%4d] D_loss: %.8f, G_loss: %.8f" %
                          ((epoch + 1), (iter + 1), self.data_loader.dataset.__len__() // self.batch_size, D_loss.data[0], G_loss.data[0]))

            self.train_hist['per_epoch_time'].append(time.time() - epoch_start_time)
            self.visualize_results((epoch+1))

        self.train_hist['total_time'].append(time.time() - start_time)
        print("Avg one epoch time: %.2f, total %d epochs time: %.2f" % (np.mean(self.train_hist['per_epoch_time']),
              self.epoch, self.train_hist['total_time'][0]))
        print("Training finish!... save training results")

        self.save()
        utils.generate_animation(self.result_dir + '/' + self.dataset + '/' + self.model_name + '/' + self.model_name,
                                 self.epoch)
        utils.loss_plot(self.train_hist, os.path.join(self.save_dir, self.dataset, self.model_name), self.model_name)
    def train(self):
        self.train_hist = {}
        self.train_hist['D_loss'] = []
        self.train_hist['G_loss'] = []
        self.train_hist['per_epoch_time'] = []
        self.train_hist['total_time'] = []

        if self.gpu_mode:
            self.y_real_, self.y_fake_ = Variable(torch.ones(self.batch_size, 1).cuda()), Variable(torch.zeros(self.batch_size, 1).cuda())
        else:
            self.y_real_, self.y_fake_ = Variable(torch.ones(self.batch_size, 1)), Variable(torch.zeros(self.batch_size, 1))

        self.D.train()
        print('training start!!')
        start_time = time.time()
        for epoch in range(self.epoch):
            self.G.train()
            epoch_start_time = time.time()
            for iter, (x_, _) in enumerate(self.data_loader):
                if iter == self.data_loader.dataset.__len__() // self.batch_size:
                    break

                z_ = torch.rand((self.batch_size, self.z_dim))

                if self.gpu_mode:
                    x_, z_ = Variable(x_.cuda()), Variable(z_.cuda())
                else:
                    x_, z_ = Variable(x_), Variable(z_)

                # update D network
                self.D_optimizer.zero_grad()

                D_real = self.D(x_)
                D_real_loss = -torch.mean(D_real)

                G_ = self.G(z_)
                D_fake = self.D(G_)
                D_fake_loss = torch.mean(D_fake)

                # gradient penalty
                if self.gpu_mode:
                    alpha = torch.rand(x_.size()).cuda()
                else:
                    alpha = torch.rand(x_.size())

                x_hat = Variable(alpha * x_.data + (1 - alpha) * G_.data, requires_grad=True)

                pred_hat = self.D(x_hat)
                if self.gpu_mode:
                    gradients = grad(outputs=pred_hat, inputs=x_hat, grad_outputs=torch.ones(pred_hat.size()).cuda(),
                                 create_graph=True, retain_graph=True, only_inputs=True)[0]
                else:
                    gradients = grad(outputs=pred_hat, inputs=x_hat, grad_outputs=torch.ones(pred_hat.size()),
                                     create_graph=True, retain_graph=True, only_inputs=True)[0]

                gradient_penalty = self.lambda_ * ((gradients.view(gradients.size()[0], -1).norm(2, 1) - 1) ** 2).mean()

                D_loss = D_real_loss + D_fake_loss + gradient_penalty

                D_loss.backward()
                self.D_optimizer.step()

                if ((iter+1) % self.n_critic) == 0:
                    # update G network
                    self.G_optimizer.zero_grad()

                    G_ = self.G(z_)
                    D_fake = self.D(G_)
                    G_loss = -torch.mean(D_fake)
                    self.train_hist['G_loss'].append(G_loss.data[0])

                    G_loss.backward()
                    self.G_optimizer.step()

                    self.train_hist['D_loss'].append(D_loss.data[0])

                if ((iter + 1) % 100) == 0:
                    print("Epoch: [%2d] [%4d/%4d] D_loss: %.8f, G_loss: %.8f" %
                          ((epoch + 1), (iter + 1), self.data_loader.dataset.__len__() // self.batch_size, D_loss.data[0], G_loss.data[0]))

            self.train_hist['per_epoch_time'].append(time.time() - epoch_start_time)
            self.visualize_results((epoch+1))

        self.train_hist['total_time'].append(time.time() - start_time)
        print("Avg one epoch time: %.2f, total %d epochs time: %.2f" % (np.mean(self.train_hist['per_epoch_time']),
              self.epoch, self.train_hist['total_time'][0]))
        print("Training finish!... save training results")

        self.save()
        utils.generate_animation(self.result_dir + '/' + self.dataset + '/' + self.model_name + '/' + self.model_name,
                                 self.epoch)
        utils.loss_plot(self.train_hist, os.path.join(self.save_dir, self.dataset, self.model_name), self.model_name)
    def train(self):
        self.train_hist['D_loss'] = []
        self.train_hist['G_loss'] = []
        self.train_hist["per_epoch_time"] = []
        self.train_hist['total_time'] = []

        if self.gpu_mode:
            self.y_real_, self.y_fake_ = Variable(
                torch.ones(self.batch_size, 1).cuda()), Variable(
                    torch.zeros(self.batch_size, 1).cuda())
        else:
            self.y_real_, self.y_fake_ = Variable(
                torch.ones(self.batch_size,
                           1)), Variable(torch.zeros(self.batch_size, 1))

        # self.D.tarin()
        print('train start!!')
        start_time = time.time()

        for epoch in range(self.epoch):
            # self.G.train()
            epoch_start_time = time.time()

            for iter, (x_, _) in enumerate(self.data_loader):
                if iter == self.data_loader.dataset.__len__(
                ) // self.batch_size:
                    break

                z_ = torch.rand((self.batch_size, self.z_dim))

                if self.gpu_mode:
                    x_, z_ = Variable(x_.cuda()), Variable(z_.cuda())
                else:
                    x_, z_ = Variable(x_), Variable(z_)
                """Update D network"""
                self.D_optimizer.zero_grad()

                # train with real images
                y_hat_real = self.D(x_)  # forward pass
                D_real_loss = self.BCE_loss(y_hat_real, self.y_real_)

                generated_images_ = self.G(z_)
                y_hat_fake = self.D(generated_images_)
                D_fake_loss = self.BCE_loss(y_hat_fake, self.y_fake_)

                D_loss = D_fake_loss + D_real_loss
                self.train_hist['D_loss'].append(D_loss.data[0])

                D_loss.backward()
                self.D_optimizer.step()
                """Update generator network"""
                self.G_optimizer.zero_grad()

                generated_images_ = self.G(z_)
                y_hat_fake = self.D(generated_images_)
                G_loss = self.BCE_loss(y_hat_fake, self.y_real_)
                self.train_hist['G_loss'].append(G_loss.data[0])

                G_loss.backward()
                self.G_optimizer.step()
                if ((iter + 1) % 100) == 0:
                    print("Epoch: [%2d] [%4d/%4d] D_loss: %.8f, G_loss: %.8f" %
                          ((epoch + 1),
                           (iter + 1), self.data_loader.dataset.__len__() //
                           self.batch_size, D_loss.data[0], G_loss.data[0]))

            self.train_hist['per_epoch_time'].append(time.time() -
                                                     epoch_start_time)
            self.visualize_results((epoch + 1))

        self.train_hist['total_time'].append(time.time() - start_time)
        print("Avg one epoch time: %.2f, total %d epochs time: %.2f" %
              (np.mean(self.train_hist['per_epoch_time']), self.epoch,
               self.train_hist['total_time'][0]))
        print("Training finish!... save training results")

        self.save()
        utils.generate_animation(
            self.result_dir + '/' + self.dataset + '/' + self.model_name +
            '/' + self.model_name, self.epoch)
        utils.loss_plot(
            self.train_hist,
            os.path.join(self.save_dir, self.dataset, self.model_name),
            self.model_name)
示例#18
0
    def train(self):
        self.train_hist = {}
        self.train_hist['D_loss'] = []
        self.train_hist['G_loss'] = []
        self.train_hist['per_epoch_time'] = []
        self.train_hist['total_time'] = []

        self.y_real_, self.y_fake_ = torch.ones(self.batch_size,
                                                1), torch.zeros(
                                                    self.batch_size, 1)
        if self.gpu_mode:
            self.y_real_, self.y_fake_ = self.y_real_.cuda(
            ), self.y_fake_.cuda()

        self.D.train()
        print('training start!!')
        start_time = time.time()
        for epoch in range(self.epoch):
            self.G.train()
            epoch_start_time = time.time()
            for iter, (x_, y_) in enumerate(self.data_loader):
                if iter == self.data_loader.dataset.__len__(
                ) // self.batch_size:
                    break
                z_ = torch.rand((self.batch_size, self.z_dim))
                y_vec_ = torch.zeros(
                    (self.batch_size, self.class_num)).scatter_(
                        1,
                        y_.type(torch.LongTensor).unsqueeze(1), 1)
                if self.gpu_mode:
                    x_, z_, y_vec_ = x_.cuda(), z_.cuda(), y_vec_.cuda()

                # update D network
                self.D_optimizer.zero_grad()

                D_real, C_real = self.D(x_)
                D_real_loss = self.BCE_loss(D_real, self.y_real_)
                C_real_loss = self.CE_loss(C_real, torch.max(y_vec_, 1)[1])

                G_ = self.G(z_, y_vec_)
                D_fake, C_fake = self.D(G_)
                D_fake_loss = self.BCE_loss(D_fake, self.y_fake_)
                C_fake_loss = self.CE_loss(C_fake, torch.max(y_vec_, 1)[1])

                D_loss = D_real_loss + self.alpha * C_real_loss + D_fake_loss + self.alpha * C_fake_loss
                self.train_hist['D_loss'].append(D_loss.item())

                D_loss.backward()
                self.D_optimizer.step()

                # update G network
                self.G_optimizer.zero_grad()

                z_.requires_grad = True
                G_ = self.G(z_, y_vec_)
                D_fake, C_fake = self.D(G_)

                G_loss = self.BCE_loss(D_fake, self.y_real_)
                C_fake_loss = self.CE_loss(C_fake, torch.max(y_vec_, 1)[1])

                G_loss += self.alpha * C_fake_loss

                # penalize global Lipschitz using gradient norm
                gradients = grad(outputs=G_,
                                 inputs=z_,
                                 grad_outputs=torch.ones(G_.size()).cuda(),
                                 create_graph=True,
                                 retain_graph=True,
                                 only_inputs=True)[0]

                # gradients = torch.zeros(G_.size()).cuda()
                reg_loss = (gradients.view(gradients.size()[0],
                                           -1).norm(2, 1)**2).mean()
                G_loss += self.lambda_ * reg_loss

                # # penalize local Lipschitz
                # reg_loss = 0
                # for j in range(self.n_repeat):
                #     v = f.normalize(torch.rand(self.batch_size, self.z_dim), p=2, dim=1)
                #     u = torch.rand(self.batch_size) + 1e-12    # avoid underflow
                #     unif_noise = (u ** (1/float(self.z_dim))).unsqueeze(1)*v
                #     unif_noise = unif_noise.cuda()

                #     G_neighbor_ = self.G(z_+unif_noise, y_vec_)
                #     dist_x = torch.sqrt(torch.sum((G_neighbor_.view(self.batch_size, -1) - G_.view(self.batch_size, -1))**2, dim=1))
                #     dist_z = torch.sqrt(torch.sum(unif_noise**2, dim=1))

                #     reg_loss += torch.mean(dist_x / dist_z)

                # G_loss += self.lambda_ * reg_loss / self.n_repeat

                self.train_hist['G_loss'].append(G_loss.item())

                G_loss.backward()
                self.G_optimizer.step()

                if ((iter + 1) % 100) == 0:
                    print(
                        "Epoch: [%2d] [%4d/%4d] D_loss: %.8f, G_loss: %.8f, reg_loss: %.4f"
                        %
                        ((epoch + 1), (iter + 1),
                         self.data_loader.dataset.__len__() // self.batch_size,
                         D_loss.item(), G_loss.item(), reg_loss.item()))

            self.train_hist['per_epoch_time'].append(time.time() -
                                                     epoch_start_time)
            with torch.no_grad():
                self.visualize_results((epoch + 1))

        self.train_hist['total_time'].append(time.time() - start_time)
        print("Avg one epoch time: %.2f, total %d epochs time: %.2f" %
              (np.mean(self.train_hist['per_epoch_time']), self.epoch,
               self.train_hist['total_time'][0]))
        print("Training finish!... save training results")

        self.save()
        utils.generate_animation(
            self.result_dir + '/' + self.dataset + '/' + self.model_name +
            '/' + self.model_name, self.epoch)
        utils.loss_plot(
            self.train_hist,
            os.path.join(self.save_dir, self.dataset, self.model_name),
            self.model_name)
示例#19
0
    def train(self):
        self.train_hist = {}
        self.train_hist['D_loss'] = []
        self.train_hist['G_loss'] = []
        self.train_hist['per_epoch_time'] = []
        self.train_hist['total_time'] = []
        self.train_hist['D_norm'] = []

        f = open("%s/results.txt" % self.log_dir, "w")
        f.write("d_loss,g_loss,d_norm\n")
    
        if self.gpu_mode:
            self.y_real_, self.y_fake_ = Variable(torch.ones(self.batch_size, 1).cuda()), Variable(torch.zeros(self.batch_size, 1).cuda())
        else:
            self.y_real_, self.y_fake_ = Variable(torch.ones(self.batch_size, 1)), Variable(torch.zeros(self.batch_size, 1))

        #for iter, ((x1_,_), (x2_,_)) in enumerate(zip(self.data_loader, self.data_loader)):
        #    import pdb
        #    pdb.set_trace()

        self.D.train()
        print('training start!!')
        start_time = time.time()
        for epoch in range(self.epoch):
            self.G.train()
            epoch_start_time = time.time()
            for iter, (x_, _) in enumerate(self.data_loader):
                
                if iter == self.data_loader.dataset.__len__() // self.batch_size:
                    break

                z_ = torch.rand((self.batch_size, self.z_dim))

                if self.gpu_mode:
                    x_, z_ = Variable(x_.cuda(), requires_grad=True), \
                            Variable(z_.cuda())
                else:
                    x_, z_ = Variable(x_, requires_grad=True), \
                            Variable(z_)

                # update D network

                D_real = self.D(x_)
                # compute gradient penalty
                grad_wrt_x = grad(outputs=D_real, inputs=x_,
                                 grad_outputs=torch.ones(D_real.size()).cuda(),
                                 create_graph=True, retain_graph=True, only_inputs=True)[0]
                g_norm  = ((grad_wrt_x.view(grad_wrt_x.size()[0], -1).norm(2, 1) - 1) ** 2).mean()
                self.train_hist['D_norm'].append(g_norm.data.item())

                self.D_optimizer.zero_grad()

                G_ = self.G(z_).detach()
                alpha = float(np.random.random())
                Xz = Variable(alpha*x_.data + (1.-alpha)*G_.data)
                D_Xz = self.D(Xz)
                D_loss = self.BCE_loss(D_Xz, alpha*self.y_real_)
                
                self.train_hist['D_loss'].append(D_loss.data.item())

                D_loss.backward()
                self.D_optimizer.step()

                # update G network
                self.G_optimizer.zero_grad()

                G_ = self.G(z_)
                D_fake = self.D(G_)
                G_loss = self.BCE_loss(D_fake, self.y_real_)
                self.train_hist['G_loss'].append(G_loss.data.item())

                G_loss.backward()
                self.G_optimizer.step()

                if ((iter + 1) % 100) == 0:
                    print("Epoch: [%2d] [%4d/%4d] D_loss: %.8f, G_loss: %.8f, D_norm: %.8f" %
                          ((epoch + 1),
                           (iter + 1),
                           self.data_loader.dataset.__len__() // self.batch_size,
                           D_loss.data.item(),
                           G_loss.data.item(),
                           g_norm.data.item()))
                    f.write("%.8f,%.8f,%.8f\n" % (D_loss.data.item(), G_loss.data.item(), g_norm.data.item()))
                    f.flush()

            self.train_hist['per_epoch_time'].append(time.time() - epoch_start_time)
            self.visualize_results((epoch+1))

        self.train_hist['total_time'].append(time.time() - start_time)
        print("Avg one epoch time: %.2f, total %d epochs time: %.2f" % (np.mean(self.train_hist['per_epoch_time']),
              self.epoch, self.train_hist['total_time'][0]))
        print("Training finish!... save training results")

        f.close()

        self.save()
        utils.generate_animation(self.result_dir + '/' + self.dataset + '/' + self.model_name + '/' + self.model_name,
                                 self.epoch)
        utils.loss_plot(self.train_hist, os.path.join(self.save_dir, self.dataset, self.model_name), self.model_name)
示例#20
0
    def train(self):
        self.train_hist = {}
        self.train_hist['D_loss'] = []
        self.train_hist['G_loss'] = []
        self.train_hist['per_epoch_time'] = []
        self.train_hist['total_time'] = []

        if self.gpu_mode:
            self.y_real_, self.y_fake_ = Variable(
                torch.ones(self.batch_size, 1).cuda()), Variable(
                    torch.zeros(self.batch_size, 1).cuda())
        else:
            self.y_real_, self.y_fake_ = Variable(
                torch.ones(self.batch_size,
                           1)), Variable(torch.zeros(self.batch_size, 1))

        self.D.train()
        print('training start!!')
        start_time = time.time()
        for epoch in range(self.epoch):
            epoch_start_time = time.time()
            self.G.train()
            for iter, (x_, _) in enumerate(self.data_loader):
                if iter == self.data_loader.dataset.__len__(
                ) // self.batch_size:
                    break

                z_ = torch.rand((self.batch_size, self.z_dim))

                if self.gpu_mode:
                    x_, z_ = Variable(x_.cuda()), Variable(z_.cuda())
                else:
                    x_, z_ = Variable(x_), Variable(z_)

                # update D network
                self.D_optimizer.zero_grad()

                D_real = self.D(x_)
                D_real_loss = self.BCE_loss(D_real, self.y_real_)

                G_ = self.G(z_)
                D_fake = self.D(G_)
                D_fake_loss = self.BCE_loss(D_fake, self.y_fake_)
                """ DRAGAN Loss (Gradient penalty) """
                # This is borrowed from https://github.com/jfsantos/dragan-pytorch/blob/master/dragan.py
                if self.gpu_mode:
                    alpha = torch.rand(x_.size()).cuda()
                    x_hat = Variable(
                        alpha * x_.data + (1 - alpha) *
                        (x_.data +
                         0.5 * x_.data.std() * torch.rand(x_.size()).cuda()),
                        requires_grad=True)
                else:
                    alpha = torch.rand(x_.size())
                    x_hat = Variable(
                        alpha * x_.data + (1 - alpha) *
                        (x_.data +
                         0.5 * x_.data.std() * torch.rand(x_.size())),
                        requires_grad=True)
                pred_hat = self.D(x_hat)
                if self.gpu_mode:
                    gradients = grad(outputs=pred_hat,
                                     inputs=x_hat,
                                     grad_outputs=torch.ones(
                                         pred_hat.size()).cuda(),
                                     create_graph=True,
                                     retain_graph=True,
                                     only_inputs=True)[0]
                else:
                    gradients = grad(outputs=pred_hat,
                                     inputs=x_hat,
                                     grad_outputs=torch.ones(pred_hat.size()),
                                     create_graph=True,
                                     retain_graph=True,
                                     only_inputs=True)[0]

                gradient_penalty = self.lambda_ * (
                    (gradients.view(gradients.size()[0], -1).norm(2, 1) - 1)**
                    2).mean()

                D_loss = D_real_loss + D_fake_loss + gradient_penalty
                self.train_hist['D_loss'].append(D_loss.data[0])
                D_loss.backward()
                self.D_optimizer.step()

                # update G network
                self.G_optimizer.zero_grad()

                G_ = self.G(z_)
                D_fake = self.D(G_)

                G_loss = self.BCE_loss(D_fake, self.y_real_)
                self.train_hist['G_loss'].append(G_loss.data[0])

                G_loss.backward()
                self.G_optimizer.step()

                if ((iter + 1) % 10) == 0:
                    print("Epoch: [%2d] [%4d/%4d] D_loss: %.8f, G_loss: %.8f" %
                          ((epoch + 1),
                           (iter + 1), self.data_loader.dataset.__len__() //
                           self.batch_size, D_loss.data[0], G_loss.data[0]))

            self.train_hist['per_epoch_time'].append(time.time() -
                                                     epoch_start_time)
            self.visualize_results((epoch + 1))

        self.train_hist['total_time'].append(time.time() - start_time)
        print("Avg one epoch time: %.2f, total %d epochs time: %.2f" %
              (np.mean(self.train_hist['per_epoch_time']), self.epoch,
               self.train_hist['total_time'][0]))
        print("Training finish!... save training results")

        self.save()
        utils.generate_animation(
            self.result_dir + '/' + self.dataset + '/' + self.model_name +
            '/' + self.model_name, self.epoch)
        utils.loss_plot(
            self.train_hist,
            os.path.join(self.save_dir, self.dataset, self.model_name),
            self.model_name)
示例#21
0
    def train(self):
        self.train_hist = {}
        self.train_hist['D_loss'] = []
        self.train_hist['G_loss'] = []
        self.train_hist['per_epoch_time'] = []
        self.train_hist['total_time'] = []

        if self.gpu_mode:
            self.y_real_, self.y_fake_ = Variable(torch.ones(self.batch_size, 1).cuda()), Variable(torch.zeros(self.batch_size, 1).cuda())
        else:
            self.y_real_, self.y_fake_ = Variable(torch.ones(self.batch_size, 1)), Variable(torch.zeros(self.batch_size, 1))

        self.D.train()
        print('training start!!')
        start_time = time.time()
        for epoch in range(self.epoch):
            self.G.train()
            epoch_start_time = time.time()
            for iter, (x_, _) in enumerate(self.data_loader):
                if iter == self.data_loader.dataset.__len__() // self.batch_size:
                    break

                z_ = torch.rand((self.batch_size, self.z_dim))

                if self.gpu_mode:
                    x_, z_ = Variable(x_.cuda()), Variable(z_.cuda())
                else:
                    x_, z_ = Variable(x_), Variable(z_)

                # update D network
                self.D_optimizer.zero_grad()

                D_real = self.D(x_)
                D_real_loss = -torch.mean(D_real)

                G_ = self.G(z_)
                D_fake = self.D(G_)
                D_fake_loss = torch.mean(D_fake)

                D_loss = D_real_loss + D_fake_loss

                D_loss.backward()
                self.D_optimizer.step()

                # clipping D
                for p in self.D.parameters():
                    p.data.clamp_(-self.c, self.c)

                if ((iter+1) % self.n_critic) == 0:
                    # update G network
                    self.G_optimizer.zero_grad()

                    G_ = self.G(z_)
                    D_fake = self.D(G_)
                    G_loss = -torch.mean(D_fake)
                    self.train_hist['G_loss'].append(G_loss.data[0])

                    G_loss.backward()
                    self.G_optimizer.step()

                    self.train_hist['D_loss'].append(D_loss.data[0])

                if ((iter + 1) % 100) == 0:
                    print("Epoch: [%2d] [%4d/%4d] D_loss: %.8f, G_loss: %.8f" %
                          ((epoch + 1), (iter + 1), self.data_loader.dataset.__len__() // self.batch_size, D_loss.data[0], G_loss.data[0]))

            self.train_hist['per_epoch_time'].append(time.time() - epoch_start_time)
            self.visualize_results((epoch+1))

        self.train_hist['total_time'].append(time.time() - start_time)
        print("Avg one epoch time: %.2f, total %d epochs time: %.2f" % (np.mean(self.train_hist['per_epoch_time']),
              self.epoch, self.train_hist['total_time'][0]))
        print("Training finish!... save training results")

        self.save()
        utils.generate_animation(self.result_dir + '/' + self.dataset + '/' + self.model_name + '/' + self.model_name,
                                 self.epoch)
        utils.loss_plot(self.train_hist, os.path.join(self.save_dir, self.dataset, self.model_name), self.model_name)
示例#22
0
    def train(self):
        self.train_hist = {}
        self.train_hist['D_loss'] = []
        self.train_hist['G_loss'] = []
        self.train_hist['E_loss'] = []
        self.train_hist['per_epoch_time'] = []
        self.train_hist['total_time'] = []

        if torch.cuda.is_available():
            self.y_real_, self.y_fake_ = Variable(torch.ones(self.batch_size, 1).cuda()), Variable(
                torch.zeros(self.batch_size, 1).cuda())
        else:
            self.y_real_, self.y_fake_ = Variable(torch.ones(self.batch_size, 1)), Variable(
                torch.zeros(self.batch_size, 1))

        # self.D.train()
        print('training start!!')
        start_time = time.time()
        self.load(149)
        for epoch in range(150, self.epoch):
            self.G_optimizer.param_groups[0]['lr'] = self.args.lrG / np.sqrt(epoch + 1)
            self.D_optimizer.param_groups[0]['lr'] = self.args.lrD / np.sqrt(epoch + 1)
            # reset training mode of G and E

            epoch_start_time = time.time()

            D_err = []
            G_err = []
            # learning rate decay
            # if (epoch+1) % 20 == 0:
            #     self.G_optimizer.param_groups[0]['lr'] /= 2
            #     self.D_optimizer.param_groups[0]['lr'] /= 2
            #     self.E_optimizer.param_groups[0]['lr'] /= 2
            #     print("learning rate change!")
            # self.G_optimizer.param_groups[0]['lr'] /= np.sqrt(epoch+1)
            # self.D_optimizer.param_groups[0]['lr'] /= np.sqrt(epoch+1)
            # self.E_optimizer.param_groups[0]['lr'] /= np.sqrt(epoch+1)
            # print("learning rate change!")


            for iter, (X, _) in enumerate(self.data_loader):

                X = utils.to_var(X)

                """Discriminator"""
                z = utils.to_var(torch.randn(self.batch_size, self.z_dim))
                X_hat = self.G(z)
                D_real = self.D(self.FC(X))
                D_fake = self.D(self.FC(X_hat))
                D_loss = self.BCE_loss(D_real, self.y_real_) + self.BCE_loss(D_fake, self.y_fake_)
                self.train_hist['D_loss'].append(D_loss.data[0])
                D_err.append(D_loss.data[0])
                # Optimize
                D_loss.backward()
                self.D_optimizer.step()
                self.__reset_grad()

                """Generator"""
                # Use both Discriminator and Encoder to update Generator
                z = utils.to_var(torch.randn(self.batch_size, self.z_dim))
                X_hat = self.G(z)
                D_fake = self.D(self.FC(X_hat))

                # E_loss = torch.mean(
                #     torch.mean(0.5 * (z - z_mu) ** 2 * torch.exp(-z_sigma) +
                #                0.5 * z_sigma + 0.919, 1))

                G_loss = self.BCE_loss(D_fake, self.y_real_)
                total_loss = G_loss
                self.train_hist['G_loss'].append(G_loss.data[0])
                G_err.append(G_loss.data[0])
                #E_err.append(E_loss.data[0])
                # Optimize
                total_loss.backward()
                self.G_optimizer.step()
                #self.E_optimizer.step()
                self.__reset_grad()

                """ Plot """
                if (iter + 1) == self.data_loader.dataset.__len__() // self.batch_size:
                    # Print and plot every epoch
                    print('Epoch-{}; D_loss: {:.4}; G_loss: {:.4}\n'
                          .format(epoch, np.mean(D_err), np.mean(G_err)))
                    for iter, (X, _) in enumerate(self.valid_loader):
                        X = utils.to_var(X)
                        self.visualize_results(X, epoch + 1)
                        break

                    break

            self.train_hist['per_epoch_time'].append(time.time() - epoch_start_time)

            # Save model
            if (epoch + 1) % 5 == 0:
                self.save(epoch)

        self.train_hist['total_time'].append(time.time() - start_time)
        print("Avg one epoch time: %.2f, total %d epochs time: %.2f" % (np.mean(self.train_hist['per_epoch_time']),
                                                                        self.epoch, self.train_hist['total_time'][0]))
        print("Training finish!... save training results")
        # self.save(epoch)

        # Generate animation of reconstructed plot
        utils.generate_animation(
            self.root + '/' + self.result_dir + '/' + self.dataset + '/' + self.model_name + '/reconstructed',
            self.epoch)
        utils.loss_plot(self.train_hist, os.path.join(self.root, self.save_dir, self.dataset, self.model_name),
                        self.model_name)
示例#23
0
文件: GANEM.py 项目: Jossome/GAN-EM
    def train(self):
        self.train_hist = {}
        self.train_hist['D_loss'] = []
        self.train_hist['G_loss'] = []
        self.train_hist['per_epoch_time'] = []
        self.train_hist['total_time'] = []

        self.D.train()
        self.E.train()

        print('training start!!')
        start_time = time.time()
        first_time=True
        self.accuracy_hist=[]

        for epoch in range(self.epoch):
            self.G.train()  # check here!!
            epoch_start_time = time.time()
            decay=0.98**epoch
            self.E_optimizer = optim.Adam(self.E.parameters(), lr=decay * 0.3 * self.args.lrD,
                                          betas=(self.args.beta1, self.args.beta2))
            self.G_optimizer = optim.Adam(self.G.parameters(), lr=decay * 3 * self.args.lrG, betas=(self.args.beta1, self.args.beta2))
            self.D_optimizer = optim.Adam(self.D.parameters(), lr=decay * self.args.lrD, betas=(self.args.beta1, self.args.beta2))
            for M_epoch in range(5):
                for iter, (batch_x, batch_y) in enumerate(self.train_loader):

                    x_=batch_x
                    z_=torch.rand((self.batch_size, self.z_dim))
                    x_, z_ = Variable(x_.cuda()), Variable(z_.cuda())
                    G_batch_size = batch_x.size()[0]
                    if G_batch_size < self.batch_size:
                        break
                    # x_  (batch, 1L, 28L, 28L)
                    # z_  (batch, 62L)

                    # update D network:

                    image_real = Variable(batch_x.cuda())
                    self.E.eval()
                    y_real = self.E(image_real)
                    y_real = nn.Softmax()(y_real)
                    y_real = (y_real).data.cpu().numpy()  #


                    self.D_optimizer.zero_grad()

                    D_real = self.D(x_)
                    if first_time:
                        y_real = (1 / float(self.class_num)) * np.ones((G_batch_size, self.class_num)) # first_time

                    y_real = np.concatenate((y_real, 2*np.ones((np.shape(y_real)[0], 1))), axis=1)

                    ones=np.ones((np.shape(y_real)[0],np.shape(y_real)[1]))
                    ones[:,-1]=0
                    ones=torch.FloatTensor(ones)
                    ones=Variable(ones).cuda()
                    y_real=torch.FloatTensor(y_real).cuda()

                    D_real_loss = torch.nn.BCEWithLogitsLoss(weight=y_real)(D_real,ones)

                    G_input, conditional_label = self.gen_cond_label(self.batch_size)
                    G_ = self.G(G_input, 0)
                    D_fake = self.D(G_)
                    y_fake_1 = np.tile(np.zeros((self.class_num)), (self.batch_size, 1))
                    y_fake_2 = np.tile(np.ones((1)), (self.batch_size, 1))
                    y_fake = np.concatenate((y_fake_1,y_fake_2),axis=1)
                    y_fake = Variable(torch.FloatTensor(y_fake).cuda())
                    D_fake_loss=torch.nn.BCEWithLogitsLoss()(D_fake,y_fake)

                    D_loss = D_real_loss + D_fake_loss

                    self.train_hist['D_loss'].append(D_loss.data[0])
                    D_loss.backward()
                    self.D_optimizer.step()


                    # update G network:

                    self.G_optimizer.zero_grad()
                    G_input, conditional_label = self.gen_cond_label(self.batch_size)
                    G_ = self.G(G_input, 0)
                    D_fake = self.D(G_)

                    G_y_real=np.concatenate((conditional_label.numpy(),np.tile([0],(self.batch_size,1))),axis=1)
                    G_y_real=Variable(torch.FloatTensor(G_y_real)).cuda()
                    G_loss=torch.nn.BCEWithLogitsLoss()(D_fake,G_y_real)


                    self.train_hist['G_loss'].append(G_loss.data[0])
                    G_loss.backward()
                    self.G_optimizer.step()

                    if ((iter + 1) % 100) == 0:
                        print("Epoch: [%2d] [%4d/%4d] D_loss: %.8f, G_loss: %.8f" %
                              ((epoch + 1), (iter + 1), len(self.data_X) // self.batch_size, D_loss.data[0], G_loss.data[0]))



            self.E_training(200)
            first_time = False
            self.visualize_results((epoch+1))
            self.compute_accuracy()
            self.train_hist['per_epoch_time'].append(time.time() - epoch_start_time)
            self.save()

        self.train_hist['total_time'].append(time.time() - start_time)
        print("Avg one epoch time: %.2f, total %d epochs time: %.2f" % (np.mean(self.train_hist['per_epoch_time']),
              self.epoch, self.train_hist['total_time'][0]))
        print("Training finish!... save training results")

        self.save()
        utils.generate_animation(self.result_dir + '/' + self.dataset + '/' + self.model_name + '/' + self.model_name,
                                 self.epoch)
        utils.loss_plot(self.train_hist, os.path.join(self.save_dir, self.dataset, self.model_name), self.model_name)
示例#24
0
    def train(self):
        vis = visdom.Visdom()
        self.train_hist = {}
        self.train_hist['D_loss'] = []
        self.train_hist['G_loss'] = []
        self.train_hist['per_epoch_time'] = []
        self.train_hist['total_time'] = []
        self.M = {}
        self.M['pre'] = []
        self.M['pre'].append(1)
        self.M['cur'] = []

        self.y_real_, self.y_fake_ = torch.ones(self.batch_size, 1), torch.zeros(self.batch_size, 1)
        if self.gpu_mode:
            self.y_real_, self.y_fake_ = self.y_real_.cuda(), self.y_fake_.cuda()

        self.D.train()
        print('training start!!')
        start_time = time.time()
        for epoch in range(self.epoch):
            self.G.train()
            epoch_start_time = time.time()
            for iter, (x_, _) in enumerate(self.data_loader):
                if iter == self.data_loader.dataset.__len__() // self.batch_size:
                    break

                z_ = torch.Tensor(self.batch_size, self.z_dim).uniform_(-1,1)

                if self.gpu_mode:
                    x_, z_ = x_.cuda(), z_.cuda()

                # update D network
                self.D_optimizer.zero_grad()

                D_real = self.D(x_)
                D_real_loss = torch.mean(torch.abs(D_real - x_))

                G_ = self.G(z_)
                D_fake = self.D(G_)
                D_fake_loss = torch.mean(torch.abs(D_fake - G_))

                D_loss = D_real_loss - self.k * D_fake_loss
                self.train_hist['D_loss'].append(D_loss.item())

                D_loss.backward()
                self.D_optimizer.step()

                # update G network
                self.G_optimizer.zero_grad()

                G_ = self.G(z_)
                D_fake = self.D(G_)
                D_fake_loss = torch.mean(torch.abs(D_fake - G_))

                G_loss = D_fake_loss
                self.train_hist['G_loss'].append(G_loss.item())

                G_loss.backward()
                self.G_optimizer.step()

                # convergence metric
                temp_M = D_real_loss + torch.abs(self.gamma * D_real_loss - G_loss)

                # operation for updating k
                temp_k = self.k + self.lambda_ * (self.gamma * D_real_loss - G_loss)
                temp_k = temp_k.item()

                self.k = min(max(temp_k, 0), 1)
                self.M['cur'] = temp_M.item()

                if (iter + 1) %30 ==0:
                    generated = G_.cpu().data.numpy() / 2 + 0.5
                    batch_image = x_.cpu().data.numpy() / 2 + 0.5
                    print('min image ',generated.min())
                    print('max image ',generated.max())
                    vis.images(generated, nrow=8, win='generated')
                    vis.images(batch_image, nrow=8, win='original')
                    print('convergence metric ',self.M['cur'])
                if ((iter + 1) % 100) == 0:
                    print("Epoch: [%2d] [%4d/%4d] D_loss: %.8f, G_loss: %.8f, M: %.8f, k: %.8f" %
                          ((epoch + 1), (iter + 1), self.data_loader.dataset.__len__() // self.batch_size, D_loss.item(), G_loss.item(), self.M['cur'], self.k))


            if np.mean(self.M['pre']) < np.mean(self.M['cur']):
                pre_lr = self.G_optimizer.param_groups[0]['lr']
                self.G_optimizer.param_groups[0]['lr'] = max(self.G_optimizer.param_groups[0]['lr'] / 2.0,
                                                             self.lr_lower_boundary)
                self.D_optimizer.param_groups[0]['lr'] = max(self.D_optimizer.param_groups[0]['lr'] / 2.0,
                                                             self.lr_lower_boundary)
                print('M_pre: ' + str(np.mean(self.M['pre'])) + ', M_cur: ' + str(
                    np.mean(self.M['cur'])) + ', lr: ' + str(pre_lr) + ' --> ' + str(
                    self.G_optimizer.param_groups[0]['lr']))
            else:
                print('M_pre: ' + str(np.mean(self.M['pre'])) + ', M_cur: ' + str(np.mean(self.M['cur'])))
                self.M['pre'] = self.M['cur']

                self.M['cur'] = []

            self.train_hist['per_epoch_time'].append(time.time() - epoch_start_time)
            with torch.no_grad():
                self.visualize_results((epoch+1))

        self.train_hist['total_time'].append(time.time() - start_time)
        print("Avg one epoch time: %.2f, total %d epochs time: %.2f" % (np.mean(self.train_hist['per_epoch_time']),
              self.epoch, self.train_hist['total_time'][0]))
        print("Training finish!... save training results")

        self.save()
        utils.generate_animation(self.result_dir + '/' + self.dataset + '/' + self.model_name + '/' + self.model_name,
                                 self.epoch)
        utils.loss_plot(self.train_hist, os.path.join(self.save_dir, self.dataset, self.model_name), self.model_name)
示例#25
0
    def train(self):
        self.train_hist = {}
        self.train_hist['D_loss'] = []
        self.train_hist['G_loss'] = []
        self.train_hist['per_epoch_time'] = []
        self.train_hist['total_time'] = []

        self.y_real_, self.y_fake_ = torch.ones(self.batch_size,
                                                1), torch.zeros(
                                                    self.batch_size, 1)
        if self.gpu_mode:
            self.y_real_, self.y_fake_ = self.y_real_.cuda(
            ), self.y_fake_.cuda()

        self.D.train()
        print('training start!!')
        start_time = time.time()
        Q_stack = []
        for epoch in range(self.epoch):
            self.G.train()
            epoch_start_time = time.time()

            for iter, (x_, _) in enumerate(self.data_loader):

                if iter == self.data_loader.dataset.__len__(
                ) // self.batch_size:
                    break

                z_ = torch.rand((self.batch_size, self.z_dim))

                if self.gpu_mode:
                    x_, z_ = x_.cuda(), z_.cuda()

                # update D network
                self.D_optimizer.zero_grad()

                D_real = self.D(x_)
                D_real_loss = self.BCE_loss(D_real, self.y_real_)

                G_ = self.G(z_)
                D_fake = self.D(G_)
                D_fake_loss = self.BCE_loss(D_fake, self.y_fake_)

                D_loss = D_real_loss + D_fake_loss
                self.train_hist['D_loss'].append(D_loss.item())

                D_loss.backward()
                self.D_optimizer.step()

                # update G network

                self.G_optimizer.zero_grad()
                G_ = self.G(z_)
                D_fake = self.D(G_)

                ######## following codes are practical implementation for the paper ##########
                pertubation_del = (torch.randn(self.batch_size,
                                               self.z_dim)).cuda()
                eps = self.eps
                pertu_length = torch.norm(pertubation_del, dim=1, keepdim=True)
                pertubation_del = (pertubation_del / pertu_length) * eps
                z_prime = z_ + pertubation_del
                pertube_images = self.G(z_) - self.G(z_prime)
                pertube_latent_var = z_ - z_prime
                Q = torch.norm(pertube_images.view(
                    self.batch_size, -1), dim=1) / torch.norm(
                        pertube_latent_var.view(self.batch_size, -1), dim=1)
                print(Q)

                L_max = 0.0
                L_min = 0.0
                count_max = 0
                count_min = 0

                for i in range(self.batch_size):
                    if Q[i] > self.eig_max:
                        L_max += (Q[i] - self.eig_max)**2
                        count_max += 1
                    if Q[i] < self.eig_min:
                        L_min += (Q[i] - self.eig_min)**2
                        count_min += 1
                L = L_max + L_min
                #################### end of implementation for the paper ####################

                G_loss = self.BCE_loss(D_fake, self.y_real_)

                self.train_hist['G_loss'].append(G_loss.item())

                G_loss.backward()

                self.G_optimizer.step()

                if ((iter + 1) % 100) == 0:
                    print("Epoch: [%2d] [%4d/%4d] D_loss: %.8f, G_loss: %.8f" %
                          ((epoch + 1),
                           (iter + 1), self.data_loader.dataset.__len__() //
                           self.batch_size, D_loss.item(), G_loss.item()))
                    print(L)
                    print(count_max)
                    print(count_min)

            self.train_hist['per_epoch_time'].append(time.time() -
                                                     epoch_start_time)
            with torch.no_grad():
                self.gen_mode(4, epoch)
                self.visualize_results((epoch + 1))

        self.train_hist['total_time'].append(time.time() - start_time)
        print("Avg one epoch time: %.2f, total %d epochs time: %.2f" %
              (np.mean(self.train_hist['per_epoch_time']), self.epoch,
               self.train_hist['total_time'][0]))
        print("Training finish!... save training results")

        self.save()
        utils.generate_animation(
            self.result_dir + '/' + self.dataset + '/' + self.model_name +
            '/' + self.model_name, self.epoch)
        utils.loss_plot(
            self.train_hist,
            os.path.join(self.save_dir, self.dataset, self.model_name),
            self.model_name)
示例#26
0
    def train_all_classes(self):
        self.train_hist = {}
        self.train_hist['D_loss'] = []
        self.train_hist['G_loss'] = []
        self.train_hist['per_epoch_time'] = []
        self.train_hist['total_time'] = []


        self.data_loader_train  = DataLoader(self.dataset_train, batch_size=self.batch_size)
        self.data_loader_valid = DataLoader(self.dataset_valid, batch_size=self.batch_size)


        print('training start!')
        start_time = time.time()
        for epoch in range(self.epoch):
            self.G.train()
            D_losses = []
            G_losses = []

            epoch_start_time = time.time()
            #for tours in range(int(50000/self.num_examples)): #we want to see always as much images
            for x_, y_ in self.data_loader_train:
                # train discriminator D
                self.D.zero_grad()

                batch_size = x_.size()[0]

                y_real_ = torch.ones(batch_size)
                y_fake_ = torch.zeros(batch_size)
                y_label_ = torch.zeros(batch_size, 10)
                y_label_.scatter_(1, y_.view(batch_size, 1), 1)

                x_ = x_.view(-1, 1 , 28, 28)
                x_, y_label_, y_real_, y_fake_ = Variable(x_.cuda()), Variable(y_label_.cuda()), Variable(y_real_.cuda()), Variable(y_fake_.cuda())
                D_result, c = self.D(x_, y_label_)
                D_real_loss = self.BCELoss(D_result, y_real_)

                z_ = torch.rand((batch_size, self.z_dim, 1, 1))
                y_ = (torch.rand(batch_size, 1) * 10).type(torch.LongTensor)

                z_ = Variable(z_.cuda())
                G_result = self.G(z_, y_label_)

                D_result, c = self.D(G_result, y_label_)
                D_fake_loss = self.BCELoss(D_result, y_fake_)
                D_fake_score = D_result.data.mean()

                D_train_loss = D_real_loss + D_fake_loss

                D_train_loss.backward()
                self.D_optimizer.step()

                D_losses.append(D_train_loss.data[0])

                # train generator G
                self.G.zero_grad()

                z_ = torch.rand((batch_size, self.z_dim, 1, 1))
                y_ = (torch.rand(batch_size, 1) * 10).type(torch.LongTensor)

                z_ = Variable(z_.cuda())

                G_result = self.G(z_, y_label_)
                D_result,c = self.D(G_result, y_label_)
                G_train_loss = self.BCELoss(D_result, y_real_)
                G_train_loss.backward()
                self.G_optimizer.step()

                G_losses.append(G_train_loss.data[0])

            epoch_end_time = time.time()
            per_epoch_ptime = epoch_end_time - epoch_start_time


            print('[%d/%d] - ptime: %.2f, loss_d: %.3f, loss_g: %.3f' % ((epoch + 1), self.epoch, per_epoch_ptime, torch.mean(torch.FloatTensor(D_losses)),
                                                                    torch.mean(torch.FloatTensor(G_losses))))
            self.train_hist['D_loss'].append(torch.mean(torch.FloatTensor(D_losses)))
            self.train_hist['G_loss'].append(torch.mean(torch.FloatTensor(G_losses)))
            self.train_hist['per_epoch_time'].append(per_epoch_ptime)

            self.save()
            self.visualize_results((epoch + 1))

        self.train_hist['total_time'].append(time.time() - start_time)
        print("Avg one epoch time: %.2f, total %d epochs time: %.2f" % (np.mean(self.train_hist['per_epoch_time']),
                                                                        self.epoch, self.train_hist['total_time'][0]))
        print("Training finish!... save training results")

        utils.generate_animation(self.result_dir + '/' + self.model_name, self.epoch)
        utils.loss_plot(self.train_hist, self.save_dir, self.model_name)
示例#27
0
    def train(self):

        self.G.apply(self.G.weights_init)
        print(' training start!! (no conditional)')
        start_time = time.time()

        for classe in range(10):
            self.train_hist = {}
            self.train_hist['D_loss'] = []
            self.train_hist['G_loss'] = []
            self.train_hist['per_epoch_time'] = []
            self.train_hist['total_time'] = []
            # self.G.apply(self.G.weights_init) does not work for instance
            del self.E
            self.E = Encoder(self.z_dim, self.dataset, self.conditional)
            self.E_optimizer = optim.Adam(
                self.E.parameters(),
                lr=self.lr)  #, lr=args.lrD, betas=(args.beta1, args.beta2))
            if self.gpu_mode:
                self.E.cuda(self.device)

            best = 100000
            self.data_loader_train = get_iter_dataset(self.dataset_train,
                                                      self.list_class_train,
                                                      self.batch_size, classe)
            self.data_loader_valid = get_iter_dataset(self.dataset_valid,
                                                      self.list_class_valid,
                                                      self.batch_size, classe)
            early_stop = 0.
            for epoch in range(self.epoch):

                epoch_start_time = time.time()
                # print("number of batch data")
                # print(len(self.data_loader_train))
                self.E.train()
                self.G.train()
                sum_loss_train = 0.
                n_batch = 0.
                #for iter in range(self.size_epoch):
                for iter, (x_, t_) in enumerate(self.data_loader_train):
                    n_batch += 1
                    #x_ = sort_utils.get_batch(list_classes, classe, self.batch_size)
                    #x_ = torch.FloatTensor(x_)
                    x_ = Variable(x_)
                    if self.gpu_mode:
                        x_ = x_.cuda(self.device)
                    # VAE
                    z_, mu, logvar = self.E(x_)
                    recon_batch = self.G(z_)

                    # train
                    self.G_optimizer.zero_grad()
                    self.E_optimizer.zero_grad()
                    g_loss = self.loss_function(recon_batch, x_, mu, logvar)
                    g_loss.backward()  #retain_variables=True)
                    sum_loss_train += g_loss.data[0]
                    self.G_optimizer.step()
                    self.E_optimizer.step()

                    self.train_hist['D_loss'].append(g_loss.data[0])
                    self.train_hist['G_loss'].append(g_loss.data[0])

                    if ((iter + 1) % 100) == 0:
                        print(
                            "classe : [%1d] Epoch: [%2d] [%4d/%4d] G_loss: %.8f, E_loss: %.8f"
                            % (classe, (epoch + 1),
                               (iter + 1), self.size_epoch, g_loss.data[0],
                               g_loss.data[0]))
                sum_loss_train = sum_loss_train / np.float(n_batch)
                sum_loss_valid = 0.
                n_batch = 0.
                n_batch = 1.
                self.E.eval()
                self.G.eval()
                for iter, (x_, t_) in enumerate(self.data_loader_valid):
                    n_batch += 1
                    max_val, max_indice = torch.max(t_, 0)
                    mask_idx = torch.nonzero(t_ == classe)
                    if mask_idx.dim() == 0:
                        continue
                    x_ = torch.index_select(x_, 0, mask_idx[:, 0])
                    t_ = torch.index_select(t_, 0, mask_idx[:, 0])
                    if self.gpu_mode:
                        x_ = Variable(x_.cuda(self.device), volatile=True)
                    else:
                        x_ = Variable(x_)
                    # VAE
                    z_, mu, logvar = self.E(x_)
                    recon_batch = self.G(z_)

                    G_loss = self.loss_function(recon_batch, x_, mu, logvar)
                    sum_loss_valid += G_loss.data[0]

                sum_loss_valid = sum_loss_valid / np.float(n_batch)
                print(
                    "classe : [%1d] Epoch: [%2d] Train_loss: %.8f, Valid_loss: %.8f"
                    % (classe, (epoch + 1), sum_loss_train, sum_loss_valid))
                self.train_hist['per_epoch_time'].append(time.time() -
                                                         epoch_start_time)
                self.visualize_results((epoch + 1), classe)
                if sum_loss_valid < best:
                    best = sum_loss_valid
                    self.save_G(classe)
                    early_stop = 0.
                # We dit early stopping of the valid performance doesn't
                # improve anymore after 50 epochs
                if early_stop == 150:
                    break
                else:
                    early_stop += 1
            result_dir = self.result_dir + '/' + 'classe-' + str(classe)
            utils.generate_animation(result_dir + '/' + self.model_name,
                                     epoch + 1)
            utils.loss_plot(self.train_hist, result_dir, self.model_name)

            np.savetxt(
                os.path.join(result_dir,
                             'vae_training_' + self.dataset + '.txt'),
                np.transpose([self.train_hist['G_loss']]))

        self.train_hist['total_time'].append(time.time() - start_time)
        print("Avg one epoch time: %.2f, total %d epochs time: %.2f" %
              (np.mean(self.train_hist['per_epoch_time']), self.epoch,
               self.train_hist['total_time'][0]))
        print("Training finish!... save training results")
示例#28
0
    def train(self):
        self.train_hist = {}
        self.train_hist['D_loss'] = []
        self.train_hist['G_loss'] = []
        self.train_hist['info_loss'] = []
        self.train_hist['per_epoch_time'] = []
        self.train_hist['total_time'] = []

        if self.gpu_mode:
            self.y_real_, self.y_fake_ = Variable(
                torch.ones(self.batch_size, 1).cuda()), Variable(
                    torch.zeros(self.batch_size, 1).cuda())
        else:
            self.y_real_, self.y_fake_ = Variable(
                torch.ones(self.batch_size,
                           1)), Variable(torch.zeros(self.batch_size, 1))

        self.D.train()
        print('training start!!')
        start_time = time.time()
        for epoch in range(self.epoch):
            self.G.train()
            epoch_start_time = time.time()
            for iter in range(len(self.data_X) // self.batch_size):
                x_ = self.data_X[iter * self.batch_size:(iter + 1) *
                                 self.batch_size]
                z_ = torch.rand((self.batch_size, self.z_dim))
                if self.SUPERVISED == True:
                    y_disc_ = self.data_Y[iter * self.batch_size:(iter + 1) *
                                          self.batch_size]
                else:
                    y_disc_ = torch.from_numpy(
                        np.random.multinomial(
                            1,
                            self.len_discrete_code *
                            [float(1.0 / self.len_discrete_code)],
                            size=[self.batch_size])).type(torch.FloatTensor)

                y_cont_ = torch.from_numpy(
                    np.random.uniform(-1, 1, size=(self.batch_size,
                                                   2))).type(torch.FloatTensor)

                if self.gpu_mode:
                    x_, z_, y_disc_, y_cont_ = Variable(x_.cuda()), Variable(z_.cuda()), \
                                               Variable(y_disc_.cuda()), Variable(y_cont_.cuda())
                else:
                    x_, z_, y_disc_, y_cont_ = Variable(x_), Variable(
                        z_), Variable(y_disc_), Variable(y_cont_)

                # update D network
                self.D_optimizer.zero_grad()

                D_real, _, _ = self.D(x_)
                D_real_loss = self.BCE_loss(D_real, self.y_real_)

                G_ = self.G(z_, y_cont_, y_disc_)
                D_fake, _, _ = self.D(G_)
                D_fake_loss = self.BCE_loss(D_fake, self.y_fake_)

                D_loss = D_real_loss + D_fake_loss
                self.train_hist['D_loss'].append(D_loss.data[0])

                D_loss.backward(retain_graph=True)
                self.D_optimizer.step()

                # update G network
                self.G_optimizer.zero_grad()

                G_ = self.G(z_, y_cont_, y_disc_)
                D_fake, D_cont, D_disc = self.D(G_)

                G_loss = self.BCE_loss(D_fake, self.y_real_)
                self.train_hist['G_loss'].append(G_loss.data[0])

                G_loss.backward(retain_graph=True)
                self.G_optimizer.step()

                # information loss
                disc_loss = self.CE_loss(D_disc, torch.max(y_disc_, 1)[1])
                cont_loss = self.MSE_loss(D_cont, y_cont_)
                info_loss = disc_loss + cont_loss
                self.train_hist['info_loss'].append(info_loss.data[0])

                info_loss.backward()
                self.info_optimizer.step()

                if ((iter + 1) % 100) == 0:
                    print(
                        "Epoch: [%2d] [%4d/%4d] D_loss: %.8f, G_loss: %.8f, info_loss: %.8f"
                        % ((epoch + 1),
                           (iter + 1), len(self.data_X) // self.batch_size,
                           D_loss.data[0], G_loss.data[0], info_loss.data[0]))

            self.train_hist['per_epoch_time'].append(time.time() -
                                                     epoch_start_time)
            self.visualize_results((epoch + 1))

        self.train_hist['total_time'].append(time.time() - start_time)
        print("Avg one epoch time: %.2f, total %d epochs time: %.2f" %
              (np.mean(self.train_hist['per_epoch_time']), self.epoch,
               self.train_hist['total_time'][0]))
        print("Training finish!... save training results")

        self.save()
        utils.generate_animation(
            self.result_dir + '/' + self.dataset + '/' + self.model_name +
            '/' + self.model_name, self.epoch)
        utils.generate_animation(
            self.result_dir + '/' + self.dataset + '/' + self.model_name +
            '/' + self.model_name + '_cont', self.epoch)
        self.loss_plot(
            self.train_hist,
            os.path.join(self.save_dir, self.dataset, self.model_name),
            self.model_name)
示例#29
0
    def train_all_classes(self):
        self.G.apply(self.G.weights_init)
        self.train_hist = {}
        self.train_hist['Train_loss'] = []
        self.train_hist['Valid_loss'] = []
        self.train_hist['per_epoch_time'] = []
        self.train_hist['total_time'] = []
        self.size_epoch = 1

        if self.gpu_mode:
            self.y_real_, self.y_fake_ = Variable(
                torch.ones(self.batch_size, 1).cuda(self.device)), Variable(
                    torch.zeros(self.batch_size, 1).cuda(self.device))
        else:
            self.y_real_, self.y_fake_ = Variable(
                torch.ones(self.batch_size,
                           1)), Variable(torch.zeros(self.batch_size, 1))

        self.E.train()
        print('training start!!')
        start_time = time.time()
        best = 1000000

        self.data_loader_train = DataLoader(self.dataset_train,
                                            batch_size=self.batch_size)
        self.data_loader_valid = DataLoader(self.dataset_valid,
                                            batch_size=self.batch_size)

        early_stop = 0
        for epoch in range(self.epoch):
            self.E.train()
            self.G.train()
            epoch_start_time = time.time()
            sum_loss_train = 0.
            n_batch = 0.
            for iter, (x_, t_) in enumerate(self.data_loader_train):
                y_onehot = torch.FloatTensor(t_.shape[0], 10)
                y_onehot.zero_()
                y_onehot.scatter_(1, t_[:, np.newaxis], 1.0)
                if self.gpu_mode:
                    x_ = Variable(x_.cuda(self.device))
                    if self.conditional:
                        y_onehot = Variable(y_onehot.cuda(self.device))
                else:
                    x_ = Variable(x_)
                self.E_optimizer.zero_grad()
                self.G_optimizer.zero_grad()
                # VAE
                z_, mu, logvar = self.E(x_, y_onehot)
                recon_batch = self.G(z_, y_onehot)

                G_loss = self.loss_function(recon_batch, x_, mu, logvar)
                sum_loss_train += G_loss.data[0]
                G_loss.backward()  #retain_variables=True)

                self.E_optimizer.step()
                self.G_optimizer.step()
                n_batch += 1
                self.train_hist['Train_loss'].append(G_loss.data[0])

            sum_loss_train = sum_loss_train / np.float(n_batch)
            sum_loss_valid = 0.
            n_batch = 0.
            self.E.eval()
            self.G.eval()
            for iter, (x_, t_) in enumerate(self.data_loader_valid):
                n_batch += 1
                y_onehot = torch.FloatTensor(t_.shape[0], 10)
                y_onehot.zero_()
                y_onehot.scatter_(1, t_[:, np.newaxis], 1.0)
                if self.gpu_mode:
                    x_ = Variable(x_.cuda(self.device))
                    if self.conditional:
                        y_onehot = Variable(y_onehot.cuda(self.device))
                else:
                    x_ = Variable(x_)
                # VAE
                z_, mu, logvar = self.E(x_, y_onehot)
                recon_batch = self.G(z_, y_onehot)
                G_loss = self.loss_function(recon_batch, x_, mu, logvar)
                sum_loss_valid += G_loss.data[0]
                self.train_hist['Valid_loss'].append(G_loss.data[0])
            sum_loss_valid = sum_loss_valid / np.float(n_batch)
            print("Epoch: [%2d] Train_loss: %.8f, Valid_loss: %.8f" %
                  ((epoch + 1), sum_loss_train, sum_loss_valid))

            self.train_hist['per_epoch_time'].append(time.time() -
                                                     epoch_start_time)
            self.visualize_results((epoch + 1))
            if sum_loss_valid < best:
                best = sum_loss_valid
                self.save()
                early_stop = 0.
            # We dit early stopping of the valid performance doesn't
            # improve anymore after 50 epochs
            if early_stop == 150:
                #break
                print("I should stop")
            else:
                early_stop += 1

        self.train_hist['total_time'].append(time.time() - start_time)
        print("Avg one epoch time: %.2f, total %d epochs time: %.2f" %
              (np.mean(self.train_hist['per_epoch_time']), self.epoch,
               self.train_hist['total_time'][0]))
        print("Training finish!... save training results")
        utils.generate_animation(self.result_dir + '/' + self.model_name,
                                 self.epoch)
        # utils.loss_plot(self.train_hist, os.path.join(self.save_dir, self.dataset, self.model_name,
        #                                               'num_examples_' + str(self.num_examples)), self.model_name)

        np.savetxt(
            os.path.join(self.result_dir + '/cvae_training_' +
                         self.dataset + '.txt'),
            np.transpose([self.train_hist['Train_loss']]))
    def train(self):
        self.train_hist = {}
        self.train_hist['D_loss'] = []
        self.train_hist['G_loss'] = []
        self.train_hist['per_epoch_time'] = []
        self.train_hist['total_time'] = []

        if self.gpu_mode:
            self.y_real_, self.y_fake_ = Variable(torch.ones(self.batch_size, 1).cuda()), Variable(torch.zeros(self.batch_size, 1).cuda())
        else:
            self.y_real_, self.y_fake_ = Variable(torch.ones(self.batch_size, 1)), Variable(torch.zeros(self.batch_size, 1))

        self.D.train()
        print('training start!!')
        start_time = time.time()
        for epoch in range(self.epoch):
            self.G.train()
            epoch_start_time = time.time()
            for iter, (x_, _) in enumerate(self.data_loader):
                if iter == self.data_loader.dataset.__len__() // self.batch_size:
                    break

                z_ = torch.rand((self.batch_size, self.z_dim))

                if self.gpu_mode:
                    x_, z_ = Variable(x_.cuda()), Variable(z_.cuda())
                else:
                    x_, z_ = Variable(x_), Variable(z_)

                # update D network
                self.D_optimizer.zero_grad()

                D_real = self.D(x_)
                D_real_loss = self.BCE_loss(D_real, self.y_real_)

                G_ = self.G(z_)
                D_fake = self.D(G_)
                D_fake_loss = self.BCE_loss(D_fake, self.y_fake_)

                D_loss = D_real_loss + D_fake_loss
                self.train_hist['D_loss'].append(D_loss.data[0])

                D_loss.backward()
                self.D_optimizer.step()

                # update G network
                self.G_optimizer.zero_grad()

                G_ = self.G(z_)
                D_fake = self.D(G_)
                G_loss = self.BCE_loss(D_fake, self.y_real_)
                self.train_hist['G_loss'].append(G_loss.data[0])

                G_loss.backward()
                self.G_optimizer.step()

                if ((iter + 1) % 100) == 0:
                    print("Epoch: [%2d] [%4d/%4d] D_loss: %.8f, G_loss: %.8f" %
                          ((epoch + 1), (iter + 1), self.data_loader.dataset.__len__() // self.batch_size, D_loss.data[0], G_loss.data[0]))

            self.train_hist['per_epoch_time'].append(time.time() - epoch_start_time)
            self.visualize_results((epoch+1))

        self.train_hist['total_time'].append(time.time() - start_time)
        print("Avg one epoch time: %.2f, total %d epochs time: %.2f" % (np.mean(self.train_hist['per_epoch_time']),
              self.epoch, self.train_hist['total_time'][0]))
        print("Training finish!... save training results")

        self.save()
        utils.generate_animation(self.result_dir + '/' + self.dataset + '/' + self.model_name + '/' + self.model_name,
                                 self.epoch)
        utils.loss_plot(self.train_hist, os.path.join(self.save_dir, self.dataset, self.model_name), self.model_name)
示例#31
0
    def train(self):
        self.train_hist = {}
        self.train_hist['D_loss'] = []
        self.train_hist['G_loss'] = []
        self.train_hist['E_loss'] = []
        self.train_hist['per_epoch_time'] = []
        self.train_hist['total_time'] = []

        if torch.cuda.is_available():
            self.y_real_, self.y_fake_ = Variable(
                torch.ones(self.batch_size, 1).cuda()), Variable(
                    torch.zeros(self.batch_size, 1).cuda())
        else:
            self.y_real_, self.y_fake_ = Variable(
                torch.ones(self.batch_size,
                           1)), Variable(torch.zeros(self.batch_size, 1))

        self.D.train()
        print('training start!!')
        start_time = time.time()
        for epoch in range(self.epoch):
            # reset training mode of G and E
            self.G.train()
            self.E.train()
            epoch_start_time = time.time()

            for iter, (X, _) in enumerate(self.data_loader):
                X = utils.to_var(X)
                """Discriminator"""
                z = utils.to_var(
                    torch.randn((self.batch_size,
                                 self.z_dim)).view(-1, self.z_dim, 1, 1))
                X_hat = self.G(z)
                D_real = self.D(X).squeeze().view(-1, 1)
                D_fake = self.D(X_hat).squeeze().view(-1, 1)
                D_loss = self.BCE_loss(D_real, self.y_real_) + self.BCE_loss(
                    D_fake, self.y_fake_)
                self.train_hist['D_loss'].append(D_loss.data[0])
                # Optimize
                D_loss.backward()
                self.D_optimizer.step()
                self.__reset_grad()
                """Encoder"""
                z = utils.to_var(
                    torch.randn((self.batch_size,
                                 self.z_dim)).view(-1, self.z_dim, 1, 1))
                X_hat = self.G(z)
                z_mu, z_sigma = self.E(X_hat)
                z_mu, z_sigma = z_mu.squeeze(), z_sigma.squeeze()
                # - loglikehood
                E_loss = torch.mean(
                    torch.mean(
                        0.5 * (z - z_mu)**2 * torch.exp(-z_sigma) +
                        0.5 * z_sigma + 0.5 * np.log(2 * np.pi), 1))
                self.train_hist['E_loss'].append(E_loss.data[0])
                # Optimize
                E_loss.backward()
                self.E_optimizer.step()
                self.__reset_grad()
                """Generator"""
                # Use both Discriminator and Encoder to update Generator
                z = utils.to_var(
                    torch.randn((self.batch_size,
                                 self.z_dim)).view(-1, self.z_dim, 1, 1))
                X_hat = self.G(z)
                D_fake = self.D(X_hat).squeeze().view(-1, 1)
                z_mu, z_sigma = self.E(X_hat)
                z_mu, z_sigma = z_mu.squeeze(), z_sigma.squeeze()
                mode_loss = torch.mean(
                    torch.mean(
                        0.5 * (z - z_mu)**2 * torch.exp(-z_sigma) +
                        0.5 * z_sigma + 0.5 * np.log(2 * np.pi), 1))
                G_loss = self.BCE_loss(D_fake, self.y_real_)
                total_loss = G_loss + mode_loss
                self.train_hist['G_loss'].append(G_loss.data[0])
                # Optimize
                total_loss.backward()
                self.G_optimizer.step()
                self.__reset_grad()
                """ Plot """
                if (iter + 1
                    ) == self.data_loader.dataset.__len__() // self.batch_size:
                    # Print and plot every epoch
                    print(
                        'Epoch-{}; D_loss: {:.4}; G_loss: {:.4}; E_loss: {:.4}\n'
                        .format(epoch, D_loss.data[0], G_loss.data[0],
                                E_loss.data[0]))
                    for iter, (X, _) in enumerate(self.valid_loader):
                        X = utils.to_var(X)
                        self.visualize_results(X, epoch + 1)
                        break

                    break

            self.train_hist['per_epoch_time'].append(time.time() -
                                                     epoch_start_time)

            # Save model every 5 epochs
            if epoch % 5 == 0:
                self.save()

        self.train_hist['total_time'].append(time.time() - start_time)
        print("Avg one epoch time: %.2f, total %d epochs time: %.2f" %
              (np.mean(self.train_hist['per_epoch_time']), self.epoch,
               self.train_hist['total_time'][0]))
        print("Training finish!... save final training results")
        self.save()

        # Generate animation of reconstructed plot
        utils.generate_animation(
            self.root + '/' + self.result_dir + '/' + self.dataset + '/' +
            self.model_name + '/reconstructed', self.epoch)
        utils.loss_plot(
            self.train_hist,
            os.path.join(self.root, self.save_dir, self.dataset,
                         self.model_name), self.model_name)
示例#32
0
文件: GAN.py 项目: AIMarkov/GAN
    def train(self):
        self.train_hist = {}
        self.train_hist['D_loss'] = []
        self.train_hist['G_loss'] = []
        self.train_hist['per_epoch_time'] = []
        self.train_hist['total_time'] = []

        self.y_real_, self.y_fake_ = torch.ones(self.batch_size,
                                                1), torch.zeros(
                                                    self.batch_size, 1)
        if self.gpu_mode:
            self.y_real_, self.y_fake_ = self.y_real_.cuda(
            ), self.y_fake_.cuda()

        self.D.train()
        print('training start!!')
        print("all_iter:",
              self.data_loader.dataset.__len__() // self.batch_size)
        start_time = time.time()
        for epoch in range(self.epoch):
            self.G.train()
            epoch_start_time = time.time()
            for iter, (x_, _) in enumerate(self.data_loader):
                if iter == self.data_loader.dataset.__len__(
                ) // self.batch_size:
                    break

                z_ = torch.rand(
                    (self.batch_size,
                     self.z_dim))  #随机生成原料size,(batch size,dim)=(64,62)
                if self.gpu_mode:
                    x_, z_ = x_.cuda(), z_.cuda()  #x_是真实的列子
                #print('x_:',x_.shape)
                # update D network
                self.D_optimizer.zero_grad()

                D_real = self.D(x_)
                D_real_loss = self.BCE_loss(D_real, self.y_real_)

                G_ = self.G(z_)
                D_fake = self.D(G_)
                D_fake_loss = self.BCE_loss(D_fake, self.y_fake_)

                D_loss = D_real_loss + D_fake_loss
                self.train_hist['D_loss'].append(D_loss.item())

                D_loss.backward()
                self.D_optimizer.step()

                # update G network
                self.G_optimizer.zero_grad()

                G_ = self.G(z_)
                D_fake = self.D(G_)
                G_loss = self.BCE_loss(D_fake, self.y_real_)
                self.train_hist['G_loss'].append(G_loss.item())

                G_loss.backward()
                self.G_optimizer.step()

                if ((iter + 1) % 100) == 0:
                    print("Epoch: [%2d] [%4d/%4d] D_loss: %.8f, G_loss: %.8f" %
                          ((epoch + 1),
                           (iter + 1), self.data_loader.dataset.__len__() //
                           self.batch_size, D_loss.item(), G_loss.item()))

            self.train_hist['per_epoch_time'].append(time.time() -
                                                     epoch_start_time)
            with torch.no_grad():
                self.visualize_results((epoch + 1))

        self.train_hist['total_time'].append(time.time() - start_time)
        print("Avg one epoch time: %.2f, total %d epochs time: %.2f" %
              (np.mean(self.train_hist['per_epoch_time']), self.epoch,
               self.train_hist['total_time'][0]))
        print("Training finish!... save training results")

        self.save()
        utils.generate_animation(
            self.result_dir + '/' + self.dataset + '/' + self.model_name +
            '/' + self.model_name, self.epoch)
        utils.loss_plot(
            self.train_hist,
            os.path.join(self.save_dir, self.dataset, self.model_name),
            self.model_name)
    def train(self):
        self.train_hist = {}
        self.train_hist['D_loss'] = []
        self.train_hist['G_loss'] = []
        self.train_hist['info_loss'] = []
        self.train_hist['per_epoch_time'] = []
        self.train_hist['total_time'] = []

        if self.gpu_mode:
            self.y_real_, self.y_fake_ = Variable(torch.ones(self.batch_size, 1).cuda()), Variable(torch.zeros(self.batch_size, 1).cuda())
        else:
            self.y_real_, self.y_fake_ = Variable(torch.ones(self.batch_size, 1)), Variable(torch.zeros(self.batch_size, 1))

        self.D.train()
        print('training start!!')
        start_time = time.time()
        for epoch in range(self.epoch):
            self.G.train()
            epoch_start_time = time.time()
            for iter in range(len(self.data_X) // self.batch_size):
                x_ = self.data_X[iter*self.batch_size:(iter+1)*self.batch_size]
                z_ = torch.rand((self.batch_size, self.z_dim))
                if self.SUPERVISED == True:
                    y_disc_ = self.data_Y[iter*self.batch_size:(iter+1)*self.batch_size]
                else:
                    y_disc_ = torch.from_numpy(
                        np.random.multinomial(1, self.len_discrete_code * [float(1.0 / self.len_discrete_code)],
                                              size=[self.batch_size])).type(torch.FloatTensor)

                y_cont_ = torch.from_numpy(np.random.uniform(-1, 1, size=(self.batch_size, 2))).type(torch.FloatTensor)

                if self.gpu_mode:
                    x_, z_, y_disc_, y_cont_ = Variable(x_.cuda()), Variable(z_.cuda()), \
                                               Variable(y_disc_.cuda()), Variable(y_cont_.cuda())
                else:
                    x_, z_, y_disc_, y_cont_ = Variable(x_), Variable(z_), Variable(y_disc_), Variable(y_cont_)

                # update D network
                self.D_optimizer.zero_grad()

                D_real, _, _ = self.D(x_)
                D_real_loss = self.BCE_loss(D_real, self.y_real_)

                G_ = self.G(z_, y_cont_, y_disc_)
                D_fake, _, _ = self.D(G_)
                D_fake_loss = self.BCE_loss(D_fake, self.y_fake_)

                D_loss = D_real_loss + D_fake_loss
                self.train_hist['D_loss'].append(D_loss.data[0])

                D_loss.backward(retain_graph=True)
                self.D_optimizer.step()

                # update G network
                self.G_optimizer.zero_grad()

                G_ = self.G(z_, y_cont_, y_disc_)
                D_fake, D_cont, D_disc = self.D(G_)

                G_loss = self.BCE_loss(D_fake, self.y_real_)
                self.train_hist['G_loss'].append(G_loss.data[0])

                G_loss.backward(retain_graph=True)
                self.G_optimizer.step()

                # information loss
                disc_loss = self.CE_loss(D_disc, torch.max(y_disc_, 1)[1])
                cont_loss = self.MSE_loss(D_cont, y_cont_)
                info_loss = disc_loss + cont_loss
                self.train_hist['info_loss'].append(info_loss.data[0])

                info_loss.backward()
                self.info_optimizer.step()


                if ((iter + 1) % 100) == 0:
                    print("Epoch: [%2d] [%4d/%4d] D_loss: %.8f, G_loss: %.8f, info_loss: %.8f" %
                          ((epoch + 1), (iter + 1), len(self.data_X) // self.batch_size, D_loss.data[0], G_loss.data[0], info_loss.data[0]))

            self.train_hist['per_epoch_time'].append(time.time() - epoch_start_time)
            self.visualize_results((epoch+1))

        self.train_hist['total_time'].append(time.time() - start_time)
        print("Avg one epoch time: %.2f, total %d epochs time: %.2f" % (np.mean(self.train_hist['per_epoch_time']),
                                                                        self.epoch, self.train_hist['total_time'][0]))
        print("Training finish!... save training results")

        self.save()
        utils.generate_animation(self.result_dir + '/' + self.dataset + '/' + self.model_name + '/' + self.model_name,
                                 self.epoch)
        utils.generate_animation(self.result_dir + '/' + self.dataset + '/' + self.model_name + '/' + self.model_name + '_cont',
                                 self.epoch)
        self.loss_plot(self.train_hist, os.path.join(self.save_dir, self.dataset, self.model_name), self.model_name)