Exemplo n.º 1
0
class GAN(object):
    """生成对抗网络
    网络结构:
        隐藏状态为z,观测数据为x,输出真假标签为y,则
        生成器:g = tanh(w1*z + b1),  x = sigmoid(u1*g + b2)
        判别器:h = tanh(w2*x + b3),  y = sigmoid(u2*h + b4)
    参数:
        z_dim: 隐状态维度
        g_dim: 生成器隐藏层维度
        x_dim: 数据的维度
        h_dim: 判别器隐藏层维度
        lr: 初始学习率
        dropout: 随机失活率
    """
    def __init__(self, z_dim, g_dim, x_dim, h_dim, lr=0.01, dropout=0.0):
        #结构参数
        self.z_dim = z_dim
        self.g_dim = g_dim
        self.x_dim = x_dim
        self.h_dim = h_dim

        #学习参数
        assert (lr > 0)
        self.lr = float(lr)
        self.dropout = min(max(dropout, 0.0), 1.0)
        ##要为生成器和判别器分别分配优化器
        self.generator_optimizer = Adam(alpha=self.lr)
        #self.discriminator_optimizer = Adam(alpha=self.lr)

        #网络参数
        self.generator = NN(input_dim=z_dim,
                            hidden_dim=g_dim,
                            output_dim=x_dim,
                            lr=self.lr * 0.5,
                            dropout=self.dropout)

        self.generator.mode = 'binary'

        self.discriminator = NN(input_dim=x_dim,
                                hidden_dim=h_dim,
                                output_dim=1,
                                lr=self.lr * 0.1,
                                dropout=self.dropout)

    def __update_discriminator__(self, x, y):
        """更新判别器参数就当做正常的二分类网络更新"""
        y_hat = self.discriminator.__update__(x, y)
        return y_hat

    def __update_generator__(self, z):
        """损失函数求导更新生成器
        模型结构:
            生成器:g = tanh(w1*z + b1),  x = sigmoid(u1*g + b2)
            判别器:h = tanh(w2*x + b3),  y_hat = sigmoid(u2*h + b4)
        对数似然损失函数(y为真伪标签):
            L = -[y*log(y_hat) + (1-y)*log(1-y_hat)]
        求导得:
            ▽y_hat = (1-y)/(1-y_hat) - y/y_hat
            ▽x = ∂L/∂y_hat * ∂y_hat/∂h * ∂h/∂x
                = ▽y_hat * y_hat*(1-y_hat)*u2 * (1-h^2)*w2
                = [(1-y)*y_hat-y*(1-y_hat)] * u2*(1-h^2)*w2
            ∂x/∂u1 = x*(1-x)*g
                令 = x_res * g
            ∂x/∂w1 = ∂x/∂g * ∂g/∂w1
                   = x_res*u1 * (1-g^2)*z
            ▽b2 = ∂L/∂x * ∂x/∂b2
                 = ▽x * x_res * 1
            ▽u1 = ∂L/∂x * ∂x/∂u1
                 = ▽x * x_res * g
            ▽b1 = ∂L/∂x * ∂x/∂g * ∂g/∂b1
                 = ▽x * x_res*u1 * (1-g^2)*1
            ▽w1 = ∂L/∂x * ∂x/∂g * ∂g/∂21
                 = ▽x * x_res*u1 * (1-g^2)*z
        此处公式不考虑batch与变量维度,具体实现时需注意维度扩展。
        """
        g = self.generator.__compute_h__(z)
        x = self.generator.__compute_y__(g)
        h = self.discriminator.__compute_h__(x)
        y_hat = self.discriminator.__compute_y__(h)
        y_res = y_hat
        #(batch, 1, 1, y_dim)
        y_res_expand = np.expand_dims(np.expand_dims(y_res, 1), 1)
        #(1, 1, h_dim, y_dim)
        u2_expand = np.expand_dims(np.expand_dims(self.discriminator.U, 0), 0)
        #(batch, 1, h_dim, 1)
        h_expand = np.expand_dims(np.expand_dims(h, 1), 3)
        #(1, x_dim, h_dim, 1)
        w2_expand = np.expand_dims(np.expand_dims(self.discriminator.W, 0), 3)
        #(batch, x_dim, h_dim, y_dim)
        x_grad = y_res_expand * u2_expand * (1 - h_expand**2) * w2_expand
        #reduce成(batch, x_dim)
        x_grad = np.mean(np.mean(x_grad, -1), -1)

        x_res = x * (1 - x)
        #(batch, 1, 1, x_dim)
        x_res_expand = np.expand_dims(np.expand_dims(x_res, 1), 2)
        #(batch, 1, g_dim, x_dim)
        g_expand = np.expand_dims(np.expand_dims(g, 1), 3)
        #(1, 1, g_dim, x_dim)
        u1_expand = np.expand_dims(np.expand_dims(self.generator.U, 0), 0)
        #(batch, z_dim, 1, 1)
        z_expand = np.expand_dims(np.expand_dims(z, -1), -1)
        #(batch, 1, 1, x_dim)
        x_grad_expand = np.expand_dims(np.expand_dims(x_grad, 1), 1)

        b2_ = x_grad_expand * x_res_expand
        u1_ = b2_ * g_expand
        u1_ = np.mean(np.squeeze(u1_), 0)
        b2_ = np.mean(np.squeeze(b2_), axis=0)

        b1_ = x_grad_expand * x_res_expand * u1_expand * (1 - g_expand**2)
        w1_ = b1_ * z_expand
        w1_ = np.mean(np.mean(w1_, 0), -1)
        b1_ = np.mean(np.mean(np.squeeze(b1_), 0), -1)

        if self.dropout == 0:
            mask = 1
        else:
            mask = np.random.binomial(
                1, 1 - self.dropout,
                (self.z_dim + self.x_dim + 1, self.g_dim + 1))

        grad = np.zeros((self.z_dim + self.x_dim + 1, self.g_dim + 1))
        grad[:self.z_dim, :-1] = w1_
        grad[self.z_dim, :-1] = b1_
        grad[self.z_dim + 1:, :-1] = u1_.T
        grad[self.z_dim + 1:, -1] = b2_
        #要骗过判别器,所以要让损失函数增加
        self.generator.coefs += self.generator_optimizer.update(grad) * mask
        return y_hat

    def generate_z(self, n_samples):
        z = normal_sample(dim=self.z_dim, n=n_samples)
        return z

    def generate_x(self, z):
        fake_x = self.generator(z)
        return fake_x

    def generate(self, n_samples):
        z = normal_sample(dim=self.z_dim, n=n_samples)
        fake_x = self.generator(z)
        return fake_x

    def fit(self, x, batch_size=128, epochs=1, verbose=0):
        x = np.asarray(x)
        n_samples, n_features = x.shape
        if n_features != self.x_dim:
            raise Exception('Data dimensions should be equal to x_dim!')
        loops = int(n_samples / batch_size)
        NCOLS = 100
        log_interval = max(1, int(loops / 100))

        for epoch in range(1, epochs + 1):
            gen_x = self.generate(n_samples)
            if verbose:
                print("Epoch {}/{}:".format(epoch, epochs))
                desc = "Training Discriminator - loss: {:.4f} - acc: {:.4f} "
                pbar = tqdm(initial=0, leave=True, total=loops, ncols=NCOLS)
            for loop in range(loops):  #训练判别器
                idx = random.sample(range(n_samples), batch_size)
                fake_x = gen_x[idx]
                data_x = x[idx]
                mix_x = np.vstack([data_x, fake_x])
                labels = np.array([1] * batch_size + [0] * batch_size)
                shuff_idx = list(range(2 * batch_size))
                random.shuffle(shuff_idx)
                mix_x = mix_x[shuff_idx]
                y = np.expand_dims(labels[shuff_idx], 1)
                y_pred = self.__update_discriminator__(mix_x, y)
                if verbose:
                    loss = binary_crossentropy(y, y_pred)
                    acc = binary_accuracy(y, y_pred)
                    if loop % log_interval == 0:
                        pbar.desc = desc.format(loss, acc)
                        pbar.update(max(1, int(loops / 100)))  #更新进度条

            gen_z = self.generate_z(n_samples)
            if verbose:
                pbar.close()
                desc = "Training Generator - loss: {:.4f} - acc: {:.4f} "
                pbar = tqdm(initial=0, leave=True, total=loops, ncols=NCOLS)
            for loop in range(loops):  #训练生成器
                idx = random.sample(range(n_samples), batch_size)
                fake_z = gen_z[idx]
                data_x = x[idx]
                """
                加一点辅助监督:浅层网络生成器很难与判别器抗衡,所以加一点
                    辅助监督帮助生成器。
                """
                _ = self.generator.__update__(fake_z, data_x)
                y_hat = self.__update_generator__(fake_z)  #只训练生成数据
                if verbose:
                    data_x = x[idx]
                    fake_x = self.generate_x(fake_z)
                    mix_x = np.vstack([data_x, fake_x])
                    y_true_pred = self.discriminator(data_x)
                    y_pred = np.vstack([y_true_pred, y_hat])
                    labels = np.array([1] * batch_size + [0] * batch_size)
                    y = np.expand_dims(labels, 1)
                    loss = binary_crossentropy(y, y_pred)
                    acc = binary_accuracy(y, y_pred)
                    if loop % log_interval == 0:
                        pbar.desc = desc.format(loss, acc)
                        pbar.update(max(1, int(loops / 100)))  #更新进度条
            if verbose: pbar.close()

        return self
Exemplo n.º 2
0
class VAE(object):
    """变分自动编码器
    模型结构:
        编码器:h = tanh(w1*x + b1),  [u,sigma] = W*h + B
                u = w2*h + b2,    sigma = w3*h + b3, 编码器输出线性激活
        采样函数:z = u + sigma * e,  e是标准正态分布采样,记录为常量
        解码器:g = tanh(w4*z + b4),  x_hat = sigmoid(w5*g + b5)
    参数:
        x_dim: 数据的维度
        h_dim: 编码器隐藏层维度
        z_dim: 隐状态维度
        g_dim: 解码器隐藏层维度
        lr: 初始学习率
        dropout: 随机失活率
        
    """
    def __init__(self, x_dim, h_dim, z_dim, g_dim, lr=0.01, dropout=0.0):
        #结构参数
        self.z_dim = z_dim
        self.h_dim = h_dim
        self.g_dim = g_dim
        self.x_dim = x_dim

        #学习参数
        assert (lr > 0)
        self.lr = float(lr)
        self.dropout = min(max(dropout, 0.0), 1.0)

        #网络参数
        self.encoder = SimpleNN(input_dim=x_dim,
                                hidden_dim=h_dim,
                                output_dim=2 * z_dim,
                                lr=self.lr,
                                dropout=self.dropout)

        self.encoder_optimizer = Adam(alpha=self.lr)

        self.decoder = NN(input_dim=z_dim,
                          hidden_dim=g_dim,
                          output_dim=x_dim,
                          lr=self.lr,
                          dropout=self.dropout)

        self.decoder.mode = 'binary'

    def __update_decoder__(self, z, x):
        """更新解码器参数就使用常规对数似然损失来更新"""
        x_hat = self.decoder.__update__(z, x)
        return x_hat

    def __update_encoder__(self, x, y=None):
        """更新编码器参数使用对数似然与KL散度损失来更新
        标准VAE的标签y用不上。
        """
        n_samples, dim = x.shape
        h = self.encoder.__compute_h__(x)
        u_sigma = self.encoder.__compute_y__(h)
        u, sigma = u_sigma[:, :self.z_dim], u_sigma[:, self.z_dim:]
        z, e = normal_sample(u, sigma, n_samples, self.z_dim)
        g = self.decoder.__compute_h__(z)
        x_hat = self.decoder.__compute_y__(g)

        KL_grad = self.__encoder_KL_grad__(x, h, u, sigma, y=y)
        Likely_grad = self.__encoder_Likely_grad__(x, h, u, sigma, e, g, x_hat)
        grad = KL_grad + Likely_grad  #KL散度损失+对数似然损失

        if self.dropout == 0:
            mask = 1
        else:
            mask = np.random.binomial(
                1, 1 - self.dropout,
                (self.x_dim + self.z_dim + 1, self.h_dim + 1))

        self.encoder.coefs -= self.encoder_optimizer.update(grad) * mask
        KL_loss = VAE_KL(u, sigma)
        return z, KL_loss

    def __encoder_KL_grad__(self, x, h, u, sigma, y=None):
        """KL散度损失函数的梯度,标准VAE不使用标签y:
        编码器:
            h = tanh(w1*x + b1),  [u,sigma] = W*h + B
            u = w2*h + b2,    sigma = w3*h + b3
        根据KL散度公式可得:
            KL = -0.5 * [1 + 2*log(sigma) - u^2 - sigma^2]
        那么求导得:
            ▽b3 = ∂KL/∂sigma * ∂sigma/∂b3
                 = (sigma - 1/sigma) * 1 = ▽sigma
            ▽w3 = ∂KL/∂sigma * ∂sigma/∂w3
                 = (sigma - 1/sigma) * h
            ▽b2 = ∂KL/∂u * ∂u/∂b2
                 = u * 1
            ▽w2 = ∂KL/∂u * ∂u/∂w2
                 = u * h
            ▽b1 = (∂KL/∂u * ∂u/∂h + ∂KL/∂sigma * ∂sigma/∂h) * ∂h/∂b1
                 = (u*w2 + ▽sigma*w3) * (1-h^2) * 1
            ▽w1 = (∂KL/∂u * ∂u/∂h + ∂KL/∂sigma * ∂sigma/∂h) * ∂h/∂w1
                 = (u*w2 + ▽sigma*w3) * (1-h^2) * x
        此处公式不考虑batch与变量维度,具体实现时需注意维度扩展。
        """
        sigma_grad = sigma - 1 / (sigma + epsilon)
        b3_ = np.mean(sigma_grad, 0)

        sigma_grad_expand = np.expand_dims(sigma_grad, 1)  #(batch, 1, z_dim)
        h_expand = np.expand_dims(h, -1)  #(batch, h_dim, 1)
        w3_ = np.mean(sigma_grad_expand * h_expand, 0)

        b2_ = np.mean(u, 0)
        u_expand = np.expand_dims(u, 1)  #(batch, 1, z_dim)
        w2_ = np.mean(u_expand * h_expand, 0)

        #(1, h_dim, z_dim)
        w2_expand = np.expand_dims(self.encoder.U[:, :self.z_dim], 0)
        #(1, h_dim, z_dim)
        w3_expand = np.expand_dims(self.encoder.U[:, self.z_dim:], 0)
        b1_ = (u_expand * w2_expand + sigma_grad_expand * w3_expand) * \
              (1 - h_expand**2)
        b1_expand = np.expand_dims(b1_, 1)  #(batch, 1, h_dim, z_dim)
        #(batch, x_dim, 1, 1)
        x_expand = np.expand_dims(np.expand_dims(x, -1), -1)
        w1_ = b1_expand * x_expand
        w1_ = np.mean(np.mean(w1_, 0), -1)
        b1_ = np.mean(np.mean(b1_, 0), -1)

        grad = np.zeros((self.x_dim + 2 * self.z_dim + 1, self.h_dim + 1))
        grad[:self.x_dim, :-1] = w1_
        grad[self.x_dim, :-1] = b1_
        grad[self.x_dim + 1:, :-1] = np.column_stack([w2_, w3_]).T
        grad[self.x_dim + 1:, -1] = np.vstack([b2_, b3_]).reshape([-1])
        return grad

    def __encoder_Likely_grad__(self, x, h, u, sigma, e, g, x_hat):
        """对数似然损失的梯度
        模型结构:
            编码器:h = tanh(w1*x + b1),  [u,sigma] = W*h + B
                    u = w2*h + b2,    sigma = w3*h + b3
            采样函数:z = u + sigma * e
            解码器:g = tanh(w4*z + b4),  x_hat = sigmoid(w5*g + b5)
        对数似然损失公式:
            L = -[x*log(x_hat) + (1-x)*log(1-x_hat)]
        那么求导得(更具体的推导参考神经网络):
            ▽x_hat = (1-x)/(1-x_hat) - x/x_hat
            ▽z = ∂L/∂x_hat * ∂x_hat/∂g * ∂g/∂z
                = ▽x_hat * x_hat*(1-x_hat)*w5 + (1-g^2)*w4
                = [(1-x)*x_hat - x(1-x_hat)] * (1-g^2) * w4 * w5
            ∂z/∂u = 1,  ∂z/∂sigma = e
            ▽b3 = ∂L/∂z * ∂z/∂sigma * ∂sigma/∂b3
                 = ▽z * e * 1
            ▽w3 = ∂L/∂z * ∂z/∂sigma * ∂sigma/∂w3
                 = ▽z * e * h
            ▽b2 = ∂L/∂z * ∂z/∂u * ∂u/∂b2
                 = ▽z * 1 * 1
            ▽w2 = ∂L/∂z * ∂z/∂u * ∂u/∂w2
                 = ▽z * 1 * h
            ▽b1 = ∂L/∂z * (∂z/∂u * ∂u/∂h + ∂z/∂sigma * ∂sigma/∂h) * ∂h/∂b1
                 = ▽z * (w2 + e*w3) * 1
            ▽w1 = ∂L/∂z * (∂z/∂u * ∂u/∂h + ∂z/∂sigma * ∂sigma/∂h) * ∂h/∂w1
                 = ▽z * (w2 + e*w3) * x
        此处公式不考虑batch与变量维度,具体实现时需注意维度扩展。
        """
        x_res = (1 - x) * x_hat - x * (1 - x_hat)
        #(batch, 1, 1, x_dim)
        x_res_expand = np.expand_dims(np.expand_dims(x_res, 1), 1)
        #(batch, 1, g_dim, 1)
        g_expand = np.expand_dims(np.expand_dims(g, 1), -1)
        #(1, 1, g_dim, x_dim)
        w5_expand = np.expand_dims(np.expand_dims(self.decoder.U, 0), 0)
        #(1, z_dim, g_dim, 1)
        w4_expand = np.expand_dims(np.expand_dims(self.decoder.W, 0), -1)
        #(batch, z_dim, g_dim, x_dim)
        z_grad = x_res_expand * (1 - g_expand**2) * w4_expand * w5_expand
        #reduce成(batch, z_dim)
        z_grad = np.mean(np.mean(z_grad, -1), -1)

        b2_ = np.mean(z_grad, 0)
        b3_ = np.mean(z_grad * e, 0)
        e_expand = np.expand_dims(e, 1)  #(batch, 1, z_dim)
        h_expand = np.expand_dims(h, -1)  #(batch, h_dim, 1)
        z_grad_expand = np.expand_dims(z_grad, 1)  #(batch, 1, z_dim)
        w2_ = np.mean(z_grad_expand * h_expand, 0)
        w3_ = np.mean(z_grad_expand * h_expand * e_expand, 0)

        #(batch, 1, 1, z_dim)
        z_grad_expand = np.expand_dims(z_grad_expand, 1)
        #(batch, x_dim, 1, 1)
        x_expand = np.expand_dims(np.expand_dims(x, -1), -1)
        #(1, 1, h_dim, z_dim)
        w2_expand = np.expand_dims(
            np.expand_dims(self.encoder.U[:, :self.z_dim], 0), 0)
        #(1, 1, h_dim, z_dim)
        w3_expand = np.expand_dims(
            np.expand_dims(self.encoder.U[:, self.z_dim:], 0), 0)
        #(batch, 1, 1, z_dim)
        e_expand = np.expand_dims(e_expand, 1)

        b1_ = z_grad_expand * (w2_expand + e_expand * w3_expand)
        w1_ = b1_ * x_expand
        w1_ = np.mean(np.mean(w1_, 0), -1)
        b1_ = np.squeeze(b1_, 1)
        b1_ = np.mean(np.mean(b1_, 0), -1)

        grad = np.zeros((self.x_dim + 2 * self.z_dim + 1, self.h_dim + 1))
        grad[:self.x_dim, :-1] = w1_
        grad[self.x_dim, :-1] = b1_
        grad[self.x_dim + 1:, :-1] = np.column_stack([w2_, w3_]).T
        grad[self.x_dim + 1:, -1] = np.vstack([b2_, b3_]).reshape([-1])
        return grad

    def fit(self, x, batch_size=100, epochs=1, verbose=0):
        """训练方法
        标准VAE为自监督模型,所以不需要标签
        """
        x = np.asarray(x)
        n_samples, dim = x.shape
        assert (self.x_dim == dim)
        loops = int(n_samples / batch_size)
        NCOLS = 80  #min(100, loops)
        log_interval = max(1, int(loops / 100))

        for epoch in range(1, epochs + 1):
            if verbose:
                desc = "Epoch {}/{} - loss: {:.4f} "
                pbar = tqdm(initial=0, leave=True, total=loops, ncols=NCOLS)
            for loop in range(loops):
                idx = random.sample(range(n_samples), batch_size)
                x_sample = x[idx]
                z, KL_loss = self.__update_encoder__(x_sample)
                x_hat = self.__update_decoder__(z, x_sample)
                if verbose:
                    Likely_loss = binary_crossentropy(x_sample, x_hat)
                    loss = KL_loss + Likely_loss
                    if loop % log_interval == 0:
                        pbar.desc = desc.format(epoch, epochs, loss)
                        pbar.update(max(1, int(loops / 100)))  #更新进度条

            if verbose: pbar.close()

        return self

    def encode(self, x):
        n_samples, dim = x.shape
        u_sigma = self.encoder(x)
        u, sigma = u_sigma[:, :dim], u_sigma[:, dim:]
        z, e = normal_sample(u, sigma, n_samples, dim)
        return z

    def decode(self, z):
        return self.decoder(z)

    def generate(self, n_samples=1):
        z, e = normal_sample(n_samples=n_samples, dim=self.z_dim)
        return self.decode(z)