Пример #1
0
 def disc_to_unthinned(ref_set, generated):
     ## DISCREPANCIES TO UNTHINNED SET.
     ref_set_n = len(ref_set)
     n_sample = len(generated)
     # Compute MMD, Energy, and KL, between simulations and unthinned data.
     mmd_, _ = compute_mmd(
         generated, ref_set[np.random.choice(ref_set_n, n_sample)])
     energy_ = compute_energy(
         generated, ref_set[np.random.choice(ref_set_n, n_sample)])
     kl_ = compute_kl(generated,
                      ref_set[np.random.choice(ref_set_n, n_sample)],
                      k=5)
     return mmd_, energy_, kl_
Пример #2
0
 def compute_discrepancies(cand_set, ref_set):
     """Computes discrepancies between two sets."""
     cand_set_n = len(cand_set)
     ref_set_n = len(ref_set)
     n_sample = 1000
     assert ((cand_set_n >= n_sample) & (ref_set_n >= n_sample)), \
         'n_sample too high for one of the inputs.'
     # Compute MMD, Energy, and KL, between simulations and unthinned data.
     mmd_, _ = compute_mmd(cand_set[np.random.choice(cand_set_n, n_sample)],
                           ref_set[np.random.choice(ref_set_n, n_sample)])
     energy_ = compute_energy(
         cand_set[np.random.choice(cand_set_n, n_sample)],
         ref_set[np.random.choice(ref_set_n, n_sample)])
     kl_ = compute_kl(cand_set[np.random.choice(cand_set_n, n_sample)],
                      ref_set[np.random.choice(ref_set_n, n_sample)],
                      k=5)
     return mmd_, energy_, kl_
Пример #3
0
def get_mmds(sample_size):
    num_runs = 100

    # Set up containers for resuls.
    mmds_norm_gamma = np.zeros(num_runs)
    kmmds_norm_gamma = np.zeros(num_runs)
    kmmds_norm_norm = np.zeros(num_runs)
    x_moments = np.zeros((num_runs, 4))
    y_moments = np.zeros((num_runs, 4))
    z_moments = np.zeros((num_runs, 4))

    for i in range(num_runs):
        # Define distributions to sample.
        """
        x = np.random.normal(1, 1, size=sample_size)
        y = np.random.gamma(1, 1, size=sample_size)
        z = np.random.normal(1, 1, size=sample_size)
        """
        x = np.random.normal(10, 5, size=sample_size)
        y = np.random.gamma(20, 0.5, size=sample_size)
        z = np.random.normal(10, 5, size=sample_size)

        # Center and norm the data.
        x = (x - np.mean(x)) / np.std(x)
        y = (y - np.mean(y)) / np.std(y)
        z = (z - np.mean(z)) / np.std(z)

        # Compute metrics: reference, desired metric, comparison.
        mmds_norm_gamma[i] = compute_mmd(x, y, slim_output=True)
        kmmds_norm_gamma[i] = compute_kmmd(x, y, slim_output=True)
        kmmds_norm_norm[i] = compute_kmmd(x, z, slim_output=True)

        # Store empirical moments.
        x_moments[i] = compute_moments(x)
        y_moments[i] = compute_moments(y)
        z_moments[i] = compute_moments(z)

    return (mmds_norm_gamma, kmmds_norm_gamma, kmmds_norm_norm, x_moments,
            y_moments, z_moments)
Пример #4
0
            feed_dict={
                z: z_batch,
                x: x_batch
            })

    if it % log_iter == 0:
        n_sample = 10000
        z_sample = get_sample_z(n_sample, noise_dim)

        g_out = sess.run(g, feed_dict={z: z_sample})
        generated_normed = g_out
        generated = np.array(generated_normed) * data_raw_std + data_raw_mean

        # Compute MMD between simulations and unthinned (target) data.
        mmd_gen_vs_unthinned, _ = compute_mmd(
            generated[np.random.choice(n_sample, 500)],
            data_raw_unthinned[np.random.choice(data_num_original, 500)])
        # Compute energy between simulations and unthinned (target) data.
        energy_gen_vs_unthinned = compute_energy(
            generated[np.random.choice(n_sample, 500)],
            data_raw_unthinned[np.random.choice(data_num_original, 500)])
        # Compute KL between simulations and unthinned (target) data.
        kl_gen_vs_unthinned = compute_kl(
            generated[np.random.choice(n_sample, 500)],
            data_raw_unthinned[np.random.choice(data_num_original, 500)],
            k=5)

        if data_dim == 2:
            fig = plot(generated, data_raw, data_raw_unthinned,
                       data_raw_upsampled, it, mmd_gen_vs_unthinned)
Пример #5
0
    def build_model(self):
        self.z = tf.random_normal(shape=[self.batch_size, self.z_dim])
        #self.z = tf.truncated_normal(shape=[self.batch_size, self.z_dim])

        # Images from loader are NHWC on [0,1].
        self.x = self.data_loader['images']

        # Set up generator and autoencoder functions. Output g is on [-1, 1].
        g, self.g_var = GeneratorCNN(self.z,
                                     self.base_size,
                                     self.num_conv_filters,
                                     self.filter_size,
                                     self.channel,
                                     self.repeat_num,
                                     self.data_format,
                                     reuse=False,
                                     use_bias=self.use_bias,
                                     verbose=False)
        self.g = convert_n11_to_255(g, is_tf=True)

        # TODO: Resize for TFHub encoder.
        #if g.shape.as_list()[1] != self.scale_size:
        #    g = tf.image.resize_nearest_neighbor(
        #        g, (self.scale_size, self.scale_size))
        g = tf.image.resize_nearest_neighbor(self.g, (224, 224))
        x = tf.image.resize_nearest_neighbor(self.x, (224, 224))
        if self.channel == 1:
            g = tf.image.grayscale_to_rgb(g)
            x = tf.image.grayscale_to_rgb(x)

        # Encode both x and g. Both should start on [0,1], size (224, 224).
        tf_enc_out = tfhub_encoder(tf.concat([x, g], 0))
        self.enc_x, self.enc_g = tf.split(tf_enc_out, 2)

        #######################################################################
        # Set up several losses (e.g. discriminator, discrepancy, autoencoder).

        # Subset encoding to only non-zero columns.
        if self.dataset == 'mnist':
            nonzero_indices = np.load(
                'nonzero_indices_mnist_mobilenetv2035224.npy')
        elif self.dataset == 'birds':
            nonzero_indices = np.load(
                'nonzero_indices_birds_mobilenetv2035224.npy')
        elif self.dataset == 'celeba':
            nonzero_indices = np.load(
                'nonzero_indices_celeba_mobilenetv2035224.npy')
        self.enc_x = tf.gather(self.enc_x, nonzero_indices, axis=1)
        self.enc_g = tf.gather(self.enc_g, nonzero_indices, axis=1)
        print('\n\nUsing {} nonzero of {} total features.\n\n'.format(
            self.enc_x.get_shape().as_list()[1],
            tf_enc_out.get_shape().as_list()[1]))

        # LOSS: Maximum mean discrepancy.
        # Kernel on encodings.
        arr1 = self.enc_x
        arr2 = self.enc_g
        sigma_list = [0.0001, 0.001, 0.1]

        data_num = tf.shape(arr1)[0]
        gen_num = tf.shape(arr2)[0]
        v = tf.concat([arr1, arr2], 0)
        VVT = tf.matmul(v, tf.transpose(v))
        sqs = tf.reshape(tf.diag_part(VVT), [-1, 1])
        sqs_tiled_horiz = tf.tile(sqs, [1, tf.shape(sqs)[0]])
        exp_object = sqs_tiled_horiz - 2 * VVT + tf.transpose(sqs_tiled_horiz)
        K = 0.0
        for sigma in sigma_list:
            gamma = 1.0 / (2 * sigma**2)
            K += tf.exp(-gamma * exp_object)
        self.K = K
        K_xx = K[:data_num, data_num:]
        K_yy = K[data_num:, data_num:]
        K_xy = K[:data_num, data_num:]
        K_xx_upper = upper(K_xx)
        K_yy_upper = upper(K_yy)
        num_combos_xx = tf.to_float(data_num * (data_num - 1) / 2)
        num_combos_yy = tf.to_float(gen_num * (gen_num - 1) / 2)
        num_combos_xy = tf.to_float(data_num * gen_num)

        # Compute and choose between MMD values.
        self.mmd2 = (tf.reduce_sum(K_xx_upper) / num_combos_xx +
                     tf.reduce_sum(K_yy_upper) / num_combos_yy -
                     2 * tf.reduce_sum(K_xy) / num_combos_xy)

        # LOSS: Maximum mean discrepancy, laplace kernel.
        k_moments = self.k_moments
        do_taylor_weights = self.do_taylor_weights
        cmd_span_const = self.cmd_span_const

        self.mmd2_laplace = compute_mmd(arr1,
                                        arr2,
                                        use_tf=True,
                                        slim_output=True,
                                        sigma_list=[0.1, 0.5, 1.0, 2.0],
                                        kernel_choice='rbf_laplace')

        # LOSS: K-MMD, Taylor expansion.
        self.kmmd = compute_kmmd(arr1,
                                 arr2,
                                 k_moments=k_moments,
                                 sigma_list=[0.1, 0.5, 1.0, 2.0],
                                 use_tf=True,
                                 slim_output=True)

        # LOSS: Central moment discrepancy, with k moments.
        self.cmd_k, self.cmd_k_terms = compute_cmd(
            arr1,
            arr2,
            k_moments=k_moments,
            use_tf=True,
            cmd_span_const=cmd_span_const,
            return_terms=True,
            taylor_weights=do_taylor_weights)

        # LOSS: Noncentral moment discrepancy, with k moments.
        self.ncmd_k = compute_noncentral_moment_discrepancy(
            arr1,
            arr2,
            k_moments=k_moments,
            use_tf=True,
            cmd_span_const=cmd_span_const,
            taylor_weights=do_taylor_weights)
        _, ncmd_k_terms = compute_noncentral_moment_discrepancy(
            arr1,
            arr2,
            k_moments=k_moments,
            use_tf=True,
            return_terms=True,
            cmd_span_const=1)  # No coefs, just terms.

        # LOSS: Joint noncentral moment discrepancy, with k moments.
        self.jmd_k = compute_joint_moment_discrepancy(
            arr1,
            arr2,
            k_moments=k_moments,
            use_tf=True,
            cmd_span_const=cmd_span_const,
            taylor_weights=do_taylor_weights)

        ##############################################################
        # Assemble losses into "final" nodes, to be used by optimizer.

        self.lambda_mmd = tf.Variable(0., trainable=False, name='lambda_mmd')

        if self.dataset == 'mnist':
            #self.g_loss = self.mmd2
            self.g_loss = self.ncmd_k

        elif self.dataset == 'birds':
            #self.g_loss = self.ncmd_k
            self.g_loss = self.mmd2

        elif self.dataset == 'celeba':
            #self.g_loss = self.ncmd_k
            self.g_loss = self.mmd2

        # Optimizer nodes.
        if self.optimizer == 'adam':
            g_opt = tf.train.AdamOptimizer(self.g_lr)

        elif self.optimizer == 'rmsprop':
            g_opt = tf.train.RMSPropOptimizer(self.g_lr)

        elif self.optimizer == 'sgd':
            g_opt = tf.train.GradientDescentOptimizer(self.g_lr)

        # Set up optim nodes.
        norm_gradients = 1
        clip = 0
        if norm_gradients:
            # Update_ops node is due to tf.layers.batch_normalization.
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            with tf.control_dependencies(update_ops):
                g_grads, g_vars = zip(
                    *g_opt.compute_gradients(self.g_loss, var_list=self.g_var))

                # Replace None with zeros.
                self.g_grads = [
                    g if g is not None else tf.zeros_like(v)
                    for g, v in zip(g_grads, g_vars)
                ]

                ## Normalize each to magnitude 1.
                #self.g_grads_normed_ = [g / tf.norm(g) for g in self.g_grads]

                # Normalize each by sum of norms.
                g_scalar = tf.maximum(
                    1., tf.reduce_sum([tf.norm(g) for g in self.g_grads]))
                self.g_grads_normed = [g / g_scalar for g in self.g_grads]

                self.g_optim = g_opt.apply_gradients(
                    zip(self.g_grads_normed, g_vars))

        elif clip:
            # Update_ops node is due to tf.layers.batch_normalization.
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            with tf.control_dependencies(update_ops):

                # CLIP MMD GRADIENTS.
                g_grads, g_vars = zip(
                    *g_opt.compute_gradients(self.g_loss, var_list=self.g_var))
                self.g_grads = g_grads
                g_grads_clipped = tuple(
                    [tf.clip_by_value(g, -0.01, 0.01) for g in g_grads])
                self.g_optim = g_opt.apply_gradients(
                    zip(g_grads_clipped, g_vars))

        else:
            #self.g_optim = g_opt.minimize(
            #    self.g_loss, var_list=self.g_var, global_step=self.step)

            # Update_ops node is due to tf.layers.batch_normalization.
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            with tf.control_dependencies(update_ops):
                g_grads, g_vars = zip(
                    *g_opt.compute_gradients(self.g_loss, var_list=self.g_var))
                self.g_grads = [
                    g if g is not None else tf.zeros_like(v)
                    for g, v in zip(g_grads, g_vars)
                ]
                self.g_optim = g_opt.apply_gradients(zip(self.g_grads, g_vars))

        # SUMMARY
        self.summary_op = tf.summary.merge([
            tf.summary.image("a_g", self.g, max_outputs=10),
            tf.summary.image("c_x", self.x, max_outputs=10),
            tf.summary.scalar("loss/mmd2_laplace", self.mmd2_laplace),
            tf.summary.scalar("loss/kmmd", self.kmmd),
            tf.summary.scalar("loss/cmd_k", self.cmd_k),
            tf.summary.scalar("loss/ncmd_k", self.ncmd_k),
            tf.summary.scalar("loss/jmd_k", self.jmd_k),
            tf.summary.scalar("misc/g_lr", self.g_lr),
        ])
Пример #6
0
        latent_sample = (
            (latent_sample_unnormed - data_raw_mean[:latent_dim]) / 
             data_raw_std[:latent_dim])

        # Get only the label dimension, to pass to Generator.
        label_sample =  latent_sample[:, :label_dim]

        # Conditionally generate new sample.
        g_out = sess.run(g, feed_dict={z: z_sample, label: label_sample})
        generated_normed = np.hstack((latent_sample, g_out))
        generated = np.array(generated_normed) * data_raw_std + data_raw_mean
        ####################################################

        # Compute MMD only between data dimensions, and not latent ones.
        mmd_gen_vs_unthinned, _ = compute_mmd(
            generated[np.random.choice(n_sample, 500), -data_dim:],
            data_raw_unthinned[np.random.choice(data_num, 500), -data_dim:])
        # Compute energy only between data dimensions, and not latent ones.
        energy_gen_vs_unthinned = compute_energy(
            generated[np.random.choice(n_sample, 500), -data_dim:],
            data_raw_unthinned[np.random.choice(data_num, 500), -data_dim:])
        # Compute KL only between data dimensions, and not latent ones.
        kl_gen_vs_unthinned = compute_kl(
            generated[np.random.choice(n_sample, 500), -data_dim:],
            data_raw_unthinned[np.random.choice(data_num, 500), -data_dim:], k=5)

        if data_dim == 2:
            fig = plot(generated, data_raw, data_raw_unthinned, it,
                mmd_gen_vs_unthinned)

        if np.isnan(d_loss_):