def set_grads(self): with tf.variable_scope("G_grads"): self.g_optim = tf.train.AdamOptimizer(self.lr, beta1=self.config.beta1, beta2=self.config.beta2) self.g_gvs = self.g_optim.compute_gradients(loss=self.g_loss, var_list=self.g_vars) if self.config.clip_grad: self.g_gvs = [(tf.clip_by_norm(gg, 1.), vv) for gg, vv in self.g_gvs] self.g_grads = self.g_optim.apply_gradients( self.g_gvs, global_step=self.global_step) with tf.variable_scope("D_grads"): self.d_optim = tf.train.AdamOptimizer(self.lr * self.config.learning_rate_D / self.config.learning_rate, beta1=self.config.beta1, beta2=self.config.beta2) self.d_gvs = self.d_optim.compute_gradients(loss=self.d_loss, var_list=self.d_vars) if self.config.clip_grad: self.d_gvs = [(tf.clip_by_norm(gg, 1.), vv) for gg, vv in self.d_gvs] self.d_grads = self.d_optim.apply_gradients(self.d_gvs) print('[*] Gradients set')
def apply_grads(self): with tf.variable_scope("G_grads"): if len(self.g_gvs): self.g_grads = self.g_optim.apply_gradients( self.g_gvs, global_step=self.global_step) else: self.d_grads = tf.no_op() with tf.variable_scope("D_grads"): if len(self.d_gvs): self.d_grads = self.d_optim.apply_gradients( self.d_gvs, global_step=self.global_d_step) else: self.d_grads = tf.no_op()
def compute_grads(self): with tf.variable_scope("G_grads"): self.g_gvs = tf.gradients(self.g_loss, self.g_vars) self.g_gvs = zip(self.g_gvs, self.g_vars) if self.config.clip_grad: self.g_gvs = [(tf.clip_by_norm(gg, 1.), vv) for gg, vv in self.g_gvs] with tf.variable_scope("D_grads"): self.d_gvs = tf.gradients(self.d_loss, self.d_vars) self.d_gvs = zip(self.d_gvs, self.d_vars) if self.config.clip_grad: self.d_gvs = [(tf.clip_by_norm(gg, 1.), vv) for gg, vv in self.d_gvs] print('[*] Gradients set')
def add_scaling(self): x_hat_data = self.images x_hat = self.d_images norm2_jac = squared_norm_jacobian(x_hat, x_hat_data) norm2_jac = tf.reduce_mean(norm2_jac) norm_discriminator = tf.reduce_mean(tf.square(x_hat)) if self.config.scaling_variant == 'grad': scale = 1. / (self.sc * norm2_jac + 1.) elif self.config.scaling_variant == 'value_and_grad': scale = 1. / (self.sc * (norm2_jac + norm_discriminator) + 1.) unscaled_g_loss = self.g_loss with tf.variable_scope('loss'): if self.config.with_scaling: print('[*] Adding scaling variant: %s' % self.config.scaling_variant) self.apply_scaling(scale) tf.summary.scalar(self.optim_name + '_non_scaled_G', unscaled_g_loss) tf.summary.scalar(self.optim_name + '_norm_grad_G', norm2_jac) tf.summary.scalar(self.optim_name + '_G', self.g_loss) tf.summary.scalar(self.optim_name + '_D', self.d_loss) tf.summary.scalar(self.optim_name + '_norm_D', norm_discriminator) print('[*] Scaling added')
def add_gradient_penalty(self, kernel, fake, real): bs = min([self.batch_size, self.real_batch_size]) real, fake = real[:bs], fake[:bs] alpha = tf.random_uniform(shape=[bs, 1, 1, 1]) real_data = self.images[:bs] # discirminator input level fake_data = self.G[:bs] # discriminator input level x_hat_data = (1. - alpha) * real_data + alpha * fake_data x_hat = self.discriminator(x_hat_data, bs, update_collection="NO_OPS") Ekx = lambda yy: tf.reduce_mean(kernel(x_hat, yy, K_XY_only=True), axis=1) Ekxr, Ekxf = Ekx(real), Ekx(fake) witness = Ekxr - Ekxf gradients = tf.gradients(witness, [x_hat_data])[0] penalty = tf.reduce_mean( tf.square(safer_norm(gradients, axis=1) - 1.0)) with tf.variable_scope('loss'): if self.config.gradient_penalty > 0: self.d_loss += penalty * self.gp self.optim_name += '_(gp %.1f)' % self.config.gradient_penalty tf.summary.scalar('dx_penalty', penalty) print('[*] Gradient penalty added') tf.summary.scalar(self.optim_name + '_G', self.g_loss) tf.summary.scalar(self.optim_name + '_D', self.d_loss)
def set_loss(self, G, images): kernel = getattr(mmd, '_%s_kernel' % 'rbf') kerGI = kernel(G, images) print("Enter to Tf variable scope loss") with tf.variable_scope('loss'): print("!!!!Enter to Tf variable scope loss") self.g_loss = mmd.mmd2(kerGI) print(self.g_loss) self.d_loss = -self.g_loss self.optim_name = 'kernel_loss' self.add_scaling() print('[*] Loss set')
def set_loss(self, G, images): kernel = getattr(mmd, '_%s_kernel' % 'rbf') print(kernel) #kernel = mmd._rbf_kernel kerGI = kernel(G, images) with tf.variable_scope('loss'): self.g_loss = mmd.mmd2(kerGI) self.d_loss = -self.g_loss self.optim_name = 'kernel_loss' self.add_gradient_penalty(kernel, G, images) self.add_l2_penalty() print('[*] Loss set')
def build_model(self): is_cpu_ps = False self.consolidation_device = '/gpu:0' self.config.num_gpus = 1 cpu_master_worker = '/cpu:1' cpu_data_processor = '/cpu:0' with tf.device(cpu_data_processor): self.set_pipeline() self.batch_queue = tf.contrib.slim.prefetch_queue.prefetch_queue( [self.image_batch], capacity=4 * self.config.num_gpus) with tf.device(cpu_master_worker): self.global_step = tf.Variable(0, name="global_step", trainable=False) self.global_d_step = tf.Variable(0, name="global_d_step", trainable=False) self.lr = tf.Variable(self.config.learning_rate, name='lr', trainable=False, dtype=tf.float32) self.lr_decay_op = self.lr.assign( tf.maximum(self.lr * self.config.decay_rate, 1.e-6)) with tf.variable_scope('loss'): if self.config.is_train and (self.config.gradient_penalty > 0): self.gp = tf.Variable(self.config.gradient_penalty, name='gradient_penalty', trainable=False, dtype=tf.float32) self.gp_decay_op = self.gp.assign( self.gp * self.config.gp_decay_rate) if self.config.is_train and self.config.with_scaling: self.sc = tf.Variable(self.config.scaling_coeff, name='scaling_coeff', trainable=False, dtype=tf.float32) self.sc_decay_op = self.sc.assign( self.sc * self.config.sc_decay_rate) self.sample_z = tf.constant(np.random.uniform( -1, 1, size=(self.sample_size, self.z_dim)).astype(np.float32), dtype=tf.float32, name='sample_z') Generator, Discriminator = get_networks(self.config.architecture) losses = [] self.towers_g_grads = [] self.towers_d_grads = [] self.update_ops = [] with tf.variable_scope(tf.get_variable_scope()): for i in range(self.config.num_gpus): worker = '/gpu:%d' % i device_setter = misc._create_device_setter( is_cpu_ps, worker, self.config.num_gpus, ps_device=self.consolidation_device) with tf.device(device_setter): images = self.batch_queue.dequeue() self.set_tower_loss('', images, Generator, Discriminator) tf.get_variable_scope().reuse_variables() with tf.name_scope('%s_%d' % ('tower', i)) as scope: #if i==0: self.update_ops.extend( tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope)) #else: # self.set_tower_loss(scope, images,Generator,Discriminator,update_collection="NO_OPS") # update_ops.extend(tf.get_collection(tf.GraphKeys.UPDATE_OPS,scope)) #update_ops.append(tf.get_collection(tf.GraphKeys.UPDATE_OPS,scope)) if self.config.is_train: losses.append([self.g_loss, self.d_loss]) if i == 0: t_vars = tf.trainable_variables() self.d_vars = [ var for var in t_vars if 'd_' in var.name ] self.g_vars = [ var for var in t_vars if 'g_' in var.name ] self.compute_grads() self.towers_g_grads.append(self.g_gvs) self.towers_d_grads.append(self.d_gvs) summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope) if self.config.is_train: self.set_optimizer() block = min(8, int(np.sqrt(self.real_batch_size)), int(np.sqrt(self.batch_size))) summaries.append( tf.summary.image( "train/input_image", self.imageRearrange(tf.clip_by_value(self.images, 0, 1), block))) summaries.append( tf.summary.image( "train/gen_image", self.imageRearrange(tf.clip_by_value(self.G_NHWC, 0, 1), block))) #self.TrainSummary = tf.summary.merge(summaries) self.saver = tf.train.Saver(max_to_keep=self.max_to_keep) print('[*] Model built.')