def build_model(self): all_d_grads = [] all_g_grads = [] config = self.config d_opt = tf.train.AdamOptimizer(config.discriminator_learning_rate, beta1=config.beta1) g_opt = tf.train.AdamOptimizer(config.generator_learning_rate, beta1=config.beta1) with tf.variable_scope(tf.get_variable_scope()): for idx, device in enumerate(self.devices): with tf.device("/%s" % device): with tf.name_scope("device_%s" % idx): with variables_on_gpu0(): build_model_single_gpu(self, idx) d_grads = d_opt.compute_gradients(self.d_losses[-1], var_list=self.d_vars) g_grads = g_opt.compute_gradients(self.g_losses[-1], var_list=self.g_vars) all_d_grads.append(d_grads) all_g_grads.append(g_grads) tf.get_variable_scope().reuse_variables() avg_d_grads = avg_grads(all_d_grads) avg_g_grads = avg_grads(all_g_grads) self.d_optim = d_opt.apply_gradients(avg_d_grads) self.g_optim = g_opt.apply_gradients(avg_g_grads)
def build_model(self): all_d_grads = [] all_g_grads = [] config = self.config d_opt = tf.train.AdamOptimizer(config.discriminator_learning_rate, beta1=config.beta1) g_opt = tf.train.AdamOptimizer(config.generator_learning_rate, beta1=config.beta1) for idx, device in enumerate(self.devices): with tf.device("/%s" % device): with tf.name_scope("device_%s" % idx): with variables_on_gpu0(): build_model_single_gpu(self, idx) d_grads = d_opt.compute_gradients(self.d_losses[-1], var_list=self.d_vars) g_grads = g_opt.compute_gradients(self.g_losses[-1], var_list=self.g_vars) all_d_grads.append(d_grads) all_g_grads.append(g_grads) tf.get_variable_scope().reuse_variables() avg_d_grads = avg_grads(all_d_grads) avg_g_grads = avg_grads(all_g_grads) self.d_optim = d_opt.apply_gradients(avg_d_grads) self.g_optim = g_opt.apply_gradients(avg_g_grads)
def build_model(self): config = self.config self.d_opt = tf.train.AdamOptimizer(FLAGS.discriminator_learning_rate, beta1=FLAGS.beta1) self.g_opt = tf.train.AdamOptimizer(FLAGS.generator_learning_rate, beta1=FLAGS.beta1) with tf.variable_scope('model') as model_scope: if config.num_towers > 1: all_d_grads = [] all_g_grads = [] for idx, device in enumerate(self.devices): with tf.device('/%s' % device): with tf.name_scope('device_%s' % idx): with ops.variables_on_gpu0(): self.build_model_single_gpu( gpu_idx=idx, batch_size=config.batch_size, num_towers=config.num_towers) d_grads = self.d_opt.compute_gradients( self.d_losses[-1], var_list=self.d_vars) g_grads = self.g_opt.compute_gradients( self.g_losses[-1], var_list=self.g_vars) all_d_grads.append(d_grads) all_g_grads.append(g_grads) model_scope.reuse_variables() d_grads = ops.avg_grads(all_d_grads) g_grads = ops.avg_grads(all_g_grads) else: self.build_model_single_gpu(batch_size=config.batch_size, num_towers=config.num_towers) d_grads = self.d_opt.compute_gradients(self.d_losses[-1], var_list=self.d_vars) g_grads = self.g_opt.compute_gradients(self.g_losses[-1], var_list=self.g_vars) d_step = tf.get_variable('d_step', initializer=0, trainable=False) self.d_optim = self.d_opt.apply_gradients(d_grads, global_step=d_step) g_step = tf.get_variable('g_step', initializer=0, trainable=False) self.g_optim = self.g_opt.apply_gradients(g_grads, global_step=g_step)
def build_model(self): """Builds a model.""" config = self.config # If ps_tasks is zero, the local device is used. When using multiple # (non-local) replicas, the ReplicaDeviceSetter distributes the variables # across the different devices. current_step = tf.cast(self.global_step, tf.float32) # g_ratio = (1.0 + 2e-5 * tf.maximum((current_step - 100000.0), 0.0)) # g_ratio = tf.minimum(g_ratio, 4.0) self.d_learning_rate = FLAGS.discriminator_learning_rate self.g_learning_rate = FLAGS.generator_learning_rate # self.g_learning_rate = FLAGS.generator_learning_rate / (1.0 + 2e-5 * tf.cast(self.global_step, tf.float32)) # self.g_learning_rate = FLAGS.generator_learning_rate / g_ratio with tf.device(tf.train.replica_device_setter(config.ps_tasks)): self.d_opt = tf.train.AdamOptimizer(self.d_learning_rate, beta1=FLAGS.beta1) self.g_opt = tf.train.AdamOptimizer(self.g_learning_rate, beta1=FLAGS.beta1) if config.sync_replicas and config.num_workers > 1: self.d_opt = tf.train.SyncReplicasOptimizer( opt=self.d_opt, replicas_to_aggregate=config.replicas_to_aggregate) self.g_opt = tf.train.SyncReplicasOptimizer( opt=self.g_opt, replicas_to_aggregate=config.replicas_to_aggregate) if config.num_towers > 1: all_d_grads = [] all_g_grads = [] for idx, device in enumerate(self.devices): with tf.device('/%s' % device): with tf.name_scope('device_%s' % idx): with ops.variables_on_gpu0(): self.build_model_single_gpu( gpu_idx=idx, batch_size=config.batch_size, num_towers=config.num_towers) d_grads = self.d_opt.compute_gradients( self.d_losses[-1], var_list=self.d_vars) g_grads = self.g_opt.compute_gradients( self.g_losses[-1], var_list=self.g_vars) all_d_grads.append(d_grads) all_g_grads.append(g_grads) d_grads = ops.avg_grads(all_d_grads) g_grads = ops.avg_grads(all_g_grads) else: with tf.device(tf.train.replica_device_setter(config.ps_tasks)): # TODO(olganw): reusing virtual batchnorm doesn't work in the multi- # replica case. self.build_model_single_gpu(batch_size=config.batch_size, num_towers=config.num_towers) d_grads = self.d_opt.compute_gradients(self.d_losses[-1], var_list=self.d_vars) g_grads = self.g_opt.compute_gradients(self.g_losses[-1], var_list=self.g_vars) with tf.device(tf.train.replica_device_setter(config.ps_tasks)): update_moving_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) print('update_moving_ops', update_moving_ops) if config.sync_replicas: with tf.control_dependencies(update_moving_ops): d_step = tf.get_variable('d_step', initializer=0, trainable=False) self.d_optim = self.d_opt.apply_gradients( d_grads, global_step=d_step) g_step = tf.get_variable('g_step', initializer=0, trainable=False) self.g_optim = self.g_opt.apply_gradients( g_grads, global_step=g_step) else: # Don't create any additional counters, and don't update the global step with tf.control_dependencies(update_moving_ops): self.d_optim = self.d_opt.apply_gradients(d_grads) self.g_optim = self.g_opt.apply_gradients(g_grads)