def make_summary_ops(self, scope, detailed_logs): variables = tf.trainable_variables(scope) grads_policy = tf.gradients(self.policy_loss, variables) grads_value = tf.gradients(self.value_loss, variables) grads_combined = tf.gradients(self.loss, variables) grads_norm_policy = tf.global_norm(grads_policy) grads_norm_value = tf.global_norm(grads_value) grads_norm_combined = tf.global_norm(grads_combined) scalar_summaries = [ ('rl/policy_entropy', self.policy_entropy), ('rl/advantage_mean', tf.reduce_mean(self.advantage)), ('loss/loss_policy', self.policy_loss), ('loss/loss_value', self.value_loss), ('loss/loss_combined', self.loss), ('loss/grads_norm_policy', grads_norm_policy), ('loss/grads_norm_value', grads_norm_value), ('loss/grads_norm_combined', grads_norm_combined), ('loss/grads_norm_combined_clipped', self.grads_norm), ] summaries = [] for name, val in scalar_summaries: summary = tf.summary.scalar(name, val) summaries.append(summary) if detailed_logs: summaries.extend(make_grad_histograms(variables, grads_combined)) summaries.extend(make_rmsprop_histograms(self.optimizer)) summaries.extend(make_histograms(self.layers, 'activations')) summaries.extend(make_histograms(variables, 'weights')) return tf.summary.merge(summaries)
def _update_step(self, observ, action, old_mean, old_logstd, reward, advantage, length): """Compute the current combined loss and perform a gradient update step. Args: observ: Sequences of observations. action: Sequences of actions. old_mean: Sequences of action means of the behavioral policy. old_logstd: Sequences of action log stddevs of the behavioral policy. reward: Sequences of reward. advantage: Sequences of advantages. length: Batch of sequence lengths. Returns: Tuple of value loss, policy loss, and summary tensor. """ value_loss, value_summary = self._value_loss(observ, reward, length) network = self._network(observ, length) policy_loss, policy_summary = self._policy_loss(network.mean, network.logstd, old_mean, old_logstd, action, advantage, length) value_gradients, value_variables = (zip(*self._optimizer.compute_gradients(value_loss))) policy_gradients, policy_variables = (zip(*self._optimizer.compute_gradients(policy_loss))) all_gradients = value_gradients + policy_gradients all_variables = value_variables + policy_variables optimize = self._optimizer.apply_gradients(zip(all_gradients, all_variables)) summary = tf.summary.merge([ value_summary, policy_summary, tf.summary.scalar('value_gradient_norm', tf.global_norm(value_gradients)), tf.summary.scalar('policy_gradient_norm', tf.global_norm(policy_gradients)), utility.gradient_summaries(zip(value_gradients, value_variables), dict(value=r'.*')), utility.gradient_summaries(zip(policy_gradients, policy_variables), dict(policy=r'.*')) ]) with tf.control_dependencies([optimize]): return [tf.identity(x) for x in (value_loss, policy_loss, summary)]
def update(self, samples, contexts, dev_samples, dev_contexts): if self._counter % 20 == 0: # To prevent memory leaks in tf eager tf.set_random_seed(self._seed) actions, rews, weights, kwargs = self.create_batch( samples, contexts=contexts) dev_actions, dev_rews, dev_weights, dev_kwargs = self.create_batch( dev_samples, contexts=dev_contexts) trajs = (s.traj for s in samples) with tf.GradientTape( watch_accessed_variables=False, persistent=True) as tape0: tape0.watch(self._score_vars) scores = self.compute_scores(trajs, return_tensors=True) scores = [ tf.nn.softmax(x) for x in tf.split(scores, len(actions) // 10, axis=0) ] scores = tf.concat(scores, axis=0) rews = rews * tf.expand_dims(scores, axis=-1) grads = self._compute_gradients(actions, rews, weights, **kwargs) grads, _ = tf.clip_by_global_norm(grads, self.max_grad_norm) grads_and_vars = zip(grads, self.trainable_variables) new_vars = [v - self.learning_rate * g for g, v in grads_and_vars] self.optimizer.apply_gradients(grads_and_vars) grads_loss = self._compute_gradients( dev_actions, dev_rews, dev_weights, loss_str='dev', use_entropy_regularization=False, **dev_kwargs) score_grads = tape0.gradient( new_vars, self._score_vars, output_gradients=grads_loss) del tape0 score_grads_and_vars = self._score_grad_clipping( zip(score_grads, self._score_vars)) self.score_optimizer.apply_gradients( score_grads_and_vars, global_step=self.global_step) if self.log_summaries: grads = list(zip(*grads_and_vars)[0]) score_grads = list(zip(*score_grads_and_vars)[0]) contrib_summary.scalar('global_norm/train_grad', tf.global_norm(grads)) contrib_summary.scalar('global_norm/meta_grad', tf.global_norm(score_grads)) if self._debug and (self._counter % self.log_every == 0): tf.print( 'Epoch {} scores='.format(self._counter), scores[:20], summarize=10, output_stream=sys.stdout) self._counter += 1
def CreateTrainOp(total_loss, optimizer, global_step, variables_to_train, transform_grads_fn): grads_and_vars = optimizer.compute_gradients(total_loss, variables_to_train) if transform_grads_fn: grads_and_vars = transform_grads_fn(grads_and_vars) with tf.name_scope("summarize_grads"): for grad, var in grads_and_vars: if grad is not None: if isinstance(grad, tf.IndexedSlices): grad_values = grad.values else: grad_values = grad tf.summary.histogram(var.op.name + "_gradient", grad_values) tf.summary.scalar(var.op.name + "_gradient_norm", tf.global_norm([grad_values])) else: logging.info("Var %s has no gradient", var.op.name) grad_updates = optimizer.apply_gradients(grads_and_vars, global_step=global_step) with tf.name_scope("train_op"): with tf.control_dependencies([grad_updates]): total_loss = tf.check_numerics(total_loss, "LossTensor is inf or nan") return total_loss
def ProcessGradients(grads_and_vars, global_gradient_clip=0.0, sanitize_gradients=False, normalize_gradients=False): tf.logging.info("Prcessing gradients") grads, vars_ = list(zip(*grads_and_vars)) if sanitize_gradients: new_grads = [] for g in grads: if g is not None: g = tf.where(tf.is_finite(g), g, tf.zeros_like(g)) new_grads.append(g) grads = new_grads if normalize_gradients: new_grads = [] for g in grads: if g is not None: g *= tf.rsqrt(tf.maximum(1e-12, tf.reduce_sum(tf.square(g)))) new_grads.append(g) grads = new_grads if global_gradient_clip > 0: grads, grad_norm = tf.clip_by_global_norm(grads, global_gradient_clip) grads_and_vars = list(zip(grads, vars_)) else: grad_norm = tf.global_norm(grads) tf.summary.scalar("global_grad_norm", grad_norm) return grads_and_vars
def gradient_summaries(gvs, suppress_inf_and_nans=False): """Creates summaries for norm, mean and var of gradients.""" gs = [gv[0] for gv in gvs] grad_global_norm = tf.global_norm(gs, 'gradient_global_norm') if suppress_inf_and_nans: is_nan_or_inf = tf.logical_or(tf.is_nan(grad_global_norm), tf.is_inf(grad_global_norm)) grad_global_norm = tf.where(is_nan_or_inf, tf.zeros_like(grad_global_norm) - 1., grad_global_norm) grad_abs_max, grad_abs_mean, grad_mean, grad_var = [0.] * 4 n_grads = 1e-8 for g, _ in gvs: if isinstance(g, tf.IndexedSlices): g = g.values if g is not None: current_n_grads = np.prod(g.shape.as_list()) abs_g = abs(g) mean, var = tf.nn.moments(g, list(range(len(g.shape)))) grad_abs_max = tf.maximum(grad_abs_max, tf.reduce_max(abs_g)) grad_abs_mean += tf.reduce_sum(abs_g) grad_mean += mean * current_n_grads grad_var += var n_grads += current_n_grads tf.summary.scalar('grad/abs_max', grad_abs_max) tf.summary.scalar('grad/abs_mean', grad_abs_mean / n_grads) tf.summary.scalar('grad/mean', grad_mean / n_grads) tf.summary.scalar('grad/var', grad_var / n_grads) return dict(grad_global_norm=grad_global_norm)
def build_train(self, initial_lr): """ """ #count_number_trainable_params(verbose=True) # TODO remove # Decay learning rate by manually incrementing decay_step decay_step = tf.Variable(0.0, name='decay_step', trainable=False) learning_rate = tf.train.exponential_decay(initial_lr, decay_step, 1, 0.8, staircase=True, name="learning_rate") trainable_variables = tf.trainable_variables() optimizer = tf.train.RMSPropOptimizer(learning_rate, decay=0.9) # clip gradients grads = tf.gradients(self.loss, trainable_variables) grads, _ = tf.clip_by_global_norm(grads, 1.0, use_norm=tf.global_norm(grads)) train_op = optimizer.apply_gradients(zip(grads, trainable_variables)) self.decay_step = decay_step self.learning_rate = learning_rate self.train_op = train_op
def __init__(self, mdp, n_input, lr, n_h1=400, n_h2=300, l2=10, name='deep_irl_fc'): super(DeepIRLFC, self).__init__(mdp, lr) self.n_input = n_input self.lr = lr self.n_h1 = n_h1 self.n_h2 = n_h2 self.name = name self.sess = tf.compat.v1.Session() self.input_s, self.reward, self.theta = self._build_network(self.name) self.optimizer = tf.train.AdamOptimizer(lr) self.grad_r = tf.placeholder(tf.float32, [None, 1]) self.l2_loss = tf.add_n([tf.nn.l2_loss(v) for v in self.theta]) self.grad_l2 = tf.gradients(self.l2_loss, self.theta) self.grad_theta = tf.gradients(self.reward, self.theta, -self.grad_r) self.grad_theta = [ tf.add(l2 * self.grad_l2[i], self.grad_theta[i]) for i in range(len(self.grad_l2)) ] self.grad_theta, _ = tf.clip_by_global_norm(self.grad_theta, 100.0) self.grad_norms = tf.global_norm(self.grad_theta) self.optimize = self.optimizer.apply_gradients( zip(self.grad_theta, self.theta)) self.sess.run(tf.compat.v1.global_variables_initializer())
def _update_policy_step(self, observ, action, old_mean, old_logstd, advantage, length): """Compute the current policy loss and perform a gradient update step. Args: observ: Sequences of observations. action: Sequences of actions. old_mean: Sequences of action means of the behavioral policy. old_logstd: Sequences of action log stddevs of the behavioral policy. advantage: Sequences of advantages. length: Batch of sequence lengths. Returns: Tuple of loss tensor and summary tensor. """ network = self._network(observ, length) loss, summary = self._policy_loss(network.mean, network.logstd, old_mean, old_logstd, action, advantage, length) gradients, variables = (zip( *self._policy_optimizer.compute_gradients(loss))) optimize = self._policy_optimizer.apply_gradients( zip(gradients, variables)) summary = tf.summary.merge([ summary, tf.summary.scalar('gradient_norm', tf.global_norm(gradients)), utility.gradient_summaries(zip(gradients, variables), dict(policy=r'.*')) ]) with tf.control_dependencies([optimize]): return [tf.identity(loss), tf.identity(summary)]
def _add_gradients_summaries(grads_and_vars): """Add histogram summaries to gradients. Note: The summaries are also added to the SUMMARIES collection. Args: grads_and_vars: A list of gradient to variable pairs (tuples). Returns: The _list_ of the added summaries for grads_and_vars. """ summaries = [] for grad, var in grads_and_vars: if grad is not None: if isinstance(grad, tf.IndexedSlices): grad_values = grad.values else: grad_values = grad summaries.append( tf.summary.histogram(var.op.name + ':gradient', grad_values)) summaries.append( tf.summary.histogram(var.op.name + ':gradient_norm', tf.global_norm([grad_values]))) else: tf.logging.info('Var %s has no gradient', var.op.name) return summaries
def _build_train_op(loss, tf_vars, learning_rate, train_step, num_aggregate): """Build training ops from `loss` tensor.""" optim = tf.train.AdamOptimizer(learning_rate) optim = tf.train.SyncReplicasOptimizer( optim, replicas_to_aggregate=num_aggregate, total_num_replicas=1) grads = tf.gradients(loss, tf_vars) train_op = optim.apply_gradients(zip(grads, tf_vars), global_step=train_step) grad_norm = tf.global_norm(grads) return train_op, optim, grad_norm
def build_train_graph(self, hparams, length): # Train graph self.length = length with tf.variable_scope("nmt", reuse=tf.AUTO_REUSE): self.init_embeddings(hparams) res = self.build_graph(hparams) self._set_train_or_infer(res) if self.mode != contrib_learn.ModeKeys.INFER: return self.gradients, tf.global_norm(self.gradients)
def gradient_clip(gradients, max_gradient_norm): """Clipping gradients of a model.""" clipped_gradients, gradient_norm = tf.clip_by_global_norm( gradients, max_gradient_norm) gradient_norm_summary = [tf.summary.scalar("grad_norm", gradient_norm)] gradient_norm_summary.append( tf.summary.scalar("clipped_gradient", tf.global_norm(clipped_gradients))) return clipped_gradients, gradient_norm_summary
def get_train_op(config, variables, gradients, optimizer, clip_norm=0): if clip_norm > 0: gradients, gradients_norm = tf.clip_by_global_norm(gradients, clip_norm=clip_norm) else: gradients_norm = tf.global_norm(gradients) if gradients and not all(grad is None for grad in gradients): train_op = optimizer.apply_gradients(zip(gradients, variables), name="apply_gradients") else: train_op = tf.no_op(name="apply_gradients") return train_op, gradients_norm
def get_train_op_and_metrics(loss, params): """Generate training op and metrics to save in TensorBoard.""" with tf.variable_scope("get_train_op"): learning_rate = get_learning_rate( learning_rate=params["learning_rate"], hidden_size=params["hidden_size"], learning_rate_warmup_steps=params["learning_rate_warmup_steps"]) # Create optimizer. Use LazyAdamOptimizer from TF contrib, which is faster # than the TF core Adam optimizer. from tensorflow.contrib import opt as contrib_opt # pylint: disable=g-import-not-at-top optimizer = contrib_opt.LazyAdamOptimizer( learning_rate, beta1=params["optimizer_adam_beta1"], beta2=params["optimizer_adam_beta2"], epsilon=params["optimizer_adam_epsilon"]) if params["use_tpu"] and params["tpu"] != tpu_util.LOCAL: optimizer = tf.compat.v1.tpu.CrossShardOptimizer(optimizer) # Uses automatic mixed precision FP16 training if on GPU. if params["dtype"] == "fp16": optimizer = tf.train.experimental.enable_mixed_precision_graph_rewrite( optimizer) # Calculate and apply gradients using LazyAdamOptimizer. global_step = tf.train.get_global_step() tvars = tf.trainable_variables() gradients = optimizer.compute_gradients( loss, tvars, colocate_gradients_with_ops=True) minimize_op = optimizer.apply_gradients(gradients, global_step=global_step, name="train") update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) train_op = tf.group(minimize_op, update_ops) train_metrics = {"learning_rate": learning_rate} if not params["use_tpu"]: # gradient norm is not included as a summary when running on TPU, as # it can cause instability between the TPU and the host controller. gradient_norm = tf.global_norm(list(zip(*gradients))[0]) train_metrics["global_norm/gradient_norm"] = gradient_norm return train_op, train_metrics
def add_optimizer_op(self, scope): """ Set self.train_op and self.grad_norm Args: scope: (string) scope name, that specifies if target network or not """ ############################################################## """ TODO: 1. get Adam Optimizer 2. compute grads with respect to variables in scope for self.loss 3. if self.config.grad_clip is True, then clip the grads by norm using self.config.clip_val 4. apply the gradients and store the train op in self.train_op (sess.run(train_op) must update the variables) 5. compute the global norm of the gradients (which are not None) and store this scalar in self.grad_norm HINT: you may find the following functions useful - tf.get_collection - optimizer.compute_gradients - tf.clip_by_norm - optimizer.apply_gradients - tf.global_norm you can access config variables by writing self.config.variable_name """ ############################################################## #################### YOUR CODE HERE - 8-12 lines ############# opt = tf.train.AdamOptimizer(learning_rate=self.lr) scope_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=scope) # print(tf.GraphKeys.GLOBAL_VARIABLES) # print(scope_vars) # print(scope) grads = opt.compute_gradients(self.loss, scope_vars) if self.config.grad_clip: grads=[(tf.clip_by_norm(grad, self.config.clip_val), var) for grad, var in grads] self.train_op = opt.apply_gradients(grads) self.grad_norm = tf.global_norm([grad[0] for grad in grads])
def add_optimizer_op(self, scope): """ Set self.train_op and self.grad_norm Args: scope: (string) name of the scope whose variables we are differentiating with respect to """ ############################################################## """ TODO: 1. get Adam Optimizer 2. compute grads with respect to variables in scope for self.loss 3. if self.config.grad_clip is True, then clip the grads by norm using self.config.clip_val 4. apply the gradients and store the train op in self.train_op (sess.run(train_op) must update the variables) 5. compute the global norm of the gradients (which are not None) and store this scalar in self.grad_norm HINT: you may find the following functions useful - tf.get_collection - optimizer.compute_gradients - tf.clip_by_norm - optimizer.apply_gradients - tf.global_norm you can access config variables by writing self.config.variable_name """ ############################################################## #################### YOUR CODE HERE - 8-12 lines ############# optimizer = tf.train.AdamOptimizer(learning_rate=self.lr) var = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope) gradient, variable = list(zip(*optimizer.compute_gradients(self.loss, var))) if self.config.grad_clip: gradient, _ = tf.clip_by_global_norm(gradient, self.config.clip_val) self.train_op = optimizer.apply_gradients(list(zip(gradient, variable))) self.grad_norm = tf.global_norm(gradient)
def make_train_op(compute_scope_loss, optimizer, compute_scope, apply_scope, max_grad_norm=None): """ compute_scope: the scope in which to calculate gradients apply_scope: the scope in which to apply the gradients """ # Clip gradients compute_tvs = tf.trainable_variables(compute_scope) compute_grads = tf.gradients(compute_scope_loss, compute_tvs) if max_grad_norm is not None: compute_grads, _ = tf.clip_by_global_norm(compute_grads, max_grad_norm) # Create a dictionary mapping from variable name to gradients calculated in compute_scope compute_scope_grads_dict = {} for grad, var in zip(compute_grads, compute_tvs): if grad is None: continue var_name = strip_var_name(var.name) compute_scope_grads_dict[var_name] = grad grads_norm = tf.global_norm(list(compute_scope_grads_dict.values())) # Create a dictionary mapping from variable names to variables in apply_scope apply_tvs = tf.trainable_variables(apply_scope) apply_tvs_dict = {} for var in apply_tvs: var_name = strip_var_name(var.name) apply_tvs_dict[var_name] = var # Create an operator which applies gradients to variables in apply_scope grads_and_compute_scope_vars = [] for var_name, grad in compute_scope_grads_dict.items(): grads_and_compute_scope_vars.append((grad, apply_tvs_dict[var_name])) train_op = optimizer.apply_gradients(grads_and_compute_scope_vars) return train_op, grads_norm
def get_train_step(model, dataset, optimizer): loss, scalars, _ = model(dataset("train")) global_step = tf.train.get_or_create_global_step() grads = optimizer.compute_gradients(loss) gradients, variables = zip(*grads) global_norm = tf.global_norm(gradients) gradients, global_norm = tf.clip_by_global_norm( gradients, 5.0, use_norm=global_norm) grads = zip(gradients, variables) train_op = optimizer.apply_gradients(grads, global_step=global_step) with tf.control_dependencies([train_op]): overview = model.get_overview_images(dataset("summary")) scalars["debug/global_grad_norm"] = global_norm summaries = { k: tf.summary.scalar(k, v) for k, v in scalars.items() } summaries.update( {k: tf.summary.image(k, v) for k, v in overview.items()}) return tf.identity(global_step), scalars, train_op
def _update_value_step(self, observ, reward, length): """Compute the current value loss and perform a gradient update step. Args: observ: Sequences of observations. reward: Sequences of reward. length: Batch of sequence lengths. Returns: Tuple of loss tensor and summary tensor. """ loss, summary = self._value_loss(observ, reward, length) gradients, variables = (zip( *self._value_optimizer.compute_gradients(loss))) optimize = self._value_optimizer.apply_gradients( zip(gradients, variables)) summary = tf.summary.merge([ summary, tf.summary.scalar('gradient_norm', tf.global_norm(gradients)), utility.gradient_summaries(zip(gradients, variables), dict(value=r'.*')) ]) with tf.control_dependencies([optimize]): return [tf.identity(loss), tf.identity(summary)]
def get_train_ops(loss, tf_variables, train_step, clip_mode=None, grad_bound=None, l2_reg=1e-4, lr_warmup_val=None, lr_warmup_steps=100, lr_init=0.1, lr_dec_start=0, lr_dec_every=10000, lr_dec_rate=0.1, lr_dec_min=None, lr_cosine=False, lr_max=None, lr_min=None, lr_T_0=None, lr_T_mul=None, num_train_batches=None, optim_algo=None, sync_replicas=False, num_aggregate=None, num_replicas=None, get_grad_norms=False, moving_average=None): """ Args: clip_mode: "global", "norm", or None. moving_average: store the moving average of parameters """ if l2_reg > 0: l2_losses = [] for var in tf_variables: l2_losses.append(tf.reduce_sum(var**2)) l2_loss = tf.add_n(l2_losses) loss += l2_reg * l2_loss # loss = loss + 1e-4*l2_loss grads = tf.gradients(loss, tf_variables) grad_norm = tf.global_norm(grads) grad_norms = {} for v, g in zip(tf_variables, grads): if v is None or g is None: continue if isinstance(g, tf.IndexedSlices): grad_norms[v.name] = tf.sqrt(tf.reduce_sum(g.values**2)) else: grad_norms[v.name] = tf.sqrt(tf.reduce_sum(g**2)) if clip_mode is not None: assert grad_bound is not None, "Need grad_bound to clip gradients." if clip_mode == "global": grads, _ = tf.clip_by_global_norm(grads, grad_bound) elif clip_mode == "norm": clipped = [] for g in grads: if isinstance(g, tf.IndexedSlices): c_g = tf.clip_by_norm(g.values, grad_bound) c_g = tf.IndexedSlices(g.indices, c_g) else: c_g = tf.clip_by_norm(g, grad_bound) clipped.append(g) grads = clipped else: raise NotImplementedError("Unknown clip_mode {}".format(clip_mode)) if lr_cosine: assert lr_max is not None, "Need lr_max to use lr_cosine" assert lr_min is not None, "Need lr_min to use lr_cosine" assert lr_T_0 is not None, "Need lr_T_0 to use lr_cosine" assert lr_T_mul is not None, "Need lr_T_mul to use lr_cosine" assert num_train_batches is not None, ("Need num_train_batches to use" " lr_cosine") curr_epoch = train_step // num_train_batches # train step will be calculated by just one batch! last_reset = tf.Variable(0, dtype=tf.int32, trainable=False, name="last_reset") T_i = tf.Variable(lr_T_0, dtype=tf.int32, trainable=False, name="T_i") T_curr = curr_epoch - last_reset def _update(): update_last_reset = tf.assign(last_reset, curr_epoch, use_locking=True) update_T_i = tf.assign(T_i, T_i * lr_T_mul, use_locking=True) with tf.control_dependencies([update_last_reset, update_T_i]): rate = tf.to_float(T_curr) / tf.to_float(T_i) * 3.1415926 lr = lr_min + 0.5 * (lr_max - lr_min) * (1.0 + tf.cos(rate)) return lr def _no_update(): rate = tf.to_float(T_curr) / tf.to_float(T_i) * 3.1415926 lr = lr_min + 0.5 * (lr_max - lr_min) * (1.0 + tf.cos(rate)) return lr learning_rate = tf.cond(tf.greater_equal(T_curr, T_i), _update, _no_update) else: learning_rate = tf.train.exponential_decay( lr_init, tf.maximum(train_step - lr_dec_start, 0), lr_dec_every, lr_dec_rate, staircase=True) if lr_dec_min is not None: learning_rate = tf.maximum(learning_rate, lr_dec_min) if lr_warmup_val is not None: learning_rate = tf.cond(tf.less(train_step, lr_warmup_steps), lambda: lr_warmup_val, lambda: learning_rate) if optim_algo == "momentum": opt = tf.train.MomentumOptimizer(learning_rate, 0.9, use_locking=True, use_nesterov=True) elif optim_algo == "sgd": opt = tf.train.GradientDescentOptimizer(learning_rate, use_locking=True) elif optim_algo == "adam": opt = tf.train.AdamOptimizer(learning_rate, beta1=0.0, epsilon=1e-3, use_locking=True) else: raise ValueError("Unknown optim_algo {}".format(optim_algo)) if sync_replicas: assert num_aggregate is not None, "Need num_aggregate to sync." assert num_replicas is not None, "Need num_replicas to sync." opt = tf.train.SyncReplicasOptimizer( opt, replicas_to_aggregate=num_aggregate, total_num_replicas=num_replicas, use_locking=True) if moving_average is not None: opt = tf.contrib.opt.MovingAverageOptimizer( opt, average_decay=moving_average) train_op = opt.apply_gradients(zip(grads, tf_variables), global_step=train_step) if get_grad_norms: return train_op, learning_rate, grad_norm, opt, grad_norms else: return train_op, learning_rate, grad_norm, opt
def train(train_dir, config, dataset_fn, checkpoints_to_keep=5, keep_checkpoint_every_n_hours=1, num_steps=None, master='', num_sync_workers=0, num_ps_tasks=0, task=0): """Train loop.""" tf.gfile.MakeDirs(train_dir) is_chief = (task == 0) if is_chief: _trial_summary(config.hparams, config.train_examples_path or config.tfds_name, train_dir) with tf.Graph().as_default(): with tf.device( tf.train.replica_device_setter(num_ps_tasks, merge_devices=True)): model = config.model model.build(config.hparams, config.data_converter.output_depth, encoder_train=config.encoder_train, decoder_train=config.decoder_train) optimizer = model.train(**_get_input_tensors(dataset_fn(), config)) restored_vars = _get_restore_vars(config.var_train_pattern) _set_trainable_vars(config.var_train_pattern) hooks = [] if num_sync_workers: optimizer = tf.train.SyncReplicasOptimizer( optimizer, num_sync_workers) hooks.append(optimizer.make_session_run_hook(is_chief)) grads, var_list = zip(*optimizer.compute_gradients(model.loss)) global_norm = tf.global_norm(grads) tf.summary.scalar('global_norm', global_norm) if config.hparams.clip_mode == 'value': g = config.hparams.grad_clip clipped_grads = [ tf.clip_by_value(grad, -g, g) for grad in grads ] elif config.hparams.clip_mode == 'global_norm': clipped_grads = tf.cond( global_norm < config.hparams.grad_norm_clip_to_zero, lambda: tf.clip_by_global_norm(grads, config.hparams.grad_clip, use_norm=global_norm)[0], lambda: [tf.zeros(tf.shape(g)) for g in grads]) else: raise ValueError('Unknown clip_mode: {}'.format( config.hparams.clip_mode)) train_op = optimizer.apply_gradients(zip(clipped_grads, var_list), global_step=model.global_step, name='train_step') logging_dict = { 'global_step': model.global_step, 'loss': model.loss } hooks.append( tf.train.LoggingTensorHook(logging_dict, every_n_iter=5)) if num_steps: hooks.append(tf.train.StopAtStepHook(last_step=num_steps)) variables_to_restore = contrib_framework.get_variables_to_restore( include=[v.name for v in restored_vars]) init_assign_op, init_feed_dict = contrib_framework.assign_from_checkpoint( config.pretrained_path, variables_to_restore) def InitAssignFn(scaffold, sess): sess.run(init_assign_op, init_feed_dict) scaffold = tf.train.Scaffold( init_fn=InitAssignFn, saver=tf.train.Saver( max_to_keep=checkpoints_to_keep, keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours, )) contrib_training.train(train_op=train_op, logdir=train_dir, scaffold=scaffold, hooks=hooks, save_checkpoint_secs=60, master=master, is_chief=is_chief)
def create_optimizer(loss, learning_rate, num_train_steps, weight_decay_rate=0.0, warmup_steps=0, warmup_proportion=0, lr_decay_power=1.0, layerwise_lr_decay_power=-1, n_transformer_layers=None, hvd=None, use_fp16=False, num_accumulation_steps=1, allreduce_post_accumulation=False): """ Creates an optimizer and training op. """ compression = Compression.fp16 if use_fp16 else Compression.none global_step = tf.train.get_or_create_global_step() learning_rate = tf.train.polynomial_decay(learning_rate, global_step, num_train_steps, end_learning_rate=0.0, power=lr_decay_power, cycle=False) warmup_steps = max(num_train_steps * warmup_proportion, warmup_steps) learning_rate *= tf.minimum( 1.0, tf.cast(global_step, tf.float32) / tf.cast(warmup_steps, tf.float32)) if layerwise_lr_decay_power > 0: learning_rate = _get_layer_lrs(learning_rate, layerwise_lr_decay_power, n_transformer_layers) optimizer = AdamWeightDecayOptimizer( learning_rate=learning_rate, weight_decay_rate=weight_decay_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-6, exclude_from_weight_decay=["LayerNorm", "layer_norm", "bias"]) if hvd is not None and (num_accumulation_steps == 1 or (not allreduce_post_accumulation)): optimizer = hvd.DistributedOptimizer(optimizer, sparse_as_dense=True, compression=compression) if use_fp16: loss_scale_manager = tf_contrib.mixed_precision.ExponentialUpdateLossScaleManager( init_loss_scale=2**32, incr_every_n_steps=1000, decr_every_n_nan_or_inf=2, decr_ratio=0.5) optimizer = tf_contrib.mixed_precision.LossScaleOptimizer( optimizer, loss_scale_manager) tvars = tf.trainable_variables() # if hvd.rank() == 0: # print("*****Trainable variables*****") # for v in tvars: # print(v) # print("*****************************") grads_and_vars = optimizer.compute_gradients( loss * 1.0 / num_accumulation_steps, tvars) if num_accumulation_steps > 1: local_step = tf.get_variable(name="local_step", shape=[], dtype=tf.int32, trainable=False, initializer=tf.zeros_initializer()) batch_finite = tf.get_variable(name="batch_finite", shape=[], dtype=tf.bool, trainable=False, initializer=tf.ones_initializer()) accum_vars = [ tf.get_variable(name=tvar.name.split(":")[0] + "/accum", shape=tvar.shape.as_list(), dtype=tf.float32, trainable=False, initializer=tf.zeros_initializer()) for tvar in tvars ] reset_step = tf.cast(tf.math.equal(local_step % num_accumulation_steps, 0), dtype=tf.bool) local_step = tf.cond( reset_step, lambda: local_step.assign(tf.ones_like(local_step)), lambda: local_step.assign_add(1)) grads, tvars, accum_vars = zip( *[(g, v, g_acc) for (g, v), g_acc in zip(grads_and_vars, accum_vars) if g is not None]) if use_fp16: # оказывается, это условие может быть кучу перых шагов false, а затем будет всю дорогу true all_are_finite = tf.reduce_all( [tf.reduce_all(tf.is_finite(g)) for g in grads]) # если возобновить обучение из чекпоинта, то снова первые дохера шагов градиенты будут накапливаться, # что повлечёт скачок лосса # сделано так для продолжения обучения # all_are_finite = tf.constant(True, dtype=tf.bool) else: all_are_finite = tf.constant(True, dtype=tf.bool) batch_finite = tf.cond( reset_step, lambda: batch_finite.assign( tf.math.logical_and(tf.constant(True, dtype=tf.bool), all_are_finite)), lambda: batch_finite.assign( tf.math.logical_and(batch_finite, all_are_finite))) # This is how the model was pre-trained. # ensure global norm is a finite number # to prevent clip_by_global_norm from having a hizzy fit. (clipped_grads, _) = tf.clip_by_global_norm( grads, clip_norm=1.0, use_norm=tf.cond(all_are_finite, lambda: tf.global_norm(grads), lambda: tf.constant(1.0))) accum_vars = tf.cond( reset_step, lambda: [v.assign(grad) for v, grad in zip(accum_vars, clipped_grads)], lambda: [v.assign_add(grad) for v, grad in zip(accum_vars, clipped_grads)]) def update(accum_vars): if allreduce_post_accumulation and hvd is not None: accum_vars = [ hvd.allreduce(tf.convert_to_tensor(accum_var), compression=compression) if isinstance( accum_var, tf.IndexedSlices) else hvd.allreduce(accum_var, compression=compression) for accum_var in accum_vars ] return optimizer.apply_gradients(list(zip(accum_vars, tvars)), global_step=global_step) update_step = tf.identity(tf.cast(tf.math.equal( local_step % num_accumulation_steps, 0), dtype=tf.bool), name="update_step") update_op = tf.cond(update_step, lambda: update(accum_vars), lambda: tf.no_op()) new_global_step = tf.cond( tf.math.logical_and( update_step, tf.cast(hvd.allreduce(tf.cast(batch_finite, tf.int32)), tf.bool)), lambda: global_step + 1, lambda: global_step) new_global_step = tf.identity(new_global_step, name='step_update') train_op = tf.group(update_op, [global_step.assign(new_global_step)]) else: grads_and_vars = [(g, v) for g, v in grads_and_vars if g is not None] grads, tvars = list(zip(*grads_and_vars)) if use_fp16: all_are_finite = tf.reduce_all( [tf.reduce_all(tf.is_finite(g)) for g in grads]) else: all_are_finite = tf.constant(True, dtype=tf.bool) # This is how the model was pre-trained. # ensure global norm is a finite number # to prevent clip_by_global_norm from having a hizzy fit. (clipped_grads, _) = tf.clip_by_global_norm( grads, clip_norm=1.0, use_norm=tf.cond(all_are_finite, lambda: tf.global_norm(grads), lambda: tf.constant(1.0))) train_op = optimizer.apply_gradients(list(zip(clipped_grads, tvars)), global_step=global_step) new_global_step = tf.cond(all_are_finite, lambda: global_step + 1, lambda: global_step) new_global_step = tf.identity(new_global_step, name='step_update') train_op = tf.group(train_op, [global_step.assign(new_global_step)]) return train_op
def train(self, save_dir='./tmp', transfer_dir=None, details=False, verbose=True, show_each_step=False, show_percentage=True, **kwargs): train_size = self.train_set.num_examples num_steps_per_epoch = np.ceil(train_size / self.batch_size).astype(int) self.steps_per_epoch = num_steps_per_epoch num_steps = num_steps_per_epoch * self.num_epochs self.total_steps = num_steps validation_frequency = kwargs.get('validation_frequency', None) summary_frequency = kwargs.get('summary_frequency', None) if validation_frequency is None: validation_frequency = num_steps_per_epoch if summary_frequency is None: summary_frequency = num_steps_per_epoch num_validations = num_steps // validation_frequency last_val_iter = num_validations * validation_frequency if transfer_dir is not None: # Transfer learning setup model_to_load = kwargs.get('model_to_load', None) blocks_to_load = kwargs.get('blocks_to_load', None) load_moving_average = kwargs.get('load_moving_average', False) start_epoch = kwargs.get('start_epoch', 0) start_step = num_steps_per_epoch * start_epoch if not os.path.isdir(transfer_dir): ckpt_to_load = transfer_dir elif model_to_load is None: # Find a model to be transferred ckpt_to_load = tf.train.latest_checkpoint(transfer_dir) elif isinstance(model_to_load, str): ckpt_to_load = os.path.join(transfer_dir, model_to_load) else: fp = open(os.path.join(transfer_dir, 'checkpoints.txt'), 'r') ckpt_list = fp.readlines() fp.close() ckpt_to_load = os.path.join(transfer_dir, ckpt_list[model_to_load].rstrip()) reader = pywrap_tensorflow.NewCheckpointReader( ckpt_to_load) # Find variables to be transferred var_to_shape_map = reader.get_variable_to_shape_map() var_names = [var for var in var_to_shape_map.keys()] var_list = [] if blocks_to_load is None: for blk in self.model.block_list: var_list += self.model.get_collection( 'block_{}/variables'.format(blk)) var_list += self.model.get_collection( 'block_{}/ema_variables'.format(blk)) else: for blk in blocks_to_load: var_list += self.model.get_collection( 'block_{}/variables'.format(blk)) var_list += self.model.get_collection( 'block_{}/ema_variables'.format(blk)) variables_not_loaded = [] if load_moving_average: variables = {} for var in var_list: var_name = var.name.rstrip(':0') ema_name = var.name.rstrip( ':0') + '/ExponentialMovingAverage' if ema_name in var_to_shape_map: if var.get_shape() == var_to_shape_map[ema_name]: variables[ema_name] = var if var_name in var_names: var_names.remove(ema_name) else: print('<{}> was not loaded due to shape mismatch'. format(var_name)) variables_not_loaded.append(var_name) elif var_name in var_to_shape_map: if var.get_shape() == var_to_shape_map[var_name]: variables[var_name] = var if var_name in var_names: var_names.remove(var_name) else: print('<{}> was not loaded due to shape mismatch'. format(var_name)) variables_not_loaded.append(var_name) else: variables_not_loaded.append(var_name) else: variables = [] for var in var_list: var_name = var.name.rstrip(':0') if var_name in var_to_shape_map: if var.get_shape() == var_to_shape_map[var_name]: variables.append(var) var_names.remove(var_name) else: print('<{}> was not loaded due to shape mismatch'. format(var_name)) variables_not_loaded.append(var_name) else: variables_not_loaded.append(var_name) saver_transfer = tf.train.Saver(variables) self.model.session.run(tf.global_variables_initializer()) saver_transfer.restore(self.model.session, ckpt_to_load) if verbose: print('') print( 'Variables have been initialized using the following checkpoint:' ) print(ckpt_to_load) print( 'The following variables in the checkpoint were not used:') print(var_names) print( 'The following variables do not exist in the checkpoint, so they were initialized randomly:' ) print(variables_not_loaded) print('') pkl_file = os.path.join(transfer_dir, 'learning_curve-result-1.pkl') pkl_loaded = False if os.path.exists(pkl_file): train_steps = start_step if show_each_step else start_step // validation_frequency eval_steps = start_step // validation_frequency with open(pkl_file, 'rb') as fo: prev_results = pkl.load(fo) prev_results[0] = prev_results[0][:train_steps] prev_results[1] = prev_results[1][:train_steps] prev_results[2] = prev_results[2][:eval_steps] prev_results[3] = prev_results[3][:eval_steps] train_len = len(prev_results[0]) eval_len = len(prev_results[2]) if train_len == train_steps and eval_len == eval_steps: train_losses, train_scores, eval_losses, eval_scores = prev_results pkl_loaded = True else: train_losses, train_scores, eval_losses, eval_scores = [], [], [], [] else: train_losses, train_scores, eval_losses, eval_scores = [], [], [], [] else: start_epoch = 0 start_step = 0 self.model.session.run(tf.global_variables_initializer()) train_losses, train_scores, eval_losses, eval_scores = [], [], [], [] pkl_loaded = False max_to_keep = kwargs.get('max_to_keep', 5) log_trace = kwargs.get('log_trace', False) saver = tf.train.Saver(max_to_keep=max_to_keep) saver.export_meta_graph( filename=os.path.join(save_dir, 'model.ckpt.meta')) kwargs[ 'monte_carlo'] = False # Turn off monte carlo dropout for validation with tf.device('/cpu:{}'.format(self.model.cpu_offset)): with tf.variable_scope('summaries'): # TensorBoard summaries tf.summary.scalar('Loss', self.model.loss) tf.summary.scalar('Learning Rate', self.learning_rate) for i, val in enumerate(self.model.debug_values): tf.summary.scalar('Debug_{}-{}'.format(i, val.name), val) tf.summary.image('Input Images', tf.cast(self.model.input_images * 255, dtype=tf.uint8), max_outputs=4) tf.summary.image('Augmented Input Images', tf.cast(self.model.X_all * 255, dtype=tf.uint8), max_outputs=4) for i, img in enumerate(self.model.debug_images): tf.summary.image('Debug_{}-{}'.format(i, img.name), tf.cast(img * 255, dtype=tf.uint8), max_outputs=4) tf.summary.histogram('Image Histogram', self.model.X_all) for blk in self.model.block_list: weights = self.model.get_collection( 'block_{}/weight_variables'.format(blk)) if len(weights) > 0: tf.summary.histogram( 'Block {} Weight Histogram'.format(blk), weights[0]) weights = self.model.get_collection('weight_variables') with tf.variable_scope('weights_l1'): weights_l1 = tf.math.accumulate_n( [tf.reduce_sum(tf.math.abs(w)) for w in weights]) tf.summary.scalar('Weights L1 Norm', weights_l1) with tf.variable_scope('weights_l2'): weights_l2 = tf.global_norm(weights) tf.summary.scalar('Weights L2 Norm', weights_l2) tail_scores_5 = [] tail_scores_1 = [] with tf.variable_scope('weights_tail_score'): for w in weights: w_size = tf.size(w, out_type=tf.float32) w_std = tf.math.reduce_std(w) w_abs = tf.math.abs(w) tail_threshold_5 = 1.96 * w_std tail_threshold_1 = 2.58 * w_std num_weights_5 = tf.math.reduce_sum( tf.cast(tf.math.greater(w_abs, tail_threshold_5), dtype=tf.float32)) num_weights_1 = tf.math.reduce_sum( tf.cast(tf.math.greater(w_abs, tail_threshold_1), dtype=tf.float32)) tail_scores_5.append(num_weights_5 / (0.05 * w_size)) tail_scores_1.append(num_weights_1 / (0.01 * w_size)) tail_score_5 = tf.math.accumulate_n(tail_scores_5) / len( tail_scores_5) tail_score_1 = tf.math.accumulate_n(tail_scores_1) / len( tail_scores_1) tf.summary.scalar('Weights Tail Score 5p', tail_score_5) tf.summary.scalar('Weights Tail Score 1p', tail_score_1) with tf.variable_scope('gradients_l2'): gradients_l2 = tf.global_norm(self.avg_grads) tf.summary.scalar('Gradients L2 Norm', gradients_l2) merged = tf.summary.merge_all() train_writer = tf.summary.FileWriter( os.path.join(save_dir, 'logs'), self.model.session.graph) train_results = dict() if verbose: print('Running training loop...') print('Batch size: {}'.format(self.batch_size)) print('Number of epochs: {}'.format(self.num_epochs)) print('Number of training iterations: {}'.format(num_steps)) print('Number of iterations per epoch: {}'.format( num_steps_per_epoch)) if show_each_step: step_losses, step_scores = [], [] else: step_losses, step_scores = 0, 0 eval_loss, eval_score = np.inf, 0 annotations = [] self.train_set.initialize( self.model.session) # Initialize training iterator handles = self.train_set.get_string_handles( self.model.session) # Get a string handle from training iterator # if self.val_set is not None: # self.val_set.initialize(self.model.session) # Initialize validation iterator with tf.variable_scope('calc/'): step_init_op = self.model.global_step.assign( start_step, name='init_global_step') self.model.session.run([step_init_op] + self.model.init_ops) tf.get_default_graph().finalize() # self._test_drive(save_dir=save_dir) # Run test code self.curr_epoch += start_epoch self.curr_step += start_step step_loss, step_score = 0, 0 start_time = time.time() for i in range(num_steps - start_step): # Training iterations self._update_learning_rate() try: step_loss, step_Y_true, step_Y_pred = self._step( handles, merged=merged, writer=train_writer, summary=i % summary_frequency == 0, log_trace=log_trace and i % summary_frequency == 1) step_score = self.evaluator.score(step_Y_true, step_Y_pred) except tf.errors.OutOfRangeError: if verbose: remainder_size = train_size - (self.steps_per_epoch - 1) * self.batch_size print( 'The last iteration ({} data) has been ignored'.format( remainder_size)) if show_each_step: step_losses.append(step_loss) step_scores.append(step_score) else: step_losses += step_loss step_scores += step_score self.curr_step += 1 if ( i + 1 ) % validation_frequency == 0: # Validation every validation_frequency iterations if self.val_set is not None: _, eval_Y_true, eval_Y_pred, eval_loss = self.model.predict( self.val_set, verbose=False, return_images=False, run_init_ops=False, **kwargs) eval_score = self.evaluator.score(eval_Y_true, eval_Y_pred) eval_scores.append(eval_score) eval_losses.append(eval_loss) del eval_Y_true, eval_Y_pred curr_score = eval_score self.model.save_results(self.val_set, os.path.join(save_dir, 'results'), self.curr_epoch, max_examples=kwargs.get( 'num_examples_to_save', None), **kwargs) else: curr_score = np.mean( step_scores ) if show_each_step else step_scores / validation_frequency if self.evaluator.is_better(curr_score, self.best_score, **kwargs): # Save best model self.best_score = curr_score saver.save(self.model.session, os.path.join(save_dir, 'model.ckpt'), global_step=self.model.global_step, write_meta_graph=False) if show_each_step: annotations.append((self.curr_step, curr_score)) else: annotations.append( (self.curr_step // validation_frequency, curr_score)) annotations = annotations[-max_to_keep:] elif self.curr_step == last_val_iter: # Save latest model saver.save(self.model.session, os.path.join(save_dir, 'model.ckpt'), global_step=self.model.global_step, write_meta_graph=False) if show_each_step: annotations.append((self.curr_step, curr_score)) else: annotations.append( (self.curr_step // validation_frequency, curr_score)) annotations = annotations[-max_to_keep:] ckpt_list = saver.last_checkpoints[::-1] fp = open(os.path.join(save_dir, 'checkpoints.txt'), 'w') for fname in ckpt_list: fp.write(fname.split(os.sep)[-1] + '\n') fp.close() if show_each_step: train_losses += step_losses train_scores += step_scores step_losses, step_scores = [], [] else: step_loss = step_losses / validation_frequency step_score = step_scores / validation_frequency train_losses.append(step_loss) train_scores.append(step_score) step_losses, step_scores = 0, 0 if ( i + 1 ) % num_steps_per_epoch == 0: # Print and plot results every epoch self.train_set.initialize( self.model.session ) # Initialize training iterator every epoch if show_each_step: val_freq = validation_frequency start = 0 if pkl_loaded else start_step else: val_freq = 1 start = 0 if pkl_loaded else start_epoch if self.val_set is not None: if verbose: if show_percentage: print( '[epoch {}/{}]\tTrain loss: {:.5f} |Train score: {:2.3%} ' '|Eval loss: {:.5f} |Eval score: {:2.3%} |LR: {:.7f} ' '|Elapsed time: {:5.0f} sec'.format( self.curr_epoch, self.num_epochs, step_loss, step_score, eval_loss, eval_score, self.init_learning_rate * self.curr_multiplier, time.time() - start_time)) else: print( '[epoch {}/{}]\tTrain loss: {:.5f} |Train score: {:.5f} ' '|Eval loss: {:.5f} |Eval score: {:.5f} |LR: {:.7f} ' '|Elapsed time: {:5.0f} sec'.format( self.curr_epoch, self.num_epochs, step_loss, step_score, eval_loss, eval_score, self.init_learning_rate * self.curr_multiplier, time.time() - start_time)) if len(eval_losses) > 0: if self.model.num_classes is None: loss_thres = min(eval_losses) * 2 else: if self.model.num_classes > 1: loss_thres = max([ 2 * np.log(self.model.num_classes), min(eval_losses) * 2 ]) else: loss_thres = min(eval_losses) * 2 plot_learning_curve(train_losses, train_scores, eval_losses=eval_losses, eval_scores=eval_scores, name=self.evaluator.name, loss_threshold=loss_thres, mode=self.evaluator.mode, img_dir=save_dir, annotations=annotations, start_step=start, validation_frequency=val_freq) else: if verbose: if show_percentage: print( '[epoch {}/{}]\tTrain loss: {:.5f} |Train score: {:2.3%} |LR: {:.7f} ' '|Elapsed time: {:5.0f} sec'.format( self.curr_epoch, self.num_epochs, step_loss, step_score, self.init_learning_rate * self.curr_multiplier, time.time() - start_time)) else: print( '[epoch {}/{}]\tTrain loss: {:.5f} |Train score: {:.5f} |LR: {:.7f} ' '|Elapsed time: {:5.0f} sec'.format( self.curr_epoch, self.num_epochs, step_loss, step_score, self.init_learning_rate * self.curr_multiplier, time.time() - start_time)) if self.model.num_classes is None: loss_thres = min(train_losses) * 2 else: if self.model.num_classes > 1: loss_thres = max([ 2 * np.log(self.model.num_classes), min(train_losses) * 2 ]) else: loss_thres = min(train_losses) * 2 plot_learning_curve(train_losses, train_scores, eval_losses=None, eval_scores=None, name=self.evaluator.name, loss_threshold=loss_thres, mode=self.evaluator.mode, img_dir=save_dir, annotations=annotations, start_step=start, validation_frequency=val_freq) self.curr_epoch += 1 plt.close() train_writer.close() if verbose: print('Total training time: {:.2f} sec'.format(time.time() - start_time)) print('Best {} {}: {:.4f}'.format( 'evaluation' if self.val_set is not None else 'training', self.evaluator.name, self.best_score)) print('Done.') if details: train_results['step_losses'] = step_losses train_results['step_scores'] = step_scores if self.val_set is not None: train_results['eval_losses'] = eval_losses train_results['eval_scores'] = eval_scores return train_results
def _global_norm_with_cast(grads_and_vars): return tf.global_norm( list( map(lambda x: tf.cast(x, tf.float32), list(zip(*grads_and_vars))[0])))
def train(train_dir, config, dataset_fn, checkpoints_to_keep=5, keep_checkpoint_every_n_hours=1, num_steps=None, master='', num_sync_workers=0, num_ps_tasks=0, task=0): """Train loop.""" tf.gfile.MakeDirs(train_dir) is_chief = (task == 0) if is_chief: _trial_summary( config.hparams, config.train_examples_path or config.tfds_name, train_dir) with tf.Graph().as_default(): with tf.device(tf.train.replica_device_setter( num_ps_tasks, merge_devices=True)): model = config.model model.build(config.hparams, config.data_converter.output_depth, is_training=True) optimizer = model.train(**_get_input_tensors(dataset_fn(), config)) hooks = [] if num_sync_workers: optimizer = tf.train.SyncReplicasOptimizer( optimizer, num_sync_workers) hooks.append(optimizer.make_session_run_hook(is_chief)) grads, var_list = list(zip(*optimizer.compute_gradients(model.loss))) global_norm = tf.global_norm(grads) tf.summary.scalar('global_norm', global_norm) if config.hparams.clip_mode == 'value': g = config.hparams.grad_clip clipped_grads = [tf.clip_by_value(grad, -g, g) for grad in grads] elif config.hparams.clip_mode == 'global_norm': clipped_grads = tf.cond( global_norm < config.hparams.grad_norm_clip_to_zero, lambda: tf.clip_by_global_norm( # pylint:disable=g-long-lambda grads, config.hparams.grad_clip, use_norm=global_norm)[0], lambda: [tf.zeros(tf.shape(g)) for g in grads]) else: raise ValueError( 'Unknown clip_mode: {}'.format(config.hparams.clip_mode)) train_op = optimizer.apply_gradients( list(zip(clipped_grads, var_list)), global_step=model.global_step, name='train_step') logging_dict = {'global_step': model.global_step, 'loss': model.loss} hooks.append(tf.train.LoggingTensorHook(logging_dict, every_n_iter=100)) if num_steps: hooks.append(tf.train.StopAtStepHook(last_step=num_steps)) scaffold = tf.train.Scaffold( saver=tf.train.Saver( max_to_keep=checkpoints_to_keep, keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours)) tf_slim.training.train( train_op=train_op, logdir=train_dir, scaffold=scaffold, hooks=hooks, save_checkpoint_secs=60, master=master, is_chief=is_chief)
def __init__(self, *, scope, ob_space, ac_space, stochpol_fn, nsteps, nepochs=4, nminibatches=1, gamma=0.99, gamma_ext=0.99, lam=0.95, ent_coef=0, cliprange=0.2, max_grad_norm=1.0, vf_coef=1.0, lr=30e-5, imitation_data=None, adam_hps=None, testing=False, comm=None, comm_train=None, use_news=False, update_ob_stats_every_step=True, int_coeff=None, ext_coeff=None, ): self.imitation_data=imitation_data if self.imitation_data is None: self.imitation_flag=False self.lr = lr self.ext_coeff = ext_coeff self.int_coeff = int_coeff self.use_news = use_news self.update_ob_stats_every_step = update_ob_stats_every_step self.abs_scope = (tf.get_variable_scope().name + '/' + scope).lstrip('/') self.testing = testing self.comm_log = MPI.COMM_SELF if comm is not None and comm.Get_size() > 1: self.comm_log = comm assert not testing or comm.Get_rank() != 0, "Worker number zero can't be testing" if comm_train is not None: self.comm_train, self.comm_train_size = comm_train, comm_train.Get_size() else: self.comm_train, self.comm_train_size = self.comm_log, self.comm_log.Get_size() self.is_log_leader = self.comm_log.Get_rank()==0 self.is_train_leader = self.comm_train.Get_rank()==0 with tf.variable_scope(scope): self.best_ret = -np.inf self.local_best_ret = - np.inf self.rooms = [] self.local_rooms = [] self.scores = [] self.ob_space = ob_space self.ac_space = ac_space self.stochpol = stochpol_fn() self.nepochs = nepochs self.cliprange = cliprange self.nsteps = nsteps self.nminibatches = nminibatches self.gamma = gamma self.gamma_ext = gamma_ext self.lam = lam self.adam_hps = adam_hps or dict() self.ph_adv = tf.placeholder(tf.float32, [None, None]) self.ph_ret_int = tf.placeholder(tf.float32, [None, None]) self.ph_ret_ext = tf.placeholder(tf.float32, [None, None]) self.ph_oldnlp = tf.placeholder(tf.float32, [None, None]) self.ph_oldvpred = tf.placeholder(tf.float32, [None, None]) self.ph_lr = tf.placeholder(tf.float32, []) self.ph_lr_pred = tf.placeholder(tf.float32, []) self.ph_cliprange = tf.placeholder(tf.float32, []) #Define loss. neglogpac = self.stochpol.pd_opt.neglogp(self.stochpol.ph_ac) entropy = tf.reduce_mean(self.stochpol.pd_opt.entropy()) vf_loss_int = (0.5 * vf_coef) * tf.reduce_mean(tf.square(self.stochpol.vpred_int_opt - self.ph_ret_int)) vf_loss_ext = (0.5 * vf_coef) * tf.reduce_mean(tf.square(self.stochpol.vpred_ext_opt - self.ph_ret_ext)) vf_loss = vf_loss_int + vf_loss_ext ratio = tf.exp(self.ph_oldnlp - neglogpac) # p_new / p_old negadv = - self.ph_adv pg_losses1 = negadv * ratio pg_losses2 = negadv * tf.clip_by_value(ratio, 1.0 - self.ph_cliprange, 1.0 + self.ph_cliprange) pg_loss = tf.reduce_mean(tf.maximum(pg_losses1, pg_losses2)) ent_loss = (- ent_coef) * entropy approxkl = .5 * tf.reduce_mean(tf.square(neglogpac - self.ph_oldnlp)) maxkl = .5 * tf.reduce_max(tf.square(neglogpac - self.ph_oldnlp)) clipfrac = tf.reduce_mean(tf.to_float(tf.greater(tf.abs(ratio - 1.0), self.ph_cliprange))) loss = pg_loss + ent_loss + vf_loss + self.stochpol.aux_loss #Create optimizer. params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=self.abs_scope) logger.info("PPO: using MpiAdamOptimizer connected to %i peers" % self.comm_train_size) trainer = MpiAdamOptimizer(self.comm_train, learning_rate=self.ph_lr, **self.adam_hps) grads_and_vars = trainer.compute_gradients(loss, params) grads, vars = zip(*grads_and_vars) if max_grad_norm: _, _grad_norm = tf.clip_by_global_norm(grads, max_grad_norm) global_grad_norm = tf.global_norm(grads) grads_and_vars = list(zip(grads, vars)) self._train = trainer.apply_gradients(grads_and_vars) #Quantities for reporting. self._losses = [loss, pg_loss, vf_loss, entropy, clipfrac, approxkl, maxkl, self.stochpol.aux_loss, self.stochpol.feat_var, self.stochpol.max_feat, global_grad_norm] self.loss_names = ['tot', 'pg', 'vf', 'ent', 'clipfrac', 'approxkl', 'maxkl', "auxloss", "featvar", "maxfeat", "gradnorm"] self.I = None self.disable_policy_update = None allvars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=self.abs_scope) if self.is_log_leader: tf_util.display_var_info(allvars) tf.get_default_session().run(tf.variables_initializer(allvars)) sync_from_root(tf.get_default_session(), allvars) #Syncs initialization across mpi workers. self.t0 = time.time() self.global_tcount = 0
def main(argv=None): # pylint: disable=unused-argument data_dir = 'Datasets/training/' train_data_filename = data_dir + 'images/' train_labels_filename = data_dir + 'groundtruth/' # Extract it into numpy arrays. train_data = extract_data(train_data_filename, TRAINING_SIZE) train_labels = extract_labels(train_labels_filename, TRAINING_SIZE) num_epochs = NUM_EPOCHS c0 = 0 # bgrd c1 = 0 # road for i in range(len(train_labels)): if train_labels[i][0] == 1: c0 = c0 + 1 else: c1 = c1 + 1 print('Number of data points per class: c0 = ' + str(c0) + ' c1 = ' + str(c1)) print('Balancing training data...') min_c = min(c0, c1) idx0 = [i for i, j in enumerate(train_labels) if j[0] == 1] idx1 = [i for i, j in enumerate(train_labels) if j[1] == 1] new_indices = idx0[0:min_c] + idx1[0:min_c] print(len(new_indices)) print(train_data.shape) train_data = train_data[new_indices, :, :, :] train_labels = train_labels[new_indices] train_size = train_labels.shape[0] c0 = 0 c1 = 0 for i in range(len(train_labels)): if train_labels[i][0] == 1: c0 = c0 + 1 else: c1 = c1 + 1 print('Number of data points per class: c0 = ' + str(c0) + ' c1 = ' + str(c1)) # This is where training samples and labels are fed to the graph. # These placeholder nodes will be fed a batch of training data at each # training step using the {feed_dict} argument to the Run() call below. train_data_node = tf.placeholder(tf.float32, shape=(BATCH_SIZE, IMG_PATCH_SIZE, IMG_PATCH_SIZE, NUM_CHANNELS)) train_labels_node = tf.placeholder(tf.float32, shape=(BATCH_SIZE, NUM_LABELS)) train_all_data_node = tf.constant(train_data) # The variables below hold all the trainable weights. They are passed an # initial value which will be assigned when when we call: # {tf.initialize_all_variables().run()} conv1_weights = tf.Variable( tf.truncated_normal( [5, 5, NUM_CHANNELS, 32], # 5x5 filter, depth 32. stddev=0.1, seed=SEED)) conv1_biases = tf.Variable(tf.zeros([32])) conv2_weights = tf.Variable( tf.truncated_normal([5, 5, 32, 64], stddev=0.1, seed=SEED)) conv2_biases = tf.Variable(tf.constant(0.1, shape=[64])) fc1_weights = tf.Variable( # fully connected, depth 512. tf.truncated_normal( [int(IMG_PATCH_SIZE / 4 * IMG_PATCH_SIZE / 4 * 64), 512], stddev=0.1, seed=SEED)) fc1_biases = tf.Variable(tf.constant(0.1, shape=[512])) fc2_weights = tf.Variable( tf.truncated_normal([512, NUM_LABELS], stddev=0.1, seed=SEED)) fc2_biases = tf.Variable(tf.constant(0.1, shape=[NUM_LABELS])) # Make an image summary for 4d tensor image with index idx def get_image_summary(img, idx=0): V = tf.slice(img, (0, 0, 0, idx), (1, -1, -1, 1)) img_w = img.get_shape().as_list()[1] img_h = img.get_shape().as_list()[2] min_value = tf.reduce_min(V) V = V - min_value max_value = tf.reduce_max(V) V = V / (max_value * PIXEL_DEPTH) V = tf.reshape(V, (img_w, img_h, 1)) V = tf.transpose(V, (2, 0, 1)) V = tf.reshape(V, (-1, img_w, img_h, 1)) return V # Make an image summary for 3d tensor image with index idx def get_image_summary_3d(img): V = tf.slice(img, (0, 0, 0), (1, -1, -1)) img_w = img.get_shape().as_list()[1] img_h = img.get_shape().as_list()[2] V = tf.reshape(V, (img_w, img_h, 1)) V = tf.transpose(V, (2, 0, 1)) V = tf.reshape(V, (-1, img_w, img_h, 1)) return V # Get prediction for given input image def get_prediction(img): data = numpy.asarray(img_crop(img, IMG_PATCH_SIZE, IMG_PATCH_SIZE)) data_node = tf.constant(data) output = tf.nn.softmax(model(data_node)) output_prediction = s.run(output) img_prediction = label_to_img(img.shape[0], img.shape[1], IMG_PATCH_SIZE, IMG_PATCH_SIZE, output_prediction) return img_prediction # Get a concatenation of the prediction and groundtruth for given input file def get_prediction_with_groundtruth(filename, image_idx): imageid = "satImage_%.3d" % image_idx image_filename = filename + imageid + ".png" img = mpimg.imread(image_filename) img_prediction = get_prediction(img) cimg = concatenate_images(img, img_prediction) return cimg # Get prediction overlaid on the original image for given input file def get_prediction_with_overlay(filename, image_idx): imageid = "satImage_%.3d" % image_idx image_filename = filename + imageid + ".png" img = mpimg.imread(image_filename) img_prediction = get_prediction(img) oimg = make_img_overlay(img, img_prediction) return oimg # We will replicate the model structure for the training subgraph, as well # as the evaluation subgraphs, while sharing the trainable parameters. def model(data, train=False): """The Model definition.""" # 2D convolution, with 'SAME' padding (i.e. the output feature map has # the same size as the input). Note that {strides} is a 4D array whose # shape matches the data layout: [image index, y, x, depth]. conv = tf.nn.conv2d(data, conv1_weights, strides=[1, 1, 1, 1], padding='SAME') # Bias and rectified linear non-linearity. relu = tf.nn.relu(tf.nn.bias_add(conv, conv1_biases)) # Max pooling. The kernel size spec {ksize} also follows the layout of # the data. Here we have a pooling window of 2, and a stride of 2. pool = tf.nn.max_pool(relu, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') conv2 = tf.nn.conv2d(pool, conv2_weights, strides=[1, 1, 1, 1], padding='SAME') relu2 = tf.nn.relu(tf.nn.bias_add(conv2, conv2_biases)) pool2 = tf.nn.max_pool(relu2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') # Uncomment these lines to check the size of each layer # print 'data ' + str(data.get_shape()) # print 'conv ' + str(conv.get_shape()) # print 'relu ' + str(relu.get_shape()) # print 'pool ' + str(pool.get_shape()) # print 'pool2 ' + str(pool2.get_shape()) # Reshape the feature map cuboid into a 2D matrix to feed it to the # fully connected layers. pool_shape = pool2.get_shape().as_list() reshape = tf.reshape( pool2, [pool_shape[0], pool_shape[1] * pool_shape[2] * pool_shape[3]]) # Fully connected layer. Note that the '+' operation automatically # broadcasts the biases. hidden = tf.nn.relu(tf.matmul(reshape, fc1_weights) + fc1_biases) # Add a 50% dropout during training only. Dropout also scales # activations such that no rescaling is needed at evaluation time. #if train: # hidden = tf.nn.dropout(hidden, 0.5, seed=SEED) out = tf.matmul(hidden, fc2_weights) + fc2_biases if train: summary_id = '_0' s_data = get_image_summary(data) tf.summary.image('summary_data' + summary_id, s_data, max_outputs=3) s_conv = get_image_summary(conv) tf.summary.image('summary_conv' + summary_id, s_conv, max_outputs=3) s_pool = get_image_summary(pool) tf.summary.image('summary_pool' + summary_id, s_pool, max_outputs=3) s_conv2 = get_image_summary(conv2) tf.summary.image('summary_conv2' + summary_id, s_conv2, max_outputs=3) s_pool2 = get_image_summary(pool2) tf.summary.image('summary_pool2' + summary_id, s_pool2, max_outputs=3) return out # Training computation: logits + cross-entropy loss. logits = model(train_data_node, True) # BATCH_SIZE*NUM_LABELS # print 'logits = ' + str(logits.get_shape()) + ' train_labels_node = ' + str(train_labels_node.get_shape()) loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2(labels=train_labels_node, logits=logits)) tf.summary.scalar('loss', loss) all_params_node = [ conv1_weights, conv1_biases, conv2_weights, conv2_biases, fc1_weights, fc1_biases, fc2_weights, fc2_biases ] all_params_names = [ 'conv1_weights', 'conv1_biases', 'conv2_weights', 'conv2_biases', 'fc1_weights', 'fc1_biases', 'fc2_weights', 'fc2_biases' ] all_grads_node = tf.gradients(loss, all_params_node) all_grad_norms_node = [] for i in range(0, len(all_grads_node)): norm_grad_i = tf.global_norm([all_grads_node[i]]) all_grad_norms_node.append(norm_grad_i) tf.summary.scalar(all_params_names[i], norm_grad_i) # L2 regularization for the fully connected parameters. regularizers = (tf.nn.l2_loss(fc1_weights) + tf.nn.l2_loss(fc1_biases) + tf.nn.l2_loss(fc2_weights) + tf.nn.l2_loss(fc2_biases)) # Add the regularization term to the loss. loss += 5e-4 * regularizers # Optimizer: set up a variable that's incremented once per batch and # controls the learning rate decay. batch = tf.Variable(0) # Decay once per epoch, using an exponential schedule starting at 0.01. learning_rate = tf.train.exponential_decay( 0.01, # Base learning rate. batch * BATCH_SIZE, # Current index into the dataset. train_size, # Decay step. 0.95, # Decay rate. staircase=True) # tf.scalar_summary('learning_rate', learning_rate) tf.summary.scalar('learning_rate', learning_rate) # Use simple momentum for the optimization. optimizer = tf.train.MomentumOptimizer(learning_rate, 0.0).minimize(loss, global_step=batch) # Predictions for the minibatch, validation set and test set. train_prediction = tf.nn.softmax(logits) # We'll compute them only once in a while by calling their {eval()} method. train_all_prediction = tf.nn.softmax(model(train_all_data_node)) # Add ops to save and restore all the variables. saver = tf.train.Saver() # Create a local session to run this computation. with tf.Session() as s: if RESTORE_MODEL: # Restore variables from disk. saver.restore(s, FLAGS.train_dir + "/model.ckpt") print("Model restored.") else: # Run all the initializers to prepare the trainable parameters. tf.global_variables_initializer().run() # Build the summary operation based on the TF collection of Summaries. summary_op = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(FLAGS.train_dir, graph=s.graph) print('Initialized!') # Loop through training steps. print('Total number of iterations = ' + str(int(num_epochs * train_size / BATCH_SIZE))) training_indices = range(train_size) for iepoch in range(num_epochs): # Permute training indices perm_indices = numpy.random.permutation(training_indices) steps_per_epoch = int(train_size / BATCH_SIZE) for step in range(steps_per_epoch): offset = (step * BATCH_SIZE) % (train_size - BATCH_SIZE) batch_indices = perm_indices[offset:(offset + BATCH_SIZE)] # Compute the offset of the current minibatch in the data. # Note that we could use better randomization across epochs. batch_data = train_data[batch_indices, :, :, :] batch_labels = train_labels[batch_indices] # This dictionary maps the batch data (as a numpy array) to the # node in the graph is should be fed to. feed_dict = { train_data_node: batch_data, train_labels_node: batch_labels } if step == 0: summary_str, _, l, lr, predictions = s.run( [ summary_op, optimizer, loss, learning_rate, train_prediction ], feed_dict=feed_dict) summary_writer.add_summary(summary_str, iepoch * steps_per_epoch) summary_writer.flush() print('Epoch %d' % iepoch) print('Minibatch loss: %.3f, learning rate: %.6f' % (l, lr)) print('Minibatch error: %.1f%%' % error_rate(predictions, batch_labels)) sys.stdout.flush() else: # Run the graph and fetch some of the nodes. _, l, lr, predictions = s.run( [optimizer, loss, learning_rate, train_prediction], feed_dict=feed_dict) # Save the variables to disk. save_path = saver.save(s, FLAGS.train_dir + "/model.ckpt") print("Model saved in file: %s" % save_path) print("Running prediction on training set") prediction_training_dir = "predictions_training/" if not os.path.isdir(prediction_training_dir): os.mkdir(prediction_training_dir) for i in range(1, TRAINING_SIZE + 1): pimg = get_prediction_with_groundtruth(train_data_filename, i) Image.fromarray(pimg).save(prediction_training_dir + "prediction_" + str(i) + ".png") oimg = get_prediction_with_overlay(train_data_filename, i) oimg.save(prediction_training_dir + "overlay_" + str(i) + ".png")
# # This is how the model was pre-trained. #(grads, _) = tf.clip_by_global_norm(grads, clip_norm=1.0) // Change 11 clip grads grads_and_vars = [(g, v) for g, v in grads_and_vars if g is not None] grads, tvars = list(zip(*grads_and_vars)) all_are_finite = tf.reduce_all( [tf.reduce_all(tf.is_finite(g)) for g in grads]) if use_fp16 or manual_fp16 else tf.constant(True, dtype=tf.bool) # This is how the model was pre-trained. # ensure global norm is a finite number # to prevent clip_by_global_norm from having a hizzy fit. (clipped_grads, _) = tf.clip_by_global_norm( grads, clip_norm=1.0, use_norm=tf.cond( all_are_finite, lambda: tf.global_norm(grads), lambda: tf.constant(1.0))) #train_op = optimizer.apply_gradients( # list(zip(grads, tvars)), global_step=global_step) // Change 12 apply grads using the cliped grads train_op = optimizer.apply_gradients( list(zip(clipped_grads, tvars)), global_step=global_step) # Normally the global step update is done inside of `apply_gradients`. # However, neither `AdamWeightDecayOptimizer` nor `LAMBOptimizer` do this. # But if you use a different optimizer, you should probably take this line # out. new_global_step = global_step + 1 train_op = tf.group(train_op, [global_step.assign(new_global_step)]) return train_op
def separate_gradient_update(self): # >>> original >>> # denoise_params = tf.get_collection( # tf.GraphKeys.TRAINABLE_VARIABLES, "denoising_model") # ranking_model_params = tf.get_collection( # tf.GraphKeys.TRAINABLE_VARIABLES, "ranking_model") # cprint('denoise_params: {}'.format(denoise_params), 'green') # [<tf.Variable 'denoising_model/propensity_network/W_0:0' shape=(9, 1) dtype=float32>, <tf.Variable 'denoising_model/propensity_network/b_0:0' shape=(1,) dtype=float32>] # cprint('ranking_model_params: {}'.format(ranking_model_params), 'green') # [] # if self.hparams.l2_loss > 0: # # for p in denoise_params: # # self.exam_loss += self.hparams.l2_loss * tf.nn.l2_loss(p) # for p in ranking_model_params: # self.rank_loss += self.hparams.l2_loss * tf.nn.l2_loss(p) # self.loss = self.exam_loss + self.hparams.ranker_loss_weight * self.rank_loss # denoise_gradients = tf.gradients(self.exam_loss, denoise_params) # ranking_model_gradients = tf.gradients( # self.rank_loss, ranking_model_params) # if self.hparams.max_gradient_norm > 0: # denoise_gradients, denoise_norm = tf.clip_by_global_norm(denoise_gradients, # self.hparams.max_gradient_norm) # ranking_model_gradients, ranking_model_norm = tf.clip_by_global_norm(ranking_model_gradients, # self.hparams.max_gradient_norm * self.hparams.ranker_loss_weight) # self.norm = tf.global_norm(denoise_gradients + ranking_model_gradients) # opt_denoise = self.optimizer_func(self.propensity_learning_rate) # opt_ranker = self.optimizer_func(self.learning_rate) # denoise_updates = opt_denoise.apply_gradients(zip(denoise_gradients, denoise_params), # global_step=self.global_step) # ranker_updates = opt_ranker.apply_gradients(zip(ranking_model_gradients, ranking_model_params)) # self.updates = tf.group(denoise_updates, ranker_updates) # <<< original <<< # >>> zcr modified >>> # with tf.variable_scope("denoising_ranking_gradients", reuse=tf.AUTO_REUSE): # cprint('tf.get_variable_scope().name: {}'.format(tf.get_variable_scope().name), 'red') # 输出空字符,说明是默认变量空间 with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE): denoise_params = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, "denoising_model") ranking_model_params = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, "ranking_model") # cprint('denoise_params: {}'.format(denoise_params), 'green') # [<tf.Variable 'denoising_model/propensity_network/W_0:0' shape=(9, 1) dtype=float32>, <tf.Variable 'denoising_model/propensity_network/b_0:0' shape=(1,) dtype=float32>] # cprint('ranking_model_params: {}'.format(ranking_model_params), 'green') # [] 解释:zcr开始将base_algorithms.py中的“with tf.variable_scope(scope or "ranking_model"):”注释掉了,后来还原,就得到下面的正常结果。 ''' [<tf.Variable 'ranking_model/ranking_model/layer_norm_0/gamma:0' shape=(136,) dtype=float32>, <tf.Variable 'ranking_model/ranking_model/layer_norm_0/beta:0' shape=(136,) dtype=float32>, <tf.Variable 'ranking_model/dnn_W_0:0' shape=(136, 512) dtype=float32>, <tf.Variable 'ranking_model/dnn_b_0:0' shape=(512,) dtype=float32>, <tf.Variable 'ranking_model/ranking_model/layer_norm_1/gamma:0' shape=(512,) dtype=float32>, <tf.Variable 'ranking_model/ranking_model/layer_norm_1/beta:0' shape=(512,) dtype=float32>, <tf.Variable 'ranking_model/dnn_W_1:0' shape=(512, 256) dtype=float32>, <tf.Variable 'ranking_model/dnn_b_1:0' shape=(256,) dtype=float32>, <tf.Variable 'ranking_model/ranking_model/layer_norm_2/gamma:0' shape=(256,) dtype=float32>, <tf.Variable 'ranking_model/ranking_model/layer_norm_2/beta:0' shape=(256,) dtype=float32>, <tf.Variable 'ranking_model/dnn_W_2:0' shape=(256, 128) dtype=float32>, <tf.Variable 'ranking_model/dnn_b_2:0' shape=(128,) dtype=float32>, <tf.Variable 'ranking_model/ranking_model/layer_norm_3/gamma:0' shape=(128,) dtype=float32>, <tf.Variable 'ranking_model/ranking_model/layer_norm_3/beta:0' shape=(128,) dtype=float32>, <tf.Variable 'ranking_model/dnn_W_3:0' shape=(128, 1) dtype=float32>, <tf.Variable 'ranking_model/dnn_b_3:0' shape=(1,) dtype=float32>] ''' if self.hparams.l2_loss > 0: # for p in denoise_params: # self.exam_loss += self.hparams.l2_loss * tf.nn.l2_loss(p) for p in ranking_model_params: self.rank_loss += self.hparams.l2_loss * tf.nn.l2_loss(p) self.loss = self.exam_loss + self.hparams.ranker_loss_weight * self.rank_loss denoise_gradients = tf.gradients(self.exam_loss, denoise_params) ranking_model_gradients = tf.gradients(self.rank_loss, ranking_model_params) if self.hparams.max_gradient_norm > 0: denoise_gradients, denoise_norm = tf.clip_by_global_norm(denoise_gradients, self.hparams.max_gradient_norm) ranking_model_gradients, ranking_model_norm = tf.clip_by_global_norm(ranking_model_gradients, self.hparams.max_gradient_norm * self.hparams.ranker_loss_weight) self.norm = tf.global_norm(denoise_gradients + ranking_model_gradients) opt_denoise = self.optimizer_func(self.propensity_learning_rate) opt_ranker = self.optimizer_func(self.learning_rate) denoise_updates = opt_denoise.apply_gradients(zip(denoise_gradients, denoise_params), global_step=self.global_step) ranker_updates = opt_ranker.apply_gradients(zip(ranking_model_gradients, ranking_model_params)) # denoise_updates = opt_denoise.apply_gradients(list(zip(denoise_gradients, denoise_params)), # global_step=self.global_step) # ranker_updates = opt_ranker.apply_gradients(list(zip(ranking_model_gradients, ranking_model_params))) self.updates = tf.group(denoise_updates, ranker_updates)