def _backpropagates_gradient( self, tape: tf.GradientTape, models: List[tf.keras.Model], loss: tf.float32, optimizer: tf.keras.optimizers.Adam, ) -> None: """ Backpropagates the gradient of the loss into the given networks""" trainable_variables = sum( [model.trainable_variables for model in models], []) gradients = tape.gradient(loss, trainable_variables) optimizer.apply_gradients(zip(gradients, trainable_variables))
def _optimize( self, train_vars: List[Variable], loss: Tensor, optim: Optimizer, tape: tf.GradientTape, ) -> None: """Optimize the variables.""" if self.mixed_precision: loss = optim.get_scaled_loss(loss) grads = tape.gradient(loss, train_vars) if self.mixed_precision: grads = optim.get_unscaled_gradients(grads) optim.apply_gradients(zip(grads, train_vars))
def grads_calc(tape: tf.GradientTape, last_cnn_output: Model, top_class_channel: List[List[int]]): """ """ # Calcula o gradiente do modelo para saída máxima (pesos) grads = tape.gradient(top_class_channel, last_cnn_output) # Retira pela media dos pixel (global average pooling) pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2)) # Convert array to numpy array pesos = last_cnn_output.numpy()[0] pooled_grads = pooled_grads.numpy() # Multiplica pesos pela importância deles no resultados for i in range(pooled_grads.shape[-1]): pesos[:, :, i] *= pooled_grads[i] # pooled grads == ack return pesos
def dpg( q_max: tf.Tensor, a_max: tf.Tensor, tape: tf.GradientTape, dqda_clipping: float = None, clip_norm: bool = False, ) -> tf.Tensor: """Deterministic policy gradient loss, similar to trfl.dpg.""" # Calculate the gradient dq/da. dqda = tape.gradient([q_max], [a_max])[0] if dqda is None: raise ValueError('q_max needs to be a function of a_max.') # Clipping the gradient dq/da. if dqda_clipping is not None: if dqda_clipping <= 0: raise ValueError( 'dqda_clipping should be bigger than 0, {} found'.format( dqda_clipping)) if clip_norm: dqda = tf.clip_by_norm(dqda, dqda_clipping, axes=-1) else: dqda = tf.clip_by_value(dqda, -1. * dqda_clipping, dqda_clipping) # Target_a ensures correct gradient calculated during backprop. target_a = dqda + a_max # Stop the gradient going through Q network when backprop. target_a = tf.stop_gradient(target_a) # Gradient only go through actor network. loss = 0.5 * tf.reduce_sum(tf.square(target_a - a_max), axis=-1) # This recovers the DPG because (letting w be the actor network weights): # d(loss)/dw = 0.5 * (2 * (target_a - a_max) * d(target_a - a_max)/dw) # = (target_a - a_max) * [d(target_a)/dw - d(a_max)/dw] # = dq/da * [d(target_a)/dw - d(a_max)/dw] # by defn of target_a # = dq/da * [0 - d(a_max)/dw] # by stop_gradient # = - dq/da * da/dw return loss
def optimizer_minimize(self, loss: tf.Tensor, tape: tf.GradientTape, optimizer: tf.optimizers.Optimizer, model: k.Model): """apply gradients Args: loss (tf.Tensor): tape (tf.GradientTape): optimizer (tf.optimizers.Optimizer): model (k.Model): """ with tape: scaled_loss = loss / self.strategy.num_replicas_in_sync if isinstance(optimizer, tf.keras.mixed_precision.experimental.LossScaleOptimizer): scaled_loss = optimizer.get_scaled_loss(loss) grad = tape.gradient(scaled_loss, model.trainable_variables) if isinstance(optimizer, tf.keras.mixed_precision.experimental.LossScaleOptimizer): grad = optimizer.get_unscaled_gradients(grad) optimizer.apply_gradients(zip(grad, model.trainable_variables)) return scaled_loss
def train_complete(self, tape: tf.GradientTape, training_info, valid_masks=None): """Complete one iteration of training. `train_complete` should calculate gradients and update parameters using those gradients. Args: tape (tf.GradientTape): the tape which are used for calculating gradient. All the previous `train_interval` `train_step()` for are called under the context of this tape. training_info (nested Tensor): information collected for training. It is batched from each `info` returned bt `train_step()` valid_masks (tf.Tensor): masks indicating which samples are valid. shape=(T, B), dtype=tf.float32 Returns: loss_info (LossInfo): loss information grads_and_vars (list[tuple]): list of gradient and variable tuples """ with tape: loss_info = self.calc_loss(training_info) if valid_masks is not None: loss_info = tf.nest.map_structure( lambda l: tf.reduce_mean(l * valid_masks), loss_info) else: loss_info = tf.nest.map_structure(lambda l: tf.reduce_mean(l), loss_info) vars = self.variables grads = tape.gradient(loss_info.loss, vars) grads_and_vars = tuple(zip(grads, vars)) self._optimizer.apply_gradients(grads_and_vars) return loss_info, grads_and_vars
def update(self, tape: tf.GradientTape, loss): grad = tape.gradient(loss, self.net.trainable_variables) self.optimizer.apply_gradients(zip(grad, self.net.trainable_variables))
def train_complete(self, tape: tf.GradientTape, training_info, valid_masks=None, weight=1.0): """Complete one iteration of training. `train_complete` should calculate gradients and update parameters using those gradients. Args: tape (tf.GradientTape): the tape which are used for calculating gradient. All the previous `train_interval` `train_step()` are called under the context of this tape. training_info (nested Tensor): information collected for training. It is batched from each `info` returned bt `train_step()` valid_masks (tf.Tensor): masks indicating which samples are valid. shape=(T, B), dtype=tf.float32 weight (float): weight for this batch. Loss will be multiplied with this weight before calculating gradient Returns: loss_info (LossInfo): loss information grads_and_vars (list[tuple]): list of gradient and variable tuples """ with tape: loss_info = self.calc_loss(training_info) if valid_masks is not None: loss_info = tf.nest.map_structure( lambda l: tf.reduce_mean(l * valid_masks) if len(l.shape) == 2 else l, loss_info) else: loss_info = tf.nest.map_structure(lambda l: tf.reduce_mean(l), loss_info) if isinstance(loss_info.scalar_loss, tf.Tensor): assert len(loss_info.scalar_loss.shape) == 0 loss_info = loss_info._replace( loss=loss_info.loss + loss_info.scalar_loss) loss = weight * loss_info.loss opt_and_var_sets = self._get_cached_opt_and_var_sets() all_grads_and_vars = () for i, (optimizer, vars) in enumerate(opt_and_var_sets): if len(vars) == 0: continue assert optimizer is not None, "optimizer needs to be provides at __init__()" grads = tape.gradient(loss, vars) grads_and_vars = tuple(zip(grads, vars)) all_grads_and_vars = all_grads_and_vars + grads_and_vars if self._gradient_clipping is not None: if self._clip_by_global_norm: grads, global_norm = tf.clip_by_global_norm( grads, self._gradient_clipping) grads_and_vars = tuple(zip(grads, vars)) alf.utils.common.run_if( alf.utils.common.should_record_summaries(), lambda: tf. summary.scalar("global_grad_norm/%s" % i, global_norm)) else: grads_and_vars = eager_utils.clip_gradient_norms( grads_and_vars, self._gradient_clipping) optimizer.apply_gradients(grads_and_vars) self.after_train(training_info) return loss_info, all_grads_and_vars
def minimize_variables(this_loss, vars: tf.Tensor, optimizer: tf.optimizers.Optimizer, tape: tf.GradientTape): grads = tape.gradient(this_loss, vars) optimizer.apply_gradients(zip(grads, vars))
def train_complete(self, tape: tf.GradientTape, training_info: TrainingInfo, weight=1.0): """Complete one iteration of training. `train_complete` should calculate gradients and update parameters using those gradients. Args: tape (tf.GradientTape): the tape which are used for calculating gradient. All the previous `train_interval` `train_step()` for are called under the context of this tape. training_info (TrainingInfo): information collected for training. training_info.info are the batched from each policy_step.info returned by train_step() weight (float): weight for this batch. Loss will be multiplied with this weight before calculating gradient Returns: a tuple of the following: loss_info (LossInfo): loss information grads_and_vars (list[tuple]): list of gradient and variable tuples """ valid_masks = tf.cast( tf.not_equal(training_info.step_type, StepType.LAST), tf.float32) # reward shaping if self._reward_shaping_fn is not None: # record unshaped extrinsic rewards given by the environment self.add_reward_summary("reward/raw", training_info.reward) training_info = training_info._replace( reward=self._reward_shaping_fn(training_info.reward)) # record shaped extrinsic rewards actually used for training self.add_reward_summary("reward/extrinsic", training_info.reward) with tape: loss_info = self.calc_loss(training_info) loss_info = tf.nest.map_structure( lambda l: tf.reduce_mean(l * valid_masks), loss_info) loss = weight * loss_info.loss var_sets = self._get_cached_var_sets() all_grads_and_vars = () for i, vars, optimizer in zip( range(len(var_sets)), var_sets, self._optimizers): grads = tape.gradient(loss, vars) grads_and_vars = tuple(zip(grads, vars)) all_grads_and_vars = all_grads_and_vars + grads_and_vars if self._gradient_clipping is not None: if self._clip_by_global_norm: grads, global_norm = tf.clip_by_global_norm( grads, self._gradient_clipping) grads_and_vars = tuple(zip(grads, vars)) alf.utils.common.run_if( alf.utils.common.should_record_summaries(), lambda: tf. summary.scalar("global_grad_norm/%s" % i, global_norm)) else: grads_and_vars = eager_utils.clip_gradient_norms( grads_and_vars, self._gradient_clipping) optimizer.apply_gradients(grads_and_vars) return loss_info, all_grads_and_vars
def _calculate_and_apply_gradients(model: tf.keras.Sequential, optimizer: tf.keras.optimizers, gradient_tape: tf.GradientTape, loss: [float]): gradients = gradient_tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(gradients, model.trainable_variables))