def train_step( self, inputs: Tuple[NestedTensorDict, NestedTensorDict], model: ModelType, optimizer: tf.keras.optimizers.Optimizer, metrics: Optional[Sequence[tf.keras.metrics.Metric]] = None ) -> Dict[str, tf.Tensor]: features, labels = inputs input_dict = {"features": features} if self.task_config.model_call_needs_labels: input_dict["labels"] = labels is_mixed_precision = isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer) with tf.GradientTape() as tape: outputs = model(**input_dict, training=True) loss, loss_dict = model.compute_losses(labels=labels, outputs=outputs) loss = loss / tf.distribute.get_strategy().num_replicas_in_sync if is_mixed_precision: loss = optimizer.get_scaled_loss(loss) tvars = model.trainable_variables grads = tape.gradient(loss, tvars) if is_mixed_precision: grads = optimizer.get_unscaled_gradients(grads) optimizer.apply_gradients(list(zip(grads, tvars))) logs = {"loss": loss} if metrics: for m in metrics: m.update_state(loss_dict[m.name]) return logs
def step_train(conf, data: dict, model: MoCo, opt: tf.keras.optimizers.Optimizer, step): # feat = data['image'] label = data['label'] _step = -1 if step % 100 > 0 else step if step == 0: _, _, _ = model(data, step=_step) model.update_initial() with tf.GradientTape() as tape: assignment, agg_n, agg_k = model(data, step=_step) loss = model.losses[0] gradients = tape.gradient(loss, model.trainable_scope) opt.apply_gradients(zip(gradients, model.trainable_scope)) model.update_queues(agg_n, agg_k) model.update_momentum() if _step > 0: acc, nmi, ari, sc = hook(agg_n.numpy(), label.numpy(), assignment.numpy()) tf.summary.scalar('eval/nmi', nmi, step) tf.summary.scalar('eval/acc', acc, step) tf.summary.scalar('eval/ari', ari, step) tf.summary.scalar('eval/sc', sc, step) return loss.numpy()
def training(self, policy_model: tf.keras.Model, value_model: tf.keras.Model, policy_optimizer: tf.keras.optimizers.Optimizer, value_optimizer: tf.keras.optimizers.Optimizer, observations0: tf.Tensor, actions: tf.Tensor, returns: tf.Tensor): with tf.GradientTape(persistent=True) as tape: means, log_stds = policy_model(observations0) stddevs = tf.math.exp(log_stds) values = value_model(observations0) baseline = returns - values dists = tfp.distributions.Normal(means, stddevs) log_probs = dists.log_prob(actions) policy_loss = -tf.math.reduce_mean(log_probs * baseline) values_loss = self.huber_loss(returns, values) policy_grads = tape.gradient(policy_loss, policy_model.trainable_variables) policy_optimizer.apply_gradients( zip(policy_grads, policy_model.trainable_variables)) value_grads = tape.gradient(values_loss, value_model.trainable_variables) value_optimizer.apply_gradients( zip(value_grads, value_model.trainable_variables))
def step_train(mu_hat, data: dict, model: MiCE, opt: tf.keras.optimizers.Optimizer, step): label = data['label'] _step = -1 if step % 100 > 0 else step if step == 0: _, _, _ = model(data, step=-1, training=False) model.update_initial() with tf.GradientTape() as tape: assignment, f, v = model(data, step=_step) loss = model.losses[0] gradients = tape.gradient(loss, model.trainable_scope) opt.apply_gradients(zip(gradients, model.trainable_scope)) model.update_queues(v) model.update_momentum() mu_hat = mu_hat + model.mu_hat(assignment, v).numpy() if _step > 0: acc, nmi, ari, sc = hook(f.numpy(), label.numpy(), assignment.numpy()) tf.summary.scalar('eval/nmi', nmi, step) tf.summary.scalar('eval/acc', acc, step) tf.summary.scalar('eval/ari', ari, step) tf.summary.scalar('eval/sc', sc, step) return loss.numpy(), mu_hat
def training(self, actor_model: tf.keras.Model, critic_model: tf.keras.Model, target_actor_model: tf.keras.Model, target_critic_model: tf.keras.Model, actor_optimizer: tf.keras.optimizers.Optimizer, critic_optimizer: tf.keras.optimizers.Optimizer, observations0: tf.Tensor, observations1: tf.Tensor, actions: tf.Tensor, rewards: tf.Tensor, discount_factor: float, tau: float): with tf.GradientTape() as tape: target_actions = target_actor_model(observations1) y = rewards + discount_factor * target_critic_model( [observations1, target_actions]) values = critic_model([observations0, actions]) critic_loss = self.huber_loss(y, values) critic_grads = tape.gradient(critic_loss, critic_model.trainable_variables) critic_optimizer.apply_gradients( zip(critic_grads, critic_model.trainable_variables)) with tf.GradientTape() as tape: actions = actor_model(observations0) values = critic_model([observations0, actions]) actor_loss = -tf.math.reduce_mean(values) actor_grads = tape.gradient(actor_loss, actor_model.trainable_variables) actor_optimizer.apply_gradients( zip(actor_grads, actor_model.trainable_variables)) self.update_target(target_actor_model, actor_model, tau) self.update_target(target_critic_model, critic_model, tau)
def train_step(self, inputs: Tuple[Any, Any], model: tf.keras.Model, optimizer: tf.keras.optimizers.Optimizer, metrics: Optional[List[Any]] = None): """Does forward and backward. Args: inputs: a dictionary of input tensors. model: the model, forward pass definition. optimizer: the optimizer for this training step. metrics: a nested structure of metrics objects. Returns: A dictionary of logs. """ features, labels = inputs input_partition_dims = self.task_config.train_input_partition_dims if input_partition_dims: strategy = tf.distribute.get_strategy() features = strategy.experimental_split_to_logical_devices( features, input_partition_dims) num_replicas = tf.distribute.get_strategy().num_replicas_in_sync with tf.GradientTape() as tape: outputs = model(features, training=True) # Casting output layer as float32 is necessary when mixed_precision is # mixed_float16 or mixed_bfloat16 to ensure output is casted as float32. outputs = tf.nest.map_structure(lambda x: tf.cast(x, tf.float32), outputs) # Computes per-replica loss. loss = self.build_losses(model_outputs=outputs, labels=labels, aux_losses=model.losses) # Scales loss as the default gradients allreduce performs sum inside the # optimizer. scaled_loss = loss / num_replicas # For mixed_precision policy, when LossScaleOptimizer is used, loss is # scaled for numerical stability. if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer): scaled_loss = optimizer.get_scaled_loss(scaled_loss) tvars = model.trainable_variables grads = tape.gradient(scaled_loss, tvars) # Scales back gradient before apply_gradients when LossScaleOptimizer is # used. if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer): grads = optimizer.get_unscaled_gradients(grads) optimizer.apply_gradients(list(zip(grads, tvars))) logs = {self.loss: loss} if metrics: self.process_metrics(metrics, labels, outputs) logs.update({m.name: m.result() for m in metrics}) return logs
def train_step(self, inputs, model: tf.keras.Model, optimizer: tf.keras.optimizers.Optimizer, metrics): """Does forward and backward. Args: inputs: a dictionary of input tensors. model: the model, forward pass definition. optimizer: the optimizer for this training step. metrics: a nested structure of metrics objects. Returns: A dictionary of logs. """ with tf.GradientTape() as tape: outputs = model(inputs, training=True) # Computes per-replica loss. loss = self.build_losses(labels=inputs, model_outputs=outputs, metrics=metrics, aux_losses=model.losses) # Scales loss as the default gradients allreduce performs sum inside the # optimizer. # TODO(b/154564893): enable loss scaling. scaled_loss = loss / tf.distribute.get_strategy( ).num_replicas_in_sync tvars = model.trainable_variables grads = tape.gradient(scaled_loss, tvars) optimizer.apply_gradients(list(zip(grads, tvars))) self.process_metrics(metrics, inputs, outputs) return {self.loss: loss}
def train_batch(images, labels, loss: list, model: tf.keras.Model, optimizer: tf.keras.optimizers.Optimizer) -> tuple: """ A tf function to train the model on a batch. :param images: the images :param labels: the labels in the batch :param loss: the loss functions :param model: the model :param optimizer: the optimization technique :return: """ with tf.GradientTape() as tape: tape.watch(model.trainable_variables) # outputs = model(images, training=True, _checkpoint=True) outputs = model(images) regularization_loss = tf.reduce_sum(model.losses) pred_loss = [] if len(loss) == 1: outputs = (outputs, None) for output, label, loss_fn in zip(outputs, labels, loss): pred_loss.append(loss_fn(label, output)) total_loss = tf.reduce_sum(pred_loss) + regularization_loss grads = tape.gradient(total_loss, model.trainable_variables) optimizer.apply_gradients(zip(grads, model.trainable_variables)) return pred_loss, total_loss
def train(model: callable, dataset: callable, optimizer: tf.keras.optimizers.Optimizer, n_epochs: int = 5): """ Trains a model. Parameters ---------- model : callable Model must be callable as f(X) where X is the feature matrix for a batch. dataset : callable A method that returns an iterable when called. The iterable yields batches of X, y, e.g. the feature matrix and target. optimizer : tf.keras.optimizers.Optimizer An optimizer instance that supports the apply_gradients method. n_epochs : int, optional Number of epochs, by default 5 """ for epoch_i in range(n_epochs): print(f"epoch: {epoch_i+1:>4}", end='') epoch_loss = 0.0 epoch_steps = 0 for x, y in dataset(): with tf.GradientTape() as tape: prediction = model(x) loss = mean_squared_error(prediction, y) epoch_loss += np.mean(loss) epoch_steps += 1 gradients = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(gradients, model.trainable_variables)) print(f", loss={epoch_loss/epoch_steps:>9.2f}")
def train_step(initial_state: tf.Tensor, model: tf.keras.Model, optimizer: tf.keras.optimizers.Optimizer, gamma: float, max_steps_per_episode: int) -> tf.Tensor: """Runs a model training step.""" with tf.GradientTape() as tape: # Run the model for one episode to collect training data # print('passo 2',passo) action_probs, values, rewards = run_episode(initial_state, model, max_steps_per_episode) # Calculate expected returns returns = get_expected_return(rewards, gamma) # Convert training data to appropriate TF tensor shapes action_probs, values, returns = [ tf.expand_dims(x, 1) for x in [action_probs, values, returns] ] # Calculating loss values to update our network loss = compute_loss(action_probs, values, returns) # Compute the gradients from the loss grads = tape.gradient(loss, model.trainable_variables) # Apply the gradients to the model's parameters optimizer.apply_gradients(zip(grads, model.trainable_variables)) episode_reward = tf.math.reduce_sum(rewards) return episode_reward
def __initialize_optimizer(model: utils.PersonalizationLayersDecorator, optimizer: tf.keras.optimizers.Optimizer): zero_gradient = tf.nest.map_structure(tf.zeros_like, model.base_model.trainable_variables) optimizer.apply_gradients( zip(zero_gradient, model.base_model.trainable_variables)) assert optimizer.variables()
def train_step(initial_state: tf.Tensor, model: tf.keras.Model, optimizer: tf.keras.optimizers.Optimizer, gamma: float, max_steps_per_episode: int) -> tf.Tensor: """ tf.function applies the context to this function so this can be compiled into a callable TensorFlow graph, which will be fast. tf.GradientTape does automatic differentiation to loss function. :return: """ with tf.GradientTape() as tape: # Run the model for one episode to collect training data action_probs, values, rewards = run_episode(initial_state, model, max_steps_per_episode) # Calculate expected returns returns = get_expected_return(rewards, gamma) # Convert training data shape to appropriate tensor shape action_probs, values, returns = [ tf.expand_dims(x, 1) for x in [action_probs, values, returns] ] # Calculate loss loss = compute_loss(action_probs, values, returns) # Compute gradients from the loss grads = tape.gradient(loss, model.trainable_variables) # Apply gradients to model parameters optimizer.apply_gradients(zip(grads, model.trainable_variables)) episode_reward = tf.math.reduce_sum(rewards) return episode_reward
def discriminator_train_step( generator: Generator, discriminator: Discriminator, X: tf.Tensor, Z: tf.Tensor, c: tf.Tensor, optimizer: tf.keras.optimizers.Optimizer) -> tf.Tensor: """Single training step of discriminator Arguments: generator {Generator} -- the generator instance discriminator {Discriminator} -- the discriminator instance X {tf.Tensor} -- the real sample input Z {tf.Tensor} -- the random noise c {tf.Tensor} -- the conditional information optimizer {tf.keras.optimizers.Optimizer} -- the optimizer for bp update Returns: tf.Tensor -- the discriminator loss of current step """ with tf.GradientTape() as tape: G_sample = generator(Z, c) d_real = discriminator(X) d_fake = discriminator(G_sample) loss = discriminator_loss(d_real, d_fake) gradients = tape.gradient(loss, discriminator.trainable_variables) optimizer.apply_gradients(zip(gradients, discriminator.trainable_variables)) return loss
def _initialize_optimizer_vars(model: tff.learning.Model, optimizer: tf.keras.optimizers.Optimizer): """Ensures variables holding the state of `optimizer` are created.""" delta = tf.nest.map_structure(tf.zeros_like, _get_weights(model).trainable) model_weights = _get_weights(model) grads_and_vars = tf.nest.map_structure(lambda x, v: (x, v), delta, model_weights.trainable) optimizer.apply_gradients(grads_and_vars, name='server_update')
def train_actor(act: tf.keras.Model, cri: tf.keras.Model, act_opt: tf.keras.optimizers.Optimizer, states): with tf.GradientTape() as tape: actions = act(states, training=True) cri_value = cri([states, actions], training=True) act_loss = -tf.reduce_mean(cri_value) act_grad = tape.gradient(act_loss, act.trainable_variables) act_opt.apply_gradients(zip(act_grad, act.trainable_variables))
def train_step(self, inputs, model: tf.keras.Model, optimizer: tf.keras.optimizers.Optimizer, metrics=None): """Does forward and backward. With distribution strategies, this method runs on devices. Args: inputs: a dictionary of input tensors. model: the model, forward pass definition. optimizer: the optimizer for this training step. metrics: a nested structure of metrics objects. Returns: A dictionary of logs. """ if isinstance(inputs, tuple) and len(inputs) == 2: features, labels = inputs else: features, labels = inputs, inputs with tf.GradientTape() as tape: outputs = model(features, training=True) # Computes per-replica loss. if model.compiled_loss: loss = model.compiled_loss( labels, outputs, regularization_losses=model.losses) loss += self.build_losses( labels=labels, model_outputs=outputs, aux_losses=None) else: loss = self.build_losses( labels=labels, model_outputs=outputs, aux_losses=model.losses) # Scales loss as the default gradients allreduce performs sum inside the # optimizer. scaled_loss = loss / tf.distribute.get_strategy().num_replicas_in_sync # For mixed precision, when a LossScaleOptimizer is used, the loss is # scaled to avoid numeric underflow. if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer): scaled_loss = optimizer.get_scaled_loss(scaled_loss) tvars = model.trainable_variables grads = tape.gradient(scaled_loss, tvars) if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer): grads = optimizer.get_unscaled_gradients(grads) optimizer.apply_gradients(list(zip(grads, tvars))) logs = {self.loss: loss} if metrics: self.process_metrics(metrics, labels, outputs) if model.compiled_metrics: self.process_compiled_metrics(model.compiled_metrics, labels, outputs) logs.update({m.name: m.result() for m in metrics or []}) logs.update({m.name: m.result() for m in model.metrics}) return logs
def train_step(self, inputs: Tuple[Any, Any], model: tf.keras.Model, optimizer: tf.keras.optimizers.Optimizer, metrics: Optional[List[Any]] = None): """Does forward and backward. Args: inputs: a dictionary of input tensors. model: the model, forward pass definition. optimizer: the optimizer for this training step. metrics: a nested structure of metrics objects. Returns: A dictionary of logs. """ images, labels = inputs num_replicas = tf.distribute.get_strategy().num_replicas_in_sync with tf.GradientTape() as tape: outputs = model( images, image_shape=labels['image_info'][:, 1, :], anchor_boxes=labels['anchor_boxes'], gt_boxes=labels['gt_boxes'], gt_classes=labels['gt_classes'], gt_masks=(labels['gt_masks'] if self.task_config.model.include_mask else None), training=True) outputs = tf.nest.map_structure(lambda x: tf.cast(x, tf.float32), outputs) # Computes per-replica loss. losses = self.build_losses(outputs=outputs, labels=labels, aux_losses=model.losses) scaled_loss = losses['total_loss'] / num_replicas # For mixed_precision policy, when LossScaleOptimizer is used, loss is # scaled for numerical stability. if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer): scaled_loss = optimizer.get_scaled_loss(scaled_loss) tvars = model.trainable_variables grads = tape.gradient(scaled_loss, tvars) # Scales back gradient when LossScaleOptimizer is used. if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer): grads = optimizer.get_unscaled_gradients(grads) optimizer.apply_gradients(list(zip(grads, tvars))) logs = {self.loss: losses['total_loss']} if metrics: for m in metrics: m.update_state(losses[m.name]) return logs
def train_step(self, inputs: Tuple[Any, Any], model: tf.keras.Model, optimizer: tf.keras.optimizers.Optimizer, metrics: Optional[List[Any]] = None) -> Mapping[str, Any]: """Does forward and backward. This example assumes input is a tuple of (features, labels), which follows the output from data loader, i.e., Parser. The output from Parser is fed into train_step to perform one step forward and backward pass. Other data structure, such as dictionary, can also be used, as long as it is consistent between output from Parser and input used here. Args: inputs: A tuple of input tensors of (features, labels). model: A tf.keras.Model instance. optimizer: The optimizer for this training step. metrics: A nested structure of metrics objects. Returns: A dictionary of logs. """ features, labels = inputs num_replicas = tf.distribute.get_strategy().num_replicas_in_sync with tf.GradientTape() as tape: outputs = model(features, training=True) # Casting output layer as float32 is necessary when mixed_precision is # mixed_float16 or mixed_bfloat16 to ensure output is casted as float32. outputs = tf.nest.map_structure(lambda x: tf.cast(x, tf.float32), outputs) # Computes per-replica loss. loss = self.build_losses(model_outputs=outputs, labels=labels, aux_losses=model.losses) # Scales loss as the default gradients allreduce performs sum inside the # optimizer. scaled_loss = loss / num_replicas # For mixed_precision policy, when LossScaleOptimizer is used, loss is # scaled for numerical stability. if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer): scaled_loss = optimizer.get_scaled_loss(scaled_loss) tvars = model.trainable_variables grads = tape.gradient(scaled_loss, tvars) # Scales back gradient before apply_gradients when LossScaleOptimizer is # used. if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer): grads = optimizer.get_unscaled_gradients(grads) optimizer.apply_gradients(list(zip(grads, tvars))) logs = {self.loss: loss} if metrics: self.process_metrics(metrics, labels, outputs) return logs
def initialize_optimizer_vars(model: tf.keras.Model, optimizer: tf.keras.optimizers.Optimizer): """Ensures variables holding the state of `optimizer` are created.""" delta = tf.nest.map_structure(tf.zeros_like, model.trainable_variables) grads_and_vars = tf.nest.map_structure(lambda x, v: (x, v), delta, model.trainable_weights) optimizer.apply_gradients(grads_and_vars, name='server_update') assert optimizer.variables()
def train_step(self, inputs: Tuple[Any, Any], model: tf.keras.Model, optimizer: tf.keras.optimizers.Optimizer, metrics: Optional[List[Any]] = None): """Does forward and backward. Args: inputs: a dictionary of input tensors. model: the model, forward pass definition. optimizer: the optimizer for this training step. metrics: a nested structure of metrics objects. Returns: A dictionary of logs. """ features, labels = inputs num_replicas = tf.distribute.get_strategy().num_replicas_in_sync with tf.GradientTape() as tape: outputs = model(features, training=True) # Casting output layer as float32 is necessary when mixed_precision is # mixed_float16 or mixed_bfloat16 to ensure output is casted as float32. outputs = tf.nest.map_structure(lambda x: tf.cast(x, tf.float32), outputs) losses = self.build_losses(outputs['raw_output'], labels) scaled_loss = losses['total_loss'] / num_replicas # For mixed_precision policy, when LossScaleOptimizer is used, loss is # scaled for numerical stability. if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer): scaled_loss = optimizer.get_scaled_loss(scaled_loss) # compute the gradient tvars = model.trainable_variables gradients = tape.gradient(scaled_loss, tvars) # get unscaled loss if the scaled loss was used if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer): gradients = optimizer.get_unscaled_gradients(gradients) if self.task_config.gradient_clip_norm > 0.0: gradients, _ = tf.clip_by_global_norm( gradients, self.task_config.gradient_clip_norm) optimizer.apply_gradients(list(zip(gradients, tvars))) logs = {self.loss: losses['total_loss']} if metrics: for m in metrics: m.update_state(losses[m.name]) logs.update({m.name: m.result()}) return logs
def meta_train(self, task_generator: Callable[[], Generator[tuple, None, None]], n_episode: int, n_way: int, ks_shots: int, kq_shots: int, optimizer: tf.keras.optimizers.Optimizer, episode_end_callback=None): """Trains the model on the meta-training set. Args: task_generator (callable): A callable returning a generator of few_shot tasks. Each task should be a couple (support_set, query_set), themselves being a tuple (data, label). n_episode (int): Number of episodes for meta-training. n_way (int): Number of ways (or classes per episode). ks_shots (int): Number of image per class in the support set. kq_shots (int): Number of image per class in the query set. optimizer (tf.keras.optimizer): A Keras optimizer for training. episode_end_callback (Callable): callback called at the end of each episode. """ self.encoder.compile(optimizer) for episode in range(n_episode): # Open a GradientTape to record the operations run # during the forward pass, which enables autodifferentiation. with tf.GradientTape() as tape: support_set, query_set = task_generator().__next__() # Run the forward pass of the layer. # The operations that the layer applies # to its inputs are going to be recorded # on the GradientTape. distrib, support_labels, query_labels = run_episode( support_set, query_set, n_way, ks_shots, kq_shots, self.encoder) distrib = tf.transpose(distrib) loss_value = _compute_loss(distrib, query_labels, n_way) # Use the gradient tape to automatically retrieve # the gradients of the trainable variables with respect to the loss. grads = tape.gradient(loss_value, self.encoder.trainable_weights) # Run one step of gradient descent by updating # the value of the variables to minimize the loss. optimizer.apply_gradients( zip(grads, self.encoder.trainable_weights)) if episode_end_callback is not None: args = { 'episode': episode, 'episode_loss': loss_value, 'episode_gradients': grads } episode_end_callback(**args)
def update_weights(optimizer: tf.keras.optimizers.Optimizer, network: Network, batch, weight_decay: float): """ Updates the weights of the network based on gradient optimisation. Parameters ---------- optimiser : tf.keras.optimizers.Optimizer The optimiser to use for the weight updates task. network : Network The network on which to perform the weight updates. batch : weight_decay : float Returns ------- tensorflow.python.framework.ops.EagerTensor The scaled-down version of the input tensor ??? """ #print(type(network)) #print(type(batch)) loss = 0 with tf.GradientTape() as tape: for image, actions, targets in batch: # Initial step, from the real observation. value, reward, policy_logits, hidden_state = network.initial_inference(image) predictions = [(1.0, value, reward, policy_logits)] # Recurrent steps, from action and previous hidden state. for action in actions: value, reward, policy_logits, hidden_state = network.recurrent_inference(hidden_state, action) predictions.append((1.0 / len(actions), value, reward, policy_logits)) hidden_state = scale_gradient(hidden_state, 0.5) #potential bomb!!! comparing what tree says, what we say and we want them for prediction, target in zip(predictions, targets): gradient_scale, value, reward, policy_logits = prediction target_value, target_reward, target_policy = target l = ( scalar_loss(value, target_value) + scalar_loss(reward, target_reward) + tf.nn.softmax_cross_entropy_with_logits(logits=policy_logits, labels=target_policy) ) loss += scale_gradient(l, gradient_scale) all_weights = network.get_weights() for weights in all_weights: loss += weight_decay * tf.nn.l2_loss(weights) grad = tape.gradient(loss, all_weights) optimizer.apply_gradients(zip(grad, all_weights)) network.update_steps() print(network.training_steps())
def train_step(x_batch, y_batch, model:CNN_5, OP:tf.keras.optimizers.Optimizer, metrics:list, layer_lr:list): with tf.GradientTape() as tape: y_batch_pred = model(x_batch) loss = model.loss(y_batch_pred,y_batch) grad = tape.gradient(loss, model.trainable_variables) grad = [tf.multiply(g,w) for g,w in zip(grad,layer_lr)] OP.apply_gradients(zip(grad, model.trainable_variables)) metrics[0](loss) metrics[1]((y_batch+1)/2,y_batch_pred)
def __train_step(X: tf.Tensor, y: tf.Tensor, model: tf.keras.Model, optimizer: tf.keras.optimizers.Optimizer, loss_fn: tf.keras.losses.Loss) -> None: with tf.GradientTape() as tape: logits = model(X, training=True) loss_value = loss_fn(y, logits) optimizer.apply_gradients( zip(tape.gradient(loss_value, model.trainable_variables), model.trainable_variables))
def train_one_step(model: models.Model, optimizer: tf.keras.optimizers.Optimizer, x: tf.Tensor, y: tf.Tensor): with tf.GradientTape() as tape: logits = model(x) loss = compute_loss(logits, y) grads = tape.gradient(loss, model.trainable_weights) optimizer.apply_gradients(zip(grads, model.trainable_weights)) accuracy = compute_accuracy(logits, y) return loss, accuracy
def train_step(self, inputs: Tuple[Any, Any], model: tf.keras.Model, optimizer: tf.keras.optimizers.Optimizer, metrics: Optional[List[Any]] = None): """Does forward and backward. Args: inputs: A tuple of of input tensors of (features, labels). model: A tf.keras.Model instance. optimizer: The optimizer for this training step. metrics: A nested structure of metrics objects. Returns: A dictionary of logs. """ features, labels = inputs is_multilabel = self.task_config.train_data.is_multilabel if self.task_config.losses.one_hot and not is_multilabel: labels = tf.one_hot(labels, self.task_config.model.num_classes) num_replicas = tf.distribute.get_strategy().num_replicas_in_sync with tf.GradientTape() as tape: outputs = model(features, training=True) # Computes per-replica loss. loss = self.build_losses(model_outputs=outputs, labels=labels, aux_losses=model.losses) # Scales loss as the default gradients allreduce performs sum inside the # optimizer. scaled_loss = loss / num_replicas # For mixed_precision policy, when LossScaleOptimizer is used, loss is # scaled for numerical stability. if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer): scaled_loss = optimizer.get_scaled_loss(scaled_loss) tvars = model.trainable_variables grads = tape.gradient(scaled_loss, tvars) # Scales back gradient before apply_gradients when LossScaleOptimizer is # used. if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer): grads = optimizer.get_unscaled_gradients(grads) optimizer.apply_gradients(list(zip(grads, tvars))) logs = {self.loss: loss} if metrics: self.process_metrics(metrics, labels, outputs) elif model.compiled_metrics: self.process_compiled_metrics(model.compiled_metrics, labels, outputs) logs.update({m.name: m.result() for m in model.metrics}) return logs
def train_step(self, inputs: Tuple[Any, Any], model: tf.keras.Model, optimizer: tf.keras.optimizers.Optimizer, metrics: Optional[List[Any]] = None): """Does forward and backward. Args: inputs: a dictionary of input tensors. model: the model, forward pass definition. optimizer: the optimizer for this training step. metrics: a nested structure of metrics objects. Returns: A dictionary of logs. """ features, labels = inputs num_replicas = tf.distribute.get_strategy().num_replicas_in_sync with tf.GradientTape() as tape: outputs = model(features, training=True) outputs = tf.nest.map_structure(lambda x: tf.cast(x, tf.float32), outputs) # Computes per-replica loss. loss, cls_loss, box_loss, model_loss = self.build_losses( outputs=outputs, labels=labels, aux_losses=model.losses) scaled_loss = loss / num_replicas # For mixed_precision policy, when LossScaleOptimizer is used, loss is # scaled for numerical stability. if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer): scaled_loss = optimizer.get_scaled_loss(scaled_loss) tvars = model.trainable_variables grads = tape.gradient(scaled_loss, tvars) # Scales back gradient when LossScaleOptimizer is used. if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer): grads = optimizer.get_unscaled_gradients(grads) optimizer.apply_gradients(list(zip(grads, tvars))) logs = {self.loss: loss} all_losses = { 'total_loss': loss, 'cls_loss': cls_loss, 'box_loss': box_loss, 'model_loss': model_loss, } if metrics: for m in metrics: m.update_state(all_losses[m.name]) logs.update({m.name: m.result()}) return logs
def create_optimizer_vars( model: model_lib.Model, optimizer: tf.keras.optimizers.Optimizer) -> Iterable[tf.Variable]: """Applies a placeholder update to optimizer to enable getting its variables.""" delta = tf.nest.map_structure(tf.zeros_like, get_global_variables(model).trainable) grads_and_vars = tf.nest.map_structure( lambda x, v: (-1.0 * x, v), tf.nest.flatten(delta), tf.nest.flatten(get_global_variables(model).trainable)) optimizer.apply_gradients(grads_and_vars, name='server_update') return optimizer.variables()
def _initialize_optimizer_vars(model: tff.learning.Model, optimizer: tf.keras.optimizers.Optimizer): """Creates optimizer variables to assign the optimizer's state.""" # Create zero gradients to force an update that doesn't modify. # Force eagerly constructing the optimizer variables. Normally Keras lazily # creates the variables on first usage of the optimizer. Optimizers such as # Adam, Adagrad, or using momentum need to create a new set of variables shape # like the model weights. model_weights = tff.learning.ModelWeights.from_model(model) zero_gradient = [tf.zeros_like(t) for t in model_weights.trainable] optimizer.apply_gradients(zip(zero_gradient, model_weights.trainable)) assert optimizer.variables()
def train_critic(cri: tf.keras.Model, act_target: tf.keras.Model, cri_target: tf.keras.Model, cri_opt: tf.keras.optimizers.Optimizer, states, actions, rewards, next_states, gamma: float): with tf.GradientTape() as tape: target_actions = act_target(next_states, training=True) y = rewards + gamma * cri_target([next_states, target_actions], training=True) cri_value = cri([states, actions], training=True) cri_loss = mse(y, cri_value) cri_grad = tape.gradient(cri_loss, cri.trainable_variables) cri_opt.apply_gradients(zip(cri_grad, cri.trainable_variables))