def compile(self, optimizer, loss, kernel_initializer=tf.initializers.GlorotUniform, **kwargs): # pylint: disable=arguments-differ """See super class. Default optimizer used in BoltOn method is SGD. Args: optimizer: The optimizer to use. This will be automatically wrapped with the BoltOn Optimizer. loss: The loss function to use. Must be a StrongConvex loss (extend the StrongConvexMixin). kernel_initializer: The kernel initializer to use for the single layer. **kwargs: kwargs to keras Model.compile. See super. """ if not isinstance(loss, StrongConvexMixin): raise ValueError( 'loss function must be a Strongly Convex and therefore ' 'extend the StrongConvexMixin.') if not self._layers_instantiated: # compile may be called multiple times # for instance, if the input/outputs are not defined until fit. self.output_layer = tf.keras.layers.Dense( self.n_outputs, kernel_regularizer=loss.kernel_regularizer(), kernel_initializer=kernel_initializer(), ) self._layers_instantiated = True if not isinstance(optimizer, BoltOn): optimizer = optimizers.get(optimizer) optimizer = BoltOn(optimizer, loss) super(BoltOnModel, self).compile(optimizer, loss=loss, **kwargs)
def valid_optimizer(optimizer): if optimizer and isinstance(optimizer, dict): class_name = optimizer.get('class_name') optimizer = get(class_name).from_config(optimizer.get('config', {})) optimizer = serialize(optimizer) elif isinstance(optimizer, DataFrame): optimizer = json.loads(optimizer.first().optimizer) return optimizer
def testOptimizersCompatibility(self, opt_str, test_weights, test_numeric): np.random.seed(1331) with self.cached_session(): train_samples = 20 input_dim = 3 num_classes = 2 (x, y), _ = testing_utils.get_test_data(train_samples=train_samples, test_samples=10, input_shape=(input_dim, ), num_classes=num_classes) y = keras.utils.to_categorical(y) num_hidden = 5 model = testing_utils.get_small_sequential_mlp( num_hidden=num_hidden, num_classes=num_classes, input_dim=input_dim) old_mode = os.environ.get('TF2_BEHAVIOR', None) # Disable tf2 to create V1 optimizer. disable_tf2() if opt_str == 'momentum': opt_v1 = optimizers.SGD(momentum=0.9) else: opt_v1 = optimizers.get(opt_str) # Test compile and fit with v1 optimizer. model.compile(opt_v1, loss='categorical_crossentropy', metrics=[]) model.fit(x, y, batch_size=5, epochs=1) model_dir = tempfile.mkdtemp() gfile.MakeDirs(model_dir) file_name = os.path.join(model_dir, 'model.h5') model.save(file_name) enable_tf2() # Test load and fit with v2 optimizer. model_2 = saving.load_model(file_name) opt_v2 = model_2.optimizer self.assertIsInstance(opt_v2, optimizer_v2.OptimizerV2) # set_weights is called inside load_model but exception is swallowed, # this call checks the weights can be set correctly. if test_weights: opt_v2.set_weights(opt_v1.get_weights()) if test_numeric: hist_1 = model.fit(x, y, batch_size=5, epochs=1, shuffle=False) hist_2 = model_2.fit(x, y, batch_size=5, epochs=1, shuffle=False) self.assertAllClose(model.get_weights(), model_2.get_weights()) self.assertAllClose(model.get_weights(), model_2.get_weights()) self.assertAllClose(hist_1.history['loss'], hist_2.history['loss']) if old_mode is not None: os.environ['TF2_BEHAVIOR'] = old_mode
def testOptimizersCompatibility(self, opt_str, test_weights, test_numeric): np.random.seed(1331) with self.cached_session(): train_samples = 20 input_dim = 3 num_classes = 2 (x, y), _ = testing_utils.get_test_data( train_samples=train_samples, test_samples=10, input_shape=(input_dim,), num_classes=num_classes) y = keras.utils.to_categorical(y) num_hidden = 5 model = testing_utils.get_small_sequential_mlp( num_hidden=num_hidden, num_classes=num_classes, input_dim=input_dim) old_mode = os.environ.get('TF2_BEHAVIOR', None) # Disable tf2 to create V1 optimizer. disable_tf2() if opt_str == 'momentum': opt_v1 = optimizers.SGD(momentum=0.9) else: opt_v1 = optimizers.get(opt_str) # Test compile and fit with v1 optimizer. model.compile(opt_v1, loss='categorical_crossentropy', metrics=[]) model.fit(x, y, batch_size=5, epochs=1) model_dir = tempfile.mkdtemp() gfile.MakeDirs(model_dir) file_name = os.path.join(model_dir, 'model.h5') model.save(file_name) enable_tf2() # Test load and fit with v2 optimizer. model_2 = saving.load_model(file_name) opt_v2 = model_2.optimizer self.assertIsInstance(opt_v2, optimizer_v2.OptimizerV2) # set_weights is called inside load_model but exception is swallowed, # this call checks the weights can be set correctly. if test_weights: opt_v2.set_weights(opt_v1.get_weights()) if test_numeric: hist_1 = model.fit(x, y, batch_size=5, epochs=1, shuffle=False) hist_2 = model_2.fit(x, y, batch_size=5, epochs=1, shuffle=False) self.assertAllClose(model.get_weights(), model_2.get_weights()) self.assertAllClose(model.get_weights(), model_2.get_weights()) self.assertAllClose(hist_1.history['loss'], hist_2.history['loss']) if old_mode is not None: os.environ['TF2_BEHAVIOR'] = old_mode
def clone_optimizer(optimizer): if type(optimizer) is str: return optimizers.get(optimizer) # Requires Keras 1.0.7 since get_config has breaking changes. params = dict([(k, v) for k, v in optimizer.get_config().items()]) config = { 'class_name': optimizer.__class__.__name__, 'config': params, } if hasattr(optimizers, 'optimizer_from_config'): # COMPATIBILITY: Keras < 2.0 clone = optimizers.optimizer_from_config(config) else: clone = optimizers.deserialize(config) return clone
def compile(self, optimizer, metrics=[]): metrics += [mean_q] if type(optimizer) in (list, tuple): if len(optimizer) != 2: raise ValueError( 'More than two optimizers provided. Please only provide a maximum of two optimizers, the first one for the actor and the second one for the critic.' ) actor_optimizer, critic_optimizer = optimizer else: actor_optimizer = optimizer critic_optimizer = clone_optimizer(optimizer) if type(actor_optimizer) is str: actor_optimizer = optimizers.get(actor_optimizer) if type(critic_optimizer) is str: critic_optimizer = optimizers.get(critic_optimizer) assert actor_optimizer != critic_optimizer if len(metrics) == 2 and hasattr(metrics[0], '__len__') and hasattr( metrics[1], '__len__'): actor_metrics, critic_metrics = metrics else: actor_metrics = critic_metrics = metrics def clipped_error(y_true, y_pred): return K.mean(huber_loss(y_true, y_pred, self.delta_clip), axis=-1) # Compile target networks. We only use them in feed-forward mode, hence we can pass any # optimizer and loss since we never use it anyway. self.target_actor = clone_model(self.actor, self.custom_model_objects) self.target_actor.compile(optimizer='sgd', loss='mse') self.target_critic = clone_model(self.critic, self.custom_model_objects) self.target_critic.compile(optimizer='sgd', loss='mse') # We also compile the actor. We never optimize the actor using Keras but instead compute # the policy gradient ourselves. However, we need the actor in feed-forward mode, hence # we also compile it with any optimzer and self.actor.compile(optimizer='sgd', loss='mse') # Compile the critic. if self.target_model_update < 1.: # We use the `AdditionalUpdatesOptimizer` to efficiently soft-update the target model. critic_updates = get_soft_target_model_updates( self.target_critic, self.critic, self.target_model_update) critic_optimizer = AdditionalUpdatesOptimizer( critic_optimizer, critic_updates) self.critic.compile(optimizer=critic_optimizer, loss=clipped_error, metrics=critic_metrics) # Combine actor and critic so that we can get the policy gradient. # Assuming critic's state inputs are the same as actor's. combined_inputs = [] state_inputs = [] for i in self.critic.input: if i == self.critic_action_input: combined_inputs.append([]) else: combined_inputs.append(i) state_inputs.append(i) combined_inputs[self.critic_action_input_idx] = self.actor( state_inputs) combined_output = self.critic(combined_inputs) updates = actor_optimizer.get_updates( params=self.actor.trainable_weights, loss=-K.mean(combined_output)) if self.target_model_update < 1.: # Include soft target model updates. updates += get_soft_target_model_updates(self.target_actor, self.actor, self.target_model_update) updates += self.actor.updates # include other updates of the actor, e.g. for BN # Finally, combine it all into a callable function. self.actor_train_fn = K.function(state_inputs + [K.learning_phase()], [self.actor(state_inputs)], updates=updates) self.actor_optimizer = actor_optimizer self.compiled = True
def compile(self, optimizer, loss=None, metrics=None, loss_weights=None, sample_weight_mode=None, weighted_metrics=None, target_tensors=None, **kwargs): """Configures the model for training. Arguments: optimizer: A single String (name of optimizer) or optimizer instance if linear and dnn model share the same optimizer, or a list or tuple of 2 optimizers if not. See `tf.keras.optimizers`. loss: String (name of objective function), objective function or `tf.losses.Loss` instance. See `tf.losses`. If the model has multiple outputs, you can use a different loss on each output by passing a dictionary or a list of losses. The loss value that will be minimized by the model will then be the sum of all individual losses. metrics: List of metrics to be evaluated by the model during training and testing. Typically you will use `metrics=['accuracy']`. To specify different metrics for different outputs of a multi-output model, you could also pass a dictionary, such as `metrics={'output_a': 'accuracy', 'output_b': ['accuracy', 'mse']}`. You can also pass a list (len = len(outputs)) of lists of metrics such as `metrics=[['accuracy'], ['accuracy', 'mse']]` or `metrics=['accuracy', ['accuracy', 'mse']]`. loss_weights: Optional list or dictionary specifying scalar coefficients (Python floats) to weight the loss contributions of different model outputs. The loss value that will be minimized by the model will then be the *weighted sum* of all individual losses, weighted by the `loss_weights` coefficients. If a list, it is expected to have a 1:1 mapping to the model's outputs. If a tensor, it is expected to map output names (strings) to scalar coefficients. sample_weight_mode: If you need to do timestep-wise sample weighting (2D weights), set this to `"temporal"`. `None` defaults to sample-wise weights (1D). If the model has multiple outputs, you can use a different `sample_weight_mode` on each output by passing a dictionary or a list of modes. weighted_metrics: List of metrics to be evaluated and weighted by sample_weight or class_weight during training and testing. target_tensors: By default, Keras will create placeholders for the model's target, which will be fed with the target data during training. If instead you would like to use your own target tensors (in turn, Keras will not expect external Numpy data for these targets at training time), you can specify them via the `target_tensors` argument. It can be a single tensor (for a single-output model), a list of tensors, or a dict mapping output names to target tensors. **kwargs: Any additional arguments passed to Model.compile, including run_eagerly. Raises: ValueError: In case of invalid arguments for `optimizer`, `loss`, `metrics` or `sample_weight_mode`. """ if isinstance(optimizer, (tuple, list)): self.linear_optimizer = optimizers.get(optimizer[0]) self.dnn_optimizer = optimizers.get(optimizer[1]) else: # DNN and Linear sharing the same optimizer. opt = optimizers.get(optimizer) self.dnn_optimizer = opt self.linear_optimizer = opt # TODO(tanzheny): Make optimizer have default in compile (b/132909290) super(WideDeepModel, self).compile( optimizer=[self.linear_optimizer, self.dnn_optimizer], loss=loss, metrics=metrics, loss_weights=loss_weights, sample_weight_mode=sample_weight_mode, weighted_metrics=weighted_metrics, target_tensors=target_tensors, **kwargs)
def npu_distributed_optimizer_wrapper(optimizer): """ An optimizer that wraps Optimizer, using an allreduce to average gradient values before applying gradients to model weights. """ if isinstance(optimizer, str): optimizer = optimizers.get(optimizer) rank_size = os.getenv('RANK_SIZE') if hasattr(optimizer, "compute_gradients"): org_compute_gradients = optimizer.compute_gradients def _npu_compute_gradients(*args, **kwargs): """ In DistributedOptimizer, compute_gradients() is overriden to also allreduce the gradients before returning them. """ gradients = org_compute_gradients(*args, **kwargs) if rank_size is None or int(rank_size) <= 1: return gradients averaged_gradients = [] with tf.name_scope("Npu_Distributed_optimizer_Allreduce"): for grad, var in gradients: avg_grad = allreduce(grad, True) if grad is not None else None averaged_gradients.append((avg_grad, var)) return averaged_gradients optimizer.compute_gradients = _npu_compute_gradients if hasattr(optimizer, "get_gradients"): org_get_gradients = optimizer.get_gradients def _npu_get_gradients(loss, params): grads = org_get_gradients(loss, params) if rank_size is None or int(rank_size) <= 1: return grads averaged_grads = [] with tf.name_scope( "Npu_Distributed_optimizer_get_grads_Allreduce"): for grad in grads: avg_grad = allreduce(grad, True) if grad is not None else None averaged_grads.append(avg_grad) return averaged_grads optimizer.get_gradients = _npu_get_gradients if hasattr(optimizer, "_compute_gradients"): org_compute_gradients = optimizer._compute_gradients def _npu_compute_gradients(loss, var_list, grad_loss=None): gradients = org_compute_gradients(loss, var_list, grad_loss) if rank_size is None or int(rank_size) <= 1: return gradients averaged_grads = [] with tf.name_scope( "Npu_Distributed_optimizer_compute_grads_Allreduce"): for grad, var in gradients: avg_grad = allreduce(grad, True) if grad is not None else None averaged_grads.append((avg_grad, var)) return averaged_grads optimizer._compute_gradients = _npu_compute_gradients return optimizer
def __init__(self, optimizer, loss=None, metrics=None, loss_weights=None, sample_weight_mode=None, weighted_metrics=None, target_tensors=None, **kwargs): """Configures the model for training. Arguments: optimizer: String (name of optimizer) or optimizer instance. See `tf.keras.optimizers`. loss: String (name of objective function), objective function or `tf.losses.Loss` instance. See `tf.losses`. If the model has multiple outputs, you can use a different loss on each output by passing a dictionary or a list of losses. The loss value that will be minimized by the model will then be the sum of all individual losses. metrics: List of metrics to be evaluated by the model during training and testing. Typically you will use `metrics=['accuracy']`. To specify different metrics for different outputs of a multi-output model, you could also pass a dictionary, such as `metrics={'output_a': 'accuracy', 'output_b': ['accuracy', 'mse']}`. You can also pass a list (len = len(outputs)) of lists of metrics such as `metrics=[['accuracy'], ['accuracy', 'mse']]` or `metrics=['accuracy', ['accuracy', 'mse']]`. loss_weights: Optional list or dictionary specifying scalar coefficients (Python floats) to weight the loss contributions of different model outputs. The loss value that will be minimized by the model will then be the *weighted sum* of all individual losses, weighted by the `loss_weights` coefficients. If a list, it is expected to have a 1:1 mapping to the model's outputs. If a tensor, it is expected to map output names (strings) to scalar coefficients. sample_weight_mode: If you need to do timestep-wise sample weighting (2D weights), set this to `"temporal"`. `None` defaults to sample-wise weights (1D). If the model has multiple outputs, you can use a different `sample_weight_mode` on each output by passing a dictionary or a list of modes. weighted_metrics: List of metrics to be evaluated and weighted by sample_weight or class_weight during training and testing. target_tensors: By default, Keras will create placeholders for the model's target, which will be fed with the target data during training. If instead you would like to use your own target tensors (in turn, Keras will not expect external Numpy data for these targets at training time), you can specify them via the `target_tensors` argument. It can be a single tensor (for a single-output model), a list of tensors, or a dict mapping output names to target tensors. **kwargs: Any additional arguments. Raises: ValueError: In case of invalid arguments for `optimizer`, `loss`, or `metrics`. """ self._validate_kwargs( kwargs, {'trainable, last_dim'}, 'Functional models may only specify `name` and `trainable` keyword ' 'arguments during initialization. Got an unexpected argument:') self._inputs = None self._outputs = None self._model = None self._optimizer = optimizers.get(optimizer) self._loss = loss or {} self._metrics = metrics or {} self._loss_weights = loss_weights self._sample_weight_mode = sample_weight_mode self._metrics = metrics or [] self._weighted_metrics = weighted_metrics self._target_tensors = target_tensors self._kwargs = kwargs self.built = False self.compiled = False