def compile(self, memory, optimizer, policy, preprocessor): # override to make 4 optimizers self.optimizer = optimizer # clone for actor, critic networks self.optimizer.actor_keras_optimizer = clone_optimizer( self.optimizer.keras_optimizer) self.optimizer.target_actor_keras_optimizer = clone_optimizer( self.optimizer.keras_optimizer) self.optimizer.critic_keras_optimizer = clone_optimizer( self.optimizer.keras_optimizer) self.optimizer.target_critic_keras_optimizer = clone_optimizer( self.optimizer.keras_optimizer) del self.optimizer.keras_optimizer super(DDPG, self).compile(memory, self.optimizer, policy, preprocessor)
def compile_model(self): self.optimizer.keras_optimizer_2 = clone_optimizer( self.optimizer.keras_optimizer) self.model.compile( loss='mse', optimizer=self.optimizer.keras_optimizer) self.model_2.compile( loss='mse', optimizer=self.optimizer.keras_optimizer_2) logger.info("Models 1 and 2 compiled")
def test_clone_optimizer(): lr, momentum, clipnorm, clipvalue = np.random.random(size=4) optimizer = SGD(lr=lr, momentum=momentum, clipnorm=clipnorm, clipvalue=clipvalue) clone = clone_optimizer(optimizer) assert isinstance(clone, SGD) assert K.get_value(optimizer.lr) == K.get_value(clone.lr) assert K.get_value(optimizer.momentum) == K.get_value(clone.momentum) assert optimizer.clipnorm == clone.clipnorm assert optimizer.clipvalue == clone.clipvalue
def test_clone_optimizer_from_string(): clone = clone_optimizer('sgd') assert isinstance(clone, SGD)
def compile(self, optimizer, metrics=[]): metrics += [mean_q] if type(optimizer) in (list, tuple): if len(optimizer) != 2: raise ValueError( 'More than two optimizers provided. Please only provide a maximum of two optimizers, the first one for the actor and the second one for the critic.' ) actor_optimizer, critic_optimizer = optimizer else: actor_optimizer = optimizer critic_optimizer = clone_optimizer(optimizer) if type(actor_optimizer) is str: actor_optimizer = optimizers.get(actor_optimizer) if type(critic_optimizer) is str: critic_optimizer = optimizers.get(critic_optimizer) assert actor_optimizer != critic_optimizer if len(metrics) == 2 and hasattr(metrics[0], '__len__') and hasattr( metrics[1], '__len__'): actor_metrics, critic_metrics = metrics else: actor_metrics = critic_metrics = metrics def clipped_error(y_true, y_pred): return K.mean(huber_loss(y_true, y_pred, self.delta_clip), axis=-1) # Compile target networks. We only use them in feed-forward mode, hence we can pass any # optimizer and loss since we never use it anyway. self.target_actor = clone_model(self.actor, self.custom_model_objects) self.target_actor.compile(optimizer='sgd', loss='mse') self.target_critic = clone_model(self.critic, self.custom_model_objects) self.target_critic.compile(optimizer='sgd', loss='mse') # We also compile the actor. We never optimize the actor using Keras but instead compute # the policy gradient ourselves. However, we need the actor in feed-forward mode, hence # we also compile it with any optimzer and self.actor.compile(optimizer='sgd', loss='mse') # Compile the critic. if self.target_model_update < 1.: # We use the `AdditionalUpdatesOptimizer` to efficiently soft-update the target model. critic_updates = get_soft_target_model_updates( self.target_critic, self.critic, self.target_model_update) critic_optimizer = AdditionalUpdatesOptimizer( critic_optimizer, critic_updates) self.critic.compile(optimizer=critic_optimizer, loss=clipped_error, metrics=critic_metrics) # Combine actor and critic so that we can get the policy gradient. # Assuming critic's state inputs are the same as actor's. combined_inputs = [] state_inputs = [] for i in self.critic.input: if i == self.critic_action_input: combined_inputs.append([]) else: combined_inputs.append(i) state_inputs.append(i) combined_inputs[self.critic_action_input_idx] = self.actor( state_inputs) combined_output = self.critic(combined_inputs) updates = actor_optimizer.get_updates( params=self.actor.trainable_weights, loss=-K.mean(combined_output)) if self.target_model_update < 1.: # Include soft target model updates. updates += get_soft_target_model_updates(self.target_actor, self.actor, self.target_model_update) updates += self.actor.updates # include other updates of the actor, e.g. for BN # Finally, combine it all into a callable function. if K.backend() == 'tensorflow': self.actor_train_fn = K.function(state_inputs + [K.learning_phase()], [self.actor(state_inputs)], updates=updates) else: if self.uses_learning_phase: state_inputs += [K.learning_phase()] self.actor_train_fn = K.function(state_inputs, [self.actor(state_inputs)], updates=updates) self.actor_optimizer = actor_optimizer self.compiled = True
def compile(self, optimizer, metrics=[]): self.agent1.compile(clone_optimizer(optimizer), deepcopy(metrics)) self.agent2.compile(clone_optimizer(optimizer), deepcopy(metrics))