def compile(self, optimizer, metrics=[]): metrics += [mean_q] # register default metrics def clipped_masked_error(args): y_true, y_pred, mask = args loss = huber_loss(y_true, y_pred, self.delta_clip) loss *= mask # apply element-wise mask return K.sum(loss, axis=-1) # Create trainable model. The problem is that we need to mask the output since we only # ever want to update the Q values for a certain action. The way we achieve this is by # using a custom Lambda layer that computes the loss. This gives us the necessary flexibility # to mask out certain parameters by passing in multiple inputs to the Lambda layer. y_pred = self.model.output y_true = Input(name='y_true', shape=(self.nb_actions,)) mask = Input(name='mask', shape=(self.nb_actions,)) loss_out = Lambda(clipped_masked_error, output_shape=(1,), name='loss')([y_pred, y_true, mask]) ins = [self.model.input] if type(self.model.input) is not list else self.model.input trainable_model = Model(input=ins + [y_true, mask], output=[loss_out, y_pred]) assert len(trainable_model.output_names) == 2 combined_metrics = {trainable_model.output_names[1]: metrics} losses = [ lambda y_true, y_pred: y_pred, # loss is computed in Lambda layer lambda y_true, y_pred: K.zeros_like(y_pred), # we only include this for the metrics ] trainable_model.compile(optimizer=optimizer, loss=losses, metrics=combined_metrics) self.trainable_model = trainable_model self.compiled = True
def test_naf_layer_diag(): batch_size = 2 for nb_actions in (1, 3): # Construct single model with NAF as the only layer, hence it is fully deterministic # since no weights are used, which would be randomly initialized. L_flat_input = Input(shape=(nb_actions, )) mu_input = Input(shape=(nb_actions, )) action_input = Input(shape=(nb_actions, )) x = NAFLayer(nb_actions, mode='diag')([L_flat_input, mu_input, action_input]) model = Model(input=[L_flat_input, mu_input, action_input], output=x) model.compile(loss='mse', optimizer='sgd') # Create random test data. L_flat = np.random.random((batch_size, nb_actions)).astype('float32') mu = np.random.random((batch_size, nb_actions)).astype('float32') action = np.random.random((batch_size, nb_actions)).astype('float32') # Perform reference computations in numpy since these are much easier to verify. P = np.zeros((batch_size, nb_actions, nb_actions)).astype('float32') for p, l_flat in zip(P, L_flat): p[np.diag_indices(nb_actions)] = l_flat print(P, L_flat) A_ref = np.array([ np.dot(np.dot(a - m, p), a - m) for a, m, p in zip(action, mu, P) ]).astype('float32') A_ref *= -.5 # Finally, compute the output of the net, which should be identical to the previously # computed reference. A_net = model.predict([L_flat, mu, action]).flatten() assert_allclose(A_net, A_ref, rtol=1e-5)
def compile(self, optimizer, metrics=[]): metrics += [mean_q] # register default metrics # Create target V model. We don't need targets for mu or L. self.target_V_model = clone_model(self.V_model, self.custom_model_objects) self.target_V_model.compile(optimizer='sgd', loss='mse') # Build combined model. a_in = Input(shape=(self.nb_actions,), name='action_input') if type(self.V_model.input) is list: observation_shapes = [i._keras_shape[1:] for i in self.V_model.input] else: observation_shapes = [self.V_model.input._keras_shape[1:]] os_in = [Input(shape=shape, name='observation_input_{}'.format(idx)) for idx, shape in enumerate(observation_shapes)] L_out = self.L_model([a_in] + os_in) V_out = self.V_model(os_in) mu_out = self.mu_model(os_in) A_out = NAFLayer(self.nb_actions, mode=self.covariance_mode)([L_out, mu_out, a_in]) combined_out = Lambda(lambda x: x[0]+x[1], output_shape=lambda x: x[0])([A_out, V_out]) combined = Model(input=[a_in] + os_in, output=[combined_out]) # Compile combined model. if self.target_model_update < 1.: # We use the `AdditionalUpdatesOptimizer` to efficiently soft-update the target model. updates = get_soft_target_model_updates(self.target_V_model, self.V_model, self.target_model_update) optimizer = AdditionalUpdatesOptimizer(optimizer, updates) def clipped_error(y_true, y_pred): return K.mean(huber_loss(y_true, y_pred, self.delta_clip), axis=-1) combined.compile(loss=clipped_error, optimizer=optimizer, metrics=metrics) self.combined_model = combined self.compiled = True
def compile(self, optimizer, metrics=None): metrics = [] # We never train the target model, hence we can set the optimizer and loss arbitrarily. self.target_model = clone_model(self.model, self.custom_model_objects) self.target_model.compile(optimizer='sgd', loss='mse') self.model.compile(optimizer='sgd', loss='mse') # Compile model. if self.target_model_update < 1.: # We use the `AdditionalUpdatesOptimizer` to efficiently soft-update the target model. updates = get_soft_target_model_updates(self.target_model, self.model, self.target_model_update) optimizer = AdditionalUpdatesOptimizer(optimizer, updates) def clipped_masked_error(args): y_true, y_pred, mask = args loss = huber_loss(y_true, y_pred, self.delta_clip) loss *= mask # apply element-wise mask return K.sum(loss, axis=-1) # Create trainable model. The problem is that we need to mask the output since we only # ever want to update the Q values for a certain action. The way we achieve this is by # using a custom Lambda layer that computes the loss. This gives us the necessary flexibility # to mask out certain parameters by passing in multiple inputs to the Lambda layer. y_pred = self.model.output y_true = Input(name='y_true', shape=(self.nb_actions, )) mask = Input(name='mask', shape=(self.nb_actions, )) loss_out = Lambda(clipped_masked_error, output_shape=(1, ), name='loss')([y_pred, y_true, mask]) ins = [self.model.input] if type( self.model.input) is not list else self.model.input trainable_model = Model(input=ins + [y_true, mask], output=[loss_out, y_pred]) assert len(trainable_model.output_names) == 2 combined_metrics = {trainable_model.output_names[1]: metrics} losses = [ lambda y_true, y_pred: y_pred, # loss is computed in Lambda layer lambda y_true, y_pred: K.zeros_like( y_pred), # we only include this for the metrics ] trainable_model.compile(optimizer=optimizer, loss=losses, metrics=combined_metrics) self.trainable_model = trainable_model self.compiled = True