def distance_matrix_logcosh(y_true, y_pred): # distance matrix based on cosine-distance (assuming embeddings are all normalized) # dm_true = K.dot(y_true, K.transpose(y_true)) # dm_pred = K.dot(y_pred, K.transpose(y_pred)) # l2 distance matrix # dm_true = l2DM(y_true) dm_true = y_true dm_pred = l2DM(y_pred) loss = K_losses.logcosh(dm_true, dm_pred) return loss
def reg_loss_func(y_true, y_pred): reg_true = K.slice(y_true, start_reg, size_reg) reg_pred = K.slice(y_pred, start_reg, size_reg) cls_true = K.slice(y_true, start_cls, size_cls) reg_mask = K.sum(cls_true, axis=-1, keepdims=True) reg_loss = logcosh(reg_true, reg_mask * reg_pred) return reg_loss * reg_weight
def __init__(self, n_actions, frame_height=63, frame_width=113, stacked_frames=4, learning_rate=0.00001): self.n_actions = n_actions self.frame_height = frame_height self.frame_width = frame_width self.stacked_frames = stacked_frames self.learning_rate = learning_rate self.input = tf.placeholder(shape=[ None, self.frame_height, self.frame_width, self.stacked_frames ], dtype=tf.float32) self.input = self.input / 255 # Convolutional layers self.conv1 = self.conv_layer(self.input, 32, [8, 8], 4, 'conv1') self.conv2 = self.conv_layer(self.conv1, 64, [4, 4], 2, 'conv2') self.conv3 = self.conv_layer(self.conv2, 64, [3, 3], 1, 'conv3') self.flat = Flatten()(self.conv3) self.dense1 = self.dense_layer(self.flat, 512, 'dense1', relu) # Splitting into value and advantage streams self.v_stream, self.a_stream = tf.split(self.dense1, 2, 1) self.value = self.dense_layer(self.v_stream, 1, 'value') self.advantage = self.dense_layer(self.a_stream, self.n_actions, 'advantage') # Getting Q-values from value and advantage streams self.q_values = self.value + tf.subtract( self.advantage, tf.reduce_mean(self.advantage, axis=1, keepdims=True)) self.prediction = tf.argmax(self.q_values, 1) # targetQ according to Bellman equation self.target_q = tf.placeholder(shape=[None], dtype=tf.float32) self.action = tf.placeholder(shape=[None], dtype=tf.uint8) self.action_one_hot = tf.one_hot(self.action, self.n_actions, dtype=tf.float32) self.Q = tf.reduce_sum(tf.multiply(self.q_values, self.action_one_hot), axis=1) # Parameter updates self.error = logcosh(self.target_q, self.Q) self.loss = tf.reduce_mean(self.error) self.optimizer = tf.train.AdamOptimizer( learning_rate=self.learning_rate) self.update = self.optimizer.minimize(self.loss)
def __build_train_critic(self): """ ~ """ """ Placeholders """ observations_placeholder = K.placeholder(shape=(None, *self.input_dim), name='model_inputs') rewards_placeholder = K.placeholder(shape=(None, ), name="discounted_rewards") """ Internal operations """ Q_value = self.critic_network(observations_placeholder) """ Compute loss """ total_loss = K.mean(logcosh(rewards_placeholder, Q_value)) """ Train function """ return K.function( inputs=[observations_placeholder, rewards_placeholder], outputs=[total_loss], updates=Adam(lr=self.learning_rate).get_updates( total_loss, self.critic_network.trainable_weights))
def loss_func(y_true, y_pred): #y[:,:,0:8] is reg #y[:,:,8:] is classes reg_true = K.slice(y_true, start_reg, size_reg) reg_pred = K.slice(y_pred, start_reg, size_reg) cls_true = K.slice(y_true, start_cls, size_cls) cls_pred = K.slice(y_pred, start_cls, size_cls) cls_loss = crossentropy(cls_true, cls_pred) # reg_mask = obj_true reg_mask = K.sum(cls_true, axis=-1, keepdims=True) reg_loss = logcosh(reg_true, reg_mask * reg_pred) return reg_loss * reg_weight + cls_weight * cls_loss
def LogCosh_loss(y_true, y_pred): return logcosh(y_true, y_pred)
def _subCL(x): q, next_q, kldiv = x #kldiv, pred_q, next_q, p = x softkldiv = kldiv / (1 + K.abs(kldiv)) return K.sqrt(softkldiv * softkldiv) * logcosh( q, next_q) # + K.epsilon()*kld