Пример #1
0
 def clipped_masked_error(args):
     y_pred, y_true, mask = args
     loss = huber_loss(y_true=y_true,
                       y_pred=y_pred,
                       max_grad=self.max_grad)
     loss *= mask
     return K.sum(loss, axis=-1)
Пример #2
0
#output_Q is the sum of 2 Q and created for action selection:
output_Q = tf.add(output_Q1, output_Q2)
#The followings are just for the convience of calculating the loss
#Since I have 2 losses in this case, I need to make 2 pairs of the target and
#Prediction:
index1argmax = tf.argmax(tf.matmul(next_x,weight1) + bias1, axis = 1)
y_true1 = tf.gather_nd(tf.matmul(next_x, weight2) + bias2, index1 ) * gamma * (1-terminal) + r
y_pred1 = tf.gather_nd(output_Q1, action)
index2argmax = tf.argmax(tf.matmul(next_x,weight2) + bias2, axis = 1)
y_true2 = tf.gather_nd(tf.matmul(next_x, weight1) + bias1, index2 ) * gamma * (1-terminal) + r
y_pred2 = tf.gather_nd(output_Q2, action)

#create the loss function and set up the training step:
#samples and form a batch to calculate the 2 kinds of loss
#along with the 2 trainning step
loss1 = tf.reduce_mean(objectives.huber_loss(y_true1, y_pred1))
loss2 = tf.reduce_mean(objectives.huber_loss(y_true2, y_pred2))

train_step1 = tf.train.AdamOptimizer(alpha).minimize(loss1)
train_step2 = tf.train.AdamOptimizer(alpha).minimize(loss2)
sess.run(tf.global_variables_initializer())
#==================DOING THE TRAINING LOOP=========================
#NOTE: if current number of samples is less than the batch size I will create t
#the batch first by performing random action to get enough samples.
update_counter = 0
rewardOneEpisode = 0
Q4Saver(weight1,bias1,weight2,bias2, 'P0', sess)
while update_counter < num_iteration:
    env.reset()
    for j in range(batch_size, Max_TimeStep):
        stateBatch, actionBatch, rewardBatch, nextStateBatch, terminalBatch = \
Пример #3
0
x = tf.placeholder(tf.float32, shape = [None, num_frame_skip*84*84], name = 'x')
next_x = tf.placeholder(tf.float32, shape = [None, num_frame_skip*84*84], name = 'next_x')
online_weight = tf.Variable(tf.truncated_normal([num_frame_skip*84*84, output_num],stddev = 0.1), name = 'online_weight')
target_weight = tf.placeholder(dtype = tf.float32, shape = [num_frame_skip*84*84, output_num], name = 'target_weight')
online_bias = tf.Variable(tf.zeros([output_num]),dtype = tf.float32, name='online_bias')
target_bias = tf.placeholder(dtype = tf.float32, shape = [output_num], name='target_bias')

output_Q = tf.matmul(x, online_weight) + online_bias
target_Q = tf.matmul(next_x, target_weight) + target_bias

y_true = r + gamma * tf.reduce_max(target_Q) * (1-terminal)
y_pred = tf.gather_nd(output_Q, action)

#create the loss function and set up the training step:
#samples and form a batch to calculate the loss
loss = tf.reduce_mean(objectives.huber_loss(y_true, y_pred))

train_step = tf.train.AdamOptimizer(alpha).minimize(loss)
sess.run(tf.global_variables_initializer())
#doing the trainning loop:
#I used the batch update version here
update_counter = 0
Q2Saver(online_weight, online_bias, 'P0', sess)
while update_counter < num_iteration:
    env.reset()
    rewardOneEpisode = 0 #collect reward obtained in one episode.
    for j in range(batch_size, Max_TimeStep):
        target_w = online_weight.eval(session = sess)
        target_b = online_bias.eval(session = sess)
        stateBatch, actionBatch, rewardBatch, nextStateBatch, terminalBatch, recentReward, break_flag = getTrainingBatch(env, batch_size, output_Q, LinearPolicy, sample_queue,
        sess)
Пример #4
0
import numpy as np
import objectives
import operator
import scipy.special
import tensorflow as tf

y_true_ph = tf.placeholder(tf.float32, shape=(4))
y_pred_ph = tf.placeholder(tf.float32, shape=(4))
huber_loss_tensor = objectives.huber_loss(y_true_ph, y_pred_ph)
mean_huber_loss_tensor = objectives.mean_huber_loss(y_true_ph, y_pred_ph)

sess = tf.Session()
with sess.as_default():
    y_true = [1, 2, 3, 4]
    y_pred = [1, 2.5, 4, 33]
    expected = scipy.special.huber(1, list(map(operator.sub, y_true, y_pred)))
    expected_mean = np.mean(expected)

    sess.run(tf.global_variables_initializer())
    feed_dict = {y_true_ph: y_true, y_pred_ph: y_pred}
    output = sess.run(huber_loss_tensor, feed_dict=feed_dict)
    #print(output)
    #print(expected)
    print(output == expected)

    output = sess.run(mean_huber_loss_tensor, feed_dict=feed_dict)
    #print(output)
    #print(expected_mean)
    print(output == expected_mean)