tf.reset_default_graph()

# 输入
X = tf.placeholder(tf.float32, shape=input_shape)
in_training_mode = tf.placeholder(tf.bool)

# Q网络
mainQ_weights, mainQ_outputs = QNetwork(X, nActions, nameScope='mainQ')
targetQ_weights, targetQ_outputs = QNetwork(X, nActions, nameScope='targetQ')

# 行为Q
X_action = tf.placeholder(tf.int32, shape=(None, ))
Q_action = tf.reduce_sum(targetQ_outputs*tf.one_hot(X_action, nActions), axis=-1, keep_dims=True)

# 权重复制
copy_op = [tf.assign(main_name, targetQ_weights[var_name]) for var_name, main_name in mainQ_weights.items()]
copy_target_to_main = tf.group(*copy_op)

# 输出
y = tf.placeholder(tf.float32, shape=(None, 1))

# 损失
loss = tf.reduce_mean(tf.square(y - Q_action))

# 预处理器
state_preprocessor = StatePreprocessor(nFrames=nFrames, shape=frame_shape)

# tensorflow会话
saver = tf.train.Saver()
with tf.Session() as sess:
    saver.restore(sess, weights)
in_training_mode = tf.placeholder(tf.bool)

# Q网络
mainQ_weights, mainQ_outputs = QNetwork(X, nActions, nameScope='mainQ')
targetQ_weights, targetQ_outputs = QNetwork(X, nActions, nameScope='targetQ')

# 行为Q
X_action = tf.placeholder(tf.int32, shape=(None, ))
Q_action = tf.reduce_sum(targetQ_outputs * tf.one_hot(X_action, nActions),
                         axis=-1,
                         keep_dims=True)

# 权重复制
copy_op = [
    tf.assign(main_name, targetQ_weights[var_name])
    for var_name, main_name in mainQ_weights.items()
]
copy_target_to_main = tf.group(*copy_op)

# 输出
y = tf.placeholder(tf.float32, shape=(None, 1))

# 损失
loss = tf.reduce_mean(tf.square(y - Q_action))

# 优化器
optimizer = tf.train.AdamOptimizer(learning_rate)
training_op = optimizer.minimize(loss)

# 日志
loss_summary = tf.summary.scalar('loss', loss)