tf.reset_default_graph() # 输入 X = tf.placeholder(tf.float32, shape=input_shape) in_training_mode = tf.placeholder(tf.bool) # Q网络 mainQ_weights, mainQ_outputs = QNetwork(X, nActions, nameScope='mainQ') targetQ_weights, targetQ_outputs = QNetwork(X, nActions, nameScope='targetQ') # 行为Q X_action = tf.placeholder(tf.int32, shape=(None, )) Q_action = tf.reduce_sum(targetQ_outputs*tf.one_hot(X_action, nActions), axis=-1, keep_dims=True) # 权重复制 copy_op = [tf.assign(main_name, targetQ_weights[var_name]) for var_name, main_name in mainQ_weights.items()] copy_target_to_main = tf.group(*copy_op) # 输出 y = tf.placeholder(tf.float32, shape=(None, 1)) # 损失 loss = tf.reduce_mean(tf.square(y - Q_action)) # 预处理器 state_preprocessor = StatePreprocessor(nFrames=nFrames, shape=frame_shape) # tensorflow会话 saver = tf.train.Saver() with tf.Session() as sess: saver.restore(sess, weights)
in_training_mode = tf.placeholder(tf.bool) # Q网络 mainQ_weights, mainQ_outputs = QNetwork(X, nActions, nameScope='mainQ') targetQ_weights, targetQ_outputs = QNetwork(X, nActions, nameScope='targetQ') # 行为Q X_action = tf.placeholder(tf.int32, shape=(None, )) Q_action = tf.reduce_sum(targetQ_outputs * tf.one_hot(X_action, nActions), axis=-1, keep_dims=True) # 权重复制 copy_op = [ tf.assign(main_name, targetQ_weights[var_name]) for var_name, main_name in mainQ_weights.items() ] copy_target_to_main = tf.group(*copy_op) # 输出 y = tf.placeholder(tf.float32, shape=(None, 1)) # 损失 loss = tf.reduce_mean(tf.square(y - Q_action)) # 优化器 optimizer = tf.train.AdamOptimizer(learning_rate) training_op = optimizer.minimize(loss) # 日志 loss_summary = tf.summary.scalar('loss', loss)