Python MyFunctions.DQN示例

BATCH_SIZE = 32
ALPHA = 1e-4
GAMMA = .99
EPSILON = .97
rewards_array = list()

MODEL_L_STR = input("enter the name of a model (left side player)")
#MODEL_R_STR = input ("enter the name of a model (right side player)")

LOAD_MODEL = False

session = tf.Session()

State_InL = tf.placeholder(tf.float32, shape=[None, 1, 64, 64])
with tf.variable_scope("paddleL"):
    Q_L = MyFunctions.DQN(State_InL, reuse=False)
#saver1 = tf.train.Saver(var_list=tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='paddleL'))
#saver1.restore(session, MODEL_L_STR)

session.run(tf.global_variables_initializer())

Game = PONG.PongGame(320, "pixels")

AVL = AVR = [1, 0, 0]
L, R, STATE = Game.Run4Frames(AVL, AVR)
AVL = AVR = 5
NUM_ROUNDS_PLAYED = 0
time_step = 0
LRewSUM = 0
RRewSUM = 0
training_data = list()

示例#2

显示文件

文件： DQN_TRAINING_RIGHT_PLAYER.py 项目： I-CANT-CODE/Computer-Vision-Course-2018-Project

clock = pygame.time.Clock()
fstring = "DQN_RIGHT_REWARDS"
results_file = open("./"+fstring,'w')
BATCH_SIZE = 32
ALPHA = 1e-4
GAMMA = .99
EPSILON = .97
rewards_array = list()

LOAD_MODEL = False


#make 2 graphs
State_InR = tf.placeholder(tf.float32, shape = [None, 1,64,64])
with tf.variable_scope("paddleR"):
    Q_R = MyFunctions.DQN(State_InR, reuse = False)

#define loss function for left side player
GT_R = tf.placeholder(tf.float32, shape = [BATCH_SIZE])
Action_Placeholder_R = tf.placeholder(tf.float32,shape = [BATCH_SIZE,3])
approximation_R = tf.reduce_sum(tf.multiply(Action_Placeholder_R,Q_R),1)
Loss_R = tf.reduce_mean(tf.square(GT_R-approximation_R))
train_step_R = tf.train.AdamOptimizer(ALPHA).minimize(Loss_R)

Game = PONG.PongGame(320, "pixels")
Game.PADDLE_SPEED_L = 4
saver = tf.train.Saver()
session = tf.Session()
session.run(tf.global_variables_initializer())

model_string = "DQN_RIGHT_MODEL"