BATCH_SIZE = 32 ALPHA = 1e-4 GAMMA = .99 EPSILON = .97 rewards_array = list() MODEL_L_STR = input("enter the name of a model (left side player)") #MODEL_R_STR = input ("enter the name of a model (right side player)") LOAD_MODEL = False session = tf.Session() State_InL = tf.placeholder(tf.float32, shape=[None, 1, 64, 64]) with tf.variable_scope("paddleL"): Q_L = MyFunctions.DQN(State_InL, reuse=False) #saver1 = tf.train.Saver(var_list=tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='paddleL')) #saver1.restore(session, MODEL_L_STR) session.run(tf.global_variables_initializer()) Game = PONG.PongGame(320, "pixels") AVL = AVR = [1, 0, 0] L, R, STATE = Game.Run4Frames(AVL, AVR) AVL = AVR = 5 NUM_ROUNDS_PLAYED = 0 time_step = 0 LRewSUM = 0 RRewSUM = 0 training_data = list()
clock = pygame.time.Clock() fstring = "DQN_RIGHT_REWARDS" results_file = open("./"+fstring,'w') BATCH_SIZE = 32 ALPHA = 1e-4 GAMMA = .99 EPSILON = .97 rewards_array = list() LOAD_MODEL = False #make 2 graphs State_InR = tf.placeholder(tf.float32, shape = [None, 1,64,64]) with tf.variable_scope("paddleR"): Q_R = MyFunctions.DQN(State_InR, reuse = False) #define loss function for left side player GT_R = tf.placeholder(tf.float32, shape = [BATCH_SIZE]) Action_Placeholder_R = tf.placeholder(tf.float32,shape = [BATCH_SIZE,3]) approximation_R = tf.reduce_sum(tf.multiply(Action_Placeholder_R,Q_R),1) Loss_R = tf.reduce_mean(tf.square(GT_R-approximation_R)) train_step_R = tf.train.AdamOptimizer(ALPHA).minimize(Loss_R) Game = PONG.PongGame(320, "pixels") Game.PADDLE_SPEED_L = 4 saver = tf.train.Saver() session = tf.Session() session.run(tf.global_variables_initializer()) model_string = "DQN_RIGHT_MODEL"