args = parser.parse_args() image_size = args.image_size number_of_games = args.number_of_games batch_size = args.batch_size gpu_flag = args.gpu learning_rate = args.learning_rate #epsilon is the decision parameter - do you use the actor's actions or do them randomly? #initially, you want to use random actions - but over time as the actor learns, #the actor's actions will be better epsilon = 1 epsilon_decay = args.epsilon_decay display_steps = args.display_iterations sim = Simulator(1) if gpu_flag > -1: device_string = '/gpu:{}'.format(gpu_flag) else: device_string = "/cpu:0" with tf.Graph().as_default(), tf.device(device_string): sess = tf.Session(config=tf.ConfigProto( allow_soft_placement=True, log_device_placement=False)) with sess.as_default(): learner = ActionLearner( image_size=sim.image_size, n_filters=args.number_of_filters, n_hidden=args.number_of_hidden, n_out=sim.number_of_actions )
# # #this will do it linearly # # previous_state[0][1] += -1*reward*np.power(discount_factor,discount_iterator) # # discount_iterator += 1 # # break return score_list #+ left_state_list class FakeActor: def __init__(self,num_actions): self.number_of_actions = num_actions self.display_output = [0,0] def return_action(self,simulator_screen): return np.random.randint(self.number_of_actions) if __name__ == "__main__": sim = Simulator(10) actor = FakeActor(3) start_time = time.time() game_state_list = make_states(sim,actor,1,200,10,winners_only=False) print(time.time() - start_time) for state_list in game_state_list: for state in state_list: print(state[0][1:4],np.mean(state[0][4])) cv2.imshow('Phong!',cv2.resize(state[0][4],(0,0),fx=2,fy=2)) np.save('phong_screen',state[0][4]) cv2.waitKey(400)
args = parser.parse_args() image_size = args.image_size number_of_games = args.number_of_games batch_size = args.batch_size gpu_flag = args.gpu learning_rate = args.learning_rate #epsilon is the decision parameter - do you use the actor's actions or do them randomly? #initially, you want to use random actions - but over time as the actor learns, #the actor's actions will be better epsilon = 1 epsilon_decay = args.epsilon_decay display_steps = args.display_iterations sim = Simulator(1,screen_size=args.image_size,state_space=args.state_space) if gpu_flag > -1: device_string = '/gpu:{}'.format(gpu_flag) else: device_string = "/cpu:0" with tf.Graph().as_default(), tf.device(device_string): sess = tf.Session(config=tf.ConfigProto( allow_soft_placement=True, log_device_placement=False)) with sess.as_default(): learner = ActionLearner( image_size=sim.screen_size, n_filters=args.number_of_filters, n_hidden=args.number_of_hidden, n_out=sim.number_of_actions )
args = parser.parse_args() image_size = args.image_size number_of_games = args.number_of_games batch_size = args.batch_size gpu_flag = args.gpu learning_rate = args.learning_rate #epsilon is the decision parameter - do you use the actor's actions or do them randomly? #initially, you want to use random actions - but over time as the actor learns, #the actor's actions will be better epsilon = 1 epsilon_decay = args.epsilon_decay display_steps = args.display_iterations sim = Simulator(1, screen_size=args.image_size, state_space=args.state_space) if gpu_flag > -1: device_string = '/gpu:{}'.format(gpu_flag) else: device_string = "/cpu:0" with tf.Graph().as_default(), tf.device(device_string): sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)) with sess.as_default(): learner = ActionLearner(image_size=sim.screen_size, n_filters=args.number_of_filters, n_hidden=args.number_of_hidden, n_out=sim.number_of_actions) learner.set_sess(sess)
args = parser.parse_args() image_size = args.image_size number_of_games = args.number_of_games batch_size = args.batch_size gpu_flag = args.gpu learning_rate = args.learning_rate #epsilon is the decision parameter - do you use the actor's actions or do them randomly? #initially, you want to use random actions - but over time as the actor learns, #the actor's actions will be better epsilon = 1 epsilon_decay = args.epsilon_decay display_steps = args.display_iterations sim = Simulator(1) if gpu_flag > -1: device_string = '/gpu:{}'.format(gpu_flag) else: device_string = "/cpu:0" with tf.Graph().as_default(), tf.device(device_string): sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)) with sess.as_default(): learner = ActionLearner(image_size=sim.image_size, n_filters=args.number_of_filters, n_hidden=args.number_of_hidden, n_out=sim.number_of_actions) learner.set_sess(sess) global_step = tf.Variable(0, name="global_step", trainable=False)