epsilon = 1 epsilon_decay = args.epsilon_decay display_steps = args.display_iterations sim = Simulator(1) if gpu_flag > -1: device_string = '/gpu:{}'.format(gpu_flag) else: device_string = "/cpu:0" with tf.Graph().as_default(), tf.device(device_string): sess = tf.Session(config=tf.ConfigProto( allow_soft_placement=True, log_device_placement=False)) with sess.as_default(): learner = ActionLearner( image_size=sim.image_size, n_filters=args.number_of_filters, n_hidden=args.number_of_hidden, n_out=sim.number_of_actions ) learner.set_sess(sess) global_step = tf.Variable(0, name="global_step", trainable=False) saver = tf.train.Saver(tf.all_variables()) sess.run(tf.initialize_all_variables()) summary_writer = tf.train.SummaryWriter(args.save_folder, sess.graph_def) if args.restore != "No": saver.restore(sess, args.save_folder+args.restore) #just display games sim.reset()
#initially, you want to use random actions - but over time as the actor learns, #the actor's actions will be better epsilon = 1 epsilon_decay = args.epsilon_decay display_steps = args.display_iterations sim = Simulator(1) device_string = "/cpu:0" with tf.Graph().as_default(), tf.device(device_string): sess = tf.Session(config=tf.ConfigProto( allow_soft_placement=True, log_device_placement=False)) with sess.as_default(): learner = ActionLearner( image_size=sim.screen_size, n_filters=args.number_of_filters, n_hidden=args.number_of_hidden, n_out=sim.number_of_actions ) learner.set_sess(sess) saver = tf.train.Saver(tf.all_variables()) sess.run(tf.initialize_all_variables()) summary_writer = tf.train.SummaryWriter(args.save_folder, sess.graph_def) if args.restore != "No": saver.restore(sess, args.save_folder+args.restore) def redraw_heatmap(x,y,angle): #convert to radians angle = (angle - 90) *numpy.pi / 180.0 #load the screen
epsilon = 1 epsilon_decay = args.epsilon_decay display_steps = args.display_iterations sim = Simulator(1, screen_size=args.image_size, state_space=args.state_space) if gpu_flag > -1: device_string = '/gpu:{}'.format(gpu_flag) else: device_string = "/cpu:0" with tf.Graph().as_default(), tf.device(device_string): sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)) with sess.as_default(): learner = ActionLearner(image_size=sim.screen_size, n_filters=args.number_of_filters, n_hidden=args.number_of_hidden, n_out=sim.number_of_actions) learner.set_sess(sess) global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) grads_and_vars = optimizer.compute_gradients( learner.single_action_cost ) #could also use learner.normal_cost train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) loss_summary = tf.scalar_summary("cost", learner.single_action_cost) #visualize those first level filters
#initially, you want to use random actions - but over time as the actor learns, #the actor's actions will be better epsilon = 1 epsilon_decay = args.epsilon_decay display_steps = args.display_iterations sim = Simulator(1) if gpu_flag > -1: device_string = '/gpu:{}'.format(gpu_flag) else: device_string = "/cpu:0" with tf.Graph().as_default(), tf.device(device_string): sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)) with sess.as_default(): learner = ActionLearner(image_size=sim.image_size, n_filters=args.number_of_filters, n_hidden=args.number_of_hidden, n_out=sim.number_of_actions) learner.set_sess(sess) global_step = tf.Variable(0, name="global_step", trainable=False) saver = tf.train.Saver(tf.all_variables()) sess.run(tf.initialize_all_variables()) summary_writer = tf.train.SummaryWriter(args.save_folder, sess.graph_def) if args.restore != "No": saver.restore(sess, args.save_folder + args.restore) #just display games sim.reset() previous_state = numpy.zeros((sim.image_size, sim.image_size, 3))