var for var in tf.global_variables() if var.name[:9] != "embedding" ]) restore_path = tf.train.latest_checkpoint(logdir) with tf.Session() as sess: if restore_path is not None: logger.info( "Restoring variables from checkpoint: {}".format(restore_path)) restoring_saver.restore(sess, restore_path) else: logger.info("Initializing brand new network parameters.") sess.run(tf.global_variables_initializer()) global_step = sess.run(ml.global_step) logger.info("Gathering initial gameplay data!") if config['training_agent'] == "random_rollout": from agents import RandomRolloutAgent agent = RandomRolloutAgent(ml.envmodel) policy = agent.policy else: policy = None ml.gather_gameplay_data(config['n_initial_games'], policy=None) logger.info("Beginning training.") logger.info("To visualize, call:\ntensorboard --logdir={}".format(logdir)) from utils import dataset while (not config['maxsteps']) or global_step < config['maxsteps']: transition_data = ml.create_transition_sequence_dataset(n=10000) if config['use_goal_boosting']: gb_data = ml.create_goals_dataset(n=1000) gb_data_batches = dataset.iterbatches( gb_data, batch_size=config['batchsize'], shuffle=True,
from agents import RandomRolloutAgent from environments.connect4 import Connect4GameState from runners import run_to_the_end, run_for_n_games_and_print_stats if __name__ == "__main__": gs = Connect4GameState() agent0 = RandomRolloutAgent(10, False) agent1 = RandomRolloutAgent(10, False) run_for_n_games_and_print_stats([agent0, agent1], gs, 10)
from agents import CommandLineAgent, DeepQLearningAgent, RandomRolloutAgent from environments.connect4 import Connect4GameState from runners import run_to_the_end, run_for_n_games_and_print_stats, run_step if __name__ == "__main__": gs = Connect4GameState() agent0 = DeepQLearningAgent(action_space_size=gs.get_action_space_size(), neurons_per_hidden_layer=128, hidden_layers=5) agent1 = RandomRolloutAgent(100, False) agent0.alpha = 0.1 agent0.epsilon = 0.3 for i in range(100): run_for_n_games_and_print_stats([agent0, agent1], gs, 100) agent0.epsilon = -1.0 run_for_n_games_and_print_stats([agent0, agent1], gs, 100) gs_clone = gs.clone() while not gs_clone.is_game_over(): run_step([agent0, CommandLineAgent()], gs_clone) print(gs_clone)
from agents import RandomRolloutAgent from environments import GridWorldGameState from runners import run_to_the_end if __name__ == "__main__": gs = GridWorldGameState() agent = RandomRolloutAgent(100000, True) print(gs) run_to_the_end([agent], gs) print(gs)