Пример #1
0
    var for var in tf.global_variables() if var.name[:9] != "embedding"
])
restore_path = tf.train.latest_checkpoint(logdir)
with tf.Session() as sess:
    if restore_path is not None:
        logger.info(
            "Restoring variables from checkpoint: {}".format(restore_path))
        restoring_saver.restore(sess, restore_path)
    else:
        logger.info("Initializing brand new network parameters.")
        sess.run(tf.global_variables_initializer())
    global_step = sess.run(ml.global_step)
    logger.info("Gathering initial gameplay data!")
    if config['training_agent'] == "random_rollout":
        from agents import RandomRolloutAgent
        agent = RandomRolloutAgent(ml.envmodel)
        policy = agent.policy
    else:
        policy = None
    ml.gather_gameplay_data(config['n_initial_games'], policy=None)
    logger.info("Beginning training.")
    logger.info("To visualize, call:\ntensorboard --logdir={}".format(logdir))
    from utils import dataset
    while (not config['maxsteps']) or global_step < config['maxsteps']:
        transition_data = ml.create_transition_sequence_dataset(n=10000)
        if config['use_goal_boosting']:
            gb_data = ml.create_goals_dataset(n=1000)
            gb_data_batches = dataset.iterbatches(
                gb_data,
                batch_size=config['batchsize'],
                shuffle=True,
from agents import RandomRolloutAgent
from environments.connect4 import Connect4GameState
from runners import run_to_the_end, run_for_n_games_and_print_stats

if __name__ == "__main__":
    gs = Connect4GameState()
    agent0 = RandomRolloutAgent(10, False)
    agent1 = RandomRolloutAgent(10, False)

    run_for_n_games_and_print_stats([agent0, agent1], gs, 10)
Пример #3
0
from agents import CommandLineAgent, DeepQLearningAgent, RandomRolloutAgent
from environments.connect4 import Connect4GameState
from runners import run_to_the_end, run_for_n_games_and_print_stats, run_step

if __name__ == "__main__":
    gs = Connect4GameState()
    agent0 = DeepQLearningAgent(action_space_size=gs.get_action_space_size(),
                                neurons_per_hidden_layer=128,
                                hidden_layers=5)
    agent1 = RandomRolloutAgent(100, False)
    agent0.alpha = 0.1
    agent0.epsilon = 0.3

    for i in range(100):
        run_for_n_games_and_print_stats([agent0, agent1], gs, 100)

    agent0.epsilon = -1.0
    run_for_n_games_and_print_stats([agent0, agent1], gs, 100)

    gs_clone = gs.clone()
    while not gs_clone.is_game_over():
        run_step([agent0, CommandLineAgent()], gs_clone)
        print(gs_clone)
from agents import RandomRolloutAgent
from environments import GridWorldGameState
from runners import run_to_the_end

if __name__ == "__main__":
    gs = GridWorldGameState()
    agent = RandomRolloutAgent(100000, True)

    print(gs)
    run_to_the_end([agent], gs)
    print(gs)