示例#1
0
def main(**kargs):
    initial_weights_file, initial_i_frame = latest(kargs['weights_dir'])

    print("Continuing using weights from file: ", initial_weights_file, "from", initial_i_frame)

    if kargs['theano_verbose']:
        theano.config.compute_test_value = 'warn'
        theano.config.exception_verbosity = 'high'
        theano.config.optimizer = 'fast_compile'

    ale = ag.init(display_screen=(kargs['visualize'] == 'ale'), record_dir=kargs['record_dir'])
    game = ag.SpaceInvadersGame(ale)


    def new_game():
        game.ale.reset_game()
        game.finished = False
        game.cum_reward = 0
        game.lives = 4
        return game

    replay_memory = dqn.ReplayMemory(size=kargs['dqn.replay_memory_size']) if not kargs['dqn.no_replay'] else None
    # dqn_algo = q.ConstAlgo([3])
    dqn_algo = dqn.DQNAlgo(game.n_actions(),
                           replay_memory=replay_memory,
                           initial_weights_file=initial_weights_file,
                           build_network=kargs['dqn.network'],
                           updates=kargs['dqn.updates'])

    dqn_algo.replay_start_size = kargs['dqn.replay_start_size']
    dqn_algo.final_epsilon = kargs['dqn.final_epsilon']
    dqn_algo.initial_epsilon = kargs['dqn.initial_epsilon']
    dqn_algo.i_frames = initial_i_frame

    dqn_algo.log_frequency=kargs['dqn.log_frequency']


    import Queue
    dqn_algo.mood_q = Queue.Queue() if kargs['show_mood'] else None

    if kargs['show_mood'] is not None:
        plot = kargs['show_mood']()

        def worker():
            while True:
                item = dqn_algo.mood_q.get()
                plot.show(item)
                dqn_algo.mood_q.task_done()

        import threading
        t = threading.Thread(target=worker)
        t.daemon = True
        t.start()

    print(str(dqn_algo))

    visualizer = ag.SpaceInvadersGameCombined2Visualizer() if kargs['visualize'] == 'q' else q.GameNoVisualizer()
    teacher = q.Teacher(new_game, dqn_algo, visualizer,
                        ag.Phi(skip_every=4), repeat_action=4, sleep_seconds=0)
    teacher.teach(500000)
示例#2
0
def const_on_space_invaders():
    import teacher as q
    import ale_game as ag
    import dqn
    reload(q)
    reload(ag)
    reload(dqn)

    ale = ag.init()
    game = ag.SpaceInvadersGame(ale)

    def new_game():
        game.ale.reset_game()
        game.finished = False
        game.cum_reward = 0
        return game

    const_algo = q.ConstAlgo([2, 2, 2, 2, 2, 0, 0, 0, 0])
    teacher = q.Teacher(new_game, const_algo, ag.SpaceInvadersGameCombined2Visualizer(),
                        ag.Phi(skip_every=6), repeat_action=6)
    teacher.teach(1)