def main(**kargs): initial_weights_file, initial_i_frame = latest(kargs['weights_dir']) print("Continuing using weights from file: ", initial_weights_file, "from", initial_i_frame) if kargs['theano_verbose']: theano.config.compute_test_value = 'warn' theano.config.exception_verbosity = 'high' theano.config.optimizer = 'fast_compile' ale = ag.init(display_screen=(kargs['visualize'] == 'ale'), record_dir=kargs['record_dir']) game = ag.SpaceInvadersGame(ale) def new_game(): game.ale.reset_game() game.finished = False game.cum_reward = 0 game.lives = 4 return game replay_memory = dqn.ReplayMemory(size=kargs['dqn.replay_memory_size']) if not kargs['dqn.no_replay'] else None # dqn_algo = q.ConstAlgo([3]) dqn_algo = dqn.DQNAlgo(game.n_actions(), replay_memory=replay_memory, initial_weights_file=initial_weights_file, build_network=kargs['dqn.network'], updates=kargs['dqn.updates']) dqn_algo.replay_start_size = kargs['dqn.replay_start_size'] dqn_algo.final_epsilon = kargs['dqn.final_epsilon'] dqn_algo.initial_epsilon = kargs['dqn.initial_epsilon'] dqn_algo.i_frames = initial_i_frame dqn_algo.log_frequency=kargs['dqn.log_frequency'] import Queue dqn_algo.mood_q = Queue.Queue() if kargs['show_mood'] else None if kargs['show_mood'] is not None: plot = kargs['show_mood']() def worker(): while True: item = dqn_algo.mood_q.get() plot.show(item) dqn_algo.mood_q.task_done() import threading t = threading.Thread(target=worker) t.daemon = True t.start() print(str(dqn_algo)) visualizer = ag.SpaceInvadersGameCombined2Visualizer() if kargs['visualize'] == 'q' else q.GameNoVisualizer() teacher = q.Teacher(new_game, dqn_algo, visualizer, ag.Phi(skip_every=4), repeat_action=4, sleep_seconds=0) teacher.teach(500000)
def const_on_space_invaders(): import teacher as q import ale_game as ag import dqn reload(q) reload(ag) reload(dqn) ale = ag.init() game = ag.SpaceInvadersGame(ale) def new_game(): game.ale.reset_game() game.finished = False game.cum_reward = 0 return game const_algo = q.ConstAlgo([2, 2, 2, 2, 2, 0, 0, 0, 0]) teacher = q.Teacher(new_game, const_algo, ag.SpaceInvadersGameCombined2Visualizer(), ag.Phi(skip_every=6), repeat_action=6) teacher.teach(1)