# Create environment game_wrapper = GameWrapper(MAX_NOOP_STEPS) print("The environment has the following {} actions: {}".format( game_wrapper.env.action_space.n, game_wrapper.env.unwrapped.get_action_meanings())) # Create agent MAIN_DQN = build_q_network(LEARNING_RATE, input_shape=INPUT_SHAPE) TARGET_DQN = build_q_network(input_shape=INPUT_SHAPE) replay_buffer = ReplayBuffer(size=MEM_SIZE, input_shape=INPUT_SHAPE) agent = Agent(MAIN_DQN, TARGET_DQN, replay_buffer, input_shape=INPUT_SHAPE) print('Loading model...') # We only want to load the replay buffer when resuming training agent.load('./saved_models/save-02502048/', load_replay_buffer=False) print('Loaded.') terminal = True eval_rewards = [] evaluate_frame_number = 0 for frame in range(EVAL_LENGTH): if terminal: game_wrapper.reset(evaluation=True) life_lost = True episode_reward_sum = 0 terminal = False # Breakout require a "fire" action (action #1) to start the # game each time a life is lost.
# Build main and target networks MAIN_DQN = build_q_network(LEARNING_RATE, input_shape=INPUT_SHAPE) TARGET_DQN = build_q_network(input_shape=INPUT_SHAPE) replay_buffer = ReplayBuffer(size=MEM_SIZE, input_shape=INPUT_SHAPE) agent = Agent(MAIN_DQN, TARGET_DQN, replay_buffer, input_shape=INPUT_SHAPE, batch_size=BATCH_SIZE) else: # TODO: LOADING IS A LITTLE BROKEN AT THE MOMENTS! # Load the agent instead print('Loading from', LOAD_FROM) meta = agent.load(LOAD_FROM, LOAD_REPLAY_BUFFER) # Apply information loaded from meta frame_number = meta['frame_number'] rewards = meta['rewards'] loss_list = meta['loss_list'] print('Loaded') # FULL TRAINING LOOP try: # Allows us to write to Tensorboard with writer.as_default(): while frame_number < TOTAL_FRAMES: epoch_frame = 0