class emulator:
    def __init__(self, rom_name, vis, windowname='preview'):
        self.ale = ALEInterface()
        self.max_frames_per_episode = self.ale.getInt(
            "max_num_frames_per_episode")
        self.ale.setInt("random_seed", 123)
        self.ale.setInt("frame_skip", 4)
        self.ale.loadROM('roms/' + rom_name)
        self.legal_actions = self.ale.getMinimalActionSet()
        self.action_map = dict()
        self.windowname = windowname
        for i in range(len(self.legal_actions)):
            self.action_map[self.legal_actions[i]] = i
        self.init_frame_number = 0

        # print(self.legal_actions)
        self.screen_width, self.screen_height = self.ale.getScreenDims()
        print("width/height: " + str(self.screen_width) + "/" +
              str(self.screen_height))
        self.vis = vis
        if vis:
            cv2.startWindowThread()
            cv2.namedWindow(self.windowname)

    def get_image(self):
        numpy_surface = np.zeros(self.screen_height * self.screen_width * 3,
                                 dtype=np.uint8)
        self.ale.getScreenRGB(numpy_surface)
        image = np.reshape(numpy_surface,
                           (self.screen_height, self.screen_width, 3))
        return image

    def newGame(self):
        # Instead of resetting the game, we load a checkpoint and start from there.
        # self.ale.reset_game()
        self.ale.restoreState(
            self.ale.decodeState(checkpoints[random.randint(
                0, 99)].astype('uint8')))
        self.init_frame_number = self.ale.getFrameNumber()
        #self.ale.restoreState(self.ale.decodeState(np.reshape(checkpoint,(1009,1))))
        return self.get_image()

    def next(self, action_indx):
        reward = self.ale.act(action_indx)
        nextstate = self.get_image()
        # scipy.misc.imsave('test.png',nextstate)
        if self.vis:
            cv2.imshow(self.windowname, nextstate)
        return nextstate, reward, self.ale.game_over()

    def get_frame_number(self):
        return self.ale.getFrameNumber() - self.init_frame_number
Пример #2
0
    def __init__(self,
                 game,
                 seed=None,
                 use_sdl=False,
                 n_last_screens=4,
                 frame_skip=4,
                 treat_life_lost_as_terminal=True,
                 crop_or_scale='scale',
                 max_start_nullops=30,
                 record_screen_dir=None):
        self.n_last_screens = n_last_screens
        self.treat_life_lost_as_terminal = treat_life_lost_as_terminal
        self.crop_or_scale = crop_or_scale
        self.max_start_nullops = max_start_nullops

        # atari_py is used only to provide rom files. atari_py has its own
        # ale_python_interface, but it is obsolete.
        game_path = atari_py.get_game_path(game)

        ale = ALEInterface()
        if seed is not None:
            assert seed >= 0 and seed < 2 ** 16, \
                "ALE's random seed must be represented by unsigned int"
        else:
            # Use numpy's random state
            seed = np.random.randint(0, 2**16)
        ale.setInt(b'random_seed', seed)
        ale.setFloat(b'repeat_action_probability', 0.0)
        ale.setBool(b'color_averaging', False)
        if record_screen_dir is not None:
            ale.setString(b'record_screen_dir',
                          str.encode(str(record_screen_dir)))
        self.frame_skip = frame_skip
        if use_sdl:
            if 'DISPLAY' not in os.environ:
                raise RuntimeError(
                    'Please set DISPLAY environment variable for use_sdl=True')
            # SDL settings below are from the ALE python example
            if sys.platform == 'darwin':
                import pygame
                pygame.init()
                ale.setBool(b'sound', False)  # Sound doesn't work on OSX
            elif sys.platform.startswith('linux'):
                ale.setBool(b'sound', True)
            ale.setBool(b'display_screen', True)

        ale.loadROM(str.encode(str(game_path)))

        assert ale.getFrameNumber() == 0

        self.ale = ale
        self.legal_actions = ale.getMinimalActionSet()
        self.initialize()

        self.action_space = spaces.Discrete(len(self.legal_actions))
        one_screen_observation_space = spaces.Box(low=0,
                                                  high=255,
                                                  shape=(84, 84))
        self.observation_space = spaces.Tuple([one_screen_observation_space] *
                                              n_last_screens)
class emulator:
	def __init__(self, rom_name, vis,windowname='preview'):
		self.ale = ALEInterface()
		self.max_frames_per_episode = self.ale.getInt("max_num_frames_per_episode");
		self.ale.setInt("random_seed",123)
		self.ale.setInt("frame_skip",4)
		self.ale.loadROM('roms/' + rom_name )
		self.legal_actions = self.ale.getMinimalActionSet()
		self.action_map = dict()
		self.windowname = windowname
		for i in range(len(self.legal_actions)):
			self.action_map[self.legal_actions[i]] = i
		self.init_frame_number = 0

		# print(self.legal_actions)
		self.screen_width,self.screen_height = self.ale.getScreenDims()
		print("width/height: " +str(self.screen_width) + "/" + str(self.screen_height))
		self.vis = vis
		if vis: 
			cv2.startWindowThread()
			cv2.namedWindow(self.windowname)

	def get_image(self):
		numpy_surface = np.zeros(self.screen_height*self.screen_width*3, dtype=np.uint8)
		self.ale.getScreenRGB(numpy_surface)
		image = np.reshape(numpy_surface, (self.screen_height, self.screen_width, 3))
		return image

	def newGame(self):
		# Instead of resetting the game, we load a checkpoint and start from there.
		# self.ale.reset_game()
		self.ale.restoreState(self.ale.decodeState(checkpoints[random.randint(0,99)].astype('uint8')))
		self.init_frame_number = self.ale.getFrameNumber()
		#self.ale.restoreState(self.ale.decodeState(np.reshape(checkpoint,(1009,1))))
		return self.get_image()

	def next(self, action_indx):
		reward = self.ale.act(action_indx)	
		nextstate = self.get_image()
		# scipy.misc.imsave('test.png',nextstate)
		if self.vis:
			cv2.imshow(self.windowname,nextstate)
		return nextstate, reward, self.ale.game_over()

	def get_frame_number(self):
		return self.ale.getFrameNumber() - self.init_frame_number
Пример #4
0
    def __init__(self, rom_filename, seed=None, use_sdl=False, n_last_screens=4,
                 frame_skip=4, treat_life_lost_as_terminal=True,
                 crop_or_scale='scale', max_start_nullops=30,
                 record_screen_dir=None):
        self.n_last_screens = n_last_screens
        self.treat_life_lost_as_terminal = treat_life_lost_as_terminal
        self.crop_or_scale = crop_or_scale
        self.max_start_nullops = max_start_nullops

        ale = ALEInterface()
        if seed is not None:
            assert seed >= 0 and seed < 2 ** 16, \
                "ALE's random seed must be represented by unsigned int"
        else:
            # Use numpy's random state
            seed = np.random.randint(0, 2 ** 16)
        ale.setInt(b'random_seed', seed)
        ale.setFloat(b'repeat_action_probability', 0.0)
        ale.setBool(b'color_averaging', False)
        if record_screen_dir is not None:
            ale.setString(b'record_screen_dir', str.encode(record_screen_dir))
        self.frame_skip = frame_skip
        if use_sdl:
            if 'DISPLAY' not in os.environ:
                raise RuntimeError(
                    'Please set DISPLAY environment variable for use_sdl=True')
            # SDL settings below are from the ALE python example
            if sys.platform == 'darwin':
                import pygame
                pygame.init()
                ale.setBool(b'sound', False)  # Sound doesn't work on OSX
            elif sys.platform.startswith('linux'):
                ale.setBool(b'sound', True)
            ale.setBool(b'display_screen', True)
        ale.loadROM(str.encode(rom_filename))

        assert ale.getFrameNumber() == 0


        self.ale = ale
        self.legal_actions = ale.getMinimalActionSet()
        self.initialize()
Пример #5
0
screen_data = None

#initialize the state
image = ale.getScreenGrayscale(screen_data)
image = impre(name_of_the_game, image)
state = tc.stack((image, image, image, image),
                 dim=0).unsqueeze(0).type(cpu_dtype)
del image

memory_buffer = []
# zeros = tc.zeros_like(image)
# state_m = tc.zeros(sample_num, 4, 84, 84).type(gpu_dtype)
epi_num = 0
epi_reward = 0

frame_num = ale.getFrameNumber()
action = None
# iteration loop
while frame_num < 1e7:
    # reset_game if the game is over
    if ale.game_over() == True:
        epi_num = epi_num + 1

        f = open('/home/juna/atari_project/plot/list.txt', 'a')
        f.write(str(epi_num) + ',' + str(int(epi_reward)) + '\n')
        f.close()

        print('=====epi_reward : ' + str(epi_reward) + '=====')
        epi_reward = 0
        ale.reset_game()
Пример #6
0
class AtariEnvironment:
    def __init__(self, args, outputDir):

        self.outputDir = outputDir
        self.screenCaptureFrequency = args.screen_capture_freq

        self.ale = ALEInterface()
        self.ale.setInt(b'random_seed', 123456)
        random.seed(123456)
        # Fix https://groups.google.com/forum/#!topic/deep-q-learning/p4FAIaabwlo
        self.ale.setFloat(b'repeat_action_probability', 0.0)

        # Load the ROM file
        self.ale.loadROM(args.rom)

        self.actionSet = self.ale.getMinimalActionSet()
        self.gameNumber = 0
        self.stepNumber = 0
        self.resetGame()

    def getNumActions(self):
        return len(self.actionSet)

    def getState(self):
        return self.state

    def getGameNumber(self):
        return self.gameNumber

    def getFrameNumber(self):
        return self.ale.getFrameNumber()

    def getEpisodeFrameNumber(self):
        return self.ale.getEpisodeFrameNumber()

    def getEpisodeStepNumber(self):
        return self.episodeStepNumber

    def getStepNumber(self):
        return self.stepNumber

    def getGameScore(self):
        return self.gameScore

    def isGameOver(self):
        return self.ale.game_over()

    def step(self, action):
        previousLives = self.ale.lives()
        reward = 0
        isTerminal = 0
        self.stepNumber += 1
        self.episodeStepNumber += 1

        for i in range(4):
            prevScreenRGB = self.ale.getScreenRGB()
            reward += self.ale.act(self.actionSet[action])
            screenRGB = self.ale.getScreenRGB()

            # Detect end of episode, I don't think I'm handling this right in terms
            # of the overall game loop (??)
            if self.ale.lives() < previousLives or self.ale.game_over():
                isTerminal = 1
                break

            if self.gameNumber % self.screenCaptureFrequency == 0:
                dir = self.outputDir + '/screen_cap/game-%06d' % (
                    self.gameNumber)
                if not os.path.isdir(dir):
                    os.makedirs(dir)
                self.ale.saveScreenPNG(dir + '/frame-%06d.png' %
                                       (self.getEpisodeFrameNumber()))

        maxedScreen = np.maximum(screenRGB, prevScreenRGB)
        self.state = self.state.stateByAddingScreen(maxedScreen,
                                                    self.ale.getFrameNumber())
        self.gameScore += reward
        return reward, self.state, isTerminal

    def resetGame(self):
        if self.ale.game_over():
            self.gameNumber += 1
        self.ale.reset_game()
        self.state = State().stateByAddingScreen(self.ale.getScreenRGB(),
                                                 self.ale.getFrameNumber())
        self.gameScore = 0
        self.episodeStepNumber = 0  # environment steps vs ALE frames.  Will probably be 4*frame number
Пример #7
0
class AtariEnvironment:
    
    def __init__(self, args, outputDir):
        
        self.outputDir = outputDir
        self.screenCaptureFrequency = args.screen_capture_freq
        
        self.ale = ALEInterface()
        self.ale.setInt(b'random_seed', 123456)
        random.seed(123456)
        # Fix https://groups.google.com/forum/#!topic/deep-q-learning/p4FAIaabwlo
        self.ale.setFloat(b'repeat_action_probability', 0.0)

        # Load the ROM file
        self.ale.loadROM(args.rom)

        self.actionSet = self.ale.getMinimalActionSet()
        self.gameNumber = 0
        self.stepNumber = 0
        self.resetGame()

    def getNumActions(self):
        return len(self.actionSet)

    def getState(self):
        return self.state
    
    def getGameNumber(self):
        return self.gameNumber
    
    def getFrameNumber(self):
        return self.ale.getFrameNumber()
    
    def getEpisodeFrameNumber(self):
        return self.ale.getEpisodeFrameNumber()
    
    def getEpisodeStepNumber(self):
        return self.episodeStepNumber
    
    def getStepNumber(self):
        return self.stepNumber
    
    def getGameScore(self):
        return self.gameScore

    def isGameOver(self):
        return self.ale.game_over()

    def step(self, action):
        previousLives = self.ale.lives()
        reward = 0
        isTerminal = 0
        self.stepNumber += 1
        self.episodeStepNumber += 1
        
        for i in range(4):
            prevScreenRGB = self.ale.getScreenRGB()
            reward += self.ale.act(self.actionSet[action])
            screenRGB = self.ale.getScreenRGB()
    
            # Detect end of episode, I don't think I'm handling this right in terms
            # of the overall game loop (??)
            if self.ale.lives() < previousLives or self.ale.game_over():
                isTerminal = 1
                break

            if self.gameNumber % self.screenCaptureFrequency == 0:
                dir = self.outputDir + '/screen_cap/game-%06d' % (self.gameNumber)
                if not os.path.isdir(dir):
                    os.makedirs(dir)
                self.ale.saveScreenPNG(dir + '/frame-%06d.png' % (self.getEpisodeFrameNumber()))


        maxedScreen = np.maximum(screenRGB, prevScreenRGB)
        self.state = self.state.stateByAddingScreen(maxedScreen, self.ale.getFrameNumber())
        self.gameScore += reward
        return reward, self.state, isTerminal

    def resetGame(self):
        if self.ale.game_over():
            self.gameNumber += 1
        self.ale.reset_game()
        self.state = State().stateByAddingScreen(self.ale.getScreenRGB(), self.ale.getFrameNumber())
        self.gameScore = 0
        self.episodeStepNumber = 0 # environment steps vs ALE frames.  Will probably be 4*frame number
Пример #8
0
class Agent(object):
    def __init__(self):
        self._ale = ALEInterface()
        self._ale.setInt('random_seed', 123)
        self._ale.setFloat('repeat_action_probability', 0.0)
        self._ale.setBool('color_averaging', False)
        self._ale.loadROM('roms/enduro.bin')
        self._controller = Controller(self._ale)
        self._extractor = StateExtractor(self._ale)
        self._image = None

    def run(self, learn, episodes=1, draw=False):
        """ Implements the playing/learning loop.

        Args:
            learn(bool): Whether the self.learn() function should be called.
            episodes (int): The number of episodes to run the agent for.
            draw (bool): Whether to overlay the environment state on the frame.

        Returns:
            None
        """
        for e in range(episodes):
            # Observe the environment to set the initial state
            (grid, self._image) = self._extractor.run(draw=draw, scale=4.0)
            self.initialise(grid)

            num_frames = self._ale.getFrameNumber()

            # Each episode lasts 6500 frames
            while self._ale.getFrameNumber() - num_frames < 6500:
                # Take an action
                self.act()

                # Update the environment grid
                (grid, self._image) = self._extractor.run(draw=draw, scale=4.0)
                self.sense(grid)

                # Perform learning if required
                if learn:
                    self.learn()

                self.callback(learn, e + 1,
                              self._ale.getFrameNumber() - num_frames)
            self._ale.reset_game()

    def getActionsSet(self):
        """ Returns the set of all possible actions
        """
        return [Action.ACCELERATE, Action.RIGHT, Action.LEFT, Action.BRAKE]

    def move(self, action):
        """ Executes the action and advances the game to the next state.

        Args:
            action (int): The action which should executed. Make sure to use
                          the constants returned by self.getActionsSet()

        Returns:
           int: The obtained reward after executing the action
        """
        return self._controller.move(action)

    def initialise(self, grid):
        """ Called at the beginning of each episode, mainly used
        for state initialisation.

        Args:
            grid (np.ndarray): 11x10 array with the initial environment grid.

        Returns:
            None
        """
        raise NotImplementedError

    def act(self):
        """ Called at each loop iteration to choose and execute an action.

        Returns:
            None
        """
        raise NotImplementedError

    def sense(self, grid):
        """ Called at each loop iteration to construct the new state from
        the update environment grid.

        Returns:
            None
        """
        raise NotImplementedError

    def learn(self):
        """ Called at each loop iteration when the agent is learning. It should
        implement the learning procedure.

        Returns:
            None
        """
        raise NotImplementedError

    def callback(self, learn, episode, iteration):
        """ Called at each loop iteration mainly for reporting purposes.

        Args:
            learn (bool): Indicates whether the agent is learning or not.
            episode (int): The number of the current episode.
            iteration (int): The number of the current iteration.

        Returns:
            None
        """

        raise NotImplementedError
Пример #9
0
    font = pygame.font.SysFont("Ubuntu Mono",30)
    text = font.render("Total Reward: " + str(total_reward) ,1,(208,255,255))
    screen.blit(text,(330,line_pos))

    pygame.display.flip()

    #process pygame event queue
    exit=False
    for event in pygame.event.get():
        if event.type == pygame.QUIT:
            exit=True
            break;
    if(pressed[pygame.K_q]):
        exit = True
    if(exit):
        logger.close();
        break

    #delay to 60fps
    clock.tick(60.)

    if(ale.game_over()):
        episode_frame_number = ale.getEpisodeFrameNumber()
        frame_number = ale.getFrameNumber()
        print("Frame Number: " + str(frame_number) + " Episode Frame Number: " + str(episode_frame_number))
        print("Episode " + str(episode) + " ended with score: " + str(total_reward))
        ale.reset_game()
        total_reward = 0.0 
        episode = episode + 1

Пример #10
0
class GameManager(object):
    """This class takes care of the interactions between an agent and
    a game across episodes, as well as overall logging of performance.
    """

    def __init__(
        self,
        game_name,
        agent,
        results_dir,
        n_epochs=1,
        n_episodes=None,
        n_frames=None,
        remove_old_results_dir=False,
        use_minimal_action_set=True,
        min_time_between_frames=0,
    ):
        """game_name is one of the supported games (there are many), as a string: "space_invaders.bin"
        agent is an an instance of a subclass of the Agent interface
        results_dir is a string representing a directory in which results and logs are placed
            If it does not exist, it is created.
        use_minimal_action_set determines whether the agent is offered all possible actions,
            or only those (minimal) that are applicable to the specific game.
        min_time_between_frames is the minimum required time in seconds between
            frames. If 0, the game is unrestricted.
        """
        self.game_name = game_name
        self.agent = agent
        self.use_minimal_action_set = use_minimal_action_set
        self.min_time_between_frames = min_time_between_frames
        self.n_epochs = n_epochs
        self.n_episodes = n_episodes
        self.n_frames = n_frames

        if (n_episodes is None and n_frames is None) or (n_episodes is not None and n_frames is not None):
            raise ValueError("Extacly one of n_episodes and n_frames " "must be defined")

        self.initialize_results_dir(results_dir, remove_old_results_dir)

        self.log = util.logging.Logger(
            ("settings", "step", "episode", "epoch", "overall"),
            "settings",
            os.path.join(self.results_dir, "GameManager.log"),
        )

        self.stats = util.logging.CSVLogger(
            os.path.join(self.results_dir, "stats.log"),
            header="epoch,episode,total_reward,n_frames,wall_time",
            print_items=True,
        )

        self._object_cache = dict()

        self.initialize_ale()
        self.initialize_agent()

        self.dump_settings()

    def initialize_results_dir(self, results_dir, remove_existing=False):
        """Creates the whole path of directories if they do no exist.
        If they do exist, raises an error unless remove_existing is True,
        in which case the existing directory is deleted.
        """
        now = datetime.now().strftime("%Y%m%d-%H-%M")
        # drop .bin, append current time down to the minute
        results_dir = os.path.join(results_dir, self.game_name[:-4] + now)

        if remove_existing:
            if os.path.exists(results_dir):
                shutil.rmtree(results_dir)
        # Should raise an error if directory exists
        os.makedirs(results_dir)

        self.results_dir = results_dir

    def initialize_ale(self):
        self.ale = ALEInterface()
        self.ale.loadROM(os.path.join(ROM_RELATIVE_LOCATION, self.game_name))

    def initialize_agent(self):
        RSC = namedtuple("RawStateCallbacks", ["raw", "grey", "rgb", "ram"])
        raw_state_callbacks = RSC(self.get_screen, self.get_screen_grayscale, self.get_screen_RGB, self.get_RAM)

        self.agent.set_raw_state_callbacks(raw_state_callbacks)
        self.agent.set_results_dir(self.results_dir)

        if self.use_minimal_action_set:
            actions = self.ale.getMinimalActionSet()
        else:
            actions = self.ale.getLegalActionSet()

        self.agent.set_available_actions(actions)

    def rest(self, already_elapsed):
        rest_time = self.min_time_between_frames - already_elapsed
        if rest_time > 0:
            sleep(rest_time)

    def run(self):
        """Runs self.n_epochs epochs, where the agent's learning is
        reset for each new epoch.
        Each epoch lasts self.n_episodes or self.n_frames, whichever is 
            defined.
        """
        self.log.overall("Starting run")
        run_start = time()
        for epoch in xrange(self.n_epochs):
            self.agent.reset()
            self.n_epoch = epoch
            self._run_epoch()
        self.log.overall("End of run ({:.2f} s)".format(time() - run_start))

    def _run_epoch(self):
        self.n_episode = 0

        start = time()
        while not self._stop_condition_met():
            self._run_episode()
            self.n_episode += 1
        wall_time = time() - start
        frames = self.ale.getFrameNumber()

        self.log.epoch("Finished epoch after {:.2f} seconds".format(wall_time))

    def _run_episode(self):
        self.ale.reset_game()
        self.agent.on_episode_start()

        total_reward = 0
        episode_start = time()

        while (not self.ale.game_over()) and (not self._stop_condition_met()):
            timestep_start = time()

            action = self.agent.select_action()
            reward = self.ale.act(action)
            self.agent.receive_reward(reward)

            total_reward += reward

            self.rest(time() - timestep_start)

        wall_time = time() - episode_start
        self.agent.on_episode_end()

        # Stats format: CSV with epoch, episode, total_reward, n_frames, wall_time
        self.stats.write(
            self.n_epoch, self.n_episode, total_reward, self.ale.getEpisodeFrameNumber(), "{:.2f}".format(wall_time)
        )

    def _stop_condition_met(self):
        if self.n_episodes:
            return self.n_episode >= self.n_episodes
        return self.ale.getFrameNumber() >= self.n_frames

    # Methods for state perception
    def get_screen(self):
        """Returns a matrix containing the current game screen in raw pixel data,
        i.e. before conversion to RGB. Handles reuse of np.array object, so it 
        will overwrite what is in the old object"""
        return self._cached("raw", self.ale.getScreen)

    def get_screen_grayscale(self):
        """Returns an np.array with the screen grayscale colours. 
        Handles reuse of np.array object, so it will overwrite what 
        is in the old object.
        """
        return self._cached("gray", self.ale.getScreenGrayscale)

    def get_screen_RGB(self):
        """Returns a numpy array with the screen's RGB colours. 
        The first positions contain the red colours, followed by
        the green colours and then the blue colours"""
        return self._cached("rgb", self.ale.getScreenRGB)

    def get_RAM(self):
        """Returns a vector containing current RAM content (byte-level).
        Handles reuse of np.array object, so it will overwrite what 
        is in the old object"""
        return self._cached("ram", self.ale.getRAM)

    def _cached(self, key, func):
        if key in self._object_cache:
            func(self._object_cache[key])
        else:
            self._object_cache[key] = func()

        return self._object_cache[key]

    def dump_settings(self):
        import json

        settings = self.get_settings()
        path = os.path.join(self.results_dir, "settings")
        with open(path, "w") as f:
            json.dump(settings, f, indent=4)

    def get_settings(self):
        """Returns a dict representing the settings needed to 
        reproduce this object and its subobjects
        """
        return {
            "game_name": self.game_name,
            "n_epochs": self.n_epochs,
            "n_episodes": self.n_episodes,
            "n_frames": self.n_frames,
            "agent": self.agent.get_settings(),
            "results_dir": self.results_dir,
            "use_minimal_action_set": self.use_minimal_action_set,
        }
Пример #11
0
class GameManager(object):
    """This class takes care of the interactions between an agent and
    a game across episodes, as well as overall logging of performance.
    """
    def __init__(self,
                 game_name,
                 agent,
                 results_dir,
                 n_epochs=1,
                 n_episodes=None,
                 n_frames=None,
                 remove_old_results_dir=False,
                 use_minimal_action_set=True,
                 min_time_between_frames=0):
        """game_name is one of the supported games (there are many), as a string: "space_invaders.bin"
        agent is an an instance of a subclass of the Agent interface
        results_dir is a string representing a directory in which results and logs are placed
            If it does not exist, it is created.
        use_minimal_action_set determines whether the agent is offered all possible actions,
            or only those (minimal) that are applicable to the specific game.
        min_time_between_frames is the minimum required time in seconds between
            frames. If 0, the game is unrestricted.
        """
        self.game_name = game_name
        self.agent = agent
        self.use_minimal_action_set = use_minimal_action_set
        self.min_time_between_frames = min_time_between_frames
        self.n_epochs = n_epochs
        self.n_episodes = n_episodes
        self.n_frames = n_frames

        if ((n_episodes is None and n_frames is None)
                or (n_episodes is not None and n_frames is not None)):
            raise ValueError("Extacly one of n_episodes and n_frames "
                             "must be defined")

        self.initialize_results_dir(results_dir, remove_old_results_dir)

        self.log = util.logging.Logger(
            ('settings', 'step', 'episode', 'epoch', 'overall'), 'settings',
            os.path.join(self.results_dir, 'GameManager.log'))

        self.stats = util.logging.CSVLogger(
            os.path.join(self.results_dir, 'stats.log'),
            header='epoch,episode,total_reward,n_frames,wall_time',
            print_items=True)

        self._object_cache = dict()

        self.initialize_ale()
        self.initialize_agent()

        self.dump_settings()

    def initialize_results_dir(self, results_dir, remove_existing=False):
        """Creates the whole path of directories if they do no exist.
        If they do exist, raises an error unless remove_existing is True,
        in which case the existing directory is deleted.
        """
        now = datetime.now().strftime('%Y%m%d-%H-%M')
        # drop .bin, append current time down to the minute
        results_dir = os.path.join(results_dir, self.game_name[:-4] + now)

        if remove_existing:
            if os.path.exists(results_dir):
                shutil.rmtree(results_dir)
        # Should raise an error if directory exists
        os.makedirs(results_dir)

        self.results_dir = results_dir

    def initialize_ale(self):
        self.ale = ALEInterface()
        self.ale.loadROM(os.path.join(ROM_RELATIVE_LOCATION, self.game_name))

    def initialize_agent(self):
        RSC = namedtuple('RawStateCallbacks', ['raw', 'grey', 'rgb', 'ram'])
        raw_state_callbacks = RSC(self.get_screen, self.get_screen_grayscale,
                                  self.get_screen_RGB, self.get_RAM)

        self.agent.set_raw_state_callbacks(raw_state_callbacks)
        self.agent.set_results_dir(self.results_dir)

        if self.use_minimal_action_set:
            actions = self.ale.getMinimalActionSet()
        else:
            actions = self.ale.getLegalActionSet()

        self.agent.set_available_actions(actions)

    def rest(self, already_elapsed):
        rest_time = self.min_time_between_frames - already_elapsed
        if rest_time > 0:
            sleep(rest_time)

    def run(self):
        """Runs self.n_epochs epochs, where the agent's learning is
        reset for each new epoch.
        Each epoch lasts self.n_episodes or self.n_frames, whichever is 
            defined.
        """
        self.log.overall('Starting run')
        run_start = time()
        for epoch in xrange(self.n_epochs):
            self.agent.reset()
            self.n_epoch = epoch
            self._run_epoch()
        self.log.overall('End of run ({:.2f} s)'.format(time() - run_start))

    def _run_epoch(self):
        self.n_episode = 0

        start = time()
        while not self._stop_condition_met():
            self._run_episode()
            self.n_episode += 1
        wall_time = (time() - start)
        frames = self.ale.getFrameNumber()

        self.log.epoch("Finished epoch after {:.2f} seconds".format(wall_time))

    def _run_episode(self):
        self.ale.reset_game()
        self.agent.on_episode_start()

        total_reward = 0
        episode_start = time()

        while (not self.ale.game_over()) and (not self._stop_condition_met()):
            timestep_start = time()

            action = self.agent.select_action()
            reward = self.ale.act(action)
            self.agent.receive_reward(reward)

            total_reward += reward

            self.rest(time() - timestep_start)

        wall_time = time() - episode_start
        self.agent.on_episode_end()

        # Stats format: CSV with epoch, episode, total_reward, n_frames, wall_time
        self.stats.write(self.n_epoch, self.n_episode, total_reward,
                         self.ale.getEpisodeFrameNumber(),
                         '{:.2f}'.format(wall_time))

    def _stop_condition_met(self):
        if self.n_episodes:
            return self.n_episode >= self.n_episodes
        return self.ale.getFrameNumber() >= self.n_frames

    # Methods for state perception
    def get_screen(self):
        """Returns a matrix containing the current game screen in raw pixel data,
        i.e. before conversion to RGB. Handles reuse of np.array object, so it 
        will overwrite what is in the old object"""
        return self._cached('raw', self.ale.getScreen)

    def get_screen_grayscale(self):
        """Returns an np.array with the screen grayscale colours. 
        Handles reuse of np.array object, so it will overwrite what 
        is in the old object.
        """
        return self._cached('gray', self.ale.getScreenGrayscale)

    def get_screen_RGB(self):
        """Returns a numpy array with the screen's RGB colours. 
        The first positions contain the red colours, followed by
        the green colours and then the blue colours"""
        return self._cached('rgb', self.ale.getScreenRGB)

    def get_RAM(self):
        """Returns a vector containing current RAM content (byte-level).
        Handles reuse of np.array object, so it will overwrite what 
        is in the old object"""
        return self._cached('ram', self.ale.getRAM)

    def _cached(self, key, func):
        if key in self._object_cache:
            func(self._object_cache[key])
        else:
            self._object_cache[key] = func()

        return self._object_cache[key]

    def dump_settings(self):
        import json

        settings = self.get_settings()
        path = os.path.join(self.results_dir, 'settings')
        with open(path, 'w') as f:
            json.dump(settings, f, indent=4)

    def get_settings(self):
        """Returns a dict representing the settings needed to 
        reproduce this object and its subobjects
        """
        return {
            "game_name": self.game_name,
            "n_epochs": self.n_epochs,
            "n_episodes": self.n_episodes,
            "n_frames": self.n_frames,
            "agent": self.agent.get_settings(),
            "results_dir": self.results_dir,
            "use_minimal_action_set": self.use_minimal_action_set,
        }
Пример #12
0
class Agent(object):
    def __init__(self):
        self._ale = ALEInterface()
        self._ale.setInt('random_seed', 123)
        self._ale.setFloat('repeat_action_probability', 0.0)
        self._ale.setBool('color_averaging', False)
        self._ale.loadROM('roms/enduro.bin')
        self._controller = Controller(self._ale)
        self._extractor = StateExtractor(self._ale)
        self._image = None

    def run(self, learn, episodes=1, draw=False):
        """ Implements the playing/learning loop.

        Args:
            learn(bool): Whether the self.learn() function should be called.
            episodes (int): The number of episodes to run the agent for.
            draw (bool): Whether to overlay the environment state on the frame.

        Returns:
            None
        """
        for e in range(episodes):
            # Observe the environment to set the initial state
            (grid, self._image) = self._extractor.run(draw=draw, scale=4.0)
            self.initialise(grid)

            num_frames = self._ale.getFrameNumber()

            # Each episode lasts 6500 frames
            while self._ale.getFrameNumber() - num_frames < 6500:
                # Take an action
                self.act()

                # Update the environment grid
                (grid, self._image) = self._extractor.run(draw=draw, scale=4.0)
                self.sense(grid)

                # Perform learning if required
                if learn:
                    self.learn()

                self.callback(learn, e + 1, self._ale.getFrameNumber() - num_frames)
            self._ale.reset_game()

    def getActionsSet(self):
        """ Returns the set of all possible actions
        """
        return [Action.ACCELERATE, Action.RIGHT, Action.LEFT, Action.BREAK]

    def move(self, action):
        """ Executes the action and advances the game to the next state.

        Args:
            action (int): The action which should executed. Make sure to use
                          the constants returned by self.getActionsSet()

        Returns:
           int: The obtained reward after executing the action
        """
        return self._controller.move(action)

    def initialise(self, grid):
        """ Called at the beginning of each episode, mainly used
        for state initialisation.

        Args:
            grid (np.ndarray): 11x10 array with the initial environment grid.

        Returns:
            None
        """
        raise NotImplementedError

    def act(self):
        """ Called at each loop iteration to choose and execute an action.

        Returns:
            None
        """
        raise NotImplementedError

    def sense(self, grid):
        """ Called at each loop iteration to construct the new state from
        the update environment grid.

        Returns:
            None
        """
        raise NotImplementedError

    def learn(self):
        """ Called at each loop iteration when the agent is learning. It should
        implement the learning procedure.

        Returns:
            None
        """
        raise NotImplementedError

    def callback(self, learn, episode, iteration):
        """ Called at each loop iteration mainly for reporting purposes.

        Args:
            learn (bool): Indicates whether the agent is learning or not.
            episode (int): The number of the current episode.
            iteration (int): The number of the current iteration.

        Returns:
            None
        """

        raise NotImplementedError
Пример #13
0
class Agent(object):
    def __init__(self):
        self._ale = ALEInterface()
        self._ale.setInt('random_seed', 123)
        self._ale.setFloat('repeat_action_probability', 0.0)
        self._ale.setBool('color_averaging', False)
        self._ale.loadROM('roms/enduro.bin')
        self._controller = Controller(self._ale)
        self._extractor = StateExtractor(self._ale)
        self._image = None
        self._speed_range = 50

    def run(self, learn, episodes=1, draw=False):
        """ Implements the playing/learning loop.

        Args:
            learn(bool): Whether the self.learn() function should be called.
            episodes (int): The number of episodes to run the agent for.
            draw (bool): Whether to overlay the environment state on the frame.

        Returns:
            None
        """
        for e in range(episodes):
            self._relative_speed = -self._speed_range

            # Observe the environment to set the initial state
            (road, cars, grid, self._image) = self._extractor.run(draw=draw,
                                                                  scale=4.0)
            self.initialise(road, cars, self._relative_speed, grid)

            num_frames = self._ale.getFrameNumber()

            # Each episode lasts 6500 frames
            while self._ale.getFrameNumber() - num_frames < 6500:
                # Take an action
                self.act()

                # Update the environment grid
                (road, cars, grid,
                 self._image) = self._extractor.run(draw=draw, scale=4.0)

                if self.collision(cars):
                    self._relative_speed = -self._speed_range

                self.sense(road, cars, self._relative_speed, grid)

                # Perform learning if required
                if learn:
                    self.learn()

                self.callback(learn, e + 1,
                              self._ale.getFrameNumber() - num_frames)
            self._ale.reset_game()

    def collision(self, cars):
        if not cars['others']:
            return False

        x, y, _, _ = cars['self']

        min_dist = sys.float_info.max
        min_angle = 0.

        for c in cars['others']:
            cx, cy, _, _ = c
            dist = np.sqrt((cx - x)**2 + (cy - y)**2)
            if dist < min_dist:
                min_dist = dist
                min_angle = np.arctan2(y - cy, cx - x)

        return min_dist < 18. and 0.1 * np.pi < min_angle and min_angle < 0.9 * np.pi

    def getActionsSet(self):
        """ Returns the set of all possible actions
        """
        return [Action.ACCELERATE, Action.RIGHT, Action.LEFT, Action.BRAKE]

    def move(self, action):
        """ Executes the action and advances the game to the next state.

        Args:
            action (int): The action which should executed. Make sure to use
                          the constants returned by self.getActionsSet()

        Returns:
           int: The obtained reward after executing the action
        """

        if action == Action.ACCELERATE:
            self._relative_speed = min(self._relative_speed + 1,
                                       self._speed_range)
        elif action == Action.BRAKE:
            self._relative_speed = max(self._relative_speed - 1,
                                       -self._speed_range)

        return self._controller.move(action)

    def initialise(self, road, cars, speed, grid):
        """ Called at the beginning of each episode, mainly used
        for state initialisation. For more information on the arguments
        have a look at the README.md

        Args:
            road: 2-dimensional array containing [x, y] points
                  in pixel coordinates of the road grid
            cars: dictionary which contains the location and the size
                  of the agent and the opponents in pixel coordinates
            speed: the relative speed of the agent with respect the others
            gird:  2-dimensional numpy array containing the latest grid
                   representation of the environment

        Returns:
            None
        """
        raise NotImplementedError

    def act(self):
        """ Called at each loop iteration to choose and execute an action.

        Returns:
            None
        """
        raise NotImplementedError

    def sense(self, road, cars, speed, grid):
        """ Called at each loop iteration to construct the new state from
        the update environment grid. For more information on the arguments
        have a look at the README.md

        Args:
            road: 2-dimensional array containing [x, y] points
                  in pixel coordinates of the road grid
            cars: dictionary which contains the location and the size
                  of the agent and the opponents in pixel coordinates
            speed: the relative speed of the agent with respect the others
            gird: 2-dimensional numpy array containing the latest grid
                  representation of the environment
        Returns:
            None
        """
        raise NotImplementedError

    def learn(self):
        """ Called at each loop iteration when the agent is learning. It should
        implement the learning procedure.

        Returns:
            None
        """
        raise NotImplementedError

    def callback(self, learn, episode, iteration):
        """ Called at each loop iteration mainly for reporting purposes.

        Args:
            learn (bool): Indicates whether the agent is learning or not.
            episode (int): The number of the current episode.
            iteration (int): The number of the current iteration.

        Returns:
            None
        """

        raise NotImplementedError