示例#1
0
文件: envs.py 项目: afcarl/uct_atari
class Atari(AtariEnv):
    metadata = {'render.modes': ['human', 'rgb_array']}

    def __init__(self,
                 game='pong',
                 obs_type='ram',
                 frameskip=(2, 5),
                 repeat_action_probability=0.):
        """Frameskip should be either a tuple (indicating a random range to
        choose from, with the top value exclude), or an int."""

        utils.EzPickle.__init__(self, game, obs_type)
        assert obs_type in ('ram', 'image')

        self.game_path = atari_py.get_game_path(game)
        if not os.path.exists(self.game_path):
            raise IOError('You asked for game %s but path %s does not exist' %
                          (game, self.game_path))
        self._obs_type = obs_type
        self.frameskip = frameskip
        self.ale = ALEInterface()
        self.viewer = None

        # Tune (or disable) ALE's action repeat:
        # https://github.com/openai/gym/issues/349
        assert isinstance(
            repeat_action_probability,
            (float, int)), "Invalid repeat_action_probability: {!r}".format(
                repeat_action_probability)
        self.ale.setFloat('repeat_action_probability'.encode('utf-8'),
                          repeat_action_probability)

        self._seed()

        (screen_width, screen_height) = self.ale.getScreenDims()
        self._buffer = np.empty((screen_height, screen_width, 3),
                                dtype=np.uint8)

        self._action_set = self.ale.getMinimalActionSet()
        self.action_space = spaces.Discrete(len(self._action_set))

        (screen_width, screen_height) = self.ale.getScreenDims()
        if self._obs_type == 'ram':
            self.observation_space = spaces.Box(low=np.zeros(128),
                                                high=np.zeros(128) + 255)
        elif self._obs_type == 'image':
            self.observation_space = spaces.Box(low=0,
                                                high=255,
                                                shape=(screen_height,
                                                       screen_width, 3))
        else:
            raise error.Error('Unrecognized observation type: {}'.format(
                self._obs_type))

    def _get_image(self):
        return self.ale.getScreenRGB(self._buffer).copy()
示例#2
0
def main():
    arguments = docopt.docopt(__doc__, version='ALE Demo Version 1.0')

    pygame.init()

    ale = ALEInterface()
    ale.setInt(b'random_seed', 123)
    ale.setBool(b'display_screen', True)
    ale.loadROM(str.encode(arguments['<rom_file>']))

    legal_actions = ale.getLegalActionSet()
    width, height = ale.getScreenDims()
    print(width, height)
    frame = ale.getScreenRGB()
    frame = np.array(frame, dtype=float)

    rewards, num_episodes = [], int(arguments['--iters'] or 5)
    for episode in range(num_episodes):
        total_reward = 0
        while not ale.game_over():
            total_reward += ale.act(random.choice(legal_actions))
        print('Episode %d reward %d.' % (episode, total_reward))
        rewards.append(total_reward)
        ale.reset_game()

    average = sum(rewards) / len(rewards)
    print('Average for %d episodes: %d' % (num_episodes, average))
class env_atari:
    def __init__(self, params):
        self.params = params
        self.ale = ALEInterface()
        self.ale.setInt('random_seed', np.random.randint(0, 500))
        self.ale.setFloat('repeat_action_probability', params['repeat_prob'])
        self.ale.setInt(b'frame_skip', params['frameskip'])
        self.ale.setBool('color_averaging', True)
        self.ale.loadROM('roms/' + params['rom'] + '.bin')
        self.actions = self.ale.getMinimalActionSet()
        self.action_space = c_action_space(len(self.actions))
        self.screen_width, self.screen_height = self.ale.getScreenDims()

    def reset(self):
        self.ale.reset_game()
        seed = np.random.randint(0, 7)
        for i in range(seed):
            self.ale.act(0)
        return self.get_image()

    def step(self, action):
        reward = self.ale.act(self.actions[action])
        next_s = self.get_image()
        terminate = self.ale.game_over()
        return next_s, reward, float(terminate), 0

    def get_image(self):
        temp = np.zeros(self.screen_height * self.screen_width * 3,
                        dtype=np.uint8)
        self.ale.getScreenRGB(temp)
        #self.ale.getScreenGrayscale(temp)
        return temp.reshape((self.screen_height, self.screen_width, 3))
示例#4
0
class FastAtariEnv(AtariEnv):
    def __init__(self,
                 game='Breakout',
                 obs_type='image',
                 frameskip=(2, 5),
                 repeat_action_probability=0.):
        self.game_path = atari_py.get_game_path(game)
        self._obs_type = obs_type
        self.frameskip = frameskip
        self.ale = ALEInterface()
        self.viewer = None
        assert isinstance(
            repeat_action_probability,
            (float, int)), "Invalid repeat_action_probability: {!r}".format(
                repeat_action_probability)
        self.ale.setFloat('repeat_action_probability'.encode('utf-8'),
                          repeat_action_probability)
        self._seed()
        (screen_width, screen_height) = self.ale.getScreenDims()
        self._buffer = np.empty((screen_height, screen_width, 3),
                                dtype=np.uint8)

    def _get_image(self):
        # Don't reorder from rgb to bgr as we're converting to greyscale anyway
        self.ale.getScreenRGB(self._buffer)  # says rgb but actually bgr
        return self._buffer
示例#5
0
class Atari:
    def __init__(self, rom_dir):
        self.ale = ALEInterface()

        # Set settings
        self.ale.setInt("random_seed", 123)
        self.frame_skip = 4
        self.ale.setInt("frame_skip", self.frame_skip)
        self.ale.setBool("display_screen", False)
        self.ale.setBool("sound", True)
        self.record_sound_for_user = True
        self.ale.setBool("record_sound_for_user", self.record_sound_for_user)

        # NOTE recording audio to file still works. But if both file recording and
        # record_sound_for_user are enabled, then only the latter is done
        #  self.ale.setString("record_sound_filename", "")

        # Get settings
        self.ale.loadROM(rom_dir)
        self.screen_width, self.screen_height = self.ale.getScreenDims()
        self.legal_actions = self.ale.getLegalActionSet()

        # Action count across all episodes
        self.action_count = 0
        self.start_time = time.time()

        self.reset()

    def reset(self):
        self.ale.reset_game()

    def take_action(self):
        action = self.legal_actions[np.random.randint(self.legal_actions.size)]
        self.ale.act(action)
        self.action_count += 1

    def print_fps(self, delta_t=500):
        if self.action_count % delta_t == 0:
            print '[atari.py] Frames/second: %f' % (
                self.action_count / (time.time() - self.start_time))
            print '[atari.py] Overall game frame count:', atari.action_count * atari.frame_skip
            print '---------'

    def get_image_and_audio(self):
        np_data_image = np.zeros(self.screen_width * self.screen_height * 3,
                                 dtype=np.uint8)
        if self.record_sound_for_user:
            np_data_audio = np.zeros(self.ale.getAudioSize(), dtype=np.uint8)
            self.ale.getScreenRGBAndAudio(np_data_image, np_data_audio)

            # Also supports independent audio queries if user desires:
            #  self.ale.getAudio(np_data_audio)
        else:
            np_data_audio = 0
            self.ale.getScreenRGB(np_data_image)

        return np.reshape(np_data_image,
                          (self.screen_height, self.screen_width,
                           3)), np.asarray(np_data_audio)
class emulator:
	def __init__(self, rom_name, vis):
		self.ale = ALEInterface()
		self.max_frames_per_episode = self.ale.getInt("max_mum_frames_per_episode")
		self.ale.setInt("random_seed", 123)
		self.ale.setInt("frame_skip", 4)
		self.ale.loadROM('roms/' + rom_name)
		self.legal_actions = self.ale.getMinimalActionSet()
		self.action_map = dict()
		for i in range(len(self.legal_actions)):
			self.action_map[self.legal_actions[i]] = i
		
		print self.legal_actions
		self.screen_width, self.screen_height = self.ale.getScreenDims()
		print("width/height: "+ str(self.screen_width) + "/" + str(self.screen_height))
		self.vis = vis
		if vis:
			cv2.startWindowThread()
			cv2.namedWindow("preview")
			
	def get_image(self):
		# numpy_surface = np.zeros(self.screen_height*self.screen_width*3, dtype=np.uint8)
		# self.ale.getScreenRGB(numpy_surface)
		# image = np.reshape(numpy_surface, (self.screen_height, self.screen_width, 3))
		image = self.ale.getScreenRGB()
		image = np.reshape(image, (self.screen_height, self.screen_width, 3))
		return image
	
	def newGame(self):
		self.ale.reset_game()
		return self.get_image(), 0, False
	
	def next(self, action_indx):
		reward = self.ale.act(action_indx)
		nextstate = self.get_image()
		if self.vis:
			cv2.imshow('preview', nextstate)
		return nextstate, reward, self.ale.game_over()
	
	def train(self):
		for episode in range(10):
			total_reward = 0
			frame_number = 0
			while not self.ale.game_over():
				a = self.legal_actions[random.randrange(len(self.legal_actions))]
				# Apply an action and get the resulting reward
				reward = self.ale.act(a);
				total_reward += reward
				screen = self.ale.getScreenRGB()
				screen = np.array(screen).reshape([self.screen_height, self.screen_width, -1])
				frame_number = self.ale.getEpisodeFrameNumber()
				cv2.imshow("screen", screen/255.0)
				cv2.waitKey(0)
				
			self.ale.saveScreenPNG("test_"+str(frame_number)+".png")
			print('Episode %d ended with score: %d' % (episode, total_reward))
			print('Frame number is : ', frame_number)
			self.ale.reset_game()
示例#7
0
class AtariEnvironment(Environment):
    """
    Atari Environment Object
    """
    def __init__(self,
                 rom_path,
                 action_repeat=4,
                 death_end=True,
                 width_resize=84,
                 height_resize=84,
                 resize_mod='scale'):
        super(Environment, self).__init__()
        self.action_repeat = action_repeat
        self.death_end = death_end
        self.width_resize = width_resize
        self.height_resize = height_resize
        self.resize_mod = resize_mod
        self.display = False

        from ale_python_interface import ALEInterface
        self.ale = ALEInterface()
        self.ale.loadROM(rom_path)
        self.ale.setInt('random_seed', np.random.randint(1000))
        self.ale.setBool('display_screen', self.display)
        self.action_set = self.ale.getMinimalActionSet()
        self.num_actions = len(self.action_set)
        self.start_lives = self.ale.lives()
        width, height = self.ale.getScreenDims()
        self.currentScreen = np.empty((height, width), dtype=np.uint8)
        self.reset()

    def reset(self):
        self.ale.reset_game()
        self.ale.getScreenGrayscale(self.currentScreen)
        self.terminal = False

    def step(self, action, repeat=None):
        repeat = self.action_repeat if repeat is None else repeat
        reward = 0
        for _ in range(repeat):
            reward += self.ale.act(self.action_set[action])
        self.ale.getScreenGrayscale(self.currentScreen)
        self.terminal = self.death_end and self.ale.lives(
        ) < self.start_lives or self.ale.game_over()
        return reward

    def get_frame(self):
        if self.resize_mod == 'scale':
            return imresize(self.currentScreen,
                            (self.width_resize, self.height_resize),
                            interp='bilinear')
        elif self.resize_mod == 'crop':
            height, width = self.currentScreen.shape
            res = (height - width) / 2
            crop = self.currentScreen[res:(res + width), :]
            return imresize(crop, (self.width_resize, self.height_resize),
                            interp='bilinear')
示例#8
0
文件: ale.py 项目: andth80/a3c-demo
class Env():
    def __init__(self, rom_name):
        self.__initALE()
        self.__loadROM(rom_name)
        self.screen_history = []
        self.screens = []

    def __initALE(self):
        self.ale = ALEInterface()
        self.ale.setInt(b'random_seed', randrange(1000))
        self.ale.setInt(b'fragsize', 64)
        self.ale.setInt(b'frame_skip', 1)

        # qq set this back to 0.25?
        self.ale.setFloat(b'repeat_action_probability', 0)
        self.ale.setLoggerMode('error')

    def __loadROM(self, rom_name):
        self.ale.loadROM(rom_name.encode('utf-8'))
        self.actions = self.ale.getMinimalActionSet()

        (width, height) = self.ale.getScreenDims()
        self.screen_data1 = np.empty((height, width, 3), dtype=np.uint8)
        self.screen_data2 = np.empty((height, width, 3), dtype=np.uint8)

    def get_legal_action_count(self):
        return len(self.actions)

    def act(self, action_index):
        action = self.actions[action_index]
        reward = 0

        # perform the action 4 times
        reward += _clip(self.ale.act(action), -1, 1)
        reward += _clip(self.ale.act(action), -1, 1)
        reward += _clip(self.ale.act(action), -1, 1)
        self.ale.getScreenRGB(self.screen_data1)
        reward += _clip(self.ale.act(action), -1, 1)
        self.ale.getScreenRGB(self.screen_data2)

        # return the pixel-wise max of the last two frames (some games only
        # render every other frame)
        screen_data_combined = np.maximum(self.screen_data1, self.screen_data2)
        terminal = self.ale.game_over()

        self.screens.append(preprocess_screen(screen_data_combined))
        phi = get_phi(self.screens)

        return (terminal, reward, phi, self.screen_data2)

    def get_s(self):
        return get_phi(self.screens)

    def reset(self):
        self.ale.reset_game()
        self.screens = []
class emulator:
    def __init__(self, rom_name, vis, windowname='preview'):
        self.ale = ALEInterface()
        self.max_frames_per_episode = self.ale.getInt(
            "max_num_frames_per_episode")
        self.ale.setInt("random_seed", 123)
        self.ale.setInt("frame_skip", 4)
        self.ale.loadROM('roms/' + rom_name)
        self.legal_actions = self.ale.getMinimalActionSet()
        self.action_map = dict()
        self.windowname = windowname
        for i in range(len(self.legal_actions)):
            self.action_map[self.legal_actions[i]] = i
        self.init_frame_number = 0

        # print(self.legal_actions)
        self.screen_width, self.screen_height = self.ale.getScreenDims()
        print("width/height: " + str(self.screen_width) + "/" +
              str(self.screen_height))
        self.vis = vis
        if vis:
            cv2.startWindowThread()
            cv2.namedWindow(self.windowname)

    def get_image(self):
        numpy_surface = np.zeros(self.screen_height * self.screen_width * 3,
                                 dtype=np.uint8)
        self.ale.getScreenRGB(numpy_surface)
        image = np.reshape(numpy_surface,
                           (self.screen_height, self.screen_width, 3))
        return image

    def newGame(self):
        # Instead of resetting the game, we load a checkpoint and start from there.
        # self.ale.reset_game()
        self.ale.restoreState(
            self.ale.decodeState(checkpoints[random.randint(
                0, 99)].astype('uint8')))
        self.init_frame_number = self.ale.getFrameNumber()
        #self.ale.restoreState(self.ale.decodeState(np.reshape(checkpoint,(1009,1))))
        return self.get_image()

    def next(self, action_indx):
        reward = self.ale.act(action_indx)
        nextstate = self.get_image()
        # scipy.misc.imsave('test.png',nextstate)
        if self.vis:
            cv2.imshow(self.windowname, nextstate)
        return nextstate, reward, self.ale.game_over()

    def get_frame_number(self):
        return self.ale.getFrameNumber() - self.init_frame_number
示例#10
0
class Environment:
    def __init__(self, show_screen, history_length):
        self.ale = ALEInterface()
        self.ale.setInt('frame_skip', 4)
        self.history = None
        self.history_length = history_length
        if show_screen:
            self.display_screen()
        self.load_game()
        (screen_width, screen_height) = self.ale.getScreenDims()
        self.screen_data = np.empty((screen_height, screen_width, 1),
                                    dtype=np.uint8)  # 210x160 screen data
        self.dims = (84, 84)  # input size for neural network
        self.actions = [3, 0, 1, 4]  # noop, left, right, fire,

    def display_screen(self):
        self.ale.setBool("display_screen", True)

    def turn_on_sound(self):
        self.ale.setBool("sound", True)

    def restart(self):
        """reset game"""
        self.ale.reset_game()

    def act(self, action):
        """:returns reward of an action"""
        return self.ale.act(self.actions[action])

    def __get_screen(self):
        """:returns Grayscale thresholded resized screen image """
        self.ale.getScreenGrayscale(self.screen_data)
        resized = cv2.resize(self.screen_data, self.dims)
        return resized

    def get_state(self):
        binary_screen = self.__get_screen()
        if self.history is None:
            self.history = deque(maxlen=self.history_length)
            for _ in range(self.history_length - 1):
                self.history.append(binary_screen)
        self.history.append(binary_screen)
        result = np.stack(self.history, axis=0)
        return result

    def isTerminal(self):
        """checks if game is over"""
        return self.ale.game_over()

    def load_game(self):
        """load game from file"""
        self.ale.loadROM("Breakout.bin")
示例#11
0
class emulator(object):
    def __init__(self, rom_name, vis, frameskip=1, windowname='preview'):
        self.ale = ALEInterface()
        self.max_frames_per_episode = self.ale.getInt(
            "max_num_frames_per_episode")
        self.ale.setInt("random_seed", 123)
        self.ale.setInt("frame_skip", frameskip)
        romfile = str(ROM_PATH) + str(rom_name)
        if not os.path.exists(romfile):
            print('No ROM file found at "' + romfile +
                  '".\nAdjust ROM_PATH or double-check the filt exists.')
        self.ale.loadROM(romfile)
        self.legal_actions = self.ale.getMinimalActionSet()
        self.action_map = dict()
        self.windowname = windowname
        for i in range(len(self.legal_actions)):
            self.action_map[self.legal_actions[i]] = i

        # print(self.legal_actions)
        self.screen_width, self.screen_height = self.ale.getScreenDims()
        print("width/height: " + str(self.screen_width) + "/" +
              str(self.screen_height))
        self.vis = vis
        if vis:
            cv2.startWindowThread()
            cv2.namedWindow(
                self.windowname,
                flags=cv2.WINDOW_AUTOSIZE)  # permit manual resizing

    def get_image(self):
        numpy_surface = np.zeros(self.screen_height * self.screen_width * 3,
                                 dtype=np.uint8)
        self.ale.getScreenRGB(numpy_surface)
        image = np.reshape(numpy_surface,
                           (self.screen_height, self.screen_width, 3))
        return image

    def newGame(self):
        self.ale.reset_game()
        return self.get_image()

    def next(self, action_indx):
        reward = self.ale.act(action_indx)
        nextstate = self.get_image()
        # scipy.misc.imsave('test.png',nextstate)
        if self.vis:
            cv2.imshow(self.windowname, nextstate)
            if sys.platform == 'darwin':
                # if we don't do this, can hang on OS X
                cv2.waitKey(2)
        return nextstate, reward, self.ale.game_over()
示例#12
0
class Environment:
    def __init__(self, show_screen, history_length):
        self.ale = ALEInterface()
        self.ale.setInt('frame_skip', 4)
        self.history = None
        self.history_length = history_length
        if show_screen:
            self.display_screen()
        self.load_game()
        (screen_width, screen_height) = self.ale.getScreenDims()
        self.screen_data = np.empty((screen_height, screen_width, 1), dtype=np.uint8)  # 210x160 screen data
        self.dims = (84, 84)  # input size for neural network
        self.actions = [3, 0, 1, 4]  # noop, left, right, fire,

    def display_screen(self):
        self.ale.setBool("display_screen", True)

    def turn_on_sound(self):
        self.ale.setBool("sound", True)

    def restart(self):
        """reset game"""
        self.ale.reset_game()

    def act(self, action):
        """:returns reward of an action"""
        return self.ale.act(self.actions[action])

    def __get_screen(self):
        """:returns Grayscale thresholded resized screen image """
        self.ale.getScreenGrayscale(self.screen_data)
        resized = cv2.resize(self.screen_data, self.dims)
        return resized

    def get_state(self):
        binary_screen = self.__get_screen()
        if self.history is None:
            self.history = deque(maxlen=self.history_length)
            for _ in range(self.history_length - 1):
                self.history.append(binary_screen)
        self.history.append(binary_screen)
        result = np.stack(self.history, axis=0)
        return result

    def isTerminal(self):
        """checks if game is over"""
        return self.ale.game_over()

    def load_game(self):
        """load game from file"""
        self.ale.loadROM("Breakout.bin")
示例#13
0
class Atari:
    # Constructor
    def __init__(self, rom_name):
        # 1º Passo: carregamos o jogo e definimos seus parâmetros
        self.ale = ALEInterface()
        self.max_frames_per_episode = self.ale.getInt(
            b"max_num_frames_per_episode")
        self.ale.setInt(b"random_seed", 123)
        self.ale.setInt(b"frame_skip", 4)
        self.ale.loadROM(('game/' + rom_name).encode())

        self.screen_width, self.screen_height = self.ale.getScreenDims()
        self.legal_actions = self.ale.getMinimalActionSet()
        self.action_map = dict()

        for i in range(len(self.legal_actions)):
            self.action_map[self.legal_actions[i]] = i

        # 2º Passo: criamos a janela para exibição
        self.windowname = rom_name
        cv2.startWindowThread()
        cv2.namedWindow(rom_name)

    # Essa função será utilizada para receber uma imagem do emulador, já em um formato esperado
    # por nosso algoritmo de treinamento.
    def get_image(self):
        numpy_surface = np.zeros(self.screen_height * self.screen_width * 3,
                                 dtype=np.uint8)
        self.ale.getScreenRGB(numpy_surface)
        image = np.reshape(numpy_surface,
                           (self.screen_height, self.screen_width, 3))
        return image

    # Simplesmente inicializa o jogo
    def newGame(self):
        self.ale.reset_game()
        return self.get_image()

    # Essa função será responsável por retornar as informações da observação do estado após certa ação ser tomada.
    def next(self, action):
        reward = self.ale.act(self.legal_actions[np.argmax(action)])
        nextstate = self.get_image()

        cv2.imshow(self.windowname, nextstate)
        if self.ale.game_over():
            self.newGame()

        return nextstate, reward, self.ale.game_over()
class emulator:
	def __init__(self, rom_name, vis,windowname='preview'):
		self.ale = ALEInterface()
		self.max_frames_per_episode = self.ale.getInt("max_num_frames_per_episode");
		self.ale.setInt("random_seed",123)
		self.ale.setInt("frame_skip",4)
		self.ale.loadROM('roms/' + rom_name )
		self.legal_actions = self.ale.getMinimalActionSet()
		self.action_map = dict()
		self.windowname = windowname
		for i in range(len(self.legal_actions)):
			self.action_map[self.legal_actions[i]] = i
		self.init_frame_number = 0

		# print(self.legal_actions)
		self.screen_width,self.screen_height = self.ale.getScreenDims()
		print("width/height: " +str(self.screen_width) + "/" + str(self.screen_height))
		self.vis = vis
		if vis: 
			cv2.startWindowThread()
			cv2.namedWindow(self.windowname)

	def get_image(self):
		numpy_surface = np.zeros(self.screen_height*self.screen_width*3, dtype=np.uint8)
		self.ale.getScreenRGB(numpy_surface)
		image = np.reshape(numpy_surface, (self.screen_height, self.screen_width, 3))
		return image

	def newGame(self):
		# Instead of resetting the game, we load a checkpoint and start from there.
		# self.ale.reset_game()
		self.ale.restoreState(self.ale.decodeState(checkpoints[random.randint(0,99)].astype('uint8')))
		self.init_frame_number = self.ale.getFrameNumber()
		#self.ale.restoreState(self.ale.decodeState(np.reshape(checkpoint,(1009,1))))
		return self.get_image()

	def next(self, action_indx):
		reward = self.ale.act(action_indx)	
		nextstate = self.get_image()
		# scipy.misc.imsave('test.png',nextstate)
		if self.vis:
			cv2.imshow(self.windowname,nextstate)
		return nextstate, reward, self.ale.game_over()

	def get_frame_number(self):
		return self.ale.getFrameNumber() - self.init_frame_number
示例#15
0
class Emulator:
    def __init__(self, rom_name, vis):
        self.ale = ALEInterface()
        self.max_frames_per_episode = self.ale.getInt(
            "max_num_frames_per_episode")
        self.ale.setInt("random_seed", 123)
        self.ale.setInt("frame_skip", 4)
        self.ale.loadROM('roms/' + rom_name)
        self.legal_actions = self.ale.getMinimalActionSet()
        self.action_map = dict()
        for i in range(len(self.legal_actions)):
            self.action_map[self.legal_actions[i]] = i

        #print(self.legal_actions)
        self.screen_width, self.screen_height = self.ale.getScreenDims()
        print("width/height: " + str(self.screen_width) + "/" +
              str(self.screen_height))
        self.vis = vis
        if vis:
            cv2.startWindowThread()
            cv2.namedWindow("preview")

    def get_image(self):
        numpy_surface = np.zeros(self.screen_height * self.screen_width * 3,
                                 dtype=np.uint8)
        self.ale.getScreenRGB(numpy_surface)
        image = np.reshape(numpy_surface,
                           (self.screen_height, self.screen_width, 3))
        #added by ben may 2016
        print image
        print '&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&& printing'
        return image

    def newGame(self):
        self.ale.reset_game()
        return self.get_image()

    def next(self, action_indx):
        reward = self.ale.act(action_indx)
        nextstate = self.get_image()
        # scipy.misc.imsave('test.png',nextstate)
        # scipy.misc.imsave('test.png',nextstate)
        if self.vis:
            cv2.imshow('preview', nextstate)
        return nextstate, reward, self.ale.game_over()
示例#16
0
class emulator:
	def __init__(self, rom_name, vis, windowname='preview'):
  		self.ale = ALEInterface()
		# When it starts
  		self.ale.setInt("random_seed", 123)
		# Skipping 4 frames 
  		self.ale.setInt("frame_skip", 4)
  		self.ale.loadROM('roms/' + rom_name)
  		self.legal_actions = self.ale.getMinimalActionSet()
  		print('Actions : %s' % self.legal_actions)
  		self.action_map = dict()
  		self.windowname = windowname
  		# Raw atari frames, 210 * 160 pixel images 
  		self.screen_width, self.screen_height = self.ale.getScreenDims()
  		print("widht/height: " + str(self.screen_width) + "/" + str(self.screen_height))
  		# Visualize
  		self.vis = vis
  		if vis:
   			cv2.startWindowThread()
   			cv2.namedWindow(self.windowname)

 	def get_image(self):
  		# Need to specify data type as uint8
  		numpy_surface = np.zeros([self.screen_width * self.screen_height * 3], dtype=np.uint8)
  		# get RGB values
  		self.ale.getScreenRGB(numpy_surface)
  		image = np.reshape(numpy_surface, [self.screen_height, self.screen_width, 3])
  		return image

 	def new_game(self):
  		self.ale.reset_game()
  		# Reset game and getting reset image value
  		return self.get_image()

 	def next(self, action_index):
  		# Get R(s,a)
  		reward = self.ale.act(action_index)
  		# Get image pixel value after taking an action
  		next_state = self.get_image()
  		if self.vis:
   			cv2.imshow(self.windowname, next_state)
  		# self.ale.game_over() returns True when game is over
  		return next_state, reward, self.ale.game_over()
示例#17
0
class Atari:
    def __init__(self, rom_name):

        self.ale = ALEInterface()
        self.max_frames_per_episode = self.ale.getInt(
            "max_num_frames_per_episode")
        self.ale.setInt("random_seed", 123)
        self.ale.setInt("frame_skip", 4)
        self.ale.loadROM('game/' + rom_name)
        self.screen_width, self.screen_height = self.ale.getScreenDims()
        self.legal_actions = self.ale.getMinimalActionSet()
        self.action_map = dict()
        for i in range(len(self.legal_actions)):
            self.action_map[self.legal_actions[i]] = i
        # print len(self.legal_actions)
        self.windowname = rom_name
        cv2.startWindowThread()
        cv2.namedWindow(rom_name)

    def get_image(self):

        numpy_surface = np.zeros(self.screen_height * self.screen_width * 3,
                                 dtype=np.uint8)
        self.ale.getScreenRGB(numpy_surface)
        image = np.reshape(numpy_surface,
                           (self.screen_height, self.screen_width, 3))
        return image

    def newGame(self):

        self.ale.reset_game()
        return self.get_image()

    def next(self, action):

        reward = self.ale.act(self.legal_actions[np.argmax(action)])
        nextstate = self.get_image()

        cv2.imshow(self.windowname, nextstate)
        if self.ale.game_over():
            self.newGame()
        # print "reward %d" % reward
        return nextstate, reward, self.ale.game_over()
示例#18
0
class Atari:
    def __init__(self,rom_name):
        self.ale = ALEInterface()
        self.max_frames_per_episode = self.ale.getInt("max_num_frames_per_episode")
        self.ale.setInt("random_seed",123)
        self.ale.setInt("frame_skip",4)
        self.ale.loadROM(rom_name)
        self.screen_width,self.screen_height = self.ale.getScreenDims()
        self.legal_actions = self.ale.getMinimalActionSet()
        self.action_map = dict()
        for i in range(len(self.legal_actions)):
            self.action_map[self.legal_actions[i]] = i
        print len(self.legal_actions)
        self.windowname = rom_name
        cv2.startWindowThread()
        cv2.namedWindow(rom_name)

    def preprocess(self, image):
        image = cv2.cvtColor(cv2.resize(image, (84, 110)), cv2.COLOR_BGR2GRAY)
        image = image[26:110,:]
        ret, image = cv2.threshold(image,1,255,cv2.THRESH_BINARY)
        return np.reshape(image,(84,84, 1))

    def get_image(self):
        numpy_surface = np.zeros(self.screen_height*self.screen_width*3, dtype=np.uint8)
        self.ale.getScreenRGB(numpy_surface)
        image = np.reshape(numpy_surface, (self.screen_height, self.screen_width, 3))
        return self.preprocess(image)

    def newGame(self):
        self.ale.reset_game()
        return self.get_image()

    def next(self, action):
        reward = self.ale.act(self.legal_actions[np.argmax(action)])    
        nextstate = self.get_image()

        cv2.imshow(self.windowname,nextstate)
        if self.ale.game_over():
            self.newGame()
        #print "reward %d" % reward 
        return nextstate, reward, self.ale.game_over()
class emulator:
	def __init__(self, rom_name, vis):
		if vis:
			import cv2
		self.ale = ALEInterface()
		self.max_frames_per_episode = self.ale.getInt("max_num_frames_per_episode");
		self.ale.setInt("random_seed",123)
		self.ale.setInt("frame_skip",4)
		self.ale.loadROM('roms/' + rom_name )
		self.legal_actions = self.ale.getMinimalActionSet()
		self.action_map = dict()
		for i in range(len(self.legal_actions)):
			self.action_map[self.legal_actions[i]] = i

		# print(self.legal_actions)
		self.screen_width,self.screen_height = self.ale.getScreenDims()
		print("width/height: " +str(self.screen_width) + "/" + str(self.screen_height))
		self.vis = vis
		if vis: 
			cv2.startWindowThread()
			cv2.namedWindow("preview")

	def get_image(self):
		numpy_surface = np.zeros(self.screen_height*self.screen_width*3, dtype=np.uint8)
		self.ale.getScreenRGB(numpy_surface)
		image = np.reshape(numpy_surface, (self.screen_height, self.screen_width, 3))
		return image

	def newGame(self):
		self.ale.reset_game()
		return self.get_image()

	def next(self, action_indx):
		reward = self.ale.act(action_indx)	
		nextstate = self.get_image()
		# scipy.misc.imsave('test.png',nextstate)
		if self.vis:
			cv2.imshow('preview',nextstate)
		return nextstate, reward, self.ale.game_over()
class Atari:
	def __init__(self,rom_name):
		self.ale = ALEInterface()
		self.max_frames_per_episode = self.ale.getInt("max_num_frames_per_episode")
		self.ale.setInt("random_seed",123)
		self.ale.setInt("frame_skip",4)
		self.ale.loadROM('./' +rom_name)
		self.screen_width,self.screen_height = self.ale.getScreenDims()
		self.legal_actions = self.ale.getMinimalActionSet()
		self.action_map = dict()
		for i in range(len(self.legal_actions)):
			self.action_map[self.legal_actions[i]] = i
		#print len(self.legal_actions)
		self.windowname = rom_name
		#cv2.startWindowThread()
		#cv2.namedWindow(rom_name)

	def get_image(self):
		numpy_surface = np.zeros(self.screen_height*self.screen_width*3, dtype=np.uint8)
		self.ale.getScreenRGB(numpy_surface)
		image = np.reshape(numpy_surface, (self.screen_height, self.screen_width, 3))
		return image

	def newGame(self):
		self.ale.reset_game()
		return self.get_image()

	def next(self, action):
		reward = self.ale.act(self.legal_actions[np.argmax(action)])	
		nextstate = self.get_image()
		
		#cv2.imshow(self.windowname,nextstate)
		if self.ale.game_over():
			self.newGame()
		#print "reward %d" % reward 
		return nextstate, reward, self.ale.game_over()
示例#21
0
class Environment:
	"""docstring for Environment"""

	BUFFER_LEN = 2
	EPISODE_FRAMES = 18000
	EPOCH_COUNT = 200
	EPOCH_STEPS = 250000
	EVAL_EPS = 0.001
	FRAMES_SKIP = 4
	FRAME_HEIGHT = 84
	FRAME_WIDTH = 84
	MAX_NO_OP = 30
	MAX_REWARD = 1
	
	def __init__(self, rom_name, rng, display_screen = False):
		self.api = ALEInterface()
		self.api.setInt('random_seed', rng.randint(333))
		self.api.setBool('display_screen', display_screen)
		self.api.setFloat('repeat_action_probability', 0.0)
		self.rom_name = rom_name
		self.display_screen = display_screen
		self.rng = rng
		self.repeat = Environment.FRAMES_SKIP
		self.buffer_len = Environment.BUFFER_LEN
		self.height = Environment.FRAME_HEIGHT
		self.width = Environment.FRAME_WIDTH
		self.episode_steps = Environment.EPISODE_FRAMES / Environment.FRAMES_SKIP
		self.merge_id = 0
		self.max_reward = Environment.MAX_REWARD
		self.eval_eps = Environment.EVAL_EPS
		self.log_dir = ''
		self.network_dir = ''

		self.api.loadROM('../rom/' + self.rom_name)
		self.minimal_actions = self.api.getMinimalActionSet()
		original_width, original_height = self.api.getScreenDims()
		self.merge_frame = np.zeros((self.buffer_len
								, original_height
								, original_width)
								, dtype = np.uint8)

	def get_action_count(self):
		return len(self.minimal_actions)

	def train(self, agent, store_freq, folder = None, start_epoch = 0):
		self._open_log_files(agent, folder)
		obs = np.zeros((self.height, self.width), dtype = np.uint8)
		epoch_count = Environment.EPOCH_COUNT

		for epoch in xrange(start_epoch, epoch_count):
			self.need_reset = True
			steps_left = Environment.EPOCH_STEPS

			print "\n" + "=" * 50
			print "Epoch #%d" % (epoch + 1)
			episode = 0
			train_start = time.time()
			while steps_left > 0:
				num_step, _ = self._run_episode(agent, steps_left, obs)
				steps_left -= num_step
				episode += 1
				if steps_left == 0 or episode % 10 == 0:
					print "Finished episode #%d, steps_left = %d" \
						% (episode, steps_left)
			train_end = time.time()

			valid_values = agent.get_validate_values()
			eval_values = self.evaluate(agent)
			test_end = time.time()

			train_time = train_end - train_start
			test_time = test_end - train_end
			step_per_sec = Environment.EPOCH_STEPS * 1. / max(1, train_time)
			print "\tFinished epoch #%d, episode trained = %d\n" \
				"\tValidate values = %.3f, evaluate reward = %.3f\n"\
				"\tTrain time = %.0fs, test time = %.0fs, steps/sec = %.4f" \
					% (epoch + 1, episode, valid_values, eval_values\
						, train_time, test_time, step_per_sec)

			self._update_log_files(agent, epoch + 1, episode
								, valid_values, eval_values
								, train_time, test_time
								, step_per_sec, store_freq)
			gc.collect()

	def evaluate(self, agent, episodes = 30, obs = None):
		print "\n***Start evaluating"
		if obs is None:
			obs = np.zeros((self.height, self.width), dtype = np.uint8)
		sum_reward = 0.0
		sum_step = 0.0
		for episode in xrange(episodes):
			self.need_reset = True
			step, reward = self._run_episode(agent, self.episode_steps, obs
											, self.eval_eps, evaluating = True)
			sum_reward += reward
			sum_step += step
			print "Finished episode %d, reward = %d, step = %d" \
					% (episode + 1, reward, step)
		self.need_reset = True
		print "Average reward per episode = %.4f" % (sum_reward / episodes)
		print "Average step per episode = %.4f" % (sum_step / episodes)
		return sum_reward / episodes

	def _prepare_game(self):
		if self.need_reset or self.api.game_over():
			self.api.reset_game()
			self.need_reset = False
			if Environment.MAX_NO_OP > 0:
				num_no_op = self.rng.randint(Environment.MAX_NO_OP + 1) \
							+ self.buffer_len
				for _ in xrange(num_no_op):
					self.api.act(0)

		for _ in xrange(self.buffer_len):
			self._update_buffer()

	def _run_episode(self, agent, steps_left, obs
					, eps = 0.0, evaluating = False):
		self._prepare_game()

		start_lives = self.api.lives()
		step_count = 0
		sum_reward = 0
		is_terminal = False
		while step_count < steps_left and not is_terminal:
			self._get_screen(obs)
			action_id, _ = agent.get_action(obs, eps, evaluating)
			
			reward = self._repeat_action(self.minimal_actions[action_id])
			reward_clip = reward
			if self.max_reward > 0:
				reward_clip = np.clip(reward, -self.max_reward, self.max_reward)

			life_lost = not evaluating and self.api.lives() < start_lives
			is_terminal = self.api.game_over() or life_lost \
						or step_count + 1 >= steps_left

			agent.add_experience(obs, is_terminal, action_id, reward_clip
								, evaluating)
			sum_reward += reward
			step_count += 1
			
		return step_count, sum_reward

	def _update_buffer(self):
		self.api.getScreenGrayscale(self.merge_frame[self.merge_id, ...])
		self.merge_id = (self.merge_id + 1) % self.buffer_len

	def _repeat_action(self, action):
		reward = 0
		for i in xrange(self.repeat):
			reward += self.api.act(action)
			if i + self.buffer_len >= self.repeat:
				self._update_buffer()
		return reward

	def _get_screen(self, resized_frame):
		self._resize_frame(self.merge_frame.max(axis = 0), resized_frame)
				
	def _resize_frame(self, src_frame, dst_frame):
		cv2.resize(src = src_frame, dst = dst_frame,
					dsize = (self.width, self.height),
					interpolation = cv2.INTER_LINEAR)

	def _open_log_files(self, agent, folder):
		time_str = time.strftime("_%m-%d-%H-%M", time.localtime())
		base_rom_name = os.path.splitext(os.path.basename(self.rom_name))[0]


		if folder is not None:
			self.log_dir = folder
			self.network_dir = self.log_dir + '/network'
		else:
			self.log_dir = '../run_results/' + base_rom_name + time_str
			self.network_dir = self.log_dir + '/network'

		info_name = get_next_name(self.log_dir, 'info', 'txt')
		git_name = get_next_name(self.log_dir, 'git-diff', '')

		try:
			os.stat(self.log_dir)
		except OSError:
			os.makedirs(self.log_dir)

		try:
			os.stat(self.network_dir)
		except OSError:
			os.makedirs(self.network_dir)

		with open(os.path.join(self.log_dir, info_name), 'w') as f:
			f.write('Commit: ' + subprocess.check_output(['git', 'rev-parse'
														, 'HEAD']))
			f.write('Run command: ')
			f.write(' '.join(pipes.quote(x) for x in sys.argv))
			f.write('\n\n')
			f.write(agent.get_info())
			write_info(f, Environment)
			write_info(f, agent.__class__)
			write_info(f, agent.network.__class__)

		# From https://github.com/spragunr/deep_q_rl/pull/49/files
		with open(os.path.join(self.log_dir, git_name), 'w') as f:
			f.write(subprocess.check_output(['git', 'diff', 'HEAD']))

		if folder is not None:
			return

		with open(os.path.join(self.log_dir, 'results.csv'), 'w') as f:
			f.write("epoch,episode_train,validate_values,evaluate_reward"\
				",train_time,test_time,steps_per_second\n")

		mem = psutil.virtual_memory()
		with open(os.path.join(self.log_dir, 'memory.csv'), 'w') as f:
			f.write("epoch,available,free,buffers,cached"\
					",available_readable,used_percent\n")
			f.write("%d,%d,%d,%d,%d,%s,%.1f\n" % \
					(0, mem.available, mem.free, mem.buffers, mem.cached
					, bytes2human(mem.available), mem.percent))

	def _update_log_files(self, agent, epoch, episode, valid_values
						, eval_values, train_time, test_time, step_per_sec
						, store_freq):
		print "Updating log files"
		with open(self.log_dir + '/results.csv', 'a') as f:
			f.write("%d,%d,%.4f,%.4f,%d,%d,%.4f\n" % \
						(epoch, episode, valid_values, eval_values
						, train_time, test_time, step_per_sec))

		mem = psutil.virtual_memory()
		with open(self.log_dir + '/memory.csv', 'a') as f:
			f.write("%d,%d,%d,%d,%d,%s,%.1f\n" % \
					(epoch, mem.available, mem.free, mem.buffers, mem.cached
					, bytes2human(mem.available), mem.percent))

		agent.dump_network(self.network_dir + ('/%03d' % (epoch)) + '.npz')

		if (store_freq >= 0 and epoch >= Environment.EPOCH_COUNT) or \
				(store_freq > 0 and (epoch % store_freq == 0)):
			agent.dump_exp(self.network_dir + '/exp.npz')

	def _setup_record(self, network_file):
		file_name, _ = os.path.splitext(os.path.basename(network_file))
		time_str = time.strftime("_%m-%d-%H-%M", time.localtime())
		img_dir = os.path.dirname(network_file) + '/images_' \
					+ file_name + time_str
		rom_name, _ = os.path.splitext(self.rom_name)
		out_name = os.path.dirname(network_file) + '/' + rom_name + '_' \
					+ file_name + time_str + '.mov'
		print out_name

		try:
			os.stat(img_dir)
		except OSError:
			os.makedirs(img_dir)

		self.api.setString('record_screen_dir', img_dir)
		self.api.loadROM('../rom/' + self.rom_name)
		return img_dir, out_name

	def record_run(self, agent, network_file, episode_id = 1):
		if episode_id > 1:
			self.evaluate(agent, episode_id - 1)
			system_state = self.api.cloneSystemState()

		img_dir, out_name = self._setup_record(network_file)

		if episode_id > 1:
			self.api.restoreSystemState(system_state)

		self.evaluate(agent, 1)
		script = \
				"""
					{
						ffmpeg -r 60 -i %s/%%06d.png -f mov -c:v libx264 %s
					} || {
						avconv -r 60 -i %s/%%06d.png -f mov -c:v libx264 %s
					}
				""" % (img_dir, out_name, img_dir, out_name)
		os.system(script)
示例#22
0
#         ale.setBool('sound', True)
#     ale.setBool('display_screen', True)

# Load the ROM file
ale.loadROM('Breakout.bin')

# Get the list of legal actions
# legal_actions = ale.getLegalActionSet()
legal_actions = ale.getMinimalActionSet()
print legal_actions

# (screen_width,screen_height) = ale.getScreenDims()
# screen_data = np.zeros(screen_width*screen_height,dtype=np.uint32)
# ale.getScreenRGB(screen_data)

(screen_width, screen_height) = ale.getScreenDims()
screen_data = np.zeros(screen_width * screen_height, dtype=np.uint8)
print type(ale.getScreen(screen_data))

# Play 10 episodes
for episode in xrange(10):

    total_reward = 0
    while not ale.game_over():
        a = legal_actions[randrange(len(legal_actions))]
        # Apply an action and get the resulting reward
        reward = ale.act(a)
        print  reward
        total_reward += reward
    print 'Episode', episode, 'ended with score:', total_reward
    ale.reset_game()
示例#23
0
class ArcadeLearningEnvironment(Environment):
    """
    [Arcade Learning Environment](https://github.com/mgbellemare/Arcade-Learning-Environment)
    adapter (specification key: `ale`, `arcade_learning_environment`).

    May require:
    ```bash
    sudo apt-get install libsdl1.2-dev libsdl-gfx1.2-dev libsdl-image1.2-dev cmake

    git clone https://github.com/mgbellemare/Arcade-Learning-Environment.git
    cd Arcade-Learning-Environment

    mkdir build && cd build
    cmake -DUSE_SDL=ON -DUSE_RLGLUE=OFF -DBUILD_EXAMPLES=ON ..
    make -j 4
    cd ..

    pip3 install .
    ```

    Args:
        level (string): ALE rom file
            (<span style="color:#C00000"><b>required</b></span>).
        loss_of_life_termination: Signals a terminal state on loss of life
            (<span style="color:#00C000"><b>default</b></span>: false).
        loss_of_life_reward (float): Reward/Penalty on loss of life (negative values are a penalty)
            (<span style="color:#00C000"><b>default</b></span>: 0.0).
        repeat_action_probability (float): Repeats last action with given probability
            (<span style="color:#00C000"><b>default</b></span>: 0.0).
        visualize (bool): Whether to visualize interaction
            (<span style="color:#00C000"><b>default</b></span>: false).
        frame_skip (int > 0): Number of times to repeat an action without observing
            (<span style="color:#00C000"><b>default</b></span>: 1).
        seed (int): Random seed
            (<span style="color:#00C000"><b>default</b></span>: none).
    """

    def __init__(
        self, level, life_loss_terminal=False, life_loss_punishment=0.0,
        repeat_action_probability=0.0, visualize=False, frame_skip=1, seed=None
    ):
        from ale_python_interface import ALEInterface

        self.environment = ALEInterface()
        self.rom_file = level

        self.life_loss_terminal = life_loss_terminal
        self.life_loss_punishment = life_loss_punishment

        self.environment.setFloat(b'repeat_action_probability', repeat_action_probability)
        self.environment.setBool(b'display_screen', visualize)
        self.environment.setInt(b'frame_skip', frame_skip)
        if seed is not None:
            self.environment.setInt(b'random_seed', seed)

        # All set commands must be done before loading the ROM.
        self.environment.loadROM(rom_file=self.rom_file.encode())
        self.available_actions = tuple(self.environment.getLegalActionSet())

        # Full list of actions:
        # No-Op, Fire, Up, Right, Left, Down, Up Right, Up Left, Down Right, Down Left, Up Fire,
        # Right Fire, Left Fire, Down Fire, Up Right Fire, Up Left Fire, Down Right Fire, Down Left
        # Fire

    def __str__(self):
        return super().__str__() + '({})'.format(self.rom_file)

    def states(self):
        width, height = self.environment.getScreenDims()
        return dict(type='float', shape=(height, width, 3))

    def actions(self):
        return dict(type='int', num_values=len(self.available_actions))

    def close(self):
        self.environment.__del__()
        self.environment = None

    def get_states(self):
        screen = np.copy(self.environment.getScreenRGB(screen_data=self.screen))
        screen = screen.astype(dtype=np.float32) / 255.0
        return screen

    def reset(self):
        self.environment.reset_game()
        width, height = self.environment.getScreenDims()
        self.screen = np.empty((height, width, 3), dtype=np.uint8)
        self.lives = self.environment.lives()
        return self.get_states()

    def execute(self, actions):
        reward = self.environment.act(action=self.available_actions[actions])
        terminal = self.environment.game_over()
        states = self.get_states()

        next_lives = self.environment.lives()
        if next_lives < self.lives:
            if self.life_loss_terminal:
                terminal = True
            elif self.life_loss_punishment > 0.0:
                reward -= self.life_loss_punishment
            self.lives = next_lives

        return states, terminal, reward
示例#24
0
class GameEnvironment:

    def __init__(self, settings):

        self.ale = ALEInterface()
        self.ale.setBool('display_screen', settings['DISPLAY_SCREEN'])
        self.ale.setBool('sound', settings['SOUND'])
        self.ale.setBool('color_averaging', settings['COLOR_AVERAGING'])
        self.ale.setInt('random_seed', settings['RANDOM_SEED'])
        self.ale.setInt('frame_skip', settings['FRAME_SKIP'])
        self.ale.setFloat('repeat_action_probability', settings['REPEAT_ACTION_PROB'])
        roms_dir = settings['ROMS_DIR']
        rom_name = settings['ROM_NAME']
        ROM = None
        if(rom_name.endswith('.bin')):
            self.name = rom_name[:-4]
            ROM = rom_name
        else:
            self.name = rom_name
            ROM = rom_name + '.bin'

        self.ale.loadROM(os.path.join(roms_dir, ROM))
        self.random_starts = settings['RANDOM_STARTS']
        self.rng = settings['RNG']

        if(settings['MINIMAL_ACTION_SET']):
            self.actions = self.ale.getMinimalActionSet()
        else:
            self.actions = self.ale.getLegalActionSet()

        self.n_actions = len(self.actions)
        self.width, self.height = self.ale.getScreenDims()

        self.observation = np.zeros((self.height, self.width), dtype='uint8')
        self.reward = None
        self.game_over = None
        self.terminal = None
        self.total_lives = None

        self.init()

    def init(self):

        self.restartGame()
        self.reward = 0
        self.game_over = self.gameOver()
        self.terminal = self.game_over
        self.total_lives = self.lives()
        self.step(0)

    def getState(self):

        return self.observation, self.reward, self.terminal, self.game_over

    def step(self, action, training=False):

        self.reward = self.act(action)
        self.paint()
        lives = self.lives()
        self.game_over = self.gameOver()
        self.terminal = self.game_over
        if(training and (lives < self.total_lives)):
            self.terminal = True

        self.total_lives = lives
        return self.getState()

    def newGame(self):

        self.init()
        for i in xrange(self.rng.randint(1, self.random_starts)):
            self.act(0)
            terminal = self.gameOver()
            if(terminal):
                print "Warning terminal in random init"

        return self.step(0)

    def newTestGame(self):

        self.init()
        return self.getState()

    def paint(self):

        self.ale.getScreenGrayscale(self.observation)

    def getScreenRGB(self):

        return self.ale.getScreenRGB()

    def act(self, action):

        assert ((action >= 0) and (action < self.n_actions))
        return self.ale.act(self.actions[action])

    def lives(self):

        return self.ale.lives()

    def restartGame(self):

        self.ale.reset_game()

    def gameOver(self):

        return self.ale.game_over()
示例#25
0
class MyEnv(Environment):
    VALIDATION_MODE = 0

    def __init__(self, rng, rom="ale/breakout.bin", frame_skip=4, 
                 ale_options=[{"key": "random_seed", "value": 0}, 
                              {"key": "color_averaging", "value": True},
                              {"key": "repeat_action_probability", "value": 0.}]):
        self._mode = -1
        self._modeScore = 0.0
        self._modeEpisodeCount = 0

        self._frameSkip = frame_skip if frame_skip >= 1 else 1
        self._randomState = rng

        self._ale = ALEInterface()
        for option in ale_options:
            t = type(option["value"])
            if t is int:
                self._ale.setInt(option["key"], option["value"])
            elif t is float:
                self._ale.setFloat(option["key"], option["value"])
            elif t is bool:
                self._ale.setBool(option["key"], option["value"])
            else:
                raise ValueError("Option {} ({}) is not an int, bool or float.".format(option["key"], t))
        self._ale.loadROM(rom)

        w, h = self._ale.getScreenDims()
        self._screen = np.empty((h, w), dtype=np.uint8)
        self._reducedScreen = np.empty((84, 84), dtype=np.uint8)
        self._actions = self._ale.getMinimalActionSet()

                
    def reset(self, mode):
        if mode == MyEnv.VALIDATION_MODE:
            if self._mode != MyEnv.VALIDATION_MODE:
                self._mode = MyEnv.VALIDATION_MODE
                self._modeScore = 0.0
                self._modeEpisodeCount = 0
            else:
                self._modeEpisodeCount += 1
        elif self._mode != -1: # and thus mode == -1
            self._mode = -1

        self._ale.reset_game()
        for _ in range(self._randomState.randint(15)):
            self._ale.act(0)
        self._ale.getScreenGrayscale(self._screen)
        cv2.resize(self._screen, (84, 84), self._reducedScreen, interpolation=cv2.INTER_NEAREST)
        
        return [4 * [84 * [84 * [0]]]]
        
        
    def act(self, action):
        action = self._actions[action]
        
        reward = 0
        for _ in range(self._frameSkip):
            reward += self._ale.act(action)
            if self.inTerminalState():
                break
            
        self._ale.getScreenGrayscale(self._screen)
        cv2.resize(self._screen, (84, 84), self._reducedScreen, interpolation=cv2.INTER_NEAREST)
  
        self._modeScore += reward
        return np.sign(reward)

    def summarizePerformance(self, test_data_set):
        if self.inTerminalState() == False:
            self._modeEpisodeCount += 1
        print("== Mean score per episode is {} over {} episodes ==".format(self._modeScore / self._modeEpisodeCount, self._modeEpisodeCount))


    def inputDimensions(self):
        return [(4, 84, 84)]

    def observationType(self, subject):
        return np.uint8

    def nActions(self):
        return len(self._actions)

    def observe(self):
        return [np.array(self._reducedScreen)]

    def inTerminalState(self):
        return self._ale.game_over()
示例#26
0
class UpdatedAtariEnv(AtariEnv):
    def __init__(self,
                 rom_path,
                 obs_type,
                 frameskip=(2, 5),
                 repeat_action_probability=0.,
                 mode=0,
                 difficulty=0):
        """Frameskip should be either a tuple (indicating a random range to
        choose from, with the top value exclude), or an int."""

        utils.EzPickle.__init__(self, rom_path, obs_type)
        assert obs_type in ('ram', 'image')

        self.rom_path = rom_path
        if not os.path.exists(self.rom_path):
            raise IOError('You asked for ROM %s but path %s does not exist' %
                          (game, self.game_path))
        self._obs_type = obs_type
        self.frameskip = frameskip
        # Load new ALE interface, instead of atari-py
        self.ale = ALEInterface()
        self.viewer = None

        # Tune (or disable) ALE's action repeat:
        # https://github.com/openai/gym/issues/349
        assert isinstance(
            repeat_action_probability,
            (float, int)), "Invalid repeat_action_probability: {!r}".format(
                repeat_action_probability)
        self.ale.setFloat('repeat_action_probability'.encode('utf-8'),
                          repeat_action_probability)

        self.seed()

        # Set mode and difficulty
        self.ale.setMode(mode)
        self.ale.setDifficulty(difficulty)

        self._action_set = self.ale.getMinimalActionSet()
        self.action_space = spaces.Discrete(len(self._action_set))

        (screen_width, screen_height) = self.ale.getScreenDims()
        if self._obs_type == 'ram':
            self.observation_space = spaces.Box(low=0, high=255, shape=(128, ))
        elif self._obs_type == 'image':
            self.observation_space = spaces.Box(low=0,
                                                high=255,
                                                shape=(screen_height,
                                                       screen_width, 3))
        else:
            raise error.Error('Unrecognized observation type: {}'.format(
                self._obs_type))

    def seed(self, seed=None):
        self.np_random, seed1 = seeding.np_random(seed)
        # Derive a random seed. This gets passed as a uint, but gets
        # checked as an int elsewhere, so we need to keep it below
        # 2**31.
        seed2 = seeding.hash_seed(seed1 + 1) % 2**31
        # Empirically, we need to seed before loading the ROM.
        self.ale.setInt(b'random_seed', seed2)
        # Load game from ROM instead of game path
        self.ale.loadROM(self.rom_path)
        return [seed1, seed2]

    def _get_image(self):
        return self.ale.getScreenRGB()
示例#27
0
class AtariEmulator(BaseEnvironment):
    def __init__(self,
                 rom_addr,
                 random_start=False,
                 random_seed=6,
                 visualize=True,
                 single_life=False):
        self.ale = ALEInterface()

        self.ale.setInt(b"random_seed", 2 * random_seed)
        # For fuller control on explicit action repeat (>= ALE 0.5.0)
        self.ale.setFloat(b"repeat_action_probability", 0.0)
        # Disable frame_skip and color_averaging
        # See: http://is.gd/tYzVpj
        self.ale.setInt(b"frame_skip", 1)
        self.ale.setBool(b"color_averaging", False)
        full_rom_path = rom_addr
        self.ale.loadROM(str.encode(full_rom_path))
        self.legal_actions = self.ale.getMinimalActionSet()
        self.screen_width, self.screen_height = self.ale.getScreenDims()
        self.lives = self.ale.lives()
        self.writer = imageio.get_writer('breakout0.gif', fps=30)
        self.random_start = random_start
        self.single_life_episodes = single_life
        self.call_on_new_frame = visualize

        # Processed historcal frames that will be fed in to the network
        # (i.e., four 84x84 images)
        self.observation_pool = ObservationPool(
            np.zeros((84, 84, 4), dtype=np.uint8))
        self.rgb_screen = np.zeros((self.screen_height, self.screen_width, 3),
                                   dtype=np.uint8)
        self.gray_screen = np.zeros((self.screen_height, self.screen_width, 1),
                                    dtype=np.uint8)
        self.frame_pool = FramePool(
            np.empty((2, self.screen_height, self.screen_width),
                     dtype=np.uint8), self.__process_frame_pool)

    def get_legal_actions(self):
        return self.legal_actions

    def __get_screen_image(self):
        """
        Get the current frame luminance
        :return: the current frame
        """
        self.ale.getScreenGrayscale(self.gray_screen)
        if self.call_on_new_frame:
            self.ale.getScreenRGB(self.rgb_screen)
            self.on_new_frame(self.rgb_screen)
        return np.squeeze(self.gray_screen)

    def on_new_frame(self, frame):

        pass

    def __new_game(self):
        """ Restart game """
        self.ale.reset_game()
        self.lives = self.ale.lives()
        if self.random_start:
            wait = random.randint(0, MAX_START_WAIT)
            for _ in range(wait):
                self.ale.act(self.legal_actions[0])

    def __process_frame_pool(self, frame_pool):
        """ Preprocess frame pool """

        img = np.amax(frame_pool, axis=0)
        img = imresize(img, (84, 84), interp='nearest')
        img = img.astype(np.uint8)
        return img

    def __action_repeat(self, a, times=ACTION_REPEAT):
        """ Repeat action and grab screen into frame pool """
        reward = 0
        for i in range(times - FRAMES_IN_POOL):
            reward += self.ale.act(self.legal_actions[a])
        # Only need to add the last FRAMES_IN_POOL frames to the frame pool
        for i in range(FRAMES_IN_POOL):
            reward += self.ale.act(self.legal_actions[a])
            self.frame_pool.new_frame(self.__get_screen_image())
        return reward

    def get_initial_state(self):
        """ Get the initial state """
        self.__new_game()
        for step in range(4):
            _ = self.__action_repeat(0)
            self.observation_pool.new_observation(
                self.frame_pool.get_processed_frame())
        if self.__is_terminal():
            raise Exception('This should never happen.')
        return self.observation_pool.get_pooled_observations()

    def next(self, action):
        """ Get the next state, reward, and game over signal """

        reward = self.__action_repeat(np.argmax(action))
        self.observation_pool.new_observation(
            self.frame_pool.get_processed_frame())
        terminal = self.__is_terminal()
        self.lives = self.ale.lives()
        observation = self.observation_pool.get_pooled_observations()
        return observation, reward, terminal

    def __is_terminal(self):
        if self.single_life_episodes:
            return self.__is_over() or (self.lives > self.ale.lives())
        else:
            return self.__is_over()

    def __is_over(self):
        return self.ale.game_over()

    def get_noop(self):
        return [1.0, 0.0]
示例#28
0
class AtariPlayer(gym.Env):
    """
    A wrapper for ALE emulator, with configurations to mimic DeepMind DQN settings.

    Info:
        score: the accumulated reward in the current game
        gameOver: True when the current game is Over
    """

    def __init__(self, rom_file, viz=0,
                 frame_skip=4, nullop_start=30,
                 live_lost_as_eoe=True, max_num_frames=0):
        """
        Args:
            rom_file: path to the rom
            frame_skip: skip every k frames and repeat the action
            viz: visualization to be done.
                Set to 0 to disable.
                Set to a positive number to be the delay between frames to show.
                Set to a string to be a directory to store frames.
            nullop_start: start with random number of null ops.
            live_losts_as_eoe: consider lost of lives as end of episode. Useful for training.
            max_num_frames: maximum number of frames per episode.
        """
        super(AtariPlayer, self).__init__()
        if not os.path.isfile(rom_file) and '/' not in rom_file:
            rom_file = get_dataset_path('atari_rom', rom_file)
        assert os.path.isfile(rom_file), \
            "rom {} not found. Please download at {}".format(rom_file, ROM_URL)

        try:
            ALEInterface.setLoggerMode(ALEInterface.Logger.Error)
        except AttributeError:
            if execute_only_once():
                logger.warn("You're not using latest ALE")

        # avoid simulator bugs: https://github.com/mgbellemare/Arcade-Learning-Environment/issues/86
        with _ALE_LOCK:
            self.ale = ALEInterface()
            self.rng = get_rng(self)
            self.ale.setInt(b"random_seed", self.rng.randint(0, 30000))
            self.ale.setInt(b"max_num_frames_per_episode", max_num_frames)
            self.ale.setBool(b"showinfo", False)

            self.ale.setInt(b"frame_skip", 1)
            self.ale.setBool(b'color_averaging', False)
            # manual.pdf suggests otherwise.
            self.ale.setFloat(b'repeat_action_probability', 0.0)

            # viz setup
            if isinstance(viz, six.string_types):
                assert os.path.isdir(viz), viz
                self.ale.setString(b'record_screen_dir', viz)
                viz = 0
            if isinstance(viz, int):
                viz = float(viz)
            self.viz = viz
            if self.viz and isinstance(self.viz, float):
                self.windowname = os.path.basename(rom_file)
                cv2.namedWindow(self.windowname)

            self.ale.loadROM(rom_file.encode('utf-8'))
        self.width, self.height = self.ale.getScreenDims()
        self.actions = self.ale.getMinimalActionSet()

        self.live_lost_as_eoe = live_lost_as_eoe
        self.frame_skip = frame_skip
        self.nullop_start = nullop_start

        self.action_space = spaces.Discrete(len(self.actions))
        self.observation_space = spaces.Box(
            low=0, high=255, shape=(self.height, self.width, 1), dtype=np.uint8)
        self._restart_episode()

    def get_action_meanings(self):
        return [ACTION_MEANING[i] for i in self.actions]

    def _grab_raw_image(self):
        """
        :returns: the current 3-channel image
        """
        m = self.ale.getScreenRGB()
        return m.reshape((self.height, self.width, 3))

    def _current_state(self):
        """
        :returns: a gray-scale (h, w, 1) uint8 image
        """
        ret = self._grab_raw_image()
        # max-pooled over the last screen
        ret = np.maximum(ret, self.last_raw_screen)
        if self.viz:
            if isinstance(self.viz, float):
                cv2.imshow(self.windowname, ret)
                cv2.waitKey(int(self.viz * 1000))
        ret = ret.astype('float32')
        # 0.299,0.587.0.114. same as rgb2y in torch/image
        ret = cv2.cvtColor(ret, cv2.COLOR_RGB2GRAY)[:, :, np.newaxis]
        return ret.astype('uint8')  # to save some memory

    def _restart_episode(self):
        with _ALE_LOCK:
            self.ale.reset_game()

        # random null-ops start
        n = self.rng.randint(self.nullop_start)
        self.last_raw_screen = self._grab_raw_image()
        for k in range(n):
            if k == n - 1:
                self.last_raw_screen = self._grab_raw_image()
            self.ale.act(0)

    def reset(self):
        if self.ale.game_over():
            self._restart_episode()
        return self._current_state()

    def step(self, act):
        oldlives = self.ale.lives()
        r = 0
        for k in range(self.frame_skip):
            if k == self.frame_skip - 1:
                self.last_raw_screen = self._grab_raw_image()
            r += self.ale.act(self.actions[act])
            newlives = self.ale.lives()
            if self.ale.game_over() or \
                    (self.live_lost_as_eoe and newlives < oldlives):
                break

        isOver = self.ale.game_over()
        if self.live_lost_as_eoe:
            isOver = isOver or newlives < oldlives

        info = {'ale.lives': newlives}
        return self._current_state(), r, isOver, info
示例#29
0
class AtariEmulator(BaseEnvironment):
    def __init__(self,
                 emulator_id,
                 game,
                 resource_folder,
                 random_seed=3,
                 random_start=True,
                 single_life_episodes=False,
                 history_window=1,
                 visualize=False,
                 verbose=0,
                 **unknown):
        if verbose >= 2:
            logging.debug('Emulator#{} received unknown args: {}'.format(
                emulator_id, unknown))
        self.emulator_id = emulator_id
        self.ale = ALEInterface()
        self.ale.setInt(b"random_seed", random_seed * (emulator_id + 1))
        # For fuller control on explicit action repeat (>= ALE 0.5.0)
        self.ale.setFloat(b"repeat_action_probability", 0.0)
        # Disable frame_skip and color_averaging
        # See: http://is.gd/tYzVpj
        self.ale.setInt(b"frame_skip", 1)
        self.ale.setBool(b"color_averaging", False)
        self.ale.setBool(b"display_screen", visualize)

        full_rom_path = resource_folder + "/" + game + ".bin"
        self.ale.loadROM(str.encode(full_rom_path))
        self.legal_actions = self.ale.getMinimalActionSet()
        #this env is fixed until firing, so you have to...
        self._have_to_fire = ('FIRE' in [
            ACTION_MEANING[a] for a in self.legal_actions
        ])
        self.screen_width, self.screen_height = self.ale.getScreenDims()
        self.lives = self.ale.lives()

        self.random_start = random_start
        self.single_life_episodes = single_life_episodes
        self.call_on_new_frame = visualize
        self.history_window = history_window
        self.observation_shape = (self.history_window, IMG_SIZE_X, IMG_SIZE_Y)
        self.rgb_screen = np.zeros((self.screen_height, self.screen_width, 3),
                                   dtype=np.uint8)
        self.gray_screen = np.zeros((self.screen_height, self.screen_width, 1),
                                    dtype=np.uint8)
        # Processed historcal frames that will be fed in to the network (i.e., four 84x84 images)
        self.history = create_history_observation(self.history_window)
        #ObservationPool(np.zeros(self.observation_shape, dtype=np.uint8))
        self.frame_preprocessor = FramePreprocessor(self.gray_screen.shape,
                                                    FRAMES_IN_POOL)

    def get_legal_actions(self):
        return self.legal_actions

    def __get_screen_image(self):
        """
        Get the current frame luminance
        :return: the current frame
        """
        self.ale.getScreenGrayscale(self.gray_screen)
        if self.call_on_new_frame:
            self.ale.getScreenRGB(self.rgb_screen)
            self.on_new_frame(self.rgb_screen)
        return self.gray_screen

    def on_new_frame(self, frame):
        pass

    def __random_start_reset(self):
        """ Restart game """
        self.ale.reset_game()

        if self.random_start:
            wait = random.randint(0, MAX_START_WAIT + 1)
            for _ in range(wait):
                self.ale.act(self.get_noop())
                if self.__is_over():
                    self.ale.reset_game()

        self.lives = self.ale.lives()

    def __new_game(self):
        self.__random_start_reset()

        if self._have_to_fire:
            #take action on reset for environments that are fixed until firing
            self.ale.act(self.legal_actions[1])
            if self.__is_over():
                self.__random_start_reset()

            self.ale.act(self.legal_actions[2])
            if self.__is_over():
                self.__random_start_reset()

    def __action_repeat(self, a, times=ACTION_REPEAT):
        """ Repeat action and grab screen into frame pool """
        reward = 0
        for i in range(times - FRAMES_IN_POOL):
            reward += self.ale.act(self.legal_actions[a])
        # Only need to add the last FRAMES_IN_POOL frames to the frame pool
        for i in range(FRAMES_IN_POOL):
            reward += self.ale.act(self.legal_actions[a])
            self.frame_preprocessor.new_frame(self.__get_screen_image())
        return reward

    def reset(self):
        """ Get the initial state """
        self.__new_game()
        for step in range(self.history_window):
            _ = self.__action_repeat(0)
            self.history.new_observation(
                self.frame_preprocessor.get_processed())
        if self.__is_terminal():
            raise Exception('This should never happen.')
        return self.history.get_state(), None

    def next(self, action):
        """ Get the next state, reward, and game over signal """
        reward = self.__action_repeat(action)
        self.history.new_observation(self.frame_preprocessor.get_processed())
        terminal = self.__is_terminal()
        self.lives = self.ale.lives()
        return self.history.get_state(), reward, terminal, None

    def __is_terminal(self):
        if self.single_life_episodes:
            return self.__is_over() or (self.lives > self.ale.lives())
        else:
            return self.__is_over()

    def __is_over(self):
        return self.ale.game_over()

    def get_noop(self):
        return self.legal_actions[0]

    def close(self):
        del self.ale
示例#30
0
class Emulator:
    def __init__(self, rom_path, rom_name, visualize, actor_id, rseed, single_life_episodes = False):
        
        self.ale = ALEInterface()

        self.ale.setInt("random_seed", rseed * (actor_id +1))

        # For fuller control on explicit action repeat (>= ALE 0.5.0) 
        self.ale.setFloat("repeat_action_probability", 0.0)
        
        # Disable frame_skip and color_averaging
        # See: http://is.gd/tYzVpj
        self.ale.setInt("frame_skip", 1)
        self.ale.setBool("color_averaging", False)
        self.ale.loadROM(rom_path + "/" + rom_name + ".bin")
        self.legal_actions = self.ale.getMinimalActionSet()        
        self.screen_width,self.screen_height = self.ale.getScreenDims()
        #self.ale.setBool('display_screen', True)
        
        # Processed historcal frames that will be fed in to the network 
        # (i.e., four 84x84 images)
        self.screen_images_processed = np.zeros((IMG_SIZE_X, IMG_SIZE_Y, 
            NR_IMAGES)) 
        self.rgb_screen = np.zeros((self.screen_height,self.screen_width, 3), dtype=np.uint8)
        self.gray_screen = np.zeros((self.screen_height,self.screen_width,1), dtype=np.uint8)

        self.frame_pool = np.empty((2, self.screen_height, self.screen_width))
        self.current = 0
        self.lives = self.ale.lives()

        self.visualize = visualize
        self.visualize_processed = False
        self.windowname = rom_name + ' ' + str(actor_id)
        if self.visualize:
            logger.debug("Opening emulator window...")
            #from skimage import io
            #io.use_plugin('qt')
            cv2.startWindowThread()
            cv2.namedWindow(self.windowname)
            logger.debug("Emulator window opened")
            
        if self.visualize_processed:
            logger.debug("Opening processed frame window...")
            cv2.startWindowThread()
            logger.debug("Processed frame window opened")
            cv2.namedWindow(self.windowname + "_processed")
            
        self.single_life_episodes = single_life_episodes

    def get_screen_image(self):
        """ Add screen (luminance) to frame pool """
        # [screen_image, screen_image_rgb] = [self.ale.getScreenGrayscale(), 
        #     self.ale.getScreenRGB()]
        self.ale.getScreenGrayscale(self.gray_screen)
        self.ale.getScreenRGB(self.rgb_screen)
        self.frame_pool[self.current] = np.squeeze(self.gray_screen)
        self.current = (self.current + 1) % FRAMES_IN_POOL
        return self.rgb_screen

    def new_game(self):
        """ Restart game """
        self.ale.reset_game()
        self.lives = self.ale.lives()

        if MAX_START_WAIT < 0:
            logger.debug("Cannot time travel yet.")
            sys.exit()
        elif MAX_START_WAIT > 0:
            wait = random.randint(0, MAX_START_WAIT)
        else:
            wait = 0
        for _ in xrange(wait):
            self.ale.act(self.legal_actions[0])

    def process_frame_pool(self):
        """ Preprocess frame pool """
        
        img = None
        if BLEND_METHOD == "max_pool":
            img = np.amax(self.frame_pool, axis=0)
        
        #img resize(img[:210, :], (84, 84))
        img = cv2.resize(img[:210, :], (84, 84), 
            interpolation=cv2.INTER_LINEAR)
        
        img = img.astype(np.float32)
        img *= (1.0/255.0)
        
        return img
        # Reduce height to 210, if not so
        #cropped_img = img[:210, :]
        # Downsample to 110x84
        #down_sampled_img = resize(cropped_img, (84, 84))
        
        # Crop to 84x84 playing area
        #stackable_image = down_sampled_img[:, 26:110]
        #return stackable_image

    def action_repeat(self, a):
        """ Repeat action and grab screen into frame pool """
        reward = 0
        for i in xrange(ACTION_REPEAT):
            reward += self.ale.act(self.legal_actions[a])
            new_screen_image_rgb = self.get_screen_image()
        return reward, new_screen_image_rgb

    def get_reshaped_state(self, state):
        return np.reshape(state, 
            (1, IMG_SIZE_X, IMG_SIZE_Y, NR_IMAGES))
        #return np.reshape(self.screen_images_processed, 
        #    (1, IMG_SIZE_X, IMG_SIZE_Y, NR_IMAGES))

    def get_initial_state(self):
        """ Get the initial state """
        self.new_game()
        for step in xrange(NR_IMAGES):
            reward, new_screen_image_rgb = self.action_repeat(0)
            self.screen_images_processed[:, :, step] = self.process_frame_pool()
            self.show_screen(new_screen_image_rgb)
        if self.is_terminal():
            MAX_START_WAIT -= 1
            return self.get_initial_state()
        return np.copy(self.screen_images_processed) #get_reshaped_state()      

    def next(self, action):
        """ Get the next state, reward, and game over signal """
        reward, new_screen_image_rgb = self.action_repeat(np.argmax(action))
        self.screen_images_processed[:, :, 0:3] = \
            self.screen_images_processed[:, :, 1:4]
        self.screen_images_processed[:, :, 3] = self.process_frame_pool()
        self.show_screen(new_screen_image_rgb)
        terminal = self.is_terminal()
        self.lives = self.ale.lives()
        return np.copy(self.screen_images_processed), reward, terminal #get_reshaped_state(), reward, terminal
    
    def show_screen(self, image):
        """ Show visuals for raw and processed images """
        if self.visualize:
            #io.imshow(image[:210, :], fancy=True)
            cv2.imshow(self.windowname, image[:210, :])
        if self.visualize_processed:
            #io.imshow(self.screen_images_processed[:, :, 3], fancy=True)
            cv2.imshow(self.windowname + "_processed", self.screen_images_processed[:, :, 3])
            
    def is_terminal(self):
        if self.single_life_episodes:
            return (self.is_over() or (self.lives > self.ale.lives()))
        else:
            return self.is_over()

    def is_over(self):
        return self.ale.game_over()
        pygame.init()
        ale.setBool('sound', False) # Sound doesn't work on OSX
      elif sys.platform.startswith('linux'):
        ale.setBool('sound', FLAGS.sound)
      ale.setBool('display_screen', FLAGS.display)

    # Load the ROM file
    rom_file = str.encode(FLAGS.rom)
    ale.loadROM(rom_file)

    # Get the list of legal actions
    legal_actions = ale.getLegalActionSet()
    minimal_actions = ale.getMinimalActionSet()
    print "Legal Actions:", legal_actions
    print "Minimal Actions:", minimal_actions
    print "Screen size:", ale.getScreenDims()

    # Play 10 episodes
    for episode in range(10):
      total_reward = 0
      while not ale.game_over():
        a = legal_actions[randrange(len(legal_actions))]
        # Apply an action and get the resulting reward
        reward = ale.act(a);
        total_reward += reward
        #print ale.getFrameNumber(), ale.getEpisodeFrameNumber(), reward, total_reward
      print('Episode %d ended with score: %d' % (episode, total_reward))
      ale.reset_game()


if __name__ == '__main__':
示例#32
0
文件: atari.py 项目: asmith26/Chimp
class AtariSimulator(object):

    def __init__(self, settings):

        '''Initiate Arcade Learning Environment (ALE) using Python interface
        https://github.com/bbitmaster/ale_python_interface/wiki

        - Set number of frames to be skipped, random seed, ROM and title for display.
        - Retrieve a set of legal actions and their number.
        - Retrieve dimensions of the original screen (width/height), and set the dimensions
        of the cropped screen, together with the padding used to crop the screen rectangle.
        - Set dimensions of the pygame display that will show visualization of the simulation.
        (May be cropped --- showing what the learner sees, or not --- showing full Atari screen)
        - Allocate memory for generated grayscale screenshots. Accepts dims in (height/width) format
        '''

        self.ale = ALEInterface()
        self.ale.setInt("frame_skip",settings["frame_skip"])
        self.ale.setInt("random_seed",settings["seed_simulator"])
        self.ale.loadROM(settings["rom_dir"] + '/' + settings["rom"])

        self.title = "ALE Simulator: " + str(settings["rom"])
        self.actions = self.ale.getLegalActionSet()
        self.n_actions = self.actions.size

        self.screen_dims = self.ale.getScreenDims()
        self.model_dims = settings['model_dims']
        self.pad = settings['pad']

        print("Original screen width/height: " + str(self.screen_dims[0]) + "/" + str(self.screen_dims[1]))
        print("Cropped screen width/height: " + str(self.model_dims[0]) + "/" + str(self.model_dims[1]))

        self.viz_cropped = settings['viz_cropped']
        if self.viz_cropped:
            self.display_dims = (int(self.model_dims[0]*2), int(self.model_dims[1]*2))
        else:
            self.display_dims = (int(self.screen_dims[0]*2), int(self.screen_dims[1]*2))

        # preallocate an array to accept ALE screen data (height/width) !
        self.screen_data = np.empty((self.screen_dims[1],self.screen_dims[0]),dtype=np.uint8)


    def get_screenshot(self):
        '''returns a cropped snapshot of the simulator
        - store grayscale values in a preallocated array
        - cut out a square from the rectangle, using provided padding value
        - downsample to the desired size and transpose from (height/width) to (width/height)
        '''

        self.ale.getScreenGrayscale(self.screen_data)
        self.tmp = self.screen_data[(self.screen_dims[1]-self.screen_dims[0]-self.pad):(self.screen_dims[1]-self.pad),:]
        self.frame = spm.imresize(self.tmp,self.model_dims[::-1],interp='nearest').T #, interp='nearest'

        return self.frame


    def act(self,action_index):
        '''function to transition the simulator from s to s' using provided action
        the action that is provided is in form of an index
        simulator deals with translating the index into an actual action'''

        self.last_reward = self.ale.act(self.actions[action_index])


    def reward(self):
        '''return reward - has to be called after the "act" function'''

        return self.last_reward


    def episode_over(self):
        '''return a boolean indicator on whether the game is still running'''

        return self.ale.game_over()
        

    def reset_episode(self):
        '''reset the game that ended'''

        self.ale.reset_game()


    def init_viz_display(self):
        '''initialize display that will show visualization'''

        pygame.init()
        self.screen = pygame.display.set_mode(self.display_dims)
        if self.title:
            pygame.display.set_caption(self.title)


    def refresh_viz_display(self):
        '''if display is shut down, shut the game down
        else move the current simulator's frame (cropped or not cropped) into the pygame display,
        after expanding it 2x along x and y dimensions'''

        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                exit

        if self.viz_cropped:
            self.surface = pygame.surfarray.make_surface(self.frame) # has already been transposed
        else:
            self.surface = pygame.surfarray.make_surface(self.screen_data.T)

        self.screen.blit(pygame.transform.scale2x(self.surface),(0,0))
        pygame.display.flip()
示例#33
0
class ALE(Environment):
    def __init__(self,
                 rom,
                 frame_skip=1,
                 repeat_action_probability=0.0,
                 loss_of_life_termination=False,
                 loss_of_life_reward=0,
                 display_screen=False,
                 seed=np.random.RandomState()):
        """
        Initialize ALE.

        Args:
            rom: Rom filename and directory.
            frame_skip: Repeat action for n frames. Default 1.
            repeat_action_probability: Repeats last action with given probability. Default 0.
            loss_of_life_termination: Signals a terminal state on loss of life. Default False.
            loss_of_life_reward: Reward/Penalty on loss of life (negative values are a penalty). Default 0.
            display_screen: Displays the emulator screen. Default False.
            seed: Random seed
        """

        self.ale = ALEInterface()
        self.rom = rom

        self.ale.setBool(b'display_screen', display_screen)
        self.ale.setInt(b'random_seed', seed.randint(0, 9999))
        self.ale.setFloat(b'repeat_action_probability',
                          repeat_action_probability)
        self.ale.setBool(b'color_averaging', False)
        self.ale.setInt(b'frame_skip', frame_skip)

        # all set commands must be done before loading the ROM
        self.ale.loadROM(rom.encode())

        # setup gamescreen object
        width, height = self.ale.getScreenDims()
        self.gamescreen = np.empty((height, width, 3), dtype=np.uint8)

        self.frame_skip = frame_skip

        # setup action converter
        # ALE returns legal action indexes, convert these to just numbers
        self.action_inds = self.ale.getMinimalActionSet()

        # setup lives
        self.loss_of_life_reward = loss_of_life_reward
        self.cur_lives = self.ale.lives()
        self.loss_of_life_termination = loss_of_life_termination
        self.life_lost = False

    def __str__(self):
        return 'ALE({})'.format(self.rom)

    def close(self):
        self.ale = None

    def reset(self):
        self.ale.reset_game()
        self.cur_lives = self.ale.lives()
        self.life_lost = False
        # clear gamescreen
        self.gamescreen = np.empty(self.gamescreen.shape, dtype=np.uint8)
        return self.current_state

    def execute(self, action):
        # convert action to ale action
        ale_action = self.action_inds[action]

        # get reward and process terminal & next state
        rew = self.ale.act(ale_action)
        if self.loss_of_life_termination or self.loss_of_life_reward != 0:
            new_lives = self.ale.lives()
            if new_lives < self.cur_lives:
                self.cur_lives = new_lives
                self.life_lost = True
                rew += self.loss_of_life_reward

        terminal = self.is_terminal
        state_tp1 = self.current_state
        return state_tp1, rew, terminal

    @property
    def states(self):
        return dict(shape=self.gamescreen.shape, type=float)

    @property
    def actions(self):
        return dict(continuous=False,
                    num_actions=len(self.action_inds),
                    names=self.action_names)

    @property
    def current_state(self):
        self.gamescreen = self.ale.getScreenRGB(self.gamescreen)
        return np.copy(self.gamescreen)

    @property
    def is_terminal(self):
        if self.loss_of_life_termination and self.life_lost:
            return True
        else:
            return self.ale.game_over()

    @property
    def action_names(self):
        action_names = [
            'No-Op', 'Fire', 'Up', 'Right', 'Left', 'Down', 'Up Right',
            'Up Left', 'Down Right', 'Down Left', 'Up Fire', 'Right Fire',
            'Left Fire', 'Down Fire', 'Up Right Fire', 'Up Left Fire',
            'Down Right Fire', 'Down Left Fire'
        ]
        return np.asarray(action_names)[self.action_inds]
示例#34
0
class AtariWrapper():
    """
    ALE wrapper that tries to mimic the options in the DQN paper including the 
    preprocessing (except resizing/cropping)
    """
    action_words = [
        'NOOP', 'UP', 'RIGHT', 'LEFT', 'DOWN', "UPRIGHT", "UPLEFT",
        "DOWNRIGHT", "DOWNLEFT"
    ]
    _action_set = [0, 2, 3, 4, 5, 6, 7, 8, 9]
    #Valid actions for ALE.
    #Possible actions are just a list from 0,num_valid_actions
    #We still need to map from the latter to the former when

    possible_actions = list(range(len(_action_set)))

    def __init__(self,
                 rom_path,
                 seed=123,
                 frameskip=4,
                 show_display=False,
                 stack_num_states=4,
                 concatenate_state_every=4):
        """

        Parameters:
            Frameskip should be either a tuple (indicating a random range to
            choose from, with the top value exclude), or an int. It's aka action repeat.

            stack_num_states: Number of dimensions/channels to have. 

            concatenate_state_every: After how many frames should one channel be appended to state.
                Number is in terms of absolute frames independent of frameskip
        """

        self.stack_num_states = stack_num_states
        self.concatenate_state_every = concatenate_state_every

        self.game_path = rom_path
        if not os.path.exists(self.game_path):
            raise IOError('You asked for game %s but path %s does not exist' %
                          (game, self.game_path))
        self.frameskip = frameskip

        try:
            self.ale = ALEInterface()
        except Exception as e:
            print(
                "ALEInterface could not be loaded. ale_python_interface import failed"
            )
            raise e

        #Set some default options
        self.ale.setInt(b'random_seed', seed)
        self.ale.setBool(b'sound', False)
        self.ale.setBool(b'display_screen', show_display)
        self.ale.setFloat(b'repeat_action_probability', 0.)

        #Load the rom
        self.ale.loadROM(self.game_path)

        (self.screen_width, self.screen_height) = self.ale.getScreenDims()
        self.latest_frame_fifo = deque(
            maxlen=2)  #Holds the two closest frames to max.
        self.state_fifo = deque(maxlen=stack_num_states)

    def _step(self, a, force_noop=False):
        """Perform one step of the environment. 
        Automatically repeats the step self.frameskip number of times
        
        parameters:
            force_noop: Force it to perform a no-op ignoring the action supplied. 
        """
        assert a in self.possible_actions + [0]

        if force_noop:
            action, num_steps = 0, 1
        else:
            action = self._action_set[a]

        if isinstance(self.frameskip, int):
            num_steps = self.frameskip
        else:
            num_steps = np.random.randint(self.frameskip[0], self.frameskip[1])

        reward = 0.0
        for i in range(num_steps):
            reward += self.ale.act(action)
            cur_frame = self.observe_raw(get_rgb=True)
            cur_frame_cropped = self.crop_frame(cur_frame)
            self.latest_frame_fifo.append(cur_frame_cropped)

            if i % self.concatenate_state_every == 0:
                curmax_frame = np.amax(self.latest_frame_fifo, axis=0)
                frame_lumi = self.convert_to_gray(curmax_frame)
                self.state_fifo.append(frame_lumi)

        #Transpose so we get HxWxC instead of CxHxW
        self.current_frame = np.array(np.transpose(self.state_fifo, (1, 2, 0)))
        return self.current_frame, reward, self.ale.game_over(), {
            "ale.lives": self.ale.lives()
        }

    def step(self, *args, **kwargs):
        """Performs one step of the environment
        """
        lives_before = self.ale.lives()
        next_state, reward, done, info = self._step(*args, **kwargs)
        lives_after = self.ale.lives()

        # End the episode when a life is lost
        if lives_before > lives_after:
            done = True

        return next_state, reward, done, info

    def observe_raw(self, get_rgb=False):
        """Observe either RGB or Gray frames. 
        Initialzing arrays forces it to not modify stale pointers
        """
        if get_rgb:
            cur_frame_rgb = np.zeros(
                (self.screen_height, self.screen_width, 3), dtype=np.uint8)
            self.ale.getScreenRGB(cur_frame_rgb)
            return cur_frame_rgb
        else:
            cur_frame_gray = np.zeros((self.screen_height, self.screen_width),
                                      dtype=np.uint8)
            self.ale.getScreenGrayscale(cur_frame_gray)
            return cur_frame_gray

    def crop_frame(self, frame):
        """Simply crops a frame. Does nothing by default.
        """
        return frame

    def convert_to_gray(self, img):
        """Get Luminescence channel 
        """
        img_f = np.float32(img)
        img_lumi = 0.299*img_f[:,:,0] + \
                    0.587*img_f[:,:,1] + \
                    0.114*img_f[:,:,2]
        return np.uint8(img_lumi)

    def reset(self):
        """Reset the game
        """
        self.ale.reset_game()
        s = self.observe_raw(get_rgb=True)
        s = self.crop_frame(s)

        #Populate missing frames with blank ones.
        for _ in range(self.stack_num_states - 1):
            self.state_fifo.append(np.zeros(shape=(s.shape[0], s.shape[1])))

        self.latest_frame_fifo.append(s)

        #Push the latest frame
        curmax_frame = s
        frame_lumi = self.convert_to_gray(s)
        self.state_fifo.append(frame_lumi)

        self.state = np.transpose(self.state_fifo, (1, 2, 0))
        return self.state

    def get_action_meanings(self):
        """Return in text what the actions correspond to.
        """
        return [ACTION_MEANING[i] for i in self._action_set]

    def save_state(self):
        """Saves the current state and returns a identifier to saved state
        """
        return self.ale.cloneSystemState()

    def restore_state(self, ident):
        """Restore game state
        Restores the saved state of the system and perform a no-op
        so a new frame can be generated incase a restore is followed
        by an observe()
        """

        self.ale.restoreSystemState(ident)
        self.step(0, force_noop=True)
示例#35
0
if USE_SDL:
  if sys.platform == 'darwin':
    import pygame
    print "!!!"
    pygame.init()
    ale.setBool('sound', False) # Sound doesn't work on OSX
  elif sys.platform.startswith('linux'):
    ale.setBool('sound', True)
  ale.setBool('display_screen', True)
ale.setInt('frame_skip',1)
# Load the ROM file
ale.loadROM('roms/breakout.bin')
ale.setInt('max_num_frames',1)
# Get the list of legal actions
legal_actions = ale.getMinimalActionSet()
a1,a2 =  ale.getScreenDims()
cnt = 0
# Play 10 episodes
import  numpy as  np
d = np.empty((a1,a2),dtype=np.uint8)
for episode in xrange(10):
  total_reward = 0
  while not ale.game_over():
    a = legal_actions[randrange(len(legal_actions))]
    #print legal_actions
    # Apply an action and get the resulting reward
    reward = ale.act(a);
    ale.getScreenGrayscale(d)

    io.imshow(d)
    io.show()
示例#36
0
class Emulator:
    def __init__(self, rom_path, rom_name, visualize, actor_id, rseed, single_life_episodes = False):
        
        self.ale = ALEInterface()

        self.ale.setInt("random_seed", rseed * (actor_id +1))

        # For fuller control on explicit action repeat (>= ALE 0.5.0) 
        self.ale.setFloat("repeat_action_probability", 0.0)
        
        # Disable frame_skip and color_averaging
        # See: http://is.gd/tYzVpj
        self.ale.setInt("frame_skip", 1)
        self.ale.setBool("color_averaging", False)
        self.ale.loadROM(rom_path + "/" + rom_name + ".bin")
        self.legal_actions = self.ale.getMinimalActionSet()        
        self.screen_width,self.screen_height = self.ale.getScreenDims()
        #self.ale.setBool('display_screen', True)
        
        # Processed historcal frames that will be fed in to the network 
        # (i.e., four 84x84 images)
        self.screen_images_processed = np.zeros((IMG_SIZE_X, IMG_SIZE_Y, 
            NR_IMAGES)) 
        self.rgb_screen = np.zeros((self.screen_height,self.screen_width, 3), dtype=np.uint8)
        self.gray_screen = np.zeros((self.screen_height,self.screen_width,1), dtype=np.uint8)

        self.frame_pool = np.empty((2, self.screen_height, self.screen_width))
        self.current = 0
        self.lives = self.ale.lives()

        self.visualize = visualize
        self.visualize_processed = False
        self.windowname = rom_name + ' ' + str(actor_id)
        if self.visualize:
            logger.debug("Opening emulator window...")
            #from skimage import io
            #io.use_plugin('qt')
            cv2.startWindowThread()
            cv2.namedWindow(self.windowname)
            logger.debug("Emulator window opened")
            
        if self.visualize_processed:
            logger.debug("Opening processed frame window...")
            cv2.startWindowThread()
            logger.debug("Processed frame window opened")
            cv2.namedWindow(self.windowname + "_processed")
            
        self.single_life_episodes = single_life_episodes

    def get_screen_image(self):
        """ Add screen (luminance) to frame pool """
        # [screen_image, screen_image_rgb] = [self.ale.getScreenGrayscale(), 
        #     self.ale.getScreenRGB()]
        self.ale.getScreenGrayscale(self.gray_screen)
        self.ale.getScreenRGB(self.rgb_screen)
        self.frame_pool[self.current] = np.squeeze(self.gray_screen)
        self.current = (self.current + 1) % FRAMES_IN_POOL
        return self.rgb_screen

    def new_game(self):
        """ Restart game """
        self.ale.reset_game()
        self.lives = self.ale.lives()

        if MAX_START_WAIT < 0:
            logger.debug("Cannot time travel yet.")
            sys.exit()
        elif MAX_START_WAIT > 0:
            wait = random.randint(0, MAX_START_WAIT)
        else:
            wait = 0
        for _ in xrange(wait):
            self.ale.act(self.legal_actions[0])

    def process_frame_pool(self):
        """ Preprocess frame pool """
        
        img = None
        if BLEND_METHOD == "max_pool":
            img = np.amax(self.frame_pool, axis=0)
        
        #img resize(img[:210, :], (84, 84))
        img = cv2.resize(img[:210, :], (84, 84), 
            interpolation=cv2.INTER_LINEAR)
        
        img = img.astype(np.float32)
        img *= (1.0/255.0)
        
        return img
        # Reduce height to 210, if not so
        #cropped_img = img[:210, :]
        # Downsample to 110x84
        #down_sampled_img = resize(cropped_img, (84, 84))
        
        # Crop to 84x84 playing area
        #stackable_image = down_sampled_img[:, 26:110]
        #return stackable_image

    def action_repeat(self, a):
        """ Repeat action and grab screen into frame pool """
        reward = 0
        for i in xrange(ACTION_REPEAT):
            reward += self.ale.act(self.legal_actions[a])
            new_screen_image_rgb = self.get_screen_image()
        return reward, new_screen_image_rgb

    def get_reshaped_state(self, state):
        return np.reshape(state, 
            (1, IMG_SIZE_X, IMG_SIZE_Y, NR_IMAGES))
        #return np.reshape(self.screen_images_processed, 
        #    (1, IMG_SIZE_X, IMG_SIZE_Y, NR_IMAGES))

    def get_initial_state(self):
        """ Get the initial state """
        self.new_game()
        for step in xrange(NR_IMAGES):
            reward, new_screen_image_rgb = self.action_repeat(0)
            self.screen_images_processed[:, :, step] = self.process_frame_pool()
            self.show_screen(new_screen_image_rgb)
        if self.is_terminal():
            MAX_START_WAIT -= 1
            return self.get_initial_state()
        return np.copy(self.screen_images_processed) #get_reshaped_state()      

    def next(self, action):
        """ Get the next state, reward, and game over signal """
        reward, new_screen_image_rgb = self.action_repeat(np.argmax(action))
        self.screen_images_processed[:, :, 0:3] = \
            self.screen_images_processed[:, :, 1:4]
        self.screen_images_processed[:, :, 3] = self.process_frame_pool()
        self.show_screen(new_screen_image_rgb)
        terminal = self.is_terminal()
        self.lives = self.ale.lives()
        return np.copy(self.screen_images_processed), reward, terminal #get_reshaped_state(), reward, terminal
    
    def show_screen(self, image):
        """ Show visuals for raw and processed images """
        if self.visualize:
            #io.imshow(image[:210, :], fancy=True)
            cv2.imshow(self.windowname, image[:210, :])
        if self.visualize_processed:
            #io.imshow(self.screen_images_processed[:, :, 3], fancy=True)
            cv2.imshow(self.windowname + "_processed", self.screen_images_processed[:, :, 3])
            
    def is_terminal(self):
        if self.single_life_episodes:
            return (self.is_over() or (self.lives > self.ale.lives()))
        else:
            return self.is_over()

    def is_over(self):
        return self.ale.game_over()
示例#37
0
class AleAgent:
    ##
    # @param processing_cls Class for processing game visual unput
    def __init__(self, processing_cls, game_rom=None, encoder_model=None, encoder_weights=None, NFQ_model=None, NFQ_weights=None):
        assert game_rom is not None
        self.game = ALEInterface()
        if encoder_weights is not None and encoder_model is not None:
            self.encoder = Encoder(path_to_model=encoder_model, path_to_weights=encoder_weights)
        else:
            self.encoder = Encoder()

        self.processor = processing_cls()

        # Get & Set the desired settings
        self.game.setInt('random_seed', 0)
        self.game.setInt('frame_skip', 4)

        # Set USE_SDL to true to display the screen. ALE must be compilied
        # with SDL enabled for this to work. On OSX, pygame init is used to
        # proxy-call SDL_main.
        USE_SDL = True

        if USE_SDL:
            if sys.platform == 'darwin':
                pygame.init()
                self.game.setBool('sound', False)   # Sound doesn't work on OSX
            elif sys.platform.startswith('linux'):
                self.game.setBool('sound', False)   # no sound

            self.game.setBool('display_screen', True)

        # Load the ROM file
        self.game.loadROM(game_rom)

        # Get the list of legal actions
        self.legal_actions = self.game.getLegalActionSet()

        # Get actions applicable in current game
        self.minimal_actions = self.game.getMinimalActionSet()

        if NFQ_model is not None and NFQ_weights is not None:
            self.NFQ = NFQ(
                self.encoder.out_dim,
                len(self.minimal_actions),
                model_path=NFQ_model,
                weights_path=NFQ_weights
            )
        else:
            self.NFQ = NFQ(self.encoder.out_dim, len(self.minimal_actions))

        (self.screen_width, self.screen_height) = self.game.getScreenDims()
        self.screen_data = np.zeros(
            (self.screen_height, self.screen_width),
            dtype=np.uint8
        )

    ##
    # Initialize the reinforcement learning
    def train(self, num_of_episodes=1500, eps=0.995, key_binding=None):
        pygame.init()
        for episode in xrange(num_of_episodes):
            total_reward = 0
            moves = 0
            hits = 0
            print 'Starting episode: ', episode+1

            if key_binding:
                eps = 0.05
            else:
                eps -= 2/num_of_episodes

            self.game.getScreenGrayscale(self.screen_data)
            pooled_data = self.processor.process(self.screen_data)
            next_state = self.encoder.encode(pooled_data)
            while not self.game.game_over():
                current_state = next_state
                x = None

                if key_binding:
                    key_pressed = pygame.key.get_pressed()
                    x = key_binding(key_pressed)

                if x is None:
                    r = np.random.rand()
                    if r < eps:
                        x = np.random.randint(self.minimal_actions.size)
                    else:
                        x = self.NFQ.predict_action(current_state)

                a = self.minimal_actions[x]
                # Apply an action and get the resulting reward
                reward = self.game.act(a)

                # record only every 3 frames
                # if not moves % 3:
                self.game.getScreenGrayscale(self.screen_data)
                pooled_data = self.processor.process(self.screen_data)
                next_state = self.encoder.encode(pooled_data)
                transition = np.append(current_state, x)
                transition = np.append(transition, next_state)
                transition = np.append(transition, reward)
                self.NFQ.add_transition(transition)

                total_reward += reward
                if reward > 0:
                    hits += 1

                moves += 1
                if eps > 0.1:
                    eps -= 0.00001
            # end while

            print 'Epsilon: ', eps
            print 'Episode', episode+1, 'ended with score:', total_reward
            print 'Hits: ', hits
            self.game.reset_game()
            self.NFQ.train()
            hits = 0
            moves = 0
            self.NFQ.save_net()
        # end for

    ##
    # Play the game!
    def play(self):
        total_reward = 0
        moves = 1
        while not self.game.game_over():
            self.game.getScreenGrayscale(self.screen_data)
            pooled_data = self.processor.process(self.screen_data)
            current_state = self.encoder.encode(pooled_data)

            x = self.NFQ.predict_action(current_state)
            a = self.minimal_actions[x]
            reward = self.game.act(a)
            total_reward += reward
            moves += 1

        print 'The game ended with score:', total_reward, ' after: ', moves, ' moves'
示例#38
0
def main():
    if len(sys.argv) < 2:
        dir_rom = '/Users/lguan/Documents/Study/Research/Atari-2600-Roms/T-Z/Tennis.bin'
    else:
        dir_rom = sys.argv[1]

    ale = ALEInterface()

    # Get & Set the desired settings
    ale.setInt(b'random_seed', 123)

    # Set USE_SDL to true to display the screen. ALE must be compilied
    # with SDL enabled for this to work. On OSX, pygame init is used to
    # proxy-call SDL_main.
    USE_SDL = False
    if USE_SDL:
        # mac OS
        if sys.platform == 'darwin':
            pygame.init()
            ale.setBool('sound', False)  # Sound doesn't work on OSX
        elif sys.platform.startswith('linux'):
            ale.setBool('sound', True)

        ale.setBool('display_screen', True)

    # Load the ROM file
    rom_file = str.encode(dir_rom)
    print('- Loading ROM - %s' % dir_rom)
    ale.loadROM(rom_file)
    print('- Complete loading ROM')

    (game_surface_width, game_surface_height) = ale.getScreenDims()
    print("game surface width/height: "
          + str(game_surface_width) + "/"
          + str(game_surface_height))

    (display_width, display_height) = (800, 640)
    print 'display width/height', (display_width, display_height)

    # init pygame
    pygame.init()
    display_screen = pygame.display.set_mode((display_width, display_height))
    pygame.display.set_caption("Arcade Learning Environment Player Agent Display")

    # init clock
    clock = pygame.time.Clock()
    is_exit = False

    # Play 10 episodes
    for episode in range(10):
        if is_exit:
            break

        total_reward = 0

        while not ale.game_over() and not is_exit:
            a = getActionFromKeyboard()
            # Apply an action and get the resulting reward
            reward = ale.act(a)
            total_reward += reward
            # clear screen
            display_screen.fill((0, 0, 0))
            # render game surface
            renderGameSurface(ale, display_screen, (game_surface_width, game_surface_height))
            # display related info
            displayRelatedInfo(display_screen, a, total_reward)

            pygame.display.flip()

            # process pygame event queue
            for event in pygame.event.get():
                if event.type == pygame.QUIT:
                    is_exit = True
                    break
                if event.type == pygame.KEYDOWN and event.key == pygame.K_q:
                    is_exit = True
                    break

            # delay to 60fps
            clock.tick(60.)

        print('Episode %d ended with score: %d' % (episode, total_reward))
        ale.reset_game()
示例#39
0
class AtariPlayer(RLEnvironment):
    """
    A wrapper for atari emulator.
    NOTE: will automatically restart when a real episode ends
    """
    def __init__(self, rom_file, viz=0, height_range=(None,None),
            frame_skip=4, image_shape=(84, 84), nullop_start=30,
            live_lost_as_eoe=True):
        """
        :param rom_file: path to the rom
        :param frame_skip: skip every k frames and repeat the action
        :param image_shape: (w, h)
        :param height_range: (h1, h2) to cut
        :param viz: visualization to be done.
            Set to 0 to disable.
            Set to a positive number to be the delay between frames to show.
            Set to a string to be a directory to store frames.
        :param nullop_start: start with random number of null ops
        :param live_losts_as_eoe: consider lost of lives as end of episode.  useful for training.
        """
        super(AtariPlayer, self).__init__()
        if not os.path.isfile(rom_file) and '/' not in rom_file:
            rom_file = get_dataset_dir('atari_rom', rom_file)
        assert os.path.isfile(rom_file), \
                "rom {} not found. Please download at {}".format(rom_file, ROM_URL)

        try:
            ALEInterface.setLoggerMode(ALEInterface.Logger.Warning)
        except AttributeError:
            log_once()

        # avoid simulator bugs: https://github.com/mgbellemare/Arcade-Learning-Environment/issues/86
        with _ALE_LOCK:
            self.ale = ALEInterface()
            self.rng = get_rng(self)

            self.ale.setInt(b"random_seed", self.rng.randint(0, 10000))
            self.ale.setBool(b"showinfo", False)

            self.ale.setInt(b"frame_skip", 1)
            self.ale.setBool(b'color_averaging', False)
            # manual.pdf suggests otherwise.
            self.ale.setFloat(b'repeat_action_probability', 0.0)

            # viz setup
            if isinstance(viz, six.string_types):
                assert os.path.isdir(viz), viz
                self.ale.setString(b'record_screen_dir', viz)
                viz = 0
            if isinstance(viz, int):
                viz = float(viz)
            self.viz = viz
            if self.viz and isinstance(self.viz, float):
                self.windowname = os.path.basename(rom_file)
                cv2.startWindowThread()
                cv2.namedWindow(self.windowname)

            self.ale.loadROM(rom_file.encode('utf-8'))
        self.width, self.height = self.ale.getScreenDims()
        self.actions = self.ale.getMinimalActionSet()


        self.live_lost_as_eoe = live_lost_as_eoe
        self.frame_skip = frame_skip
        self.nullop_start = nullop_start
        self.height_range = height_range
        self.image_shape = image_shape

        self.current_episode_score = StatCounter()
        self.restart_episode()

    def _grab_raw_image(self):
        """
        :returns: the current 3-channel image
        """
        m = self.ale.getScreenRGB()
        return m.reshape((self.height, self.width, 3))

    def current_state(self):
        """
        :returns: a gray-scale (h, w, 1) float32 image
        """
        ret = self._grab_raw_image()
        # max-pooled over the last screen
        ret = np.maximum(ret, self.last_raw_screen)
        if self.viz:
            if isinstance(self.viz, float):
                #m = cv2.resize(ret, (1920,1200))
                cv2.imshow(self.windowname, ret)
                time.sleep(self.viz)
        ret = ret[self.height_range[0]:self.height_range[1],:].astype('float32')
        # 0.299,0.587.0.114. same as rgb2y in torch/image
        ret = cv2.cvtColor(ret, cv2.COLOR_RGB2GRAY)
        ret = cv2.resize(ret, self.image_shape)
        ret = np.expand_dims(ret, axis=2)
        return ret

    def get_action_space(self):
        return DiscreteActionSpace(len(self.actions))

    def restart_episode(self):
        if self.current_episode_score.count > 0:
            self.stats['score'].append(self.current_episode_score.sum)
        self.current_episode_score.reset()
        self.ale.reset_game()

        # random null-ops start
        n = self.rng.randint(self.nullop_start)
        self.last_raw_screen = self._grab_raw_image()
        for k in range(n):
            if k == n - 1:
                self.last_raw_screen = self._grab_raw_image()
            self.ale.act(0)

    def action(self, act):
        """
        :param act: an index of the action
        :returns: (reward, isOver)
        """
        oldlives = self.ale.lives()
        r = 0
        for k in range(self.frame_skip):
            if k == self.frame_skip - 1:
                self.last_raw_screen = self._grab_raw_image()
            r += self.ale.act(self.actions[act])
            newlives = self.ale.lives()
            if self.ale.game_over() or \
                    (self.live_lost_as_eoe and newlives < oldlives):
                break

        self.current_episode_score.feed(r)
        isOver = self.ale.game_over()
        if isOver:
            self.restart_episode()
        if self.live_lost_as_eoe:
            isOver = isOver or newlives < oldlives
        return (r, isOver)
示例#40
0
class AtariEnvironment(interfaces.Environment):
    def __init__(self,
                 atari_rom,
                 frame_skip=4,
                 noop_max=30,
                 terminate_on_end_life=False,
                 random_seed=123,
                 frame_history_length=4,
                 use_gui=False,
                 max_num_frames=500000,
                 repeat_action_probability=0.0,
                 record_screen_dir=None):
        self.ale = ALEInterface()
        self.ale.setInt('random_seed', random_seed)
        self.ale.setInt('frame_skip', 1)
        self.ale.setFloat('repeat_action_probability', 0.0)
        self.ale.setInt('max_num_frames_per_episode', max_num_frames)
        if record_screen_dir is not None:
            self.ale.setString('record_screen_dir', record_screen_dir)
        self.ale.loadROM(atari_rom)
        self.frame_skip = frame_skip
        self.repeat_action_probability = repeat_action_probability
        self.noop_max = noop_max
        self.terminate_on_end_life = terminate_on_end_life
        self.current_lives = self.ale.lives()
        self.is_terminal = False
        self.previous_action = 0
        self.num_actions = len(self.ale.getMinimalActionSet())

        w, h = self.ale.getScreenDims()
        self.screen_width = w
        self.screen_height = h
        self.zero_last_frames = [
            np.zeros((84, 84), dtype=np.uint8),
            np.zeros((84, 84), dtype=np.uint8)
        ]
        self.last_two_frames = copy.copy(self.zero_last_frames)
        self.zero_history_frames = [
            np.zeros((84, 84), dtype=np.uint8)
            for i in range(0, frame_history_length)
        ]
        self.frame_history = copy.copy(self.zero_history_frames)
        atari_actions = self.ale.getMinimalActionSet()
        self.atari_to_onehot = dict(
            list(zip(atari_actions, list(range(len(atari_actions))))))
        self.onehot_to_atari = dict(
            list(zip(list(range(len(atari_actions))), atari_actions)))
        self.screen_image = np.zeros(self.screen_height * self.screen_width,
                                     dtype=np.uint8)

        self.use_gui = use_gui
        self.original_frame = np.zeros((h, w), dtype=np.uint8)
        self.refresh_time = datetime.timedelta(milliseconds=1000 / 60)
        self.last_refresh = datetime.datetime.now()
        if (self.use_gui):
            self.gui_screen = pygame.display.set_mode((w, h))

    def getRAM(self, ram=None):
        return self.ale.getRAM(ram)

    def _get_frame(self):
        self.ale.getScreenGrayscale(self.screen_image)
        image = self.screen_image.reshape(
            [self.screen_height, self.screen_width, 1])
        self.original_frame = image
        image = cv2.resize(image, (84, 84))
        return image

    def perform_action(self, onehot_index_action):
        if self.repeat_action_probability > 0:
            if np.random.uniform() < self.repeat_action_probability:
                onehot_index_action = self.previous_action
            self.previous_action = onehot_index_action
        action = self.onehot_to_atari[onehot_index_action]
        state, action, reward, next_state, self.is_terminal = self.perform_atari_action(
            action)
        return state, onehot_index_action, reward, next_state, self.is_terminal

    def perform_atari_action(self, atari_action):
        state = self.get_current_state()
        reward = self._act(atari_action, self.frame_skip)

        if self.use_gui:
            self.refresh_gui()

        self.frame_history[:-1] = self.frame_history[1:]
        self.frame_history[-1] = np.max(self.last_two_frames, axis=0)
        next_state = self.get_current_state()

        return state, atari_action, reward, next_state, self.is_terminal

    def _act(self, ale_action, repeat):
        reward = 0
        for i in range(repeat):
            reward += self.ale.act(ale_action)
            if i >= repeat - 2:
                self.last_two_frames = [
                    self.last_two_frames[1],
                    self._get_frame()
                ]

        self.is_terminal = self.ale.game_over()

        # terminate the episode if current_lives has decreased
        lives = self.ale.lives()
        if self.current_lives != lives:
            if self.current_lives > lives and self.terminate_on_end_life:
                self.is_terminal = True
            self.current_lives = lives

        return reward

    def get_current_state(self):
        #return copy.copy(self.frame_history)
        return [x.copy() for x in self.frame_history]

    def get_actions_for_state(self, state):
        return [
            self.atari_to_onehot[a] for a in self.ale.getMinimalActionSet()
        ]

    def reset_environment(self):
        self.last_two_frames = [self.zero_history_frames[0], self._get_frame()]

        if self.terminate_on_end_life:
            if self.ale.game_over():
                self.ale.reset_game()
        else:
            self.ale.reset_game()

        self.current_lives = self.ale.lives()

        if self.noop_max > 0:
            num_noops = np.random.randint(self.noop_max + 1)
            self._act(0, num_noops)

        self.previous_action = 0
        self.frame_history = copy.copy(self.zero_history_frames)
        self.frame_history[-1] = np.max(self.last_two_frames, axis=0)

        if self.use_gui:
            self.refresh_gui()

    def is_current_state_terminal(self):
        return self.is_terminal

    def refresh_gui(self):
        current_time = datetime.datetime.now()
        if (current_time - self.last_refresh) > self.refresh_time:
            self.last_refresh = current_time

            gui_image = np.tile(
                np.transpose(self.original_frame, axes=(1, 0, 2)), [1, 1, 3])
            # gui_image = np.zeros((self.screen_width, self.screen_height, 3), dtype=np.uint8)
            # channel = np.random.randint(3)
            # gui_image[:,:,channel] = np.transpose(self.original_frame, axes=(1, 0, 2))[:,:,0]

            pygame.surfarray.blit_array(self.gui_screen, gui_image)
            pygame.display.update()
示例#41
0
from visualize import Plotter

# Create the ale interface and load rom files
ale = ALEInterface()

#ale.setBool('display_screen', True)
#pygame.init()

# Load the rom
ale.loadROM('/Users/shashwat/Downloads/pong.bin')

# These are the set of valid actions in the game
legal_actions = ale.getMinimalActionSet()

# How to get screen rgb?
width, height = ale.getScreenDims()
screen_buffer = np.empty((height, width), dtype=np.uint8)

# Some common settings
HISTORY_LENGTH = 4
MAX_STEPS = 1000000
MAX_EPOCHS = 10
MINIBATCH_SIZE = 32
LONG_PRESS_TIMES = 4
GAMMA  = 0.9
EPSILON = 0.1
UPDATE_FREQUENCY = 4
MAX_LIVES = ale.lives()

episode_sum = 0
episode_sums = []
示例#42
0
class AtariPlayer(RLEnvironment):
    """
    A wrapper for atari emulator.
    NOTE: will automatically restart when a real episode ends
    """
    def __init__(self,
                 rom_file,
                 viz=0,
                 height_range=(None, None),
                 frame_skip=4,
                 image_shape=(84, 84),
                 nullop_start=30,
                 live_lost_as_eoe=True):
        """
        :param rom_file: path to the rom
        :param frame_skip: skip every k frames and repeat the action
        :param image_shape: (w, h)
        :param height_range: (h1, h2) to cut
        :param viz: visualization to be done.
            Set to 0 to disable.
            Set to a positive number to be the delay between frames to show.
            Set to a string to be a directory to store frames.
        :param nullop_start: start with random number of null ops
        :param live_losts_as_eoe: consider lost of lives as end of episode.  useful for training.
        """
        super(AtariPlayer, self).__init__()
        if not os.path.isfile(rom_file) and '/' not in rom_file:
            rom_file = os.path.join(get_dataset_dir('atari_rom'), rom_file)
        assert os.path.isfile(rom_file), \
                "rom {} not found. Please download at {}".format(rom_file, ROM_URL)

        try:
            ALEInterface.setLoggerMode(ALEInterface.Logger.Warning)
        except AttributeError:
            log_once()

        # avoid simulator bugs: https://github.com/mgbellemare/Arcade-Learning-Environment/issues/86
        with _ALE_LOCK:
            self.ale = ALEInterface()
            self.rng = get_rng(self)

            self.ale.setInt("random_seed", self.rng.randint(0, 10000))
            self.ale.setBool("showinfo", False)

            self.ale.setInt("frame_skip", 1)
            self.ale.setBool('color_averaging', False)
            # manual.pdf suggests otherwise.
            self.ale.setFloat('repeat_action_probability', 0.0)

            # viz setup
            if isinstance(viz, six.string_types):
                assert os.path.isdir(viz), viz
                self.ale.setString('record_screen_dir', viz)
                viz = 0
            if isinstance(viz, int):
                viz = float(viz)
            self.viz = viz
            if self.viz and isinstance(self.viz, float):
                self.windowname = os.path.basename(rom_file)
                cv2.startWindowThread()
                cv2.namedWindow(self.windowname)

            self.ale.loadROM(rom_file)
        self.width, self.height = self.ale.getScreenDims()
        self.actions = self.ale.getMinimalActionSet()

        self.live_lost_as_eoe = live_lost_as_eoe
        self.frame_skip = frame_skip
        self.nullop_start = nullop_start
        self.height_range = height_range
        self.image_shape = image_shape

        self.current_episode_score = StatCounter()
        self.restart_episode()

    def _grab_raw_image(self):
        """
        :returns: the current 3-channel image
        """
        m = self.ale.getScreenRGB()
        return m.reshape((self.height, self.width, 3))

    def current_state(self):
        """
        :returns: a gray-scale (h, w, 1) float32 image
        """
        ret = self._grab_raw_image()
        # max-pooled over the last screen
        ret = np.maximum(ret, self.last_raw_screen)
        if self.viz:
            if isinstance(self.viz, float):
                #m = cv2.resize(ret, (1920,1200))
                cv2.imshow(self.windowname, ret)
                time.sleep(self.viz)
        ret = ret[self.height_range[0]:self.height_range[1], :].astype(
            'float32')
        # 0.299,0.587.0.114. same as rgb2y in torch/image
        ret = cv2.cvtColor(ret, cv2.COLOR_RGB2GRAY)
        ret = cv2.resize(ret, self.image_shape)
        ret = np.expand_dims(ret, axis=2)
        return ret

    def get_action_space(self):
        return DiscreteActionSpace(len(self.actions))

    def restart_episode(self):
        if self.current_episode_score.count > 0:
            self.stats['score'].append(self.current_episode_score.sum)
        self.current_episode_score.reset()
        self.ale.reset_game()

        # random null-ops start
        n = self.rng.randint(self.nullop_start)
        self.last_raw_screen = self._grab_raw_image()
        for k in range(n):
            if k == n - 1:
                self.last_raw_screen = self._grab_raw_image()
            self.ale.act(0)

    def action(self, act):
        """
        :param act: an index of the action
        :returns: (reward, isOver)
        """
        oldlives = self.ale.lives()
        r = 0
        for k in range(self.frame_skip):
            if k == self.frame_skip - 1:
                self.last_raw_screen = self._grab_raw_image()
            r += self.ale.act(self.actions[act])
            newlives = self.ale.lives()
            if self.ale.game_over() or \
                    (self.live_lost_as_eoe and newlives < oldlives):
                break

        self.current_episode_score.feed(r)
        isOver = self.ale.game_over()
        if isOver:
            self.restart_episode()
        if self.live_lost_as_eoe:
            isOver = isOver or newlives < oldlives
        return (r, isOver)
示例#43
0
class AtariPlayer(gym.Env):
    """
    A wrapper for ALE emulator, with configurations to mimic DeepMind DQN settings.

    Info:
        score: the accumulated reward in the current game
        gameOver: True when the current game is Over
    """

    def __init__(self, rom_file, viz=0,
                 frame_skip=4, nullop_start=30,
                 live_lost_as_eoe=True, max_num_frames=0):
        """
        Args:
            rom_file: path to the rom
            frame_skip: skip every k frames and repeat the action
            viz: visualization to be done.
                Set to 0 to disable.
                Set to a positive number to be the delay between frames to show.
                Set to a string to be a directory to store frames.
            nullop_start: start with random number of null ops.
            live_losts_as_eoe: consider lost of lives as end of episode. Useful for training.
            max_num_frames: maximum number of frames per episode.
        """
        super(AtariPlayer, self).__init__()
        if not os.path.isfile(rom_file) and '/' not in rom_file:
            rom_file = get_dataset_path('atari_rom', rom_file)
        assert os.path.isfile(rom_file), \
            "rom {} not found. Please download at {}".format(rom_file, ROM_URL)

        try:
            ALEInterface.setLoggerMode(ALEInterface.Logger.Error)
        except AttributeError:
            if execute_only_once():
                logger.warn("You're not using latest ALE")

        # avoid simulator bugs: https://github.com/mgbellemare/Arcade-Learning-Environment/issues/86
        with _ALE_LOCK:
            self.ale = ALEInterface()
            self.rng = get_rng(self)
            self.ale.setInt(b"random_seed", self.rng.randint(0, 30000))
            self.ale.setInt(b"max_num_frames_per_episode", max_num_frames)
            self.ale.setBool(b"showinfo", False)

            self.ale.setInt(b"frame_skip", 1)
            self.ale.setBool(b'color_averaging', False)
            # manual.pdf suggests otherwise.
            self.ale.setFloat(b'repeat_action_probability', 0.0)

            # viz setup
            if isinstance(viz, six.string_types):
                assert os.path.isdir(viz), viz
                self.ale.setString(b'record_screen_dir', viz)
                viz = 0
            if isinstance(viz, int):
                viz = float(viz)
            self.viz = viz
            if self.viz and isinstance(self.viz, float):
                self.windowname = os.path.basename(rom_file)
                cv2.startWindowThread()
                cv2.namedWindow(self.windowname)

            self.ale.loadROM(rom_file.encode('utf-8'))
        self.width, self.height = self.ale.getScreenDims()
        self.actions = self.ale.getMinimalActionSet()

        self.live_lost_as_eoe = live_lost_as_eoe
        self.frame_skip = frame_skip
        self.nullop_start = nullop_start

        self.action_space = spaces.Discrete(len(self.actions))
        self.observation_space = spaces.Box(
            low=0, high=255, shape=(self.height, self.width))
        self._restart_episode()

    def get_action_meanings(self):
        return [ACTION_MEANING[i] for i in self.actions]

    def _grab_raw_image(self):
        """
        :returns: the current 3-channel image
        """
        m = self.ale.getScreenRGB()
        return m.reshape((self.height, self.width, 3))

    def _current_state(self):
        """
        :returns: a gray-scale (h, w) uint8 image
        """
        ret = self._grab_raw_image()
        # max-pooled over the last screen
        ret = np.maximum(ret, self.last_raw_screen)
        if self.viz:
            if isinstance(self.viz, float):
                cv2.imshow(self.windowname, ret)
                cv2.waitKey(int(self.viz * 1000))
        ret = ret.astype('float32')
        # 0.299,0.587.0.114. same as rgb2y in torch/image
        ret = cv2.cvtColor(ret, cv2.COLOR_RGB2GRAY)
        return ret.astype('uint8')  # to save some memory

    def _restart_episode(self):
        with _ALE_LOCK:
            self.ale.reset_game()

        # random null-ops start
        n = self.rng.randint(self.nullop_start)
        self.last_raw_screen = self._grab_raw_image()
        for k in range(n):
            if k == n - 1:
                self.last_raw_screen = self._grab_raw_image()
            self.ale.act(0)

    def _reset(self):
        if self.ale.game_over():
            self._restart_episode()
        return self._current_state()

    def _step(self, act):
        oldlives = self.ale.lives()
        r = 0
        for k in range(self.frame_skip):
            if k == self.frame_skip - 1:
                self.last_raw_screen = self._grab_raw_image()
            r += self.ale.act(self.actions[act])
            newlives = self.ale.lives()
            if self.ale.game_over() or \
                    (self.live_lost_as_eoe and newlives < oldlives):
                break

        isOver = self.ale.game_over()
        if self.live_lost_as_eoe:
            isOver = isOver or newlives < oldlives

        info = {'ale.lives': newlives}
        return self._current_state(), r, isOver, info
示例#44
0
 def getScreenDims(self):
     w, h = ALEInterface.getScreenDims(self)
     return h, w  #dp
示例#45
0
class AtariDriver(object):
    """
    A wrapper for atari emulator.
    """
    def __init__(self, rom_file, frame_skip=1, viz=0):
        """
        :param rom_file: path to the rom
        :param frame_skip: skip every k frames
        :param viz: the delay. visualize the game while running. 0 to disable
        """
        self.ale = ALEInterface()
        self.rng = get_rng(self)

        self.ale.setInt("random_seed", self.rng.randint(self.rng.randint(0, 1000)))
        self.ale.setInt("frame_skip", frame_skip)
        self.ale.loadROM(rom_file)
        self.width, self.height = self.ale.getScreenDims()
        self.actions = self.ale.getMinimalActionSet()

        if isinstance(viz, int):
            viz = float(viz)
        self.viz = viz
        self.romname = os.path.basename(rom_file)
        if self.viz and isinstance(self.viz, float):
            cv2.startWindowThread()
            cv2.namedWindow(self.romname)

        self._reset()
        self.last_image = self._grab_raw_image()
        self.framenum = 0

    def _grab_raw_image(self):
        """
        :returns: the current 3-channel image
        """
        m = np.zeros(self.height * self.width * 3, dtype=np.uint8)
        self.ale.getScreenRGB(m)
        return m.reshape((self.height, self.width, 3))

    def grab_image(self):
        """
        :returns: a gray-scale image, max-pooled over the last frame.
        """
        now = self._grab_raw_image()
        ret = np.maximum(now, self.last_image)
        self.last_image = now
        if self.viz and isinstance(self.viz, float):
            cv2.imshow(self.romname, ret)
            time.sleep(self.viz)
        elif self.viz:
            cv2.imwrite("{}/{:06d}.jpg".format(self.viz, self.framenum), ret)
            self.framenum += 1
        ret = cv2.cvtColor(ret, cv2.COLOR_BGR2YUV)[:,:,0]
        ret = ret[36:204,:]   # several online repos all use this
        return ret

    def get_num_actions(self):
        """
        :returns: the number of legal actions
        """
        return len(self.actions)

    def _reset(self):
        self.ale.reset_game()

    def next(self, act):
        """
        :param act: an index of the action
        :returns: (next_image, reward, isOver)
        """
        r = self.ale.act(self.actions[act])
        s = self.grab_image()
        isOver = self.ale.game_over()
        if isOver:
            self._reset()
        return (s, r, isOver)
示例#46
0
class AtariEnvironment:
    num_actions = 18  # Use full action set

    def __init__(self, frame_shape, frame_postprocess=lambda x: x):
        self.ale = ALEInterface()
        self.ale.setBool(b"display_screen", cfg.display_screen)
        self.ale.setInt(b"frame_skip", 1)
        self.ale.setBool(b"color_averaging", False)
        self.ale.setInt(b"random_seed", cfg.random_seed)
        self.ale.setFloat(b"repeat_action_probability", cfg.sticky_prob)

        self.ale.loadROM(str.encode(cfg.rom))

        self.ale.setMode(cfg.mode)
        self.ale.setDifficulty(cfg.difficulty)

        self.action_set = self.ale.getLegalActionSet()
        assert len(self.action_set) == AtariEnvironment.num_actions

        screen_dims = tuple(reversed(self.ale.getScreenDims())) + (1,)
        self._frame_buffer = CircularBuffer(
            cfg.frame_buffer_size, screen_dims, np.uint8
        )
        self._frame_stack = CircularBuffer(
            cfg.frame_history_size, frame_shape, np.uint8
        )
        self._frame_postprocess = frame_postprocess

        self._episode_count = 0
        self.reset(inc_episode_count=False)

    def _is_terminal(self):
        return self.ale.game_over()

    def _get_single_frame(self):
        stacked_frames = np.concatenate(self._frame_buffer, axis=2)
        maxed_frame = np.amax(stacked_frames, axis=2)
        expanded_frame = np.expand_dims(maxed_frame, 3)
        frame = self._frame_postprocess(expanded_frame)

        return frame

    def reset(self, inc_episode_count=True):
        self._episode_frames = 0
        self._episode_reward = 0
        if inc_episode_count:
            self._episode_count += 1

        self.ale.reset_game()
        for _ in range(cfg.frame_buffer_size):
            self._frame_buffer.append(self.ale.getScreenGrayscale())
        for _ in range(cfg.frame_history_size):
            self._frame_stack.append(self._get_single_frame())

    def act(self, action):
        assert not self._is_terminal()

        cum_reward = 0
        for _ in range(cfg.frame_skip):
            cum_reward += self.ale.act(self.action_set[action])
            self._frame_buffer.append(self.ale.getScreenGrayscale())

        self._frame_stack.append(self._get_single_frame())
        self._episode_frames += cfg.frame_skip
        self._episode_reward += cum_reward
        cum_reward = np.clip(cum_reward, -1, 1)

        return cum_reward, self.state, self._is_terminal()

    @property
    def state(self):
        assert len(self._frame_buffer) == cfg.frame_buffer_size
        assert len(self._frame_stack) == cfg.frame_history_size
        return np.concatenate(self._frame_stack, axis=-1)

    @property
    def episode_reward(self):
        return self._episode_reward

    @property
    def episode_frames(self):
        return self._episode_frames

    @property
    def episode_steps(self):
        return self._episode_frames // cfg.frame_skip

    @property
    def episode_count(self):
        return self._episode_count
示例#47
0
class ALEEnvironment(BaseEnvironment):
    """
    The :class:`MinimalGameHandler` class takes care of the interface to the ALE and tries to do nothing else. It's
    meant for advanced users who need fine control over every aspect of the process. It has many functions that are simply
    wrappers of the underlying ALE but with pythonic names/usage.

    Parameters
    ----------
    rom : byte string
        Specifies the directory to load the rom from. Must be a byte string: b'dir_for_rom/rom.bin'
    display_screen : boolean
        Default False. Whether or not to show the game. True takes longer to run but can be fun to watch
    step_cap: int
        Default None. Maximum number of steps to run in an episode. Breakout can sometimes not return terminal
        even when game is ended. This fixes that and will return terminal after stepping above this count
    """
    def __init__(self, rom, resize_shape=(84, 84), skip_frame=1, repeat_action_probability=0.0,
                 step_cap=None, loss_of_life_termination=False, loss_of_life_negative_reward=False,
                 grayscale=True, display_screen=False, seed=np.random.RandomState()):
        # set up emulator
        self.ale = ALEInterface()

        if display_screen:
            self.ale.setBool(b'display_screen', True)

        self.ale.setInt(b'frame_skip', skip_frame)
        self.ale.setInt(b'random_seed', seed.randint(0, 9999))
        self.ale.setFloat(b'repeat_action_probability', repeat_action_probability)
        self.ale.setBool(b'color_averaging', False)

        self.ale.loadROM(rom.encode())

        # setup gamescreen object. I think this is faster than recreating an empty each time
        width, height = self.ale.getScreenDims()
        channels = 1 if grayscale else 3
        self.grayscale = grayscale
        self.gamescreen = np.empty((height, width, 1), dtype=np.uint8)

        self.resize_shape = resize_shape
        self.skip_frame = skip_frame
        self.step_cap = step_cap
        self.curr_step_count = 0

        # setup action converter
        # ALE returns legal action indexes, convert these to just numbers
        self.action_inds = self.ale.getMinimalActionSet()

        # setup lives
        self.loss_of_life_negative_reward = loss_of_life_negative_reward
        self.cur_lives = self.ale.lives()
        self.loss_of_life_termination = loss_of_life_termination
        self.life_lost = False

    def reset(self):
        self.ale.reset_game()
        self.cur_lives = self.ale.lives()
        self.life_lost = False
        self.curr_step_count = 0

    def step(self, action):
        self.curr_step_count += 1
        ale_action = self.action_inds[action]
        return self._step(ale_action)

    def _step(self, ale_action):
        if not self.loss_of_life_termination and not self.loss_of_life_negative_reward:
            return self.ale.act(ale_action)
        else:
            rew = self.ale.act(ale_action)
            new_lives = self.ale.lives()
            if new_lives < self.cur_lives:
                # if loss of life is negative reward subtract 1 from reward
                if self.loss_of_life_negative_reward:
                    rew -= 1
                self.cur_lives = new_lives
                self.life_lost = True
            return rew

    def get_state(self):
        if self.grayscale:
            self.gamescreen = self.ale.getScreenGrayscale(self.gamescreen)
        else:
            self.gamescreen = self.ale.getScreenRGB(self.gamescreen)
        # if resize_shape is none then don't resize
        if self.resize_shape is not None:
            # if grayscale we remove the last dimmension (channel)
            if self.grayscale:
                processedImg = imresize(self.gamescreen[:, :, 0], self.resize_shape)
            else:
                processedImg = imresize(self.gamescreen, self.resize_shape)
        return processedImg

    def get_state_shape(self):
        return self.resize_shape

    def get_terminal(self):
        if self.loss_of_life_termination and self.life_lost:
            return True
        elif self.step_cap is not None and self.curr_step_count > self.step_cap:
            return True
        else:
            return self.ale.game_over()

    def get_num_actions(self):
        return len(self.action_inds)
示例#48
0
class ALEEnvironment(BaseEnvironment):
    """
    A wrapper of Arcade Learning Environment, which inherits all members of ``BaseEnvironment``.
    """
    # 63 games
    ADVENTURE = "adventure"
    AIR_RAID = "air_raid"
    ALIEN = "alien"
    AMIDAR = "amidar"
    ASSAULT = "assault"
    ASTERIX = "asterix"
    ASTEROIDS = "asteroids"
    ATLANTIS = "aslantis"
    BANK_HEIST = "bank_heist"
    BATTLE_ZONE = "battle_zone"
    BEAM_RIDER = "beam_rider"
    BERZERK = "berzerk"
    BOWLING = "bowling"
    BOXING = "boxing"
    BREAKOUT = "breakout"
    CARNIVAL = "carnival"
    CENTIPEDE = "centipede"
    CHOPPER_COMMAND = "chopper_command"
    CRAZY_CLIMBER = "crazy_climber"
    DEFENDER = "defender"
    DEMON_ATTACK = "demon_attack"
    DOUBLE_DUNK = "double_dunk"
    ELEVATOR_ACTION = "elevator_action"
    ENDURO = "enduro"
    FISHING_DERBY = "fishing_derby"
    FREEWAY = "freeway"
    FROSTBITE = "frostbite"
    GOPHER = "gopher"
    GRAVITAR = "gravitar"
    HERO = "hero"
    ICE_HOCKEY = "ice_hockey"
    JAMESBOND = "jamesbond"
    JOURNEY_ESCAPE = "journey_escape"
    KABOOM = "kaboom"
    KANGAROO = "kangaroo"
    KRULL = "krull"
    KUNGFU_MASTER = "kung_fu_master"
    MONTEZUMA = "montezuma_revenge"
    MS_PACMAN = "ms_pacman"
    UNKNOWN = "name_this_game"
    PHOENIX = "phoenix"
    PITFALL = "pitfall"
    PONG = "pong"
    POOYAN = "pooyan"
    PRIVATE_EYE = "private_eye"
    QBERT = "qbert"
    RIVERRAID = "riverraid"
    ROAD_RUNNER = "road_runner"
    ROBOTANK = "robotank"
    SEAQUEST = "seaquest"
    SKIING = "skiing"
    SOLARIS = "solaris"
    SPACE_INVADERS = "space_invaders"
    STAR_GUNNER = "star_gunner"
    TENNIS = "tennis"
    TIME_PILOT = "time_pilot"
    TUTANKHAM = "tutankham"
    UP_N_DOWN = "up_n_down"
    VENTURE = "venture"
    VIDEO_PINBALL = "video_pinball"
    WIZARD_OF_WOR = "wizard_of_wor"
    YARS_REVENGE = "yars_revenge"
    ZAXXON = "zaxxon"

    def __init__(self,
                 rom_name,
                 frame_skip=4,
                 repeat_action_probability=0.,
                 max_episode_steps=10000,
                 loss_of_life_termination=False,
                 loss_of_life_negative_reward=False,
                 bitwise_max_on_two_consecutive_frames=False,
                 is_render=False,
                 seed=None,
                 startup_policy=None,
                 disable_actions=None,
                 num_of_sub_actions=-1,
                 state_processor=AtariProcessor(resize_shape=(84, 84),
                                                convert_to_grayscale=True)):

        os.environ['SDL_VIDEO_CENTERED'] = '1'

        file_exist = isfile(ALEEnvironment.get_rom_path(rom_name))
        if not file_exist:
            raise ValueError("Rom not found ! Please put rom " + rom_name +
                             ".bin into: " + ALEEnvironment.get_rom_path())

        self.__rom_name = rom_name
        self.__ale = ALEInterface()

        if frame_skip < 0:
            print("Invalid frame_skip param ! Set default frame_skip = 4")
            self.__frame_skip = 4
        else:
            self.__frame_skip = frame_skip

        if repeat_action_probability < 0 or repeat_action_probability > 1:
            raise ValueError("Invalid repeat_action_probability")
        else:
            self.__repeat_action_probability = repeat_action_probability

        self.__max_episode_steps = max_episode_steps
        self.__loss_of_life_termination = loss_of_life_termination
        self.__loss_of_life_negative_reward = loss_of_life_negative_reward
        self.__max_2_frames = bitwise_max_on_two_consecutive_frames

        # Max 2 frames only work with grayscale
        self.__grayscale = False
        if state_processor is not None and type(
                state_processor
        ) is AtariProcessor and state_processor.get_grayscale():
            self.__grayscale = True

        if self.__max_2_frames and self.__frame_skip > 1 and self.__grayscale:
            self.__max_2_frames = True
        else:
            self.__max_2_frames = False

        self.__is_render = is_render
        self.__processor = state_processor

        if seed is None or seed <= 0 or seed >= 9999:
            if seed is not None and (seed < 0 or seed >= 9999):
                print("Invalid seed ! Default seed = randint(0, 9999")
            self.__seed = np.random.randint(0, 9999)
            self.__random_seed = True
        else:
            self.__random_seed = False
            self.__seed = seed

        self.__current_steps = 0
        self.__is_life_lost = False
        self.__is_terminal = False
        self.__current_lives = 0
        self.__action_reduction = num_of_sub_actions
        self.__scr_width, self.__scr_height, self.__action_set = self.__init_ale(
        )
        self.__prev_buffer = np.empty((self.__scr_height, self.__scr_width, 3),
                                      dtype=np.uint8)
        self.__current_buffer = np.empty(
            (self.__scr_height, self.__scr_width, 3), dtype=np.uint8)
        self.__current_state = None
        self.__prev_state = None
        self.__startup_policy = startup_policy
        if disable_actions is None:
            self.__dis_act = []
        else:
            self.__dis_act = disable_actions

        if self.__processor is not None and self.__processor.get_number_of_objectives(
        ) > 1:
            self.__multi_objs = True
        else:
            self.__multi_objs = False

    def get_processor(self):
        return self.__processor

    def __init_ale(self):

        self.__ale.setBool(b'display_screen', self.__is_render)

        if self.__max_2_frames and self.__frame_skip > 1:
            self.__ale.setInt(b'frame_skip', 1)
        else:
            self.__ale.setInt(b'frame_skip', self.__frame_skip)

        self.__ale.setInt(b'random_seed', self.__seed)
        self.__ale.setFloat(b'repeat_action_probability',
                            self.__repeat_action_probability)
        self.__ale.setBool(b'color_averaging', False)

        self.__ale.loadROM(
            ALEEnvironment.get_rom_path(self.__rom_name).encode())

        width, height = self.__ale.getScreenDims()
        return width, height, self.__ale.getMinimalActionSet()

    def clone(self):
        if self.__random_seed:
            seed = np.random.randint(0, 9999)
        else:
            seed = self.__seed

        return ALEEnvironment(self.__rom_name, self.__frame_skip,
                              self.__repeat_action_probability,
                              self.__max_episode_steps,
                              self.__loss_of_life_termination,
                              self.__loss_of_life_negative_reward,
                              self.__max_2_frames, self.__is_render, seed,
                              self.__startup_policy,
                              self.__dis_act, self.__action_reduction,
                              self.__processor.clone())

    def step_all(self, a):
        if isinstance(a, (list, np.ndarray)):
            if len(a) <= 0:
                raise ValueError('Empty action list !')
            a = a[0]
        self.__current_steps += 1
        act = self.__action_set[a]
        rew = self._step(act)
        next_state = self.get_state()
        _is_terminal = self.is_terminal()
        return next_state, rew, _is_terminal, self.__current_steps

    def reset(self):
        self.__ale.reset_game()
        self.__current_lives = self.__ale.lives()
        self.__is_life_lost = False
        self.__is_terminal = False
        self.__current_state = None
        self.__prev_state = None

        action_space = self.get_action_space()
        v_range, is_range = action_space.get_range()
        if len(v_range) > 1:
            self.step(1)

        # No op steps
        if self.__startup_policy is not None:
            max_steps = int(self.__startup_policy.get_max_steps())
            for _ in range(max_steps):
                act = self.__startup_policy.step(self.get_state(),
                                                 action_space)
                self.step(act)

        # Start training from this point
        self.__current_steps = 0

        # Reset processor
        if self.__processor is not None:
            self.__processor.reset()

        return self.get_state()

    def _pre_step(self, act):
        if self.__max_2_frames and self.__frame_skip > 1:
            rew = 0
            for i in range(self.__frame_skip - 2):
                rew += self.__ale.act(act)
                self.__prev_buffer = self.__ale.getScreenRGB(
                    self.__prev_buffer)

            self.__prev_buffer = self.__ale.getScreenRGB(self.__prev_buffer)

            rew += self.__ale.act(act)

            self.__current_buffer = self.__ale.getScreenRGB(
                self.__current_buffer)

            self.__is_terminal = self.__ale.game_over()

            if self.__processor is not None:
                self.__prev_state = self.__processor.process(
                    self.__prev_buffer)
                self.__current_state = self.__processor.process(
                    self.__current_buffer)
            else:
                self.__prev_state = self.__prev_buffer
                self.__current_state = self.__current_buffer

            self.__current_state = np.maximum.reduce(
                [self.__prev_state, self.__current_state])
        else:
            rew = self.__ale.act(act)
            self.__current_buffer = self.__ale.getScreenRGB(
                self.__current_buffer)
            self.__is_terminal = self.__ale.game_over()

            if self.__processor is not None:
                self.__current_state = self.__processor.process(
                    self.__current_buffer)

        if self.__multi_objs and self.__processor is not None:
            all_rewards = self.__processor.get_rewards(rew)
            return all_rewards
        else:
            return rew

    def _step(self, act):
        for i in range(len(self.__dis_act)):
            if act == self.__dis_act[i]:
                act = 0

        if not self.__loss_of_life_termination and not self.__loss_of_life_negative_reward:
            if not self.__is_terminal:
                next_lives = self.__ale.lives()
                if next_lives < self.__current_lives:
                    act = 1
                    self.__current_lives = next_lives
            return self._pre_step(act)
        else:
            rew = self._pre_step(act)
            next_lives = self.__ale.lives()
            if next_lives < self.__current_lives:
                if self.__loss_of_life_negative_reward:
                    rew -= 1
                self.__current_lives = next_lives
                self.__is_life_lost = True

            return rew

    def get_state(self):
        if not self.__max_2_frames:
            if self.__processor is not None:
                return self.__current_state
            else:
                return self.__current_buffer
        else:
            return self.__current_state

    def is_terminal(self):
        if self.__loss_of_life_termination and self.__is_life_lost:
            return True
        elif self.__max_episode_steps is not None and self.__current_steps > self.__max_episode_steps:
            return True
        else:
            return self.__is_terminal

    @staticmethod
    def get_rom_path(rom=None):
        if rom is None:
            return os.path.dirname(os.path.abspath(__file__)) + "/roms/"
        else:
            return os.path.dirname(
                os.path.abspath(__file__)) + "/roms/" + rom + ".bin"

    @staticmethod
    def list_all_roms():
        return [
            f for f in listdir(ALEEnvironment.get_rom_path())
            if isfile(join(ALEEnvironment.get_rom_path(), f))
        ]

    def get_state_space(self):
        if self.__processor is None:
            shape = self.__current_buffer.shape
        else:
            shape = self.__processor.process(self.__current_buffer).shape
        min_value = np.zeros(shape, dtype=np.uint8)
        max_value = np.full(shape, 255)
        return Space(min_value, max_value, True)

    def get_action_space(self):
        if self.__action_reduction >= 1:
            return Space(0, self.__action_reduction - 1, True)
        else:
            return Space(0, len(self.__action_set) - 1, True)

    def step(self, act):
        if isinstance(act, (list, np.ndarray)):
            if len(act) <= 0:
                raise ValueError('Empty action list !')
            act = act[0]
        self.__current_steps += 1
        act = self.__action_set[act]
        rew = self._step(act)
        return rew

    def get_current_steps(self):
        return self.__current_steps

    def is_atari(self):
        return True

    def is_render(self):
        return self.__is_render

    def get_number_of_objectives(self):
        if self.__processor is None:
            return 1
        else:
            return self.__processor.get_number_of_objectives()

    def get_number_of_agents(self):
        if self.__processor is None:
            return 1
        else:
            return self.__processor.get_number_of_agents()

    def get_state_processor(self):
        return self.__processor
示例#49
0
文件: ale_env.py 项目: only4hj/DeepRL
class AleEnv():
    def __init__(self, rom, display_screen, use_env_frame_skip, frame_repeat):
        self.actions = None
        self.rom = rom
        self.display_screen = display_screen
        self.use_env_frame_skip = use_env_frame_skip
        self.frame_repeat = frame_repeat
        
    def initialize(self):
        self.ale = ALEInterface()
        self.ale.setInt("random_seed", random.randint(1, 1000))
        if self.display_screen:
            self.ale.setBool('display_screen', True)

        if self.use_env_frame_skip == True:
            self.ale.setInt('frame_skip', self.frame_repeat)
            self.ale.setBool('color_averaging', True)        
 
        self.ale.setFloat('repeat_action_probability', 0)
        self.ale.loadROM(self.rom)
        self.actions = self.ale.getMinimalActionSet()
        print 'actions: %s' % self.actions
        (self.screen_width,self.screen_height) = self.ale.getScreenDims()
        print("width/height: " +str(self.screen_width) + "/" + str(self.screen_height))
        
        self.initialized = True
        
    def get_actions(self, rom=None):
        if self.actions is None and rom != None:
            ale = ALEInterface()
            ale.loadROM(rom)
            self.actions = ale.getMinimalActionSet()
        return self.actions
        
    @property
    def state_dtype(self):
        return np.uint8
        
    @property
    def continuous_action(self):
        return False
    
    def reset_game(self):
        self.ale.reset_game()
        
    def lives(self):
        return self.ale.lives()
    
    def getScreenRGB(self):
        return self.ale.getScreenRGB()
    
    def getState(self, debug_display=False, debug_input=None):
        screen = self.ale.getScreenGrayscale()
        if screen is not None and debug_display:
            debug_input.show(screen.reshape(screen.shape[0], screen.shape[1]))
        return screen.reshape(self.screen_height, self.screen_width)
    
    def act(self, action):
        return self.ale.act(action)
    
    def game_over(self):
        return self.ale.game_over()
    
    def finish(self):
        return    
示例#50
0
class MyEnv(Environment):
    VALIDATION_MODE = 0

    def __init__(self,
                 rng,
                 rom="ale/breakout.bin",
                 frame_skip=4,
                 ale_options=[{
                     "key": "random_seed",
                     "value": 0
                 }, {
                     "key": "color_averaging",
                     "value": True
                 }, {
                     "key": "repeat_action_probability",
                     "value": 0.
                 }]):
        self._mode = -1
        self._modeScore = 0.0
        self._modeEpisodeCount = 0

        self._frameSkip = frame_skip if frame_skip >= 1 else 1
        self._randomState = rng

        self._ale = ALEInterface()
        for option in ale_options:
            t = type(option["value"])
            if t is int:
                self._ale.setInt(option["key"], option["value"])
            elif t is float:
                self._ale.setFloat(option["key"], option["value"])
            elif t is bool:
                self._ale.setBool(option["key"], option["value"])
            else:
                raise ValueError(
                    "Option {} ({}) is not an int, bool or float.".format(
                        option["key"], t))
        self._ale.loadROM(rom)

        w, h = self._ale.getScreenDims()
        self._screen = np.empty((h, w), dtype=np.uint8)
        self._reducedScreen = np.empty((84, 84), dtype=np.uint8)
        self._actions = self._ale.getMinimalActionSet()

    def reset(self, mode):
        if mode == MyEnv.VALIDATION_MODE:
            if self._mode != MyEnv.VALIDATION_MODE:
                self._mode = MyEnv.VALIDATION_MODE
                self._modeScore = 0.0
                self._modeEpisodeCount = 0
            else:
                self._modeEpisodeCount += 1
        elif self._mode != -1:  # and thus mode == -1
            self._mode = -1

        self._ale.reset_game()
        for _ in range(self._randomState.randint(15)):
            self._ale.act(0)
        self._ale.getScreenGrayscale(self._screen)
        cv2.resize(self._screen, (84, 84),
                   self._reducedScreen,
                   interpolation=cv2.INTER_NEAREST)

        return [4 * [84 * [84 * [0]]]]

    def act(self, action):
        action = self._actions[action]

        reward = 0
        for _ in range(self._frameSkip):
            reward += self._ale.act(action)
            if self.inTerminalState():
                break

        self._ale.getScreenGrayscale(self._screen)
        cv2.resize(self._screen, (84, 84),
                   self._reducedScreen,
                   interpolation=cv2.INTER_NEAREST)

        self._modeScore += reward
        return np.sign(reward)

    def summarizePerformance(self, test_data_set):
        if self.inTerminalState() == False:
            self._modeEpisodeCount += 1
        print("== Mean score per episode is {} over {} episodes ==".format(
            self._modeScore / self._modeEpisodeCount, self._modeEpisodeCount))

    def inputDimensions(self):
        return [(4, 84, 84)]

    def observationType(self, subject):
        return np.uint8

    def nActions(self):
        return len(self._actions)

    def observe(self):
        return [np.array(self._reducedScreen)]

    def inTerminalState(self):
        return self._ale.game_over()
示例#51
0
class AtariPlayer(RLEnvironment):
    """
    A wrapper for atari emulator.
    """
    def __init__(self, rom_file, viz=0, height_range=(None,None),
            frame_skip=4, image_shape=(84, 84), nullop_start=30,
            live_lost_as_eoe=True):
        """
        :param rom_file: path to the rom
        :param frame_skip: skip every k frames and repeat the action
        :param image_shape: (w, h)
        :param height_range: (h1, h2) to cut
        :param viz: visualization to be done.
            Set to 0 to disable.
            Set to a positive number to be the delay between frames to show.
            Set to a string to be a directory to store frames.
        :param nullop_start: start with random number of null ops
        :param live_losts_as_eoe: consider lost of lives as end of episode.  useful for training.
        """
        super(AtariPlayer, self).__init__()
        self.ale = ALEInterface()
        self.rng = get_rng(self)

        self.ale.setInt("random_seed", self.rng.randint(0, 10000))
        self.ale.setBool("showinfo", False)

        try:
            ALEInterface.setLoggerMode(ALEInterface.Logger.Warning)
        except AttributeError:
            log_once()

        self.ale.setInt("frame_skip", 1)
        self.ale.setBool('color_averaging', False)
        # manual.pdf suggests otherwise. may need to check
        self.ale.setFloat('repeat_action_probability', 0.0)

        # viz setup
        if isinstance(viz, six.string_types):
            assert os.path.isdir(viz), viz
            self.ale.setString('record_screen_dir', viz)
            viz = 0
        if isinstance(viz, int):
            viz = float(viz)
        self.viz = viz
        if self.viz and isinstance(self.viz, float):
            self.windowname = os.path.basename(rom_file)
            cv2.startWindowThread()
            cv2.namedWindow(self.windowname)

        self.ale.loadROM(rom_file)
        self.width, self.height = self.ale.getScreenDims()
        self.actions = self.ale.getMinimalActionSet()


        self.live_lost_as_eoe = live_lost_as_eoe
        self.frame_skip = frame_skip
        self.nullop_start = nullop_start
        self.height_range = height_range
        self.image_shape = image_shape

        self.current_episode_score = StatCounter()
        self.restart_episode()

    def _grab_raw_image(self):
        """
        :returns: the current 3-channel image
        """
        m = self.ale.getScreenRGB()
        return m.reshape((self.height, self.width, 3))

    def current_state(self):
        """
        :returns: a gray-scale (h, w, 1) image
        """
        ret = self._grab_raw_image()
        # max-pooled over the last screen
        ret = np.maximum(ret, self.last_raw_screen)
        if self.viz:
            if isinstance(self.viz, float):
                cv2.imshow(self.windowname, ret)
                time.sleep(self.viz)
        ret = ret[self.height_range[0]:self.height_range[1],:]
        # 0.299,0.587.0.114. same as rgb2y in torch/image
        ret = cv2.cvtColor(ret, cv2.COLOR_RGB2GRAY)
        ret = cv2.resize(ret, self.image_shape)
        ret = np.expand_dims(ret, axis=2)
        return ret

    def get_num_actions(self):
        """
        :returns: the number of legal actions
        """
        return len(self.actions)

    def restart_episode(self):
        if self.current_episode_score.count > 0:
            self.stats['score'].append(self.current_episode_score.sum)
        self.current_episode_score.reset()
        self.ale.reset_game()

        # random null-ops start
        n = self.rng.randint(self.nullop_start)
        self.last_raw_screen = self._grab_raw_image()
        for k in range(n):
            if k == n - 1:
                self.last_raw_screen = self._grab_raw_image()
            self.ale.act(0)

    def action(self, act):
        """
        :param act: an index of the action
        :returns: (reward, isOver)
        """
        oldlives = self.ale.lives()
        r = 0
        for k in range(self.frame_skip):
            if k == self.frame_skip - 1:
                self.last_raw_screen = self._grab_raw_image()
            r += self.ale.act(self.actions[act])
            newlives = self.ale.lives()
            if self.ale.game_over() or \
                    (self.live_lost_as_eoe and newlives < oldlives):
                break

        self.current_episode_score.feed(r)
        isOver = self.ale.game_over()
        if isOver:
            self.restart_episode()
        if self.live_lost_as_eoe:
            isOver = isOver or newlives < oldlives
        return (r, isOver)

    def get_stat(self):
        try:
            return {'avg_score': np.mean(self.stats['score']),
                    'max_score': float(np.max(self.stats['score'])) }
        except ValueError:
            return {}