示例#1
0
class FastAtariEnv(AtariEnv):
    def __init__(self,
                 game='Breakout',
                 obs_type='image',
                 frameskip=(2, 5),
                 repeat_action_probability=0.):
        self.game_path = atari_py.get_game_path(game)
        self._obs_type = obs_type
        self.frameskip = frameskip
        self.ale = ALEInterface()
        self.viewer = None
        assert isinstance(
            repeat_action_probability,
            (float, int)), "Invalid repeat_action_probability: {!r}".format(
                repeat_action_probability)
        self.ale.setFloat('repeat_action_probability'.encode('utf-8'),
                          repeat_action_probability)
        self._seed()
        (screen_width, screen_height) = self.ale.getScreenDims()
        self._buffer = np.empty((screen_height, screen_width, 3),
                                dtype=np.uint8)

    def _get_image(self):
        # Don't reorder from rgb to bgr as we're converting to greyscale anyway
        self.ale.getScreenRGB(self._buffer)  # says rgb but actually bgr
        return self._buffer
class env_atari:
    def __init__(self, params):
        self.params = params
        self.ale = ALEInterface()
        self.ale.setInt('random_seed', np.random.randint(0, 500))
        self.ale.setFloat('repeat_action_probability', params['repeat_prob'])
        self.ale.setInt(b'frame_skip', params['frameskip'])
        self.ale.setBool('color_averaging', True)
        self.ale.loadROM('roms/' + params['rom'] + '.bin')
        self.actions = self.ale.getMinimalActionSet()
        self.action_space = c_action_space(len(self.actions))
        self.screen_width, self.screen_height = self.ale.getScreenDims()

    def reset(self):
        self.ale.reset_game()
        seed = np.random.randint(0, 7)
        for i in range(seed):
            self.ale.act(0)
        return self.get_image()

    def step(self, action):
        reward = self.ale.act(self.actions[action])
        next_s = self.get_image()
        terminate = self.ale.game_over()
        return next_s, reward, float(terminate), 0

    def get_image(self):
        temp = np.zeros(self.screen_height * self.screen_width * 3,
                        dtype=np.uint8)
        self.ale.getScreenRGB(temp)
        #self.ale.getScreenGrayscale(temp)
        return temp.reshape((self.screen_height, self.screen_width, 3))
示例#3
0
class Atari:
    def __init__(self, rom_dir):
        self.ale = ALEInterface()

        # Set settings
        self.ale.setInt("random_seed", 123)
        self.frame_skip = 4
        self.ale.setInt("frame_skip", self.frame_skip)
        self.ale.setBool("display_screen", False)
        self.ale.setBool("sound", True)
        self.record_sound_for_user = True
        self.ale.setBool("record_sound_for_user", self.record_sound_for_user)

        # NOTE recording audio to file still works. But if both file recording and
        # record_sound_for_user are enabled, then only the latter is done
        #  self.ale.setString("record_sound_filename", "")

        # Get settings
        self.ale.loadROM(rom_dir)
        self.screen_width, self.screen_height = self.ale.getScreenDims()
        self.legal_actions = self.ale.getLegalActionSet()

        # Action count across all episodes
        self.action_count = 0
        self.start_time = time.time()

        self.reset()

    def reset(self):
        self.ale.reset_game()

    def take_action(self):
        action = self.legal_actions[np.random.randint(self.legal_actions.size)]
        self.ale.act(action)
        self.action_count += 1

    def print_fps(self, delta_t=500):
        if self.action_count % delta_t == 0:
            print '[atari.py] Frames/second: %f' % (
                self.action_count / (time.time() - self.start_time))
            print '[atari.py] Overall game frame count:', atari.action_count * atari.frame_skip
            print '---------'

    def get_image_and_audio(self):
        np_data_image = np.zeros(self.screen_width * self.screen_height * 3,
                                 dtype=np.uint8)
        if self.record_sound_for_user:
            np_data_audio = np.zeros(self.ale.getAudioSize(), dtype=np.uint8)
            self.ale.getScreenRGBAndAudio(np_data_image, np_data_audio)

            # Also supports independent audio queries if user desires:
            #  self.ale.getAudio(np_data_audio)
        else:
            np_data_audio = 0
            self.ale.getScreenRGB(np_data_image)

        return np.reshape(np_data_image,
                          (self.screen_height, self.screen_width,
                           3)), np.asarray(np_data_audio)
示例#4
0
class ALE(object):
    def __init__(self, init_seed, init_rand):
        self.ale = ALEInterface()
        self.ale.setInt(b'random_seed', init_seed)
        self.ale.setFloat(b'repeat_action_probability', 0.0) 
        self.ale.loadROM('./breakout.bin')
        self.action_size = 4

        self.screen = None
        self.reward = 0
        self.terminal = True
        self.init_rand = init_rand

    def setSetting(self, action_repeat, screen_type):
        self.action_repeat = action_repeat
        self.screen_type = screen_type

    def _step(self, action):
        self.reward = self.ale.act(action)
        self.terminal = self.ale.game_over()

        if self.screen_type == 0:
            self.screen = self.ale.getScreenRGB()
        elif self.screen_type == 1:
            self.screen = self.ale.getScreenGrayscale()
        else:
            sys.stderr.write('screen_type error!')
            exit()


    def state(self):
        return self.reward, self.screen, self.terminal

    def act(self, action):
        cumulated_reward = 0
        for _ in range(self.action_repeat):
            self._step(action)
            cumulated_reward += self.reward
            if self.terminal:
                break
        self.reward = cumulated_reward
        return self.state()

    def new_game(self):
        if self.ale.game_over():
            self.ale.reset_game()

            if self.screen_type == 0:
                self.screen = self.ale.getScreenRGB()
            elif self.screen_type == 1:
                self.screen = self.ale.getScreenGrayscale()
            else:
                sys.stderr.write('screen_type error!')
                exit()

        for _ in range(self.init_rand):
            self._step(0)

        return self.screen
class emulator:
	def __init__(self, rom_name, vis):
		self.ale = ALEInterface()
		self.max_frames_per_episode = self.ale.getInt("max_mum_frames_per_episode")
		self.ale.setInt("random_seed", 123)
		self.ale.setInt("frame_skip", 4)
		self.ale.loadROM('roms/' + rom_name)
		self.legal_actions = self.ale.getMinimalActionSet()
		self.action_map = dict()
		for i in range(len(self.legal_actions)):
			self.action_map[self.legal_actions[i]] = i
		
		print self.legal_actions
		self.screen_width, self.screen_height = self.ale.getScreenDims()
		print("width/height: "+ str(self.screen_width) + "/" + str(self.screen_height))
		self.vis = vis
		if vis:
			cv2.startWindowThread()
			cv2.namedWindow("preview")
			
	def get_image(self):
		# numpy_surface = np.zeros(self.screen_height*self.screen_width*3, dtype=np.uint8)
		# self.ale.getScreenRGB(numpy_surface)
		# image = np.reshape(numpy_surface, (self.screen_height, self.screen_width, 3))
		image = self.ale.getScreenRGB()
		image = np.reshape(image, (self.screen_height, self.screen_width, 3))
		return image
	
	def newGame(self):
		self.ale.reset_game()
		return self.get_image(), 0, False
	
	def next(self, action_indx):
		reward = self.ale.act(action_indx)
		nextstate = self.get_image()
		if self.vis:
			cv2.imshow('preview', nextstate)
		return nextstate, reward, self.ale.game_over()
	
	def train(self):
		for episode in range(10):
			total_reward = 0
			frame_number = 0
			while not self.ale.game_over():
				a = self.legal_actions[random.randrange(len(self.legal_actions))]
				# Apply an action and get the resulting reward
				reward = self.ale.act(a);
				total_reward += reward
				screen = self.ale.getScreenRGB()
				screen = np.array(screen).reshape([self.screen_height, self.screen_width, -1])
				frame_number = self.ale.getEpisodeFrameNumber()
				cv2.imshow("screen", screen/255.0)
				cv2.waitKey(0)
				
			self.ale.saveScreenPNG("test_"+str(frame_number)+".png")
			print('Episode %d ended with score: %d' % (episode, total_reward))
			print('Frame number is : ', frame_number)
			self.ale.reset_game()
示例#6
0
文件: ale.py 项目: andth80/a3c-demo
class Env():
    def __init__(self, rom_name):
        self.__initALE()
        self.__loadROM(rom_name)
        self.screen_history = []
        self.screens = []

    def __initALE(self):
        self.ale = ALEInterface()
        self.ale.setInt(b'random_seed', randrange(1000))
        self.ale.setInt(b'fragsize', 64)
        self.ale.setInt(b'frame_skip', 1)

        # qq set this back to 0.25?
        self.ale.setFloat(b'repeat_action_probability', 0)
        self.ale.setLoggerMode('error')

    def __loadROM(self, rom_name):
        self.ale.loadROM(rom_name.encode('utf-8'))
        self.actions = self.ale.getMinimalActionSet()

        (width, height) = self.ale.getScreenDims()
        self.screen_data1 = np.empty((height, width, 3), dtype=np.uint8)
        self.screen_data2 = np.empty((height, width, 3), dtype=np.uint8)

    def get_legal_action_count(self):
        return len(self.actions)

    def act(self, action_index):
        action = self.actions[action_index]
        reward = 0

        # perform the action 4 times
        reward += _clip(self.ale.act(action), -1, 1)
        reward += _clip(self.ale.act(action), -1, 1)
        reward += _clip(self.ale.act(action), -1, 1)
        self.ale.getScreenRGB(self.screen_data1)
        reward += _clip(self.ale.act(action), -1, 1)
        self.ale.getScreenRGB(self.screen_data2)

        # return the pixel-wise max of the last two frames (some games only
        # render every other frame)
        screen_data_combined = np.maximum(self.screen_data1, self.screen_data2)
        terminal = self.ale.game_over()

        self.screens.append(preprocess_screen(screen_data_combined))
        phi = get_phi(self.screens)

        return (terminal, reward, phi, self.screen_data2)

    def get_s(self):
        return get_phi(self.screens)

    def reset(self):
        self.ale.reset_game()
        self.screens = []
class emulator:
    def __init__(self, rom_name, vis, windowname='preview'):
        self.ale = ALEInterface()
        self.max_frames_per_episode = self.ale.getInt(
            "max_num_frames_per_episode")
        self.ale.setInt("random_seed", 123)
        self.ale.setInt("frame_skip", 4)
        self.ale.loadROM('roms/' + rom_name)
        self.legal_actions = self.ale.getMinimalActionSet()
        self.action_map = dict()
        self.windowname = windowname
        for i in range(len(self.legal_actions)):
            self.action_map[self.legal_actions[i]] = i
        self.init_frame_number = 0

        # print(self.legal_actions)
        self.screen_width, self.screen_height = self.ale.getScreenDims()
        print("width/height: " + str(self.screen_width) + "/" +
              str(self.screen_height))
        self.vis = vis
        if vis:
            cv2.startWindowThread()
            cv2.namedWindow(self.windowname)

    def get_image(self):
        numpy_surface = np.zeros(self.screen_height * self.screen_width * 3,
                                 dtype=np.uint8)
        self.ale.getScreenRGB(numpy_surface)
        image = np.reshape(numpy_surface,
                           (self.screen_height, self.screen_width, 3))
        return image

    def newGame(self):
        # Instead of resetting the game, we load a checkpoint and start from there.
        # self.ale.reset_game()
        self.ale.restoreState(
            self.ale.decodeState(checkpoints[random.randint(
                0, 99)].astype('uint8')))
        self.init_frame_number = self.ale.getFrameNumber()
        #self.ale.restoreState(self.ale.decodeState(np.reshape(checkpoint,(1009,1))))
        return self.get_image()

    def next(self, action_indx):
        reward = self.ale.act(action_indx)
        nextstate = self.get_image()
        # scipy.misc.imsave('test.png',nextstate)
        if self.vis:
            cv2.imshow(self.windowname, nextstate)
        return nextstate, reward, self.ale.game_over()

    def get_frame_number(self):
        return self.ale.getFrameNumber() - self.init_frame_number
示例#8
0
class emulator(object):
    def __init__(self, rom_name, vis, frameskip=1, windowname='preview'):
        self.ale = ALEInterface()
        self.max_frames_per_episode = self.ale.getInt(
            "max_num_frames_per_episode")
        self.ale.setInt("random_seed", 123)
        self.ale.setInt("frame_skip", frameskip)
        romfile = str(ROM_PATH) + str(rom_name)
        if not os.path.exists(romfile):
            print('No ROM file found at "' + romfile +
                  '".\nAdjust ROM_PATH or double-check the filt exists.')
        self.ale.loadROM(romfile)
        self.legal_actions = self.ale.getMinimalActionSet()
        self.action_map = dict()
        self.windowname = windowname
        for i in range(len(self.legal_actions)):
            self.action_map[self.legal_actions[i]] = i

        # print(self.legal_actions)
        self.screen_width, self.screen_height = self.ale.getScreenDims()
        print("width/height: " + str(self.screen_width) + "/" +
              str(self.screen_height))
        self.vis = vis
        if vis:
            cv2.startWindowThread()
            cv2.namedWindow(
                self.windowname,
                flags=cv2.WINDOW_AUTOSIZE)  # permit manual resizing

    def get_image(self):
        numpy_surface = np.zeros(self.screen_height * self.screen_width * 3,
                                 dtype=np.uint8)
        self.ale.getScreenRGB(numpy_surface)
        image = np.reshape(numpy_surface,
                           (self.screen_height, self.screen_width, 3))
        return image

    def newGame(self):
        self.ale.reset_game()
        return self.get_image()

    def next(self, action_indx):
        reward = self.ale.act(action_indx)
        nextstate = self.get_image()
        # scipy.misc.imsave('test.png',nextstate)
        if self.vis:
            cv2.imshow(self.windowname, nextstate)
            if sys.platform == 'darwin':
                # if we don't do this, can hang on OS X
                cv2.waitKey(2)
        return nextstate, reward, self.ale.game_over()
示例#9
0
class Atari:
    # Constructor
    def __init__(self, rom_name):
        # 1º Passo: carregamos o jogo e definimos seus parâmetros
        self.ale = ALEInterface()
        self.max_frames_per_episode = self.ale.getInt(
            b"max_num_frames_per_episode")
        self.ale.setInt(b"random_seed", 123)
        self.ale.setInt(b"frame_skip", 4)
        self.ale.loadROM(('game/' + rom_name).encode())

        self.screen_width, self.screen_height = self.ale.getScreenDims()
        self.legal_actions = self.ale.getMinimalActionSet()
        self.action_map = dict()

        for i in range(len(self.legal_actions)):
            self.action_map[self.legal_actions[i]] = i

        # 2º Passo: criamos a janela para exibição
        self.windowname = rom_name
        cv2.startWindowThread()
        cv2.namedWindow(rom_name)

    # Essa função será utilizada para receber uma imagem do emulador, já em um formato esperado
    # por nosso algoritmo de treinamento.
    def get_image(self):
        numpy_surface = np.zeros(self.screen_height * self.screen_width * 3,
                                 dtype=np.uint8)
        self.ale.getScreenRGB(numpy_surface)
        image = np.reshape(numpy_surface,
                           (self.screen_height, self.screen_width, 3))
        return image

    # Simplesmente inicializa o jogo
    def newGame(self):
        self.ale.reset_game()
        return self.get_image()

    # Essa função será responsável por retornar as informações da observação do estado após certa ação ser tomada.
    def next(self, action):
        reward = self.ale.act(self.legal_actions[np.argmax(action)])
        nextstate = self.get_image()

        cv2.imshow(self.windowname, nextstate)
        if self.ale.game_over():
            self.newGame()

        return nextstate, reward, self.ale.game_over()
示例#10
0
def main():
    arguments = docopt.docopt(__doc__, version='ALE Demo Version 1.0')

    pygame.init()

    ale = ALEInterface()
    ale.setInt(b'random_seed', 123)
    ale.setBool(b'display_screen', True)
    ale.loadROM(str.encode(arguments['<rom_file>']))

    legal_actions = ale.getLegalActionSet()
    width, height = ale.getScreenDims()
    print(width, height)
    frame = ale.getScreenRGB()
    frame = np.array(frame, dtype=float)

    rewards, num_episodes = [], int(arguments['--iters'] or 5)
    for episode in range(num_episodes):
        total_reward = 0
        while not ale.game_over():
            total_reward += ale.act(random.choice(legal_actions))
        print('Episode %d reward %d.' % (episode, total_reward))
        rewards.append(total_reward)
        ale.reset_game()

    average = sum(rewards) / len(rewards)
    print('Average for %d episodes: %d' % (num_episodes, average))
class Emulate:
  def __init__(self, rom_file, display_screen=False,frame_skip=4,screen_height=84,screen_width=84,repeat_action_probability=0,color_averaging=True,random_seed=0,record_screen_path='screen_pics',record_sound_filename=None,minimal_action_set=True):
    self.ale = ALEInterface()
    if display_screen:
      if sys.platform == 'darwin':
        import pygame
        pygame.init()
        self.ale.setBool('sound', False) # Sound doesn't work on OSX
      elif sys.platform.startswith('linux'):
        self.ale.setBool('sound', True)
      self.ale.setBool('display_screen', True)

    self.ale.setInt('frame_skip', frame_skip)
    self.ale.setFloat('repeat_action_probability', repeat_action_probability)
    self.ale.setBool('color_averaging', color_averaging)

    if random_seed:
      self.ale.setInt('random_seed', random_seed)

    self.ale.loadROM(rom_file)

    if minimal_action_set:
      self.actions = self.ale.getMinimalActionSet()
    else:
      self.actions = self.ale.getLegalActionSet()

    self.dims = (screen_width,screen_height)

  def numActions(self):
    return len(self.actions)

  def getActions(self):
  	return self.actions

  def restart(self):
    self.ale.reset_game()

  def act(self, action):
    reward = self.ale.act(self.actions[action])
    return reward

  def getScreen(self):
    screen = self.ale.getScreenGrayscale()
    resized = cv2.resize(screen, self.dims)
    return resized

  def getScreenGray(self):
    screen = self.ale.getScreenGrayscale()
    resized = cv2.resize(screen, self.dims)
    rotated = np.rot90(resized,k=1)
    return rotated

  def getScreenColor(self):
    screen = self.ale.getScreenRGB()
    resized = cv2.resize(screen, self.dims)
    rotated = np.rot90(resized,k=1)
    return rotated

  def isTerminal(self):
    return self.ale.game_over()
示例#12
0
文件: emulator.py 项目: amharc/jnp3
class Emulator(object):
    def __init__(self, settings):
        self.ale = ALEInterface()
        self.ale.setInt('frame_skip', settings['frame_skip'])
        self.ale.setInt('random_seed', np.random.RandomState().randint(1000))
        self.ale.setBool('color_averaging', False)
        self.ale.loadROM('roms/' + settings['rom_name'])
        self.actions = self.ale.getMinimalActionSet()
        self.width = settings['screen_width']
        self.height = settings['screen_height']
        
    def reset(self):
        self.ale.reset_game()

    def image(self):
        screen = self.ale.getScreenGrayscale()
        screen = cv2.resize(screen, (self.height, self.width),
                interpolation=cv2.INTER_LINEAR)
        return np.reshape(screen, (self.height, self.width))

    def full_image(self):
        screen = self.ale.getScreenRGB()
        return screen

    def act(self, action):
        return self.ale.act(self.actions[action])

    def terminal(self):
        return self.ale.game_over()
class emulator:
	def __init__(self, rom_name, vis,windowname='preview'):
		self.ale = ALEInterface()
		self.max_frames_per_episode = self.ale.getInt("max_num_frames_per_episode");
		self.ale.setInt("random_seed",123)
		self.ale.setInt("frame_skip",4)
		self.ale.loadROM('roms/' + rom_name )
		self.legal_actions = self.ale.getMinimalActionSet()
		self.action_map = dict()
		self.windowname = windowname
		for i in range(len(self.legal_actions)):
			self.action_map[self.legal_actions[i]] = i
		self.init_frame_number = 0

		# print(self.legal_actions)
		self.screen_width,self.screen_height = self.ale.getScreenDims()
		print("width/height: " +str(self.screen_width) + "/" + str(self.screen_height))
		self.vis = vis
		if vis: 
			cv2.startWindowThread()
			cv2.namedWindow(self.windowname)

	def get_image(self):
		numpy_surface = np.zeros(self.screen_height*self.screen_width*3, dtype=np.uint8)
		self.ale.getScreenRGB(numpy_surface)
		image = np.reshape(numpy_surface, (self.screen_height, self.screen_width, 3))
		return image

	def newGame(self):
		# Instead of resetting the game, we load a checkpoint and start from there.
		# self.ale.reset_game()
		self.ale.restoreState(self.ale.decodeState(checkpoints[random.randint(0,99)].astype('uint8')))
		self.init_frame_number = self.ale.getFrameNumber()
		#self.ale.restoreState(self.ale.decodeState(np.reshape(checkpoint,(1009,1))))
		return self.get_image()

	def next(self, action_indx):
		reward = self.ale.act(action_indx)	
		nextstate = self.get_image()
		# scipy.misc.imsave('test.png',nextstate)
		if self.vis:
			cv2.imshow(self.windowname,nextstate)
		return nextstate, reward, self.ale.game_over()

	def get_frame_number(self):
		return self.ale.getFrameNumber() - self.init_frame_number
示例#14
0
class Emulator:
    def __init__(self, rom_name, vis):
        self.ale = ALEInterface()
        self.max_frames_per_episode = self.ale.getInt(
            "max_num_frames_per_episode")
        self.ale.setInt("random_seed", 123)
        self.ale.setInt("frame_skip", 4)
        self.ale.loadROM('roms/' + rom_name)
        self.legal_actions = self.ale.getMinimalActionSet()
        self.action_map = dict()
        for i in range(len(self.legal_actions)):
            self.action_map[self.legal_actions[i]] = i

        #print(self.legal_actions)
        self.screen_width, self.screen_height = self.ale.getScreenDims()
        print("width/height: " + str(self.screen_width) + "/" +
              str(self.screen_height))
        self.vis = vis
        if vis:
            cv2.startWindowThread()
            cv2.namedWindow("preview")

    def get_image(self):
        numpy_surface = np.zeros(self.screen_height * self.screen_width * 3,
                                 dtype=np.uint8)
        self.ale.getScreenRGB(numpy_surface)
        image = np.reshape(numpy_surface,
                           (self.screen_height, self.screen_width, 3))
        #added by ben may 2016
        print image
        print '&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&& printing'
        return image

    def newGame(self):
        self.ale.reset_game()
        return self.get_image()

    def next(self, action_indx):
        reward = self.ale.act(action_indx)
        nextstate = self.get_image()
        # scipy.misc.imsave('test.png',nextstate)
        # scipy.misc.imsave('test.png',nextstate)
        if self.vis:
            cv2.imshow('preview', nextstate)
        return nextstate, reward, self.ale.game_over()
示例#15
0
文件: envs.py 项目: afcarl/uct_atari
class Atari(AtariEnv):
    metadata = {'render.modes': ['human', 'rgb_array']}

    def __init__(self,
                 game='pong',
                 obs_type='ram',
                 frameskip=(2, 5),
                 repeat_action_probability=0.):
        """Frameskip should be either a tuple (indicating a random range to
        choose from, with the top value exclude), or an int."""

        utils.EzPickle.__init__(self, game, obs_type)
        assert obs_type in ('ram', 'image')

        self.game_path = atari_py.get_game_path(game)
        if not os.path.exists(self.game_path):
            raise IOError('You asked for game %s but path %s does not exist' %
                          (game, self.game_path))
        self._obs_type = obs_type
        self.frameskip = frameskip
        self.ale = ALEInterface()
        self.viewer = None

        # Tune (or disable) ALE's action repeat:
        # https://github.com/openai/gym/issues/349
        assert isinstance(
            repeat_action_probability,
            (float, int)), "Invalid repeat_action_probability: {!r}".format(
                repeat_action_probability)
        self.ale.setFloat('repeat_action_probability'.encode('utf-8'),
                          repeat_action_probability)

        self._seed()

        (screen_width, screen_height) = self.ale.getScreenDims()
        self._buffer = np.empty((screen_height, screen_width, 3),
                                dtype=np.uint8)

        self._action_set = self.ale.getMinimalActionSet()
        self.action_space = spaces.Discrete(len(self._action_set))

        (screen_width, screen_height) = self.ale.getScreenDims()
        if self._obs_type == 'ram':
            self.observation_space = spaces.Box(low=np.zeros(128),
                                                high=np.zeros(128) + 255)
        elif self._obs_type == 'image':
            self.observation_space = spaces.Box(low=0,
                                                high=255,
                                                shape=(screen_height,
                                                       screen_width, 3))
        else:
            raise error.Error('Unrecognized observation type: {}'.format(
                self._obs_type))

    def _get_image(self):
        return self.ale.getScreenRGB(self._buffer).copy()
示例#16
0
class emulator:
	def __init__(self, rom_name, vis, windowname='preview'):
  		self.ale = ALEInterface()
		# When it starts
  		self.ale.setInt("random_seed", 123)
		# Skipping 4 frames 
  		self.ale.setInt("frame_skip", 4)
  		self.ale.loadROM('roms/' + rom_name)
  		self.legal_actions = self.ale.getMinimalActionSet()
  		print('Actions : %s' % self.legal_actions)
  		self.action_map = dict()
  		self.windowname = windowname
  		# Raw atari frames, 210 * 160 pixel images 
  		self.screen_width, self.screen_height = self.ale.getScreenDims()
  		print("widht/height: " + str(self.screen_width) + "/" + str(self.screen_height))
  		# Visualize
  		self.vis = vis
  		if vis:
   			cv2.startWindowThread()
   			cv2.namedWindow(self.windowname)

 	def get_image(self):
  		# Need to specify data type as uint8
  		numpy_surface = np.zeros([self.screen_width * self.screen_height * 3], dtype=np.uint8)
  		# get RGB values
  		self.ale.getScreenRGB(numpy_surface)
  		image = np.reshape(numpy_surface, [self.screen_height, self.screen_width, 3])
  		return image

 	def new_game(self):
  		self.ale.reset_game()
  		# Reset game and getting reset image value
  		return self.get_image()

 	def next(self, action_index):
  		# Get R(s,a)
  		reward = self.ale.act(action_index)
  		# Get image pixel value after taking an action
  		next_state = self.get_image()
  		if self.vis:
   			cv2.imshow(self.windowname, next_state)
  		# self.ale.game_over() returns True when game is over
  		return next_state, reward, self.ale.game_over()
示例#17
0
class Atari:
    def __init__(self, rom_name):

        self.ale = ALEInterface()
        self.max_frames_per_episode = self.ale.getInt(
            "max_num_frames_per_episode")
        self.ale.setInt("random_seed", 123)
        self.ale.setInt("frame_skip", 4)
        self.ale.loadROM('game/' + rom_name)
        self.screen_width, self.screen_height = self.ale.getScreenDims()
        self.legal_actions = self.ale.getMinimalActionSet()
        self.action_map = dict()
        for i in range(len(self.legal_actions)):
            self.action_map[self.legal_actions[i]] = i
        # print len(self.legal_actions)
        self.windowname = rom_name
        cv2.startWindowThread()
        cv2.namedWindow(rom_name)

    def get_image(self):

        numpy_surface = np.zeros(self.screen_height * self.screen_width * 3,
                                 dtype=np.uint8)
        self.ale.getScreenRGB(numpy_surface)
        image = np.reshape(numpy_surface,
                           (self.screen_height, self.screen_width, 3))
        return image

    def newGame(self):

        self.ale.reset_game()
        return self.get_image()

    def next(self, action):

        reward = self.ale.act(self.legal_actions[np.argmax(action)])
        nextstate = self.get_image()

        cv2.imshow(self.windowname, nextstate)
        if self.ale.game_over():
            self.newGame()
        # print "reward %d" % reward
        return nextstate, reward, self.ale.game_over()
示例#18
0
class Atari:
    def __init__(self,rom_name):
        self.ale = ALEInterface()
        self.max_frames_per_episode = self.ale.getInt("max_num_frames_per_episode")
        self.ale.setInt("random_seed",123)
        self.ale.setInt("frame_skip",4)
        self.ale.loadROM(rom_name)
        self.screen_width,self.screen_height = self.ale.getScreenDims()
        self.legal_actions = self.ale.getMinimalActionSet()
        self.action_map = dict()
        for i in range(len(self.legal_actions)):
            self.action_map[self.legal_actions[i]] = i
        print len(self.legal_actions)
        self.windowname = rom_name
        cv2.startWindowThread()
        cv2.namedWindow(rom_name)

    def preprocess(self, image):
        image = cv2.cvtColor(cv2.resize(image, (84, 110)), cv2.COLOR_BGR2GRAY)
        image = image[26:110,:]
        ret, image = cv2.threshold(image,1,255,cv2.THRESH_BINARY)
        return np.reshape(image,(84,84, 1))

    def get_image(self):
        numpy_surface = np.zeros(self.screen_height*self.screen_width*3, dtype=np.uint8)
        self.ale.getScreenRGB(numpy_surface)
        image = np.reshape(numpy_surface, (self.screen_height, self.screen_width, 3))
        return self.preprocess(image)

    def newGame(self):
        self.ale.reset_game()
        return self.get_image()

    def next(self, action):
        reward = self.ale.act(self.legal_actions[np.argmax(action)])    
        nextstate = self.get_image()

        cv2.imshow(self.windowname,nextstate)
        if self.ale.game_over():
            self.newGame()
        #print "reward %d" % reward 
        return nextstate, reward, self.ale.game_over()
示例#19
0
class Breakout(object):
    steps_between_actions = 4

    def __init__(self):
        self.ale = ALEInterface()
        self.ale.setInt('random_seed', 123)
        self.ale.setBool("display_screen", False)
        self.ale.setBool("sound", False)
        self.ale.loadROM("%s/breakout.bin" % rom_directory)
        self.current_state = [
            self.ale.getScreenRGB(), self.ale.getScreenRGB()
        ]

    def start_episode(self):
        self.ale.reset_game()

    def take_action(self, action):
        assert not self.terminated

        def step():
            reward = self.ale.act(action)
            self.roll_state()
            return reward

        reward = sum(step() for _ in xrange(self.steps_between_actions))

        return (reward, self.current_state)

    def roll_state(self):
        assert len(self.current_state) == 2
        self.current_state = [self.current_state[1], self.ale.getScreenRGB()]
        assert len(self.current_state) == 2

    @property
    def actions(self):
        return self.ale.getMinimalActionSet()

    @property
    def terminated(self):
        return self.ale.game_over() or self.ale.lives() < 5
class emulator:
	def __init__(self, rom_name, vis):
		if vis:
			import cv2
		self.ale = ALEInterface()
		self.max_frames_per_episode = self.ale.getInt("max_num_frames_per_episode");
		self.ale.setInt("random_seed",123)
		self.ale.setInt("frame_skip",4)
		self.ale.loadROM('roms/' + rom_name )
		self.legal_actions = self.ale.getMinimalActionSet()
		self.action_map = dict()
		for i in range(len(self.legal_actions)):
			self.action_map[self.legal_actions[i]] = i

		# print(self.legal_actions)
		self.screen_width,self.screen_height = self.ale.getScreenDims()
		print("width/height: " +str(self.screen_width) + "/" + str(self.screen_height))
		self.vis = vis
		if vis: 
			cv2.startWindowThread()
			cv2.namedWindow("preview")

	def get_image(self):
		numpy_surface = np.zeros(self.screen_height*self.screen_width*3, dtype=np.uint8)
		self.ale.getScreenRGB(numpy_surface)
		image = np.reshape(numpy_surface, (self.screen_height, self.screen_width, 3))
		return image

	def newGame(self):
		self.ale.reset_game()
		return self.get_image()

	def next(self, action_indx):
		reward = self.ale.act(action_indx)	
		nextstate = self.get_image()
		# scipy.misc.imsave('test.png',nextstate)
		if self.vis:
			cv2.imshow('preview',nextstate)
		return nextstate, reward, self.ale.game_over()
示例#21
0
class ALEInterfaceWrapper:
    def __init__(self, repeat_action_probability, rng):
        self.internal_action_repeat_prob = repeat_action_probability
        self.prev_action = 0
        self.rng_source = rng
        self.rng = deepcopy(self.rng_source)
        self.ale = ALEInterface()
        '''
		This sets the probability from the default 0.25 to 0.
		It ensures deterministic actions.
		'''
        self.ale.setFloat('repeat_action_probability', 0.0)

    def getScreenRGB(self):
        return self.ale.getScreenRGB()

    def game_over(self):
        return self.ale.game_over()

    def reset_game(self):
        self.ale.reset_game()

    def lives(self):
        return self.ale.lives()

    def getMinimalActionSet(self):
        return self.ale.getMinimalActionSet()

    def setInt(self, key, value):
        self.ale.setInt(key, value)

    def setFloat(self, key, value):
        self.ale.setFloat(key, value)

    def loadROM(self, rom):
        self.ale.loadROM(rom)

    def reset_action_seed(self):
        self.rng = deepcopy(self.rng_source)

    def set_action_seed(self, seed):
        self.rng = np.random.RandomState(seed)

    def act(self, action):
        actual_action = action
        if self.internal_action_repeat_prob > 0:
            if self.rng.uniform(0, 1) < self.internal_action_repeat_prob:
                actual_action = self.prev_action
        self.prev_action = actual_action
        return self.ale.act(actual_action)
示例#22
0
class AleInterface(object):
    def __init__(self, game, args):
        self.game = game
        self.ale = ALEInterface()

        # if sys.platform == 'darwin':
        #     self.ale.setBool('sound', False)  # Sound doesn't work on OSX
        # elif sys.platform.startswith('linux'):
        #     self.ale.setBool('sound', True)
        # self.ale.setBool('display_screen', True)
        #
        self.ale.setBool('display_screen', args.display_screen)

        self.ale.setInt('frame_skip', args.frame_skip)
        self.ale.setFloat('repeat_action_probability', args.repeat_action_probability)
        self.ale.setBool('color_averaging', args.color_averaging)
        self.ale.setInt('random_seed', args.random_seed)

        #
        # if rand_seed is not None:
        #     self.ale.setInt('random_seed', rand_seed)

        rom_file = "./roms/%s.bin" % game
        if not os.path.exists(rom_file):
            print "not found rom file:", rom_file
            sys.exit(-1)
        self.ale.loadROM(rom_file)

        self.actions = self.ale.getMinimalActionSet()


    def get_actions_num(self):
        return len(self.actions)

    def act(self, action):
        reward = self.ale.act(self.actions[action])
        return reward

    def get_screen_gray(self):
        return self.ale.getScreenGrayscale()

    def get_screen_rgb(self):
        return self.ale.getScreenRGB()

    def game_over(self):
        return self.ale.game_over()

    def reset_game(self):
        return self.ale.reset_game()
class Atari:
	def __init__(self,rom_name):
		self.ale = ALEInterface()
		self.max_frames_per_episode = self.ale.getInt("max_num_frames_per_episode")
		self.ale.setInt("random_seed",123)
		self.ale.setInt("frame_skip",4)
		self.ale.loadROM('./' +rom_name)
		self.screen_width,self.screen_height = self.ale.getScreenDims()
		self.legal_actions = self.ale.getMinimalActionSet()
		self.action_map = dict()
		for i in range(len(self.legal_actions)):
			self.action_map[self.legal_actions[i]] = i
		#print len(self.legal_actions)
		self.windowname = rom_name
		#cv2.startWindowThread()
		#cv2.namedWindow(rom_name)

	def get_image(self):
		numpy_surface = np.zeros(self.screen_height*self.screen_width*3, dtype=np.uint8)
		self.ale.getScreenRGB(numpy_surface)
		image = np.reshape(numpy_surface, (self.screen_height, self.screen_width, 3))
		return image

	def newGame(self):
		self.ale.reset_game()
		return self.get_image()

	def next(self, action):
		reward = self.ale.act(self.legal_actions[np.argmax(action)])	
		nextstate = self.get_image()
		
		#cv2.imshow(self.windowname,nextstate)
		if self.ale.game_over():
			self.newGame()
		#print "reward %d" % reward 
		return nextstate, reward, self.ale.game_over()
示例#24
0
def train():
    ale = ALEInterface()
    ale.setInt('random_seed', 123)
    ale.loadROM('roms/breakout.bin')
    legal_actions = ale.getLegalActionSet()
    total_reward = 0
    while not ale.game_over():
        a = legal_actions[randrange(len(legal_actions))]
        reward = ale.act(a)
        screen = None
        screen = ale.getScreenRGB()
        print(screen)
        plt.imshow(screen)
        plt.show()

        total_reward += reward
        print(total_reward)
    print('Episode end!')
示例#25
0
        class Env:
            def __init__(self):
                self.ale = ALEInterface()
                rom_name = "roms/Breakout.bin"
                self.ale.setInt("frame_skip", 4)
                self.ale.loadROM(rom_name)
                legal_actions = self.ale.getMinimalActionSet()
                self.action_map = {}
                for i in range(len(legal_actions)):
                    self.action_map[i] = legal_actions[i]
                self.action_num = len(self.action_map)

            def reset(self):
                state = np.zeros((84, 84, 3), dtype=np.uint8)
                self.ale.reset_game()
                return state

            def step(self, action):
                reward = self.ale.act(self.action_map[action])
                state = self.ale.getScreenRGB()
                done = self.ale.game_over()
                return state, reward, done, ""
class ALEInterfaceWrapper:
    def __init__(self, repeat_action_probability):
        self.internal_action_repeat_prob = repeat_action_probability
        self.prev_action = 0
        self.ale = ALEInterface()
        '''
		This sets the probability from the default 0.25 to 0.
		It ensures deterministic actions.
		'''
        self.ale.setFloat('repeat_action_probability',
                          repeat_action_probability)

    def getScreenRGB(self):
        return self.ale.getScreenRGB()

    def game_over(self):
        return self.ale.game_over()

    def reset_game(self):
        self.ale.reset_game()

    def lives(self):
        return self.ale.lives()

    def getMinimalActionSet(self):
        return self.ale.getMinimalActionSet()

    def setInt(self, key, value):
        self.ale.setInt(key, value)

    def setFloat(self, key, value):
        self.ale.setFloat(key, value)

    def loadROM(self, rom):
        self.ale.loadROM(rom)

    def act(self, action):
        actual_action = action
        return self.ale.act(actual_action)
示例#27
0
class Game():
    """
    Wrapper around the ALEInterface class.
    """

    def __init__(self, rom_file, sdl=False):
        self.ale = ALEInterface()
        # Setup SDL
        if sdl:
            if sys.platform == 'darwin':
                import pygame
                pygame.init()
                self.ale.setBool(b'sound', False) # Sound doesn't work on OSX
            elif sys.platform.startswith('linux'):
                self.ale.setBool(b'sound', True)
            self.ale.setBool(b'display_screen', True)

        # Load rom
        self.ale.loadROM(str.encode(rom_file))

    def get_action_set(self):
        return self.ale.getLegalActionSet()

    def get_minimal_action_set(self):
        return self.ale.getMinimalActionSet()

    def game_over(self):
        return self.ale.game_over()

    def act(self, action):
        return self.ale.act(action)

    def reset_game(self):
        self.ale.reset_game()

    def get_frame(self):
        return self.ale.getScreenRGB()
示例#28
0
class GameEnvironment:

    def __init__(self, settings):

        self.ale = ALEInterface()
        self.ale.setBool('display_screen', settings['DISPLAY_SCREEN'])
        self.ale.setBool('sound', settings['SOUND'])
        self.ale.setBool('color_averaging', settings['COLOR_AVERAGING'])
        self.ale.setInt('random_seed', settings['RANDOM_SEED'])
        self.ale.setInt('frame_skip', settings['FRAME_SKIP'])
        self.ale.setFloat('repeat_action_probability', settings['REPEAT_ACTION_PROB'])
        roms_dir = settings['ROMS_DIR']
        rom_name = settings['ROM_NAME']
        ROM = None
        if(rom_name.endswith('.bin')):
            self.name = rom_name[:-4]
            ROM = rom_name
        else:
            self.name = rom_name
            ROM = rom_name + '.bin'

        self.ale.loadROM(os.path.join(roms_dir, ROM))
        self.random_starts = settings['RANDOM_STARTS']
        self.rng = settings['RNG']

        if(settings['MINIMAL_ACTION_SET']):
            self.actions = self.ale.getMinimalActionSet()
        else:
            self.actions = self.ale.getLegalActionSet()

        self.n_actions = len(self.actions)
        self.width, self.height = self.ale.getScreenDims()

        self.observation = np.zeros((self.height, self.width), dtype='uint8')
        self.reward = None
        self.game_over = None
        self.terminal = None
        self.total_lives = None

        self.init()

    def init(self):

        self.restartGame()
        self.reward = 0
        self.game_over = self.gameOver()
        self.terminal = self.game_over
        self.total_lives = self.lives()
        self.step(0)

    def getState(self):

        return self.observation, self.reward, self.terminal, self.game_over

    def step(self, action, training=False):

        self.reward = self.act(action)
        self.paint()
        lives = self.lives()
        self.game_over = self.gameOver()
        self.terminal = self.game_over
        if(training and (lives < self.total_lives)):
            self.terminal = True

        self.total_lives = lives
        return self.getState()

    def newGame(self):

        self.init()
        for i in xrange(self.rng.randint(1, self.random_starts)):
            self.act(0)
            terminal = self.gameOver()
            if(terminal):
                print "Warning terminal in random init"

        return self.step(0)

    def newTestGame(self):

        self.init()
        return self.getState()

    def paint(self):

        self.ale.getScreenGrayscale(self.observation)

    def getScreenRGB(self):

        return self.ale.getScreenRGB()

    def act(self, action):

        assert ((action >= 0) and (action < self.n_actions))
        return self.ale.act(self.actions[action])

    def lives(self):

        return self.ale.lives()

    def restartGame(self):

        self.ale.reset_game()

    def gameOver(self):

        return self.ale.game_over()
示例#29
0
class AtariDriver(object):
    """
    A wrapper for atari emulator.
    """
    def __init__(self, rom_file, frame_skip=1, viz=0):
        """
        :param rom_file: path to the rom
        :param frame_skip: skip every k frames
        :param viz: the delay. visualize the game while running. 0 to disable
        """
        self.ale = ALEInterface()
        self.rng = get_rng(self)

        self.ale.setInt("random_seed", self.rng.randint(self.rng.randint(0, 1000)))
        self.ale.setInt("frame_skip", frame_skip)
        self.ale.loadROM(rom_file)
        self.width, self.height = self.ale.getScreenDims()
        self.actions = self.ale.getMinimalActionSet()

        if isinstance(viz, int):
            viz = float(viz)
        self.viz = viz
        self.romname = os.path.basename(rom_file)
        if self.viz and isinstance(self.viz, float):
            cv2.startWindowThread()
            cv2.namedWindow(self.romname)

        self._reset()
        self.last_image = self._grab_raw_image()
        self.framenum = 0

    def _grab_raw_image(self):
        """
        :returns: the current 3-channel image
        """
        m = np.zeros(self.height * self.width * 3, dtype=np.uint8)
        self.ale.getScreenRGB(m)
        return m.reshape((self.height, self.width, 3))

    def grab_image(self):
        """
        :returns: a gray-scale image, max-pooled over the last frame.
        """
        now = self._grab_raw_image()
        ret = np.maximum(now, self.last_image)
        self.last_image = now
        if self.viz and isinstance(self.viz, float):
            cv2.imshow(self.romname, ret)
            time.sleep(self.viz)
        elif self.viz:
            cv2.imwrite("{}/{:06d}.jpg".format(self.viz, self.framenum), ret)
            self.framenum += 1
        ret = cv2.cvtColor(ret, cv2.COLOR_BGR2YUV)[:,:,0]
        ret = ret[36:204,:]   # several online repos all use this
        return ret

    def get_num_actions(self):
        """
        :returns: the number of legal actions
        """
        return len(self.actions)

    def _reset(self):
        self.ale.reset_game()

    def next(self, act):
        """
        :param act: an index of the action
        :returns: (next_image, reward, isOver)
        """
        r = self.ale.act(self.actions[act])
        s = self.grab_image()
        isOver = self.ale.game_over()
        if isOver:
            self._reset()
        return (s, r, isOver)
示例#30
0
class AtariEnvironment:
    def __init__(self, args, outputDir):

        self.outputDir = outputDir
        self.screenCaptureFrequency = args.screen_capture_freq

        self.ale = ALEInterface()
        self.ale.setInt(b'random_seed', 123456)
        random.seed(123456)
        # Fix https://groups.google.com/forum/#!topic/deep-q-learning/p4FAIaabwlo
        self.ale.setFloat(b'repeat_action_probability', 0.0)

        # Load the ROM file
        self.ale.loadROM(args.rom)

        self.actionSet = self.ale.getMinimalActionSet()
        self.gameNumber = 0
        self.stepNumber = 0
        self.resetGame()

    def getNumActions(self):
        return len(self.actionSet)

    def getState(self):
        return self.state

    def getGameNumber(self):
        return self.gameNumber

    def getFrameNumber(self):
        return self.ale.getFrameNumber()

    def getEpisodeFrameNumber(self):
        return self.ale.getEpisodeFrameNumber()

    def getEpisodeStepNumber(self):
        return self.episodeStepNumber

    def getStepNumber(self):
        return self.stepNumber

    def getGameScore(self):
        return self.gameScore

    def isGameOver(self):
        return self.ale.game_over()

    def step(self, action):
        previousLives = self.ale.lives()
        reward = 0
        isTerminal = 0
        self.stepNumber += 1
        self.episodeStepNumber += 1

        for i in range(4):
            prevScreenRGB = self.ale.getScreenRGB()
            reward += self.ale.act(self.actionSet[action])
            screenRGB = self.ale.getScreenRGB()

            # Detect end of episode, I don't think I'm handling this right in terms
            # of the overall game loop (??)
            if self.ale.lives() < previousLives or self.ale.game_over():
                isTerminal = 1
                break

            if self.gameNumber % self.screenCaptureFrequency == 0:
                dir = self.outputDir + '/screen_cap/game-%06d' % (
                    self.gameNumber)
                if not os.path.isdir(dir):
                    os.makedirs(dir)
                self.ale.saveScreenPNG(dir + '/frame-%06d.png' %
                                       (self.getEpisodeFrameNumber()))

        maxedScreen = np.maximum(screenRGB, prevScreenRGB)
        self.state = self.state.stateByAddingScreen(maxedScreen,
                                                    self.ale.getFrameNumber())
        self.gameScore += reward
        return reward, self.state, isTerminal

    def resetGame(self):
        if self.ale.game_over():
            self.gameNumber += 1
        self.ale.reset_game()
        self.state = State().stateByAddingScreen(self.ale.getScreenRGB(),
                                                 self.ale.getFrameNumber())
        self.gameScore = 0
        self.episodeStepNumber = 0  # environment steps vs ALE frames.  Will probably be 4*frame number
示例#31
0
class AtariEmulator(BaseEnvironment):
    def __init__(self,
                 emulator_id,
                 game,
                 resource_folder,
                 random_seed=3,
                 random_start=True,
                 single_life_episodes=False,
                 history_window=1,
                 visualize=False,
                 verbose=0,
                 **unknown):
        if verbose >= 2:
            logging.debug('Emulator#{} received unknown args: {}'.format(
                emulator_id, unknown))
        self.emulator_id = emulator_id
        self.ale = ALEInterface()
        self.ale.setInt(b"random_seed", random_seed * (emulator_id + 1))
        # For fuller control on explicit action repeat (>= ALE 0.5.0)
        self.ale.setFloat(b"repeat_action_probability", 0.0)
        # Disable frame_skip and color_averaging
        # See: http://is.gd/tYzVpj
        self.ale.setInt(b"frame_skip", 1)
        self.ale.setBool(b"color_averaging", False)
        self.ale.setBool(b"display_screen", visualize)

        full_rom_path = resource_folder + "/" + game + ".bin"
        self.ale.loadROM(str.encode(full_rom_path))
        self.legal_actions = self.ale.getMinimalActionSet()
        #this env is fixed until firing, so you have to...
        self._have_to_fire = ('FIRE' in [
            ACTION_MEANING[a] for a in self.legal_actions
        ])
        self.screen_width, self.screen_height = self.ale.getScreenDims()
        self.lives = self.ale.lives()

        self.random_start = random_start
        self.single_life_episodes = single_life_episodes
        self.call_on_new_frame = visualize
        self.history_window = history_window
        self.observation_shape = (self.history_window, IMG_SIZE_X, IMG_SIZE_Y)
        self.rgb_screen = np.zeros((self.screen_height, self.screen_width, 3),
                                   dtype=np.uint8)
        self.gray_screen = np.zeros((self.screen_height, self.screen_width, 1),
                                    dtype=np.uint8)
        # Processed historcal frames that will be fed in to the network (i.e., four 84x84 images)
        self.history = create_history_observation(self.history_window)
        #ObservationPool(np.zeros(self.observation_shape, dtype=np.uint8))
        self.frame_preprocessor = FramePreprocessor(self.gray_screen.shape,
                                                    FRAMES_IN_POOL)

    def get_legal_actions(self):
        return self.legal_actions

    def __get_screen_image(self):
        """
        Get the current frame luminance
        :return: the current frame
        """
        self.ale.getScreenGrayscale(self.gray_screen)
        if self.call_on_new_frame:
            self.ale.getScreenRGB(self.rgb_screen)
            self.on_new_frame(self.rgb_screen)
        return self.gray_screen

    def on_new_frame(self, frame):
        pass

    def __random_start_reset(self):
        """ Restart game """
        self.ale.reset_game()

        if self.random_start:
            wait = random.randint(0, MAX_START_WAIT + 1)
            for _ in range(wait):
                self.ale.act(self.get_noop())
                if self.__is_over():
                    self.ale.reset_game()

        self.lives = self.ale.lives()

    def __new_game(self):
        self.__random_start_reset()

        if self._have_to_fire:
            #take action on reset for environments that are fixed until firing
            self.ale.act(self.legal_actions[1])
            if self.__is_over():
                self.__random_start_reset()

            self.ale.act(self.legal_actions[2])
            if self.__is_over():
                self.__random_start_reset()

    def __action_repeat(self, a, times=ACTION_REPEAT):
        """ Repeat action and grab screen into frame pool """
        reward = 0
        for i in range(times - FRAMES_IN_POOL):
            reward += self.ale.act(self.legal_actions[a])
        # Only need to add the last FRAMES_IN_POOL frames to the frame pool
        for i in range(FRAMES_IN_POOL):
            reward += self.ale.act(self.legal_actions[a])
            self.frame_preprocessor.new_frame(self.__get_screen_image())
        return reward

    def reset(self):
        """ Get the initial state """
        self.__new_game()
        for step in range(self.history_window):
            _ = self.__action_repeat(0)
            self.history.new_observation(
                self.frame_preprocessor.get_processed())
        if self.__is_terminal():
            raise Exception('This should never happen.')
        return self.history.get_state(), None

    def next(self, action):
        """ Get the next state, reward, and game over signal """
        reward = self.__action_repeat(action)
        self.history.new_observation(self.frame_preprocessor.get_processed())
        terminal = self.__is_terminal()
        self.lives = self.ale.lives()
        return self.history.get_state(), reward, terminal, None

    def __is_terminal(self):
        if self.single_life_episodes:
            return self.__is_over() or (self.lives > self.ale.lives())
        else:
            return self.__is_over()

    def __is_over(self):
        return self.ale.game_over()

    def get_noop(self):
        return self.legal_actions[0]

    def close(self):
        del self.ale
示例#32
0
class ArcadeLearningEnvironment(Environment):
    """
    [Arcade Learning Environment](https://github.com/mgbellemare/Arcade-Learning-Environment)
    adapter (specification key: `ale`, `arcade_learning_environment`).

    May require:
    ```bash
    sudo apt-get install libsdl1.2-dev libsdl-gfx1.2-dev libsdl-image1.2-dev cmake

    git clone https://github.com/mgbellemare/Arcade-Learning-Environment.git
    cd Arcade-Learning-Environment

    mkdir build && cd build
    cmake -DUSE_SDL=ON -DUSE_RLGLUE=OFF -DBUILD_EXAMPLES=ON ..
    make -j 4
    cd ..

    pip3 install .
    ```

    Args:
        level (string): ALE rom file
            (<span style="color:#C00000"><b>required</b></span>).
        loss_of_life_termination: Signals a terminal state on loss of life
            (<span style="color:#00C000"><b>default</b></span>: false).
        loss_of_life_reward (float): Reward/Penalty on loss of life (negative values are a penalty)
            (<span style="color:#00C000"><b>default</b></span>: 0.0).
        repeat_action_probability (float): Repeats last action with given probability
            (<span style="color:#00C000"><b>default</b></span>: 0.0).
        visualize (bool): Whether to visualize interaction
            (<span style="color:#00C000"><b>default</b></span>: false).
        frame_skip (int > 0): Number of times to repeat an action without observing
            (<span style="color:#00C000"><b>default</b></span>: 1).
        seed (int): Random seed
            (<span style="color:#00C000"><b>default</b></span>: none).
    """

    def __init__(
        self, level, life_loss_terminal=False, life_loss_punishment=0.0,
        repeat_action_probability=0.0, visualize=False, frame_skip=1, seed=None
    ):
        from ale_python_interface import ALEInterface

        self.environment = ALEInterface()
        self.rom_file = level

        self.life_loss_terminal = life_loss_terminal
        self.life_loss_punishment = life_loss_punishment

        self.environment.setFloat(b'repeat_action_probability', repeat_action_probability)
        self.environment.setBool(b'display_screen', visualize)
        self.environment.setInt(b'frame_skip', frame_skip)
        if seed is not None:
            self.environment.setInt(b'random_seed', seed)

        # All set commands must be done before loading the ROM.
        self.environment.loadROM(rom_file=self.rom_file.encode())
        self.available_actions = tuple(self.environment.getLegalActionSet())

        # Full list of actions:
        # No-Op, Fire, Up, Right, Left, Down, Up Right, Up Left, Down Right, Down Left, Up Fire,
        # Right Fire, Left Fire, Down Fire, Up Right Fire, Up Left Fire, Down Right Fire, Down Left
        # Fire

    def __str__(self):
        return super().__str__() + '({})'.format(self.rom_file)

    def states(self):
        width, height = self.environment.getScreenDims()
        return dict(type='float', shape=(height, width, 3))

    def actions(self):
        return dict(type='int', num_values=len(self.available_actions))

    def close(self):
        self.environment.__del__()
        self.environment = None

    def get_states(self):
        screen = np.copy(self.environment.getScreenRGB(screen_data=self.screen))
        screen = screen.astype(dtype=np.float32) / 255.0
        return screen

    def reset(self):
        self.environment.reset_game()
        width, height = self.environment.getScreenDims()
        self.screen = np.empty((height, width, 3), dtype=np.uint8)
        self.lives = self.environment.lives()
        return self.get_states()

    def execute(self, actions):
        reward = self.environment.act(action=self.available_actions[actions])
        terminal = self.environment.game_over()
        states = self.get_states()

        next_lives = self.environment.lives()
        if next_lives < self.lives:
            if self.life_loss_terminal:
                terminal = True
            elif self.life_loss_punishment > 0.0:
                reward -= self.life_loss_punishment
            self.lives = next_lives

        return states, terminal, reward
示例#33
0
class AtariEnvironment:
    
    def __init__(self, args, outputDir):
        
        self.outputDir = outputDir
        self.screenCaptureFrequency = args.screen_capture_freq
        
        self.ale = ALEInterface()
        self.ale.setInt(b'random_seed', 123456)
        random.seed(123456)
        # Fix https://groups.google.com/forum/#!topic/deep-q-learning/p4FAIaabwlo
        self.ale.setFloat(b'repeat_action_probability', 0.0)

        # Load the ROM file
        self.ale.loadROM(args.rom)

        self.actionSet = self.ale.getMinimalActionSet()
        self.gameNumber = 0
        self.stepNumber = 0
        self.resetGame()

    def getNumActions(self):
        return len(self.actionSet)

    def getState(self):
        return self.state
    
    def getGameNumber(self):
        return self.gameNumber
    
    def getFrameNumber(self):
        return self.ale.getFrameNumber()
    
    def getEpisodeFrameNumber(self):
        return self.ale.getEpisodeFrameNumber()
    
    def getEpisodeStepNumber(self):
        return self.episodeStepNumber
    
    def getStepNumber(self):
        return self.stepNumber
    
    def getGameScore(self):
        return self.gameScore

    def isGameOver(self):
        return self.ale.game_over()

    def step(self, action):
        previousLives = self.ale.lives()
        reward = 0
        isTerminal = 0
        self.stepNumber += 1
        self.episodeStepNumber += 1
        
        for i in range(4):
            prevScreenRGB = self.ale.getScreenRGB()
            reward += self.ale.act(self.actionSet[action])
            screenRGB = self.ale.getScreenRGB()
    
            # Detect end of episode, I don't think I'm handling this right in terms
            # of the overall game loop (??)
            if self.ale.lives() < previousLives or self.ale.game_over():
                isTerminal = 1
                break

            if self.gameNumber % self.screenCaptureFrequency == 0:
                dir = self.outputDir + '/screen_cap/game-%06d' % (self.gameNumber)
                if not os.path.isdir(dir):
                    os.makedirs(dir)
                self.ale.saveScreenPNG(dir + '/frame-%06d.png' % (self.getEpisodeFrameNumber()))


        maxedScreen = np.maximum(screenRGB, prevScreenRGB)
        self.state = self.state.stateByAddingScreen(maxedScreen, self.ale.getFrameNumber())
        self.gameScore += reward
        return reward, self.state, isTerminal

    def resetGame(self):
        if self.ale.game_over():
            self.gameNumber += 1
        self.ale.reset_game()
        self.state = State().stateByAddingScreen(self.ale.getScreenRGB(), self.ale.getFrameNumber())
        self.gameScore = 0
        self.episodeStepNumber = 0 # environment steps vs ALE frames.  Will probably be 4*frame number
示例#34
0
# ale.loadROM(rom_file)
#
# # Get the list of legal actions
# legal_actions = ale.getLegalActionSet()
#
# # Play 10 episodes
# for episode in range(10):
#   total_reward = 0
#   while not ale.game_over():
#     a = legal_actions[randrange(len(legal_actions))]
#     # Apply an action and get the resulting reward
#     reward = ale.act(a);
#     total_reward += reward
#   print('Episode %d ended with score: %d' % (episode, total_reward))
#   ale.reset_game()



from ale_python_interface import ALEInterface
ale = ALEInterface()
ale.setBool('display_screen', True)
rom_file = "./roms/breakoutv.bin"
ale.loadROM(rom_file)
ale.reset_game()
ale.getScreenRGB()
ale.reset_game()
ale.act(0)



示例#35
0
class AtariPlayer(RLEnvironment):
    """
    A wrapper for atari emulator.
    NOTE: will automatically restart when a real episode ends
    """
    def __init__(self, rom_file, viz=0, height_range=(None,None),
            frame_skip=4, image_shape=(84, 84), nullop_start=30,
            live_lost_as_eoe=True):
        """
        :param rom_file: path to the rom
        :param frame_skip: skip every k frames and repeat the action
        :param image_shape: (w, h)
        :param height_range: (h1, h2) to cut
        :param viz: visualization to be done.
            Set to 0 to disable.
            Set to a positive number to be the delay between frames to show.
            Set to a string to be a directory to store frames.
        :param nullop_start: start with random number of null ops
        :param live_losts_as_eoe: consider lost of lives as end of episode.  useful for training.
        """
        super(AtariPlayer, self).__init__()
        if not os.path.isfile(rom_file) and '/' not in rom_file:
            rom_file = get_dataset_dir('atari_rom', rom_file)
        assert os.path.isfile(rom_file), \
                "rom {} not found. Please download at {}".format(rom_file, ROM_URL)

        try:
            ALEInterface.setLoggerMode(ALEInterface.Logger.Warning)
        except AttributeError:
            log_once()

        # avoid simulator bugs: https://github.com/mgbellemare/Arcade-Learning-Environment/issues/86
        with _ALE_LOCK:
            self.ale = ALEInterface()
            self.rng = get_rng(self)

            self.ale.setInt(b"random_seed", self.rng.randint(0, 10000))
            self.ale.setBool(b"showinfo", False)

            self.ale.setInt(b"frame_skip", 1)
            self.ale.setBool(b'color_averaging', False)
            # manual.pdf suggests otherwise.
            self.ale.setFloat(b'repeat_action_probability', 0.0)

            # viz setup
            if isinstance(viz, six.string_types):
                assert os.path.isdir(viz), viz
                self.ale.setString(b'record_screen_dir', viz)
                viz = 0
            if isinstance(viz, int):
                viz = float(viz)
            self.viz = viz
            if self.viz and isinstance(self.viz, float):
                self.windowname = os.path.basename(rom_file)
                cv2.startWindowThread()
                cv2.namedWindow(self.windowname)

            self.ale.loadROM(rom_file.encode('utf-8'))
        self.width, self.height = self.ale.getScreenDims()
        self.actions = self.ale.getMinimalActionSet()


        self.live_lost_as_eoe = live_lost_as_eoe
        self.frame_skip = frame_skip
        self.nullop_start = nullop_start
        self.height_range = height_range
        self.image_shape = image_shape

        self.current_episode_score = StatCounter()
        self.restart_episode()

    def _grab_raw_image(self):
        """
        :returns: the current 3-channel image
        """
        m = self.ale.getScreenRGB()
        return m.reshape((self.height, self.width, 3))

    def current_state(self):
        """
        :returns: a gray-scale (h, w, 1) float32 image
        """
        ret = self._grab_raw_image()
        # max-pooled over the last screen
        ret = np.maximum(ret, self.last_raw_screen)
        if self.viz:
            if isinstance(self.viz, float):
                #m = cv2.resize(ret, (1920,1200))
                cv2.imshow(self.windowname, ret)
                time.sleep(self.viz)
        ret = ret[self.height_range[0]:self.height_range[1],:].astype('float32')
        # 0.299,0.587.0.114. same as rgb2y in torch/image
        ret = cv2.cvtColor(ret, cv2.COLOR_RGB2GRAY)
        ret = cv2.resize(ret, self.image_shape)
        ret = np.expand_dims(ret, axis=2)
        return ret

    def get_action_space(self):
        return DiscreteActionSpace(len(self.actions))

    def restart_episode(self):
        if self.current_episode_score.count > 0:
            self.stats['score'].append(self.current_episode_score.sum)
        self.current_episode_score.reset()
        self.ale.reset_game()

        # random null-ops start
        n = self.rng.randint(self.nullop_start)
        self.last_raw_screen = self._grab_raw_image()
        for k in range(n):
            if k == n - 1:
                self.last_raw_screen = self._grab_raw_image()
            self.ale.act(0)

    def action(self, act):
        """
        :param act: an index of the action
        :returns: (reward, isOver)
        """
        oldlives = self.ale.lives()
        r = 0
        for k in range(self.frame_skip):
            if k == self.frame_skip - 1:
                self.last_raw_screen = self._grab_raw_image()
            r += self.ale.act(self.actions[act])
            newlives = self.ale.lives()
            if self.ale.game_over() or \
                    (self.live_lost_as_eoe and newlives < oldlives):
                break

        self.current_episode_score.feed(r)
        isOver = self.ale.game_over()
        if isOver:
            self.restart_episode()
        if self.live_lost_as_eoe:
            isOver = isOver or newlives < oldlives
        return (r, isOver)
示例#36
0
class ALEEnvironment(BaseEnvironment):
    """
    The :class:`MinimalGameHandler` class takes care of the interface to the ALE and tries to do nothing else. It's
    meant for advanced users who need fine control over every aspect of the process. It has many functions that are simply
    wrappers of the underlying ALE but with pythonic names/usage.

    Parameters
    ----------
    rom : byte string
        Specifies the directory to load the rom from. Must be a byte string: b'dir_for_rom/rom.bin'
    display_screen : boolean
        Default False. Whether or not to show the game. True takes longer to run but can be fun to watch
    step_cap: int
        Default None. Maximum number of steps to run in an episode. Breakout can sometimes not return terminal
        even when game is ended. This fixes that and will return terminal after stepping above this count
    """
    def __init__(self, rom, resize_shape=(84, 84), skip_frame=1, repeat_action_probability=0.0,
                 step_cap=None, loss_of_life_termination=False, loss_of_life_negative_reward=False,
                 grayscale=True, display_screen=False, seed=np.random.RandomState()):
        # set up emulator
        self.ale = ALEInterface()

        if display_screen:
            self.ale.setBool(b'display_screen', True)

        self.ale.setInt(b'frame_skip', skip_frame)
        self.ale.setInt(b'random_seed', seed.randint(0, 9999))
        self.ale.setFloat(b'repeat_action_probability', repeat_action_probability)
        self.ale.setBool(b'color_averaging', False)

        self.ale.loadROM(rom.encode())

        # setup gamescreen object. I think this is faster than recreating an empty each time
        width, height = self.ale.getScreenDims()
        channels = 1 if grayscale else 3
        self.grayscale = grayscale
        self.gamescreen = np.empty((height, width, 1), dtype=np.uint8)

        self.resize_shape = resize_shape
        self.skip_frame = skip_frame
        self.step_cap = step_cap
        self.curr_step_count = 0

        # setup action converter
        # ALE returns legal action indexes, convert these to just numbers
        self.action_inds = self.ale.getMinimalActionSet()

        # setup lives
        self.loss_of_life_negative_reward = loss_of_life_negative_reward
        self.cur_lives = self.ale.lives()
        self.loss_of_life_termination = loss_of_life_termination
        self.life_lost = False

    def reset(self):
        self.ale.reset_game()
        self.cur_lives = self.ale.lives()
        self.life_lost = False
        self.curr_step_count = 0

    def step(self, action):
        self.curr_step_count += 1
        ale_action = self.action_inds[action]
        return self._step(ale_action)

    def _step(self, ale_action):
        if not self.loss_of_life_termination and not self.loss_of_life_negative_reward:
            return self.ale.act(ale_action)
        else:
            rew = self.ale.act(ale_action)
            new_lives = self.ale.lives()
            if new_lives < self.cur_lives:
                # if loss of life is negative reward subtract 1 from reward
                if self.loss_of_life_negative_reward:
                    rew -= 1
                self.cur_lives = new_lives
                self.life_lost = True
            return rew

    def get_state(self):
        if self.grayscale:
            self.gamescreen = self.ale.getScreenGrayscale(self.gamescreen)
        else:
            self.gamescreen = self.ale.getScreenRGB(self.gamescreen)
        # if resize_shape is none then don't resize
        if self.resize_shape is not None:
            # if grayscale we remove the last dimmension (channel)
            if self.grayscale:
                processedImg = imresize(self.gamescreen[:, :, 0], self.resize_shape)
            else:
                processedImg = imresize(self.gamescreen, self.resize_shape)
        return processedImg

    def get_state_shape(self):
        return self.resize_shape

    def get_terminal(self):
        if self.loss_of_life_termination and self.life_lost:
            return True
        elif self.step_cap is not None and self.curr_step_count > self.step_cap:
            return True
        else:
            return self.ale.game_over()

    def get_num_actions(self):
        return len(self.action_inds)
示例#37
0
controller = FittedQController(numActions=len(legal_actions), numFeatures=5000, horizon=100, discountParameter=0.3, epsilon=0.2)

# Simulate episodes
numEpisodes = 100

counter = 1  # Total number of time steps, used for naming screenshots with the time index they were taken

for episode in xrange(numEpisodes):

    # Initialize reward to zero as we do not have any reward yet
    reward = 0
    total_reward = 0

    while not ale.game_over():
        # Retrieve and encode current frame
        frame = ale.getScreenRGB()
        frame = np.array(frame, dtype=float)
        frameCode = encodeFrame(frame)

        # Get action from controller, and pass in frame code and previous reward (frame code is dct of flattened frame,
        # can be replaced with deep autoencoder, etc.)
        a = legal_actions[controller.queryForActionAndUpdateExperience(features=frameCode, reward=reward)]

        # Save screen shot of game
        #image = png.from_array(frame, 'RGB;8')
        #imFile = savePath + str(counter) + '.png'
        #image.save(imFile)
        #counter += 1
        #image = []

        # Select an action and send to the ALE. Get reward and add to running tally
示例#38
0
class AtariPlayer(RLEnvironment):
    """
    A wrapper for atari emulator.
    """
    def __init__(self, rom_file, viz=0, height_range=(None,None),
            frame_skip=4, image_shape=(84, 84), nullop_start=30,
            live_lost_as_eoe=True):
        """
        :param rom_file: path to the rom
        :param frame_skip: skip every k frames and repeat the action
        :param image_shape: (w, h)
        :param height_range: (h1, h2) to cut
        :param viz: visualization to be done.
            Set to 0 to disable.
            Set to a positive number to be the delay between frames to show.
            Set to a string to be a directory to store frames.
        :param nullop_start: start with random number of null ops
        :param live_losts_as_eoe: consider lost of lives as end of episode.  useful for training.
        """
        super(AtariPlayer, self).__init__()
        self.ale = ALEInterface()
        self.rng = get_rng(self)

        self.ale.setInt("random_seed", self.rng.randint(0, 10000))
        self.ale.setBool("showinfo", False)

        try:
            ALEInterface.setLoggerMode(ALEInterface.Logger.Warning)
        except AttributeError:
            log_once()

        self.ale.setInt("frame_skip", 1)
        self.ale.setBool('color_averaging', False)
        # manual.pdf suggests otherwise. may need to check
        self.ale.setFloat('repeat_action_probability', 0.0)

        # viz setup
        if isinstance(viz, six.string_types):
            assert os.path.isdir(viz), viz
            self.ale.setString('record_screen_dir', viz)
            viz = 0
        if isinstance(viz, int):
            viz = float(viz)
        self.viz = viz
        if self.viz and isinstance(self.viz, float):
            self.windowname = os.path.basename(rom_file)
            cv2.startWindowThread()
            cv2.namedWindow(self.windowname)

        self.ale.loadROM(rom_file)
        self.width, self.height = self.ale.getScreenDims()
        self.actions = self.ale.getMinimalActionSet()


        self.live_lost_as_eoe = live_lost_as_eoe
        self.frame_skip = frame_skip
        self.nullop_start = nullop_start
        self.height_range = height_range
        self.image_shape = image_shape

        self.current_episode_score = StatCounter()
        self.restart_episode()

    def _grab_raw_image(self):
        """
        :returns: the current 3-channel image
        """
        m = self.ale.getScreenRGB()
        return m.reshape((self.height, self.width, 3))

    def current_state(self):
        """
        :returns: a gray-scale (h, w, 1) image
        """
        ret = self._grab_raw_image()
        # max-pooled over the last screen
        ret = np.maximum(ret, self.last_raw_screen)
        if self.viz:
            if isinstance(self.viz, float):
                cv2.imshow(self.windowname, ret)
                time.sleep(self.viz)
        ret = ret[self.height_range[0]:self.height_range[1],:]
        # 0.299,0.587.0.114. same as rgb2y in torch/image
        ret = cv2.cvtColor(ret, cv2.COLOR_RGB2GRAY)
        ret = cv2.resize(ret, self.image_shape)
        ret = np.expand_dims(ret, axis=2)
        return ret

    def get_num_actions(self):
        """
        :returns: the number of legal actions
        """
        return len(self.actions)

    def restart_episode(self):
        if self.current_episode_score.count > 0:
            self.stats['score'].append(self.current_episode_score.sum)
        self.current_episode_score.reset()
        self.ale.reset_game()

        # random null-ops start
        n = self.rng.randint(self.nullop_start)
        self.last_raw_screen = self._grab_raw_image()
        for k in range(n):
            if k == n - 1:
                self.last_raw_screen = self._grab_raw_image()
            self.ale.act(0)

    def action(self, act):
        """
        :param act: an index of the action
        :returns: (reward, isOver)
        """
        oldlives = self.ale.lives()
        r = 0
        for k in range(self.frame_skip):
            if k == self.frame_skip - 1:
                self.last_raw_screen = self._grab_raw_image()
            r += self.ale.act(self.actions[act])
            newlives = self.ale.lives()
            if self.ale.game_over() or \
                    (self.live_lost_as_eoe and newlives < oldlives):
                break

        self.current_episode_score.feed(r)
        isOver = self.ale.game_over()
        if isOver:
            self.restart_episode()
        if self.live_lost_as_eoe:
            isOver = isOver or newlives < oldlives
        return (r, isOver)

    def get_stat(self):
        try:
            return {'avg_score': np.mean(self.stats['score']),
                    'max_score': float(np.max(self.stats['score'])) }
        except ValueError:
            return {}
示例#39
0
regressor = skflow.TensorFlowDNNRegressor(hidden_units=[20,10, 10, 15,20],learning_rate=0.01, verbose=int(sys.argv[2]))
regressor.fit(np.random.randn(1601, 1), append([1.], np.zeros((1600, 1))))
# regressor = skflow.TensorFlowEstimator.restore('./regressor')
def Q(s, a):
    return regressor.predict(append(s, a))

def detectState(ale):
    return cv2.resize(ale.getScreenGrayscale(), (40,40))

while True:
    ale = ALEInterface()
    ale.loadROM("breakout.bin")
    actionSet = ale.getMinimalActionSet()
    while not ale.game_over():
        if sys.argv[1] == 'disp':
            cv2.imshow('', cv2.resize(ale.getScreenRGB(), (600,600)))
            cv2.waitKey(1)
        s = detectState(ale)
        qvals = []
        for action in actionSet:
            qvals.append(Q(s, action)[0])
        a = actionSet[qvals.index(max(qvals))]
        X = append(s, a)
        r = ale.act(a)
        s_ = detectState(ale)
        qvals = []
        for action in actionSet:
            qvals.append(Q(s_, action)[0])
        a_ = actionSet[qvals.index(max(qvals))]
        y = r + g*Q(s_, a_)
        regressor.fit(X, y, logdir='/tmp/regressor')
示例#40
0
文件: ale.py 项目: agajews/tfbrain
if USE_SDL:
    if sys.platform == 'darwin':
        import pygame
        pygame.init()
        ale.setBool('sound', False)  # Sound doesn't work on OSX
    elif sys.platform.startswith('linux'):
        ale.setBool('sound', True)
    ale.setBool('display_screen', True)

# Load the ROM file
rom_file = str.encode('data/roms/breakout.bin')
ale.loadROM(rom_file)

# Get the list of legal actions
legal_actions = ale.getLegalActionSet()
print(legal_actions)

# Play 10 episodes
for episode in range(10):
    total_reward = 0
    while not ale.game_over():
        a = legal_actions[randrange(len(legal_actions))]
        screen = ale.getScreenRGB()
        # Apply an action and get the resulting reward
        reward = ale.act(a)
        total_reward += reward
    print('Episode %d ended with score: %d' % (episode, total_reward))
    ale.reset_game()
    img = Image.fromarray(screen, 'RGB')
    img.show()
示例#41
0
class Emulator:
    def __init__(self, rom_path, rom_name, visualize, actor_id, rseed, single_life_episodes = False):
        
        self.ale = ALEInterface()

        self.ale.setInt("random_seed", rseed * (actor_id +1))

        # For fuller control on explicit action repeat (>= ALE 0.5.0) 
        self.ale.setFloat("repeat_action_probability", 0.0)
        
        # Disable frame_skip and color_averaging
        # See: http://is.gd/tYzVpj
        self.ale.setInt("frame_skip", 1)
        self.ale.setBool("color_averaging", False)
        self.ale.loadROM(rom_path + "/" + rom_name + ".bin")
        self.legal_actions = self.ale.getMinimalActionSet()        
        self.screen_width,self.screen_height = self.ale.getScreenDims()
        #self.ale.setBool('display_screen', True)
        
        # Processed historcal frames that will be fed in to the network 
        # (i.e., four 84x84 images)
        self.screen_images_processed = np.zeros((IMG_SIZE_X, IMG_SIZE_Y, 
            NR_IMAGES)) 
        self.rgb_screen = np.zeros((self.screen_height,self.screen_width, 3), dtype=np.uint8)
        self.gray_screen = np.zeros((self.screen_height,self.screen_width,1), dtype=np.uint8)

        self.frame_pool = np.empty((2, self.screen_height, self.screen_width))
        self.current = 0
        self.lives = self.ale.lives()

        self.visualize = visualize
        self.visualize_processed = False
        self.windowname = rom_name + ' ' + str(actor_id)
        if self.visualize:
            logger.debug("Opening emulator window...")
            #from skimage import io
            #io.use_plugin('qt')
            cv2.startWindowThread()
            cv2.namedWindow(self.windowname)
            logger.debug("Emulator window opened")
            
        if self.visualize_processed:
            logger.debug("Opening processed frame window...")
            cv2.startWindowThread()
            logger.debug("Processed frame window opened")
            cv2.namedWindow(self.windowname + "_processed")
            
        self.single_life_episodes = single_life_episodes

    def get_screen_image(self):
        """ Add screen (luminance) to frame pool """
        # [screen_image, screen_image_rgb] = [self.ale.getScreenGrayscale(), 
        #     self.ale.getScreenRGB()]
        self.ale.getScreenGrayscale(self.gray_screen)
        self.ale.getScreenRGB(self.rgb_screen)
        self.frame_pool[self.current] = np.squeeze(self.gray_screen)
        self.current = (self.current + 1) % FRAMES_IN_POOL
        return self.rgb_screen

    def new_game(self):
        """ Restart game """
        self.ale.reset_game()
        self.lives = self.ale.lives()

        if MAX_START_WAIT < 0:
            logger.debug("Cannot time travel yet.")
            sys.exit()
        elif MAX_START_WAIT > 0:
            wait = random.randint(0, MAX_START_WAIT)
        else:
            wait = 0
        for _ in xrange(wait):
            self.ale.act(self.legal_actions[0])

    def process_frame_pool(self):
        """ Preprocess frame pool """
        
        img = None
        if BLEND_METHOD == "max_pool":
            img = np.amax(self.frame_pool, axis=0)
        
        #img resize(img[:210, :], (84, 84))
        img = cv2.resize(img[:210, :], (84, 84), 
            interpolation=cv2.INTER_LINEAR)
        
        img = img.astype(np.float32)
        img *= (1.0/255.0)
        
        return img
        # Reduce height to 210, if not so
        #cropped_img = img[:210, :]
        # Downsample to 110x84
        #down_sampled_img = resize(cropped_img, (84, 84))
        
        # Crop to 84x84 playing area
        #stackable_image = down_sampled_img[:, 26:110]
        #return stackable_image

    def action_repeat(self, a):
        """ Repeat action and grab screen into frame pool """
        reward = 0
        for i in xrange(ACTION_REPEAT):
            reward += self.ale.act(self.legal_actions[a])
            new_screen_image_rgb = self.get_screen_image()
        return reward, new_screen_image_rgb

    def get_reshaped_state(self, state):
        return np.reshape(state, 
            (1, IMG_SIZE_X, IMG_SIZE_Y, NR_IMAGES))
        #return np.reshape(self.screen_images_processed, 
        #    (1, IMG_SIZE_X, IMG_SIZE_Y, NR_IMAGES))

    def get_initial_state(self):
        """ Get the initial state """
        self.new_game()
        for step in xrange(NR_IMAGES):
            reward, new_screen_image_rgb = self.action_repeat(0)
            self.screen_images_processed[:, :, step] = self.process_frame_pool()
            self.show_screen(new_screen_image_rgb)
        if self.is_terminal():
            MAX_START_WAIT -= 1
            return self.get_initial_state()
        return np.copy(self.screen_images_processed) #get_reshaped_state()      

    def next(self, action):
        """ Get the next state, reward, and game over signal """
        reward, new_screen_image_rgb = self.action_repeat(np.argmax(action))
        self.screen_images_processed[:, :, 0:3] = \
            self.screen_images_processed[:, :, 1:4]
        self.screen_images_processed[:, :, 3] = self.process_frame_pool()
        self.show_screen(new_screen_image_rgb)
        terminal = self.is_terminal()
        self.lives = self.ale.lives()
        return np.copy(self.screen_images_processed), reward, terminal #get_reshaped_state(), reward, terminal
    
    def show_screen(self, image):
        """ Show visuals for raw and processed images """
        if self.visualize:
            #io.imshow(image[:210, :], fancy=True)
            cv2.imshow(self.windowname, image[:210, :])
        if self.visualize_processed:
            #io.imshow(self.screen_images_processed[:, :, 3], fancy=True)
            cv2.imshow(self.windowname + "_processed", self.screen_images_processed[:, :, 3])
            
    def is_terminal(self):
        if self.single_life_episodes:
            return (self.is_over() or (self.lives > self.ale.lives()))
        else:
            return self.is_over()

    def is_over(self):
        return self.ale.game_over()
示例#42
0
class AtariPlayer(gym.Env):
    """
    A wrapper for ALE emulator, with configurations to mimic DeepMind DQN settings.

    Info:
        score: the accumulated reward in the current game
        gameOver: True when the current game is Over
    """

    def __init__(self, rom_file, viz=0,
                 frame_skip=4, nullop_start=30,
                 live_lost_as_eoe=True, max_num_frames=0):
        """
        Args:
            rom_file: path to the rom
            frame_skip: skip every k frames and repeat the action
            viz: visualization to be done.
                Set to 0 to disable.
                Set to a positive number to be the delay between frames to show.
                Set to a string to be a directory to store frames.
            nullop_start: start with random number of null ops.
            live_losts_as_eoe: consider lost of lives as end of episode. Useful for training.
            max_num_frames: maximum number of frames per episode.
        """
        super(AtariPlayer, self).__init__()
        if not os.path.isfile(rom_file) and '/' not in rom_file:
            rom_file = get_dataset_path('atari_rom', rom_file)
        assert os.path.isfile(rom_file), \
            "rom {} not found. Please download at {}".format(rom_file, ROM_URL)

        try:
            ALEInterface.setLoggerMode(ALEInterface.Logger.Error)
        except AttributeError:
            if execute_only_once():
                logger.warn("You're not using latest ALE")

        # avoid simulator bugs: https://github.com/mgbellemare/Arcade-Learning-Environment/issues/86
        with _ALE_LOCK:
            self.ale = ALEInterface()
            self.rng = get_rng(self)
            self.ale.setInt(b"random_seed", self.rng.randint(0, 30000))
            self.ale.setInt(b"max_num_frames_per_episode", max_num_frames)
            self.ale.setBool(b"showinfo", False)

            self.ale.setInt(b"frame_skip", 1)
            self.ale.setBool(b'color_averaging', False)
            # manual.pdf suggests otherwise.
            self.ale.setFloat(b'repeat_action_probability', 0.0)

            # viz setup
            if isinstance(viz, six.string_types):
                assert os.path.isdir(viz), viz
                self.ale.setString(b'record_screen_dir', viz)
                viz = 0
            if isinstance(viz, int):
                viz = float(viz)
            self.viz = viz
            if self.viz and isinstance(self.viz, float):
                self.windowname = os.path.basename(rom_file)
                cv2.namedWindow(self.windowname)

            self.ale.loadROM(rom_file.encode('utf-8'))
        self.width, self.height = self.ale.getScreenDims()
        self.actions = self.ale.getMinimalActionSet()

        self.live_lost_as_eoe = live_lost_as_eoe
        self.frame_skip = frame_skip
        self.nullop_start = nullop_start

        self.action_space = spaces.Discrete(len(self.actions))
        self.observation_space = spaces.Box(
            low=0, high=255, shape=(self.height, self.width, 1), dtype=np.uint8)
        self._restart_episode()

    def get_action_meanings(self):
        return [ACTION_MEANING[i] for i in self.actions]

    def _grab_raw_image(self):
        """
        :returns: the current 3-channel image
        """
        m = self.ale.getScreenRGB()
        return m.reshape((self.height, self.width, 3))

    def _current_state(self):
        """
        :returns: a gray-scale (h, w, 1) uint8 image
        """
        ret = self._grab_raw_image()
        # max-pooled over the last screen
        ret = np.maximum(ret, self.last_raw_screen)
        if self.viz:
            if isinstance(self.viz, float):
                cv2.imshow(self.windowname, ret)
                cv2.waitKey(int(self.viz * 1000))
        ret = ret.astype('float32')
        # 0.299,0.587.0.114. same as rgb2y in torch/image
        ret = cv2.cvtColor(ret, cv2.COLOR_RGB2GRAY)[:, :, np.newaxis]
        return ret.astype('uint8')  # to save some memory

    def _restart_episode(self):
        with _ALE_LOCK:
            self.ale.reset_game()

        # random null-ops start
        n = self.rng.randint(self.nullop_start)
        self.last_raw_screen = self._grab_raw_image()
        for k in range(n):
            if k == n - 1:
                self.last_raw_screen = self._grab_raw_image()
            self.ale.act(0)

    def reset(self):
        if self.ale.game_over():
            self._restart_episode()
        return self._current_state()

    def step(self, act):
        oldlives = self.ale.lives()
        r = 0
        for k in range(self.frame_skip):
            if k == self.frame_skip - 1:
                self.last_raw_screen = self._grab_raw_image()
            r += self.ale.act(self.actions[act])
            newlives = self.ale.lives()
            if self.ale.game_over() or \
                    (self.live_lost_as_eoe and newlives < oldlives):
                break

        isOver = self.ale.game_over()
        if self.live_lost_as_eoe:
            isOver = isOver or newlives < oldlives

        info = {'ale.lives': newlives}
        return self._current_state(), r, isOver, info
示例#43
0
def train_agent(gamepath, agent, n_episodes, display_screen, record_weights,
        reduce_exploration_prob_amount, n_frames_to_skip):
    """
    :description: trains an agent to play a game

    :type gamepath: string
    :param gamepath: path to the binary of the game to be played

    :type agent: subclass RLAlgorithm
    :param agent: the algorithm/agent that learns to play the game

    :type n_episodes: int
    :param n_episodes: number of episodes of the game on which to train
    """

    # load the ale interface to interact with
    ale = ALEInterface()
    ale.setInt('random_seed', 42)

    # display/recording settings, doesn't seem to work currently
    recordings_dir = './recordings/breakout/'
    # previously "USE_SDL"
    if display_screen:
        if sys.platform == 'darwin':
            import pygame
            pygame.init()
            ale.setBool('sound', False) # Sound doesn't work on OSX
            #ale.setString("record_screen_dir", recordings_dir);
        elif sys.platform.startswith('linux'):
            ale.setBool('sound', True)
        ale.setBool('display_screen', True)

    ale.loadROM(gamepath)
    ale.setInt("frame_skip", n_frames_to_skip)

    screen_preprocessor = screen_utils.RGBScreenPreprocessor()

    rewards = []
    best_reward = 0
    print('starting training...')
    for episode in xrange(n_episodes):
        action = 0
        reward = 0
        newAction = None

        total_reward = 0
        counter = 0
        lives = ale.lives()

        screen = np.zeros((32, 32, 3), dtype=np.int8)
        state = { "screen" : screen,
                "objects" : None,
                "prev_objects": None,
                "prev_action": 0,
                "action": 0 }

        while not ale.game_over():
            # if newAction is None then we are training an off-policy algorithm
            # otherwise, we are training an on policy algorithm
            if newAction is None:
                action = agent.getAction(state)
            else:
                action = newAction
            reward += ale.act(action)

            if ale.lives() < lives:
              lives = ale.lives()
              reward -= 1
            total_reward += reward

            new_screen = ale.getScreenRGB()
            new_screen = screen_preprocessor.preprocess(new_screen)
            new_state = {"screen": new_screen,
                        "objects": None,
                        "prev_objects": state["objects"],
                        "prev_action": state["action"],
                        "action": action}
            newAction = agent.incorporateFeedback(state, action, reward, new_state)

            state = new_state
            reward = 0

        rewards.append(total_reward)

        if total_reward > best_reward and record_weights:
            best_reward = total_reward
            print("Best reward: {}".format(total_reward))

        if episode % PRINT_TRAINING_INFO_PERIOD == 0:
            print '\n############################'
            print '### training information ###'
            print("Average reward: {}".format(np.mean(rewards)))
            print("Last 50: {}".format(np.mean(rewards[-NUM_EPISODES_AVERAGE_REWARD_OVER:])))
            print("Exploration probability: {}".format(agent.explorationProb))
            print('action: {}'.format(action))
            print('size of weights dict: {}'.format(len(agent.weights)))
            print('current objects: {}'.format(state['objects']))
            print('previous objects: {}'.format(state['prev_objects']))
            avg_feat_weight = np.mean([v for k,v in agent.weights.iteritems()])
            print('average feature weight: {}'.format(avg_feat_weight))
            print '############################'
            print '############################\n'

        if episode != 0 and episode % RECORD_WEIGHTS_PERIOD == 0 and record_weights:
            file_utils.save_rewards(rewards, filename='episode-{}-{}-rewards'.format(episode, type(agent).__name__))
            file_utils.save_weights(agent.weights, filename='episode-{}-{}-weights'.format(episode, type(agent).__name__))

        if agent.explorationProb > MINIMUM_EXPLORATION_EPSILON:
            agent.explorationProb -= reduce_exploration_prob_amount

        print('episode: {} ended with score: {}'.format(episode, total_reward))
        ale.reset_game()
    return rewards
示例#44
0
class Emulator:
    def __init__(self, rom_path, rom_name, visualize, actor_id, rseed, single_life_episodes = False):
        
        self.ale = ALEInterface()

        self.ale.setInt("random_seed", rseed * (actor_id +1))

        # For fuller control on explicit action repeat (>= ALE 0.5.0) 
        self.ale.setFloat("repeat_action_probability", 0.0)
        
        # Disable frame_skip and color_averaging
        # See: http://is.gd/tYzVpj
        self.ale.setInt("frame_skip", 1)
        self.ale.setBool("color_averaging", False)
        self.ale.loadROM(rom_path + "/" + rom_name + ".bin")
        self.legal_actions = self.ale.getMinimalActionSet()        
        self.screen_width,self.screen_height = self.ale.getScreenDims()
        #self.ale.setBool('display_screen', True)
        
        # Processed historcal frames that will be fed in to the network 
        # (i.e., four 84x84 images)
        self.screen_images_processed = np.zeros((IMG_SIZE_X, IMG_SIZE_Y, 
            NR_IMAGES)) 
        self.rgb_screen = np.zeros((self.screen_height,self.screen_width, 3), dtype=np.uint8)
        self.gray_screen = np.zeros((self.screen_height,self.screen_width,1), dtype=np.uint8)

        self.frame_pool = np.empty((2, self.screen_height, self.screen_width))
        self.current = 0
        self.lives = self.ale.lives()

        self.visualize = visualize
        self.visualize_processed = False
        self.windowname = rom_name + ' ' + str(actor_id)
        if self.visualize:
            logger.debug("Opening emulator window...")
            #from skimage import io
            #io.use_plugin('qt')
            cv2.startWindowThread()
            cv2.namedWindow(self.windowname)
            logger.debug("Emulator window opened")
            
        if self.visualize_processed:
            logger.debug("Opening processed frame window...")
            cv2.startWindowThread()
            logger.debug("Processed frame window opened")
            cv2.namedWindow(self.windowname + "_processed")
            
        self.single_life_episodes = single_life_episodes

    def get_screen_image(self):
        """ Add screen (luminance) to frame pool """
        # [screen_image, screen_image_rgb] = [self.ale.getScreenGrayscale(), 
        #     self.ale.getScreenRGB()]
        self.ale.getScreenGrayscale(self.gray_screen)
        self.ale.getScreenRGB(self.rgb_screen)
        self.frame_pool[self.current] = np.squeeze(self.gray_screen)
        self.current = (self.current + 1) % FRAMES_IN_POOL
        return self.rgb_screen

    def new_game(self):
        """ Restart game """
        self.ale.reset_game()
        self.lives = self.ale.lives()

        if MAX_START_WAIT < 0:
            logger.debug("Cannot time travel yet.")
            sys.exit()
        elif MAX_START_WAIT > 0:
            wait = random.randint(0, MAX_START_WAIT)
        else:
            wait = 0
        for _ in xrange(wait):
            self.ale.act(self.legal_actions[0])

    def process_frame_pool(self):
        """ Preprocess frame pool """
        
        img = None
        if BLEND_METHOD == "max_pool":
            img = np.amax(self.frame_pool, axis=0)
        
        #img resize(img[:210, :], (84, 84))
        img = cv2.resize(img[:210, :], (84, 84), 
            interpolation=cv2.INTER_LINEAR)
        
        img = img.astype(np.float32)
        img *= (1.0/255.0)
        
        return img
        # Reduce height to 210, if not so
        #cropped_img = img[:210, :]
        # Downsample to 110x84
        #down_sampled_img = resize(cropped_img, (84, 84))
        
        # Crop to 84x84 playing area
        #stackable_image = down_sampled_img[:, 26:110]
        #return stackable_image

    def action_repeat(self, a):
        """ Repeat action and grab screen into frame pool """
        reward = 0
        for i in xrange(ACTION_REPEAT):
            reward += self.ale.act(self.legal_actions[a])
            new_screen_image_rgb = self.get_screen_image()
        return reward, new_screen_image_rgb

    def get_reshaped_state(self, state):
        return np.reshape(state, 
            (1, IMG_SIZE_X, IMG_SIZE_Y, NR_IMAGES))
        #return np.reshape(self.screen_images_processed, 
        #    (1, IMG_SIZE_X, IMG_SIZE_Y, NR_IMAGES))

    def get_initial_state(self):
        """ Get the initial state """
        self.new_game()
        for step in xrange(NR_IMAGES):
            reward, new_screen_image_rgb = self.action_repeat(0)
            self.screen_images_processed[:, :, step] = self.process_frame_pool()
            self.show_screen(new_screen_image_rgb)
        if self.is_terminal():
            MAX_START_WAIT -= 1
            return self.get_initial_state()
        return np.copy(self.screen_images_processed) #get_reshaped_state()      

    def next(self, action):
        """ Get the next state, reward, and game over signal """
        reward, new_screen_image_rgb = self.action_repeat(np.argmax(action))
        self.screen_images_processed[:, :, 0:3] = \
            self.screen_images_processed[:, :, 1:4]
        self.screen_images_processed[:, :, 3] = self.process_frame_pool()
        self.show_screen(new_screen_image_rgb)
        terminal = self.is_terminal()
        self.lives = self.ale.lives()
        return np.copy(self.screen_images_processed), reward, terminal #get_reshaped_state(), reward, terminal
    
    def show_screen(self, image):
        """ Show visuals for raw and processed images """
        if self.visualize:
            #io.imshow(image[:210, :], fancy=True)
            cv2.imshow(self.windowname, image[:210, :])
        if self.visualize_processed:
            #io.imshow(self.screen_images_processed[:, :, 3], fancy=True)
            cv2.imshow(self.windowname + "_processed", self.screen_images_processed[:, :, 3])
            
    def is_terminal(self):
        if self.single_life_episodes:
            return (self.is_over() or (self.lives > self.ale.lives()))
        else:
            return self.is_over()

    def is_over(self):
        return self.ale.game_over()
示例#45
0
class AtariPlayer(RLEnvironment):
    """
    A wrapper for atari emulator.
    NOTE: will automatically restart when a real episode ends
    """
    def __init__(self,
                 rom_file,
                 viz=0,
                 height_range=(None, None),
                 frame_skip=4,
                 image_shape=(84, 84),
                 nullop_start=30,
                 live_lost_as_eoe=True):
        """
        :param rom_file: path to the rom
        :param frame_skip: skip every k frames and repeat the action
        :param image_shape: (w, h)
        :param height_range: (h1, h2) to cut
        :param viz: visualization to be done.
            Set to 0 to disable.
            Set to a positive number to be the delay between frames to show.
            Set to a string to be a directory to store frames.
        :param nullop_start: start with random number of null ops
        :param live_losts_as_eoe: consider lost of lives as end of episode.  useful for training.
        """
        super(AtariPlayer, self).__init__()
        if not os.path.isfile(rom_file) and '/' not in rom_file:
            rom_file = os.path.join(get_dataset_dir('atari_rom'), rom_file)
        assert os.path.isfile(rom_file), \
                "rom {} not found. Please download at {}".format(rom_file, ROM_URL)

        try:
            ALEInterface.setLoggerMode(ALEInterface.Logger.Warning)
        except AttributeError:
            log_once()

        # avoid simulator bugs: https://github.com/mgbellemare/Arcade-Learning-Environment/issues/86
        with _ALE_LOCK:
            self.ale = ALEInterface()
            self.rng = get_rng(self)

            self.ale.setInt("random_seed", self.rng.randint(0, 10000))
            self.ale.setBool("showinfo", False)

            self.ale.setInt("frame_skip", 1)
            self.ale.setBool('color_averaging', False)
            # manual.pdf suggests otherwise.
            self.ale.setFloat('repeat_action_probability', 0.0)

            # viz setup
            if isinstance(viz, six.string_types):
                assert os.path.isdir(viz), viz
                self.ale.setString('record_screen_dir', viz)
                viz = 0
            if isinstance(viz, int):
                viz = float(viz)
            self.viz = viz
            if self.viz and isinstance(self.viz, float):
                self.windowname = os.path.basename(rom_file)
                cv2.startWindowThread()
                cv2.namedWindow(self.windowname)

            self.ale.loadROM(rom_file)
        self.width, self.height = self.ale.getScreenDims()
        self.actions = self.ale.getMinimalActionSet()

        self.live_lost_as_eoe = live_lost_as_eoe
        self.frame_skip = frame_skip
        self.nullop_start = nullop_start
        self.height_range = height_range
        self.image_shape = image_shape

        self.current_episode_score = StatCounter()
        self.restart_episode()

    def _grab_raw_image(self):
        """
        :returns: the current 3-channel image
        """
        m = self.ale.getScreenRGB()
        return m.reshape((self.height, self.width, 3))

    def current_state(self):
        """
        :returns: a gray-scale (h, w, 1) float32 image
        """
        ret = self._grab_raw_image()
        # max-pooled over the last screen
        ret = np.maximum(ret, self.last_raw_screen)
        if self.viz:
            if isinstance(self.viz, float):
                #m = cv2.resize(ret, (1920,1200))
                cv2.imshow(self.windowname, ret)
                time.sleep(self.viz)
        ret = ret[self.height_range[0]:self.height_range[1], :].astype(
            'float32')
        # 0.299,0.587.0.114. same as rgb2y in torch/image
        ret = cv2.cvtColor(ret, cv2.COLOR_RGB2GRAY)
        ret = cv2.resize(ret, self.image_shape)
        ret = np.expand_dims(ret, axis=2)
        return ret

    def get_action_space(self):
        return DiscreteActionSpace(len(self.actions))

    def restart_episode(self):
        if self.current_episode_score.count > 0:
            self.stats['score'].append(self.current_episode_score.sum)
        self.current_episode_score.reset()
        self.ale.reset_game()

        # random null-ops start
        n = self.rng.randint(self.nullop_start)
        self.last_raw_screen = self._grab_raw_image()
        for k in range(n):
            if k == n - 1:
                self.last_raw_screen = self._grab_raw_image()
            self.ale.act(0)

    def action(self, act):
        """
        :param act: an index of the action
        :returns: (reward, isOver)
        """
        oldlives = self.ale.lives()
        r = 0
        for k in range(self.frame_skip):
            if k == self.frame_skip - 1:
                self.last_raw_screen = self._grab_raw_image()
            r += self.ale.act(self.actions[act])
            newlives = self.ale.lives()
            if self.ale.game_over() or \
                    (self.live_lost_as_eoe and newlives < oldlives):
                break

        self.current_episode_score.feed(r)
        isOver = self.ale.game_over()
        if isOver:
            self.restart_episode()
        if self.live_lost_as_eoe:
            isOver = isOver or newlives < oldlives
        return (r, isOver)
示例#46
0
    keys |= pressed[pygame.K_UP]
    keys |= pressed[pygame.K_DOWN]  <<1
    keys |= pressed[pygame.K_LEFT]  <<2
    keys |= pressed[pygame.K_RIGHT] <<3
    keys |= pressed[pygame.K_z] <<4
    a = key_action_tform_table[keys]
    reward = ale.act(a);
    cur_time += 1
    total_reward += reward

    #clear screen
    screen.fill((0,0,0))

    #get atari screen pixels and blit them
    numpy_surface = np.frombuffer(game_surface.get_buffer(),dtype=np.int32)
    ale.getScreenRGB(numpy_surface)
    
    logger.log(a, TYPE_ACTION, cur_time)
    #if cur_time %2 == 0:
    logger.log(numpy_surface, TYPE_SCREEN, cur_time)

    del numpy_surface
    screen.blit(pygame.transform.scale2x(game_surface),(0,0))

    #get RAM
    ram_size = ale.getRAMSize()
    ram = np.zeros((ram_size),dtype=np.uint8)
    ale.getRAM(ram)
    
    #Display ram bytes
    font = pygame.font.SysFont("Ubuntu Mono",32)
示例#47
0
def test(session,
         hist_len=4,
         discount=0.99,
         act_rpt=4,
         upd_freq=4,
         min_sq_grad=0.01,
         epsilon=0.05,
         no_op_max=30,
         num_tests=1,
         learning_rate=0.0025,
         momentum=0.95,
         sq_momentum=0.95):
    #Create ALE object
    if len(sys.argv) < 3:
        print('Usage: %s rom_file record_screen_dir' % sys.argv[0])
        sys.exit()

    ale = ALEInterface()

    record_path = sys.argv[2]
    ale.setString('record_screen_dir', record_path)
    ale.setString('record_sound_filename', (record_path + '/sound.wav'))
    ale.setInt('fragsize', 64)
    cmd = 'mkdir '
    cmd += record_path
    os.system(cmd)

    # Get & Set the desired settings
    ale.setInt('random_seed', 123)
    #Changes repeat action probability from default of 0.25
    ale.setFloat('repeat_action_probability', 0.0)

    # Set USE_SDL to true to display the screen. ALE must be compilied
    # with SDL enabled for this to work. On OSX, pygame init is used to
    # proxy-call SDL_main.
    USE_SDL = False
    if USE_SDL:
        if sys.platform == 'darwin':
            import pygame
            pygame.init()
            ale.setBool('sound', False)  # Sound doesn't work on OSX
        elif sys.platform.startswith('linux'):
            ale.setBool('sound', True)
        ale.setBool('display_screen', True)

    # Load the ROM file
    ale.loadROM(sys.argv[1])

    # create DQN agent
    # learning_rate and momentum are unused parameters (but needed)
    agent = DQN(ale, session, epsilon, learning_rate, momentum, sq_momentum,
                hist_len, len(ale.getMinimalActionSet()), None, discount,
                rom_name(sys.argv[1]))

    #Store the most recent two images
    preprocess_stack = deque([], 2)

    num_episodes = 0
    while num_episodes < num_tests:
        #initialize sequence with initial image
        seq = list()
        #We only have one image, we cannot combine two images
        perform_no_ops(ale, no_op_max, preprocess_stack, seq)
        #proc_seq.append(pp.preprocess(seq))
        total_reward = 0

        while not ale.game_over():
            state = get_state(seq, hist_len)
            action = agent.get_action_best_network(state, epsilon)
            #skip frames by repeating action
            reward = 0
            for i in range(act_rpt):
                reward = reward + ale.act(action)
                preprocess_stack.append(ale.getScreenRGB())
            seq.append(pp.preprocess(preprocess_stack[0], preprocess_stack[1]))
            total_reward += reward
        print('Episode ended with score: %d' % (total_reward))
        num_episodes = num_episodes + 1
        ale.reset_game()
示例#48
0
class AtariPlayer(gym.Env):
    """
    A wrapper for ALE emulator, with configurations to mimic DeepMind DQN settings.

    Info:
        score: the accumulated reward in the current game
        gameOver: True when the current game is Over
    """

    def __init__(self, rom_file, viz=0,
                 frame_skip=4, nullop_start=30,
                 live_lost_as_eoe=True, max_num_frames=0):
        """
        Args:
            rom_file: path to the rom
            frame_skip: skip every k frames and repeat the action
            viz: visualization to be done.
                Set to 0 to disable.
                Set to a positive number to be the delay between frames to show.
                Set to a string to be a directory to store frames.
            nullop_start: start with random number of null ops.
            live_losts_as_eoe: consider lost of lives as end of episode. Useful for training.
            max_num_frames: maximum number of frames per episode.
        """
        super(AtariPlayer, self).__init__()
        if not os.path.isfile(rom_file) and '/' not in rom_file:
            rom_file = get_dataset_path('atari_rom', rom_file)
        assert os.path.isfile(rom_file), \
            "rom {} not found. Please download at {}".format(rom_file, ROM_URL)

        try:
            ALEInterface.setLoggerMode(ALEInterface.Logger.Error)
        except AttributeError:
            if execute_only_once():
                logger.warn("You're not using latest ALE")

        # avoid simulator bugs: https://github.com/mgbellemare/Arcade-Learning-Environment/issues/86
        with _ALE_LOCK:
            self.ale = ALEInterface()
            self.rng = get_rng(self)
            self.ale.setInt(b"random_seed", self.rng.randint(0, 30000))
            self.ale.setInt(b"max_num_frames_per_episode", max_num_frames)
            self.ale.setBool(b"showinfo", False)

            self.ale.setInt(b"frame_skip", 1)
            self.ale.setBool(b'color_averaging', False)
            # manual.pdf suggests otherwise.
            self.ale.setFloat(b'repeat_action_probability', 0.0)

            # viz setup
            if isinstance(viz, six.string_types):
                assert os.path.isdir(viz), viz
                self.ale.setString(b'record_screen_dir', viz)
                viz = 0
            if isinstance(viz, int):
                viz = float(viz)
            self.viz = viz
            if self.viz and isinstance(self.viz, float):
                self.windowname = os.path.basename(rom_file)
                cv2.startWindowThread()
                cv2.namedWindow(self.windowname)

            self.ale.loadROM(rom_file.encode('utf-8'))
        self.width, self.height = self.ale.getScreenDims()
        self.actions = self.ale.getMinimalActionSet()

        self.live_lost_as_eoe = live_lost_as_eoe
        self.frame_skip = frame_skip
        self.nullop_start = nullop_start

        self.action_space = spaces.Discrete(len(self.actions))
        self.observation_space = spaces.Box(
            low=0, high=255, shape=(self.height, self.width))
        self._restart_episode()

    def get_action_meanings(self):
        return [ACTION_MEANING[i] for i in self.actions]

    def _grab_raw_image(self):
        """
        :returns: the current 3-channel image
        """
        m = self.ale.getScreenRGB()
        return m.reshape((self.height, self.width, 3))

    def _current_state(self):
        """
        :returns: a gray-scale (h, w) uint8 image
        """
        ret = self._grab_raw_image()
        # max-pooled over the last screen
        ret = np.maximum(ret, self.last_raw_screen)
        if self.viz:
            if isinstance(self.viz, float):
                cv2.imshow(self.windowname, ret)
                cv2.waitKey(int(self.viz * 1000))
        ret = ret.astype('float32')
        # 0.299,0.587.0.114. same as rgb2y in torch/image
        ret = cv2.cvtColor(ret, cv2.COLOR_RGB2GRAY)
        return ret.astype('uint8')  # to save some memory

    def _restart_episode(self):
        with _ALE_LOCK:
            self.ale.reset_game()

        # random null-ops start
        n = self.rng.randint(self.nullop_start)
        self.last_raw_screen = self._grab_raw_image()
        for k in range(n):
            if k == n - 1:
                self.last_raw_screen = self._grab_raw_image()
            self.ale.act(0)

    def _reset(self):
        if self.ale.game_over():
            self._restart_episode()
        return self._current_state()

    def _step(self, act):
        oldlives = self.ale.lives()
        r = 0
        for k in range(self.frame_skip):
            if k == self.frame_skip - 1:
                self.last_raw_screen = self._grab_raw_image()
            r += self.ale.act(self.actions[act])
            newlives = self.ale.lives()
            if self.ale.game_over() or \
                    (self.live_lost_as_eoe and newlives < oldlives):
                break

        isOver = self.ale.game_over()
        if self.live_lost_as_eoe:
            isOver = isOver or newlives < oldlives

        info = {'ale.lives': newlives}
        return self._current_state(), r, isOver, info
示例#49
0
文件: ale.py 项目: beegica/sith_torch
class AleEnv(object):
    '''ALE wrapper for RL training
    game_over_conditions={'points':(-1, 1)}: dict that describes all desired game over conditions
    each key corresponds to a condition that is checked; the first condition met produces a game over
        points: int or tuple of integers
            int:
                if x < 0, game ends when score is <= x
                if x >= 0, game ends when score is >= x
            tuple:
                game ends if score <= x[0] or score >= x[1]
        lives: int that ends game when lives <= x
        frames: int that ends game when total number of frames >= x
        episodes: int that ends game when num of episodes >= x
            Use max_num_frames_per_episode to set max episode length


    '''

    # will include timing and hidden functionality in future iterations

    def __init__(self,
                 rom_file,
                 display_screen=False,
                 sound=False,
                 random_seed=0,
                 game_over_conditions={},
                 frame_skip=1,
                 repeat_action_probability=0.25,
                 max_num_frames_per_episode=0,
                 min_action_set=False,
                 screen_color='gray',
                 fps=60,
                 output_buffer_size=1,
                 reduce_screen=False):

        # ALE instance and setup
        self.ale = ALEInterface()
        #TODO: check if rom file exists; will crash jupyter kernel otherwise
        self.ale.loadROM(str.encode(rom_file))

        self.ale.setBool(b'sound', sound)
        self.ale.setBool(b'display_screen', display_screen)

        if min_action_set:
            self.legal_actions = self.ale.getMinimalActionSet()
        else:
            self.legal_actions = self.ale.getLegalActionSet()

        self.ale.setInt(b'random_seed', random_seed)
        self.ale.setInt(b'frame_skip', frame_skip)
        self.frame_skip = frame_skip
        self.ale.setFloat(b'repeat_action_probability',
                          repeat_action_probability)
        self.ale.setInt(b'max_num_frames_per_episode',
                        max_num_frames_per_episode)

        self.ale.loadROM(str.encode(rom_file))

        self.game_over_conditions = game_over_conditions
        self.screen_color = screen_color
        self.reduce_screen = reduce_screen
        self.d_frame = (fps**-1) * self.frame_skip

        # set up output buffer
        self.output_buffer_size = output_buffer_size
        self.queue_size = self.output_buffer_size
        self._reset_params()

    def observe(self, flatten=False, expand_dim=False):

        if flatten is True:
            out = np.stack(self.output_queue[i]
                           for i in range(self.output_buffer_size)).flatten()

            if expand_dim is True:
                return np.expand_dims(np.expand_dims(out, axis=0), axis=1)
            else:
                return out
        else:
            out = np.stack(self.output_queue[i]
                           for i in range(self.output_buffer_size))
            out = np.squeeze(out)

            if expand_dim is True:
                return np.expand_dims(np.expand_dims(out, axis=0), axis=1)

            else:
                return out

    @property
    def width(self):
        return self.game_screen.shape[1]

    @property
    def height(self):
        return self.game_screen.shape[0]

    @property
    def game_over(self):
        return self._game_over()

    @property
    def actions(self):
        return self.legal_actions

    @property
    def lives(self):
        return self.ale.lives()

    def _reset_params(self):

        self.total_points = 0
        self.total_frames = 0
        self.curr_episode = 1
        self.prev_ep_frame_num = -float("inf")

        if self.screen_color == 'gray' or self.screen_color == 'grey':
            self.game_screen = np.squeeze(self.ale.getScreenGrayscale())
            if self.reduce_screen:
                self.game_screen = resize(self.game_screen,
                                          output_shape=(110, 84))
                self.game_screen = self.game_screen[0 + 21:-1 - 4, :]
        elif self.screen_color == 'rgb' or self.screen_color == 'color':
            self.game_screen = self.ale.getScreenRGB()
            if self.reduce_screen:
                self.game_screen = resize(self.game_screen,
                                          output_shape=(110, 84, 3))
                self.game_screen = self.game_screen[0 + 21:-1 - 4, :, :]

        self.output_queue = deque(
            np.zeros(shape=(self.queue_size - 1, self.height, self.width)),
            self.queue_size)
        self.output_queue.appendleft(self.game_screen)

    def reset(self):
        self.ale.reset_game()
        self._reset_params()

    def act(self, action):
        reward = self.ale.act(self.legal_actions[action])

        if self.screen_color == 'gray' or self.screen_color == 'grey':
            self.game_screen = np.squeeze(self.ale.getScreenGrayscale())
            if self.reduce_screen:
                self.game_screen = resize(self.game_screen,
                                          output_shape=(110, 84))
                self.game_screen = self.game_screen[0 + 21:-1 - 4, :]
        elif self.screen_color == 'rgb' or self.screen_color == 'color':
            self.game_screen = self.ale.getScreenRGB()
            if self.reduce_screen:
                self.game_screen = resize(self.game_screen,
                                          output_shape=(110, 84, 3))
                self.game_screen = self.game_screen[0 + 21:-1 - 4, :, :]

        self.output_queue.pop()
        self.output_queue.appendleft(self.game_screen)

        self.total_points += reward
        self.total_frames += self.frame_skip
        if self.ale.getEpisodeFrameNumber() <= self.prev_ep_frame_num:
            self.curr_episode += 1
        self.prev_ep_frame_num = self.ale.getEpisodeFrameNumber()

        return reward, self.d_frame, self.game_over

    def _game_over(self):
        if self.ale.game_over():
            return True

        for cond in self.game_over_conditions:
            if cond == 'points':
                if isinstance(self.game_over_conditions[cond], int):
                    if self.total_points >= self.game_over_conditions[cond]:
                        return True
                elif isinstance(self.game_over_conditions[cond], tuple):
                    if (self.total_points <= self.game_over_conditions[cond][0]
                            or self.total_points >=
                            self.game_over_conditions[cond][1]):
                        return True
            elif cond == 'lives':
                if self.lives <= self.game_over_conditions[cond]:
                    return True
            elif cond == 'frames':
                if self.total_frames >= self.game_over_conditions[cond]:
                    return True
            elif cond == 'episodes':
                if self.curr_episode >= self.game_over_conditions[cond]:
                    return True
            else:
                raise RuntimeError("ERROR: Invalid game over condition")

        return False
def train(gamepath, n_episodes,  display_screen,  record_weights,  reduce_exploration_prob_amount, n_frames_to_skip, exploration_prob, verbose, discount, learning_rate, load_weights, frozen_target_update_period, use_replay_mem):
    """
    :description: trains an agent to play a game 

    :type gamepath: string 
    :param gamepath: path to the binary of the game to be played

    :type n_episodes: int 
    :param n_episodes: number of episodes of the game on which to train

    display_screen : whether or not to display the screen of the game 
    
    record_weights : whether or not to save the weights of the nextwork
    
    reduce_exploration_prob_amount : amount to reduce exploration prob each episode
                                     to not reduce exploration_prob set to 0
    
    n_frames_to_skip : how frequently to determine a new action to use
    
    exploration_prob : probability of choosing a random action
    
    verbose : whether or not to print information about the run periodically
    
    discount : discount factor used in learning 
    
    learning_rate : the scaling factor for the sgd update
    
    load_weights : whether or not to load weights for the network (set the files directly below)
    
    frozen_target_update_period : the number of episodes between reseting the target of the network
    """

    # load the ale interface to interact with
    ale = ALEInterface()
    ale.setInt('random_seed', 42)

    # display/recording settings, doesn't seem to work currently
    recordings_dir = './recordings/breakout/'
    # previously "USE_SDL"
    if display_screen:
        if sys.platform == 'darwin':
            import pygame
            pygame.init()
            ale.setBool('sound', False) # Sound doesn't work on OSX
            #ale.setString("record_screen_dir", recordings_dir);
        elif sys.platform.startswith('linux'):
            ale.setBool('sound', True)
        ale.setBool('display_screen', True)

    ale.loadROM(gamepath)
    ale.setInt("frame_skip", n_frames_to_skip)
    # real actions for breakout are [0,1,3,4]
    real_actions = ale.getMinimalActionSet()

    # use a list of actions [0,1,2,3] to index into the array of real actions
    actions = np.arange(len(real_actions))

    # these theano variables are used to define the symbolic input of the network
    features = T.dvector('features')
    action = T.lscalar('action')
    reward = T.dscalar('reward')
    next_features = T.dvector('next_features')

    # load weights by file name
    # currently must be loaded by individual hidden layers
    if load_weights:
        hidden_layer_1 = file_utils.load_model('weights/hidden0_replay.pkl')
        hidden_layer_2 = file_utils.load_model('weights/hidden1_replay.pkl')
    else:
        # defining the hidden layer network structure
        # the n_hid of a prior layer must equal the n_vis of a subsequent layer
        # for q-learning the output layer must be of len(actions)
        hidden_layer_1 = HiddenLayer(n_vis=NNET_INPUT_DIMENSION, 
            n_hid=NNET_INPUT_DIMENSION, layer_name='hidden1', activation='relu')
        hidden_layer_2 = HiddenLayer(n_vis=NNET_INPUT_DIMENSION, 
            n_hid=NNET_INPUT_DIMENSION, layer_name='hidden2', activation='relu')
    hidden_layer_3 = HiddenLayer(n_vis=NNET_INPUT_DIMENSION, 
            n_hid=len(actions), layer_name='hidden3', activation='relu') 
    # the output layer is currently necessary when using tanh units in the
    # hidden layer in order to prevent a theano warning
    # currently the relu unit setting of the hidden and output layers is leaky w/ alpha=0.01
    output_layer = OutputLayer(layer_name='output', activation='relu')

    # pass a list of layers to the constructor of the network (here called "mlp")
    layers = [hidden_layer_1, hidden_layer_2, hidden_layer_3, output_layer]
    qnetwork = QNetwork(layers, discount=discount, learning_rate=learning_rate)

    # this call gets the symbolic output of the network
    # along with the parameter updates expected
    loss, updates = qnetwork.get_loss_and_updates(features, action, reward, next_features)

    # this defines the theano symbolic function used to train the network
    # 1st argument is a list of inputs, here the symbolic variables above
    # 2nd argument is the symbolic output expected
    # 3rd argument is the dictionary of parameter updates
    # 4th argument is the compilation mode
    train_model = theano.function(
                    [theano.Param(features, default=np.zeros(NNET_INPUT_DIMENSION)),
                    theano.Param(action, default=0),
                    theano.Param(reward, default=0),
                    theano.Param(next_features, default=np.zeros(NNET_INPUT_DIMENSION))],
                    outputs=loss,
                    updates=updates,
                    mode='FAST_RUN')

    sym_action = qnetwork.get_action(features)
    get_action = theano.function([features], sym_action)

    # some containers for collecting information about the training processes 
    rewards = []
    losses = []
    best_reward = 4
    sequence_examples = []
    sampled_examples = []

    # the preprocessor and feature extractor to use
    preprocessor = screen_utils.RGBScreenPreprocessor()
    feature_extractor = feature_extractors.NNetOpenCVBoundingBoxExtractor(max_features=MAX_FEATURES)

    if use_replay_mem:
        replay_mem = ReplayMemory()
    # main training loop, each episode is a full playthrough of the game
    for episode in xrange(n_episodes):

        # this implements the frozen target component of the network
        # by setting the frozen layers of the network to a copy of the current layers
        if episode % frozen_target_update_period == 0:
            qnetwork.frozen_layers = copy.deepcopy(qnetwork.layers)


        # some variables for collecting information about this particular run of the game
        total_reward = 0
        action = 1
        counter = 0
        reward = 0
        loss = 0
        previous_param_0 = None

        # lives here is used for the reward heuristic of subtracting 1 from the reward 
        # when we lose a life. currently commented out this functionality because
        # i think it might not be helpful.
        lives = ale.lives()

        # the initial state of the screen and state
        screen = np.zeros((preprocessor.dim, preprocessor.dim, preprocessor.channels))
        state = { "screen" : screen, "objects" : None, "prev_objects": None, "features": np.zeros(MAX_FEATURES)}
        
        # start the actual play through of the game
        while not ale.game_over():
            counter += 1

            # get the current features, which is the representation of the state provided to 
            # the "agent" (here just the network directly)
            features = state["features"]

            # epsilon greedy action selection (note that exploration_prob is reduced by
            # reduce_exploration_prob_amount after every game)
            if random.random() < exploration_prob: 
                action = random.choice(actions)
            else:
                # to choose an action from the network, we fprop 
                # the current state and take the argmax of the output
                # layer (i.e., the action that corresponds to the 
                # maximum q value)
                action = get_action(features)

            # take the action and receive the reward
            reward += ale.act(real_actions[action])

            # this is commented out because i think it might not be helpful
            if ale.lives() < lives: 
                 lives = ale.lives()
                 reward -= 1


            # get the next screen, preprocess it, initialize the next state
            next_screen = ale.getScreenRGB()
            next_screen = preprocessor.preprocess(next_screen)
            next_state = {"screen": next_screen, "objects": None, "prev_objects": state["objects"]}

            # get the features for the next state
            next_features = feature_extractor(next_state, action=None)

            if use_replay_mem:
                sars_tuple = (features, action, reward, next_features)
                replay_mem.store(sars_tuple)
                num_samples = 5 if replay_mem.isFull() else 1
                for i in range(0, num_samples):
                    random_train_tuple = replay_mem.sample()
                    loss += train_model(*random_train_tuple)

                # collect for pca
                sequence_examples.append(list(sars_tuple[0]) + [sars_tuple[1]] \
                         + [sars_tuple[2]] + sars_tuple[3])
                sequence_examples = sequence_examples[-100:]
                sampled_examples.append(list(random_train_tuple[0]) + [random_train_tuple[1]] \
                        + [random_train_tuple[2]] + random_train_tuple[3])
                sampled_examples = sampled_examples[-100:]
            else:
                # call the train model function
                loss += train_model(features, action, reward, next_features)
            # prepare for the next loop through the game
            next_state["features"] = next_features
            state = next_state
                
            # weird counter value to avoid interaction with any other counter
            # loop that might be added, not necessary right now
            if verbose and counter % PRINT_TRAINING_INFO_PERIOD == 0:
                print('*' * 15 + ' training information ' + '*' * 15) 
                print('episode: {}'.format(episode))
                print('reward: \t{}'.format(reward))
                print('avg reward: \t{}'.format(np.mean(rewards)))
                print 'avg reward (last 25): \t{}'.format(np.mean(rewards[-NUM_EPISODES_AVERAGE_REWARD_OVER:]))
                print('action: \t{}'.format(real_actions[action]))
                print('exploration prob: {}'.format(exploration_prob))
                
                param_info = [(p.eval(), p.name) for p in qnetwork.get_params()]
                for index, (val, name) in enumerate(param_info):
                    if previous_param_0 is None and index == 0:
                        previous_param_0 = val
                    print('parameter {} value: \n{}'.format(name, val))
                    if index == 0:
                        diff = val - previous_param_0
                        print('difference from previous param {}: \n{}'.format(name, diff))

                print('features: \t{}'.format(features))
                print('next_features: \t{}'.format(next_features))

                scaled_sequence = preprocessing.scale(np.array(sequence_examples))
                scaled_sampled = preprocessing.scale(np.array(sampled_examples))
                pca = PCA()
                _ = pca.fit_transform(scaled_sequence)
                print('variance explained by first component for sequence: {}%'.format(pca. \
                    explained_variance_ratio_[0] * 100))
                _ = pca.fit_transform(scaled_sampled)
                print('variance explained by first component for sampled: {}%'.format(pca. \
                    explained_variance_ratio_[0] * 100))

                print('*' * 52)
                print('\n')

            # collect info and total reward and also reset the reward to 0 if we reach this point
            total_reward += reward
            reward = 0
        # collect stats from this game run    
        losses.append(loss)
        rewards.append(total_reward)
    
        # if we got a best reward, inform the user 
        if total_reward > best_reward:
            best_reward = total_reward
            print("best reward!: {}".format(total_reward))

        # record the weights if record_weights=True
        # must record the weights of the indiviual layers
        # only save hidden layers b/c output layer does not have weights
        if episode != 0 and episode % RECORD_WEIGHTS_PERIOD == 0 and record_weights:
            file_utils.save_rewards(rewards)
            file_utils.save_model(qnetwork.layers[0], 'weights/hidden0_{}.pkl'.format(episode))
            file_utils.save_model(qnetwork.layers[1], 'weights/hidden1_{}.pkl'.format(episode))

        # reduce exploration policy over time
        if exploration_prob > MINIMUM_EXPLORATION_EPSILON:
            exploration_prob -= reduce_exploration_prob_amount
        
        # inform user of how the episode went and reset the game
        print('episode: {} ended with score: {}\tloss: {}'.format(episode, rewards[-1], losses[-1]))
        ale.reset_game()

    # return the list of rewards attained
    return rewards
示例#51
0
class ALEEnvironment(BaseEnvironment):
    """
    A wrapper of Arcade Learning Environment, which inherits all members of ``BaseEnvironment``.
    """
    # 63 games
    ADVENTURE = "adventure"
    AIR_RAID = "air_raid"
    ALIEN = "alien"
    AMIDAR = "amidar"
    ASSAULT = "assault"
    ASTERIX = "asterix"
    ASTEROIDS = "asteroids"
    ATLANTIS = "aslantis"
    BANK_HEIST = "bank_heist"
    BATTLE_ZONE = "battle_zone"
    BEAM_RIDER = "beam_rider"
    BERZERK = "berzerk"
    BOWLING = "bowling"
    BOXING = "boxing"
    BREAKOUT = "breakout"
    CARNIVAL = "carnival"
    CENTIPEDE = "centipede"
    CHOPPER_COMMAND = "chopper_command"
    CRAZY_CLIMBER = "crazy_climber"
    DEFENDER = "defender"
    DEMON_ATTACK = "demon_attack"
    DOUBLE_DUNK = "double_dunk"
    ELEVATOR_ACTION = "elevator_action"
    ENDURO = "enduro"
    FISHING_DERBY = "fishing_derby"
    FREEWAY = "freeway"
    FROSTBITE = "frostbite"
    GOPHER = "gopher"
    GRAVITAR = "gravitar"
    HERO = "hero"
    ICE_HOCKEY = "ice_hockey"
    JAMESBOND = "jamesbond"
    JOURNEY_ESCAPE = "journey_escape"
    KABOOM = "kaboom"
    KANGAROO = "kangaroo"
    KRULL = "krull"
    KUNGFU_MASTER = "kung_fu_master"
    MONTEZUMA = "montezuma_revenge"
    MS_PACMAN = "ms_pacman"
    UNKNOWN = "name_this_game"
    PHOENIX = "phoenix"
    PITFALL = "pitfall"
    PONG = "pong"
    POOYAN = "pooyan"
    PRIVATE_EYE = "private_eye"
    QBERT = "qbert"
    RIVERRAID = "riverraid"
    ROAD_RUNNER = "road_runner"
    ROBOTANK = "robotank"
    SEAQUEST = "seaquest"
    SKIING = "skiing"
    SOLARIS = "solaris"
    SPACE_INVADERS = "space_invaders"
    STAR_GUNNER = "star_gunner"
    TENNIS = "tennis"
    TIME_PILOT = "time_pilot"
    TUTANKHAM = "tutankham"
    UP_N_DOWN = "up_n_down"
    VENTURE = "venture"
    VIDEO_PINBALL = "video_pinball"
    WIZARD_OF_WOR = "wizard_of_wor"
    YARS_REVENGE = "yars_revenge"
    ZAXXON = "zaxxon"

    def __init__(self,
                 rom_name,
                 frame_skip=4,
                 repeat_action_probability=0.,
                 max_episode_steps=10000,
                 loss_of_life_termination=False,
                 loss_of_life_negative_reward=False,
                 bitwise_max_on_two_consecutive_frames=False,
                 is_render=False,
                 seed=None,
                 startup_policy=None,
                 disable_actions=None,
                 num_of_sub_actions=-1,
                 state_processor=AtariProcessor(resize_shape=(84, 84),
                                                convert_to_grayscale=True)):

        os.environ['SDL_VIDEO_CENTERED'] = '1'

        file_exist = isfile(ALEEnvironment.get_rom_path(rom_name))
        if not file_exist:
            raise ValueError("Rom not found ! Please put rom " + rom_name +
                             ".bin into: " + ALEEnvironment.get_rom_path())

        self.__rom_name = rom_name
        self.__ale = ALEInterface()

        if frame_skip < 0:
            print("Invalid frame_skip param ! Set default frame_skip = 4")
            self.__frame_skip = 4
        else:
            self.__frame_skip = frame_skip

        if repeat_action_probability < 0 or repeat_action_probability > 1:
            raise ValueError("Invalid repeat_action_probability")
        else:
            self.__repeat_action_probability = repeat_action_probability

        self.__max_episode_steps = max_episode_steps
        self.__loss_of_life_termination = loss_of_life_termination
        self.__loss_of_life_negative_reward = loss_of_life_negative_reward
        self.__max_2_frames = bitwise_max_on_two_consecutive_frames

        # Max 2 frames only work with grayscale
        self.__grayscale = False
        if state_processor is not None and type(
                state_processor
        ) is AtariProcessor and state_processor.get_grayscale():
            self.__grayscale = True

        if self.__max_2_frames and self.__frame_skip > 1 and self.__grayscale:
            self.__max_2_frames = True
        else:
            self.__max_2_frames = False

        self.__is_render = is_render
        self.__processor = state_processor

        if seed is None or seed <= 0 or seed >= 9999:
            if seed is not None and (seed < 0 or seed >= 9999):
                print("Invalid seed ! Default seed = randint(0, 9999")
            self.__seed = np.random.randint(0, 9999)
            self.__random_seed = True
        else:
            self.__random_seed = False
            self.__seed = seed

        self.__current_steps = 0
        self.__is_life_lost = False
        self.__is_terminal = False
        self.__current_lives = 0
        self.__action_reduction = num_of_sub_actions
        self.__scr_width, self.__scr_height, self.__action_set = self.__init_ale(
        )
        self.__prev_buffer = np.empty((self.__scr_height, self.__scr_width, 3),
                                      dtype=np.uint8)
        self.__current_buffer = np.empty(
            (self.__scr_height, self.__scr_width, 3), dtype=np.uint8)
        self.__current_state = None
        self.__prev_state = None
        self.__startup_policy = startup_policy
        if disable_actions is None:
            self.__dis_act = []
        else:
            self.__dis_act = disable_actions

        if self.__processor is not None and self.__processor.get_number_of_objectives(
        ) > 1:
            self.__multi_objs = True
        else:
            self.__multi_objs = False

    def get_processor(self):
        return self.__processor

    def __init_ale(self):

        self.__ale.setBool(b'display_screen', self.__is_render)

        if self.__max_2_frames and self.__frame_skip > 1:
            self.__ale.setInt(b'frame_skip', 1)
        else:
            self.__ale.setInt(b'frame_skip', self.__frame_skip)

        self.__ale.setInt(b'random_seed', self.__seed)
        self.__ale.setFloat(b'repeat_action_probability',
                            self.__repeat_action_probability)
        self.__ale.setBool(b'color_averaging', False)

        self.__ale.loadROM(
            ALEEnvironment.get_rom_path(self.__rom_name).encode())

        width, height = self.__ale.getScreenDims()
        return width, height, self.__ale.getMinimalActionSet()

    def clone(self):
        if self.__random_seed:
            seed = np.random.randint(0, 9999)
        else:
            seed = self.__seed

        return ALEEnvironment(self.__rom_name, self.__frame_skip,
                              self.__repeat_action_probability,
                              self.__max_episode_steps,
                              self.__loss_of_life_termination,
                              self.__loss_of_life_negative_reward,
                              self.__max_2_frames, self.__is_render, seed,
                              self.__startup_policy,
                              self.__dis_act, self.__action_reduction,
                              self.__processor.clone())

    def step_all(self, a):
        if isinstance(a, (list, np.ndarray)):
            if len(a) <= 0:
                raise ValueError('Empty action list !')
            a = a[0]
        self.__current_steps += 1
        act = self.__action_set[a]
        rew = self._step(act)
        next_state = self.get_state()
        _is_terminal = self.is_terminal()
        return next_state, rew, _is_terminal, self.__current_steps

    def reset(self):
        self.__ale.reset_game()
        self.__current_lives = self.__ale.lives()
        self.__is_life_lost = False
        self.__is_terminal = False
        self.__current_state = None
        self.__prev_state = None

        action_space = self.get_action_space()
        v_range, is_range = action_space.get_range()
        if len(v_range) > 1:
            self.step(1)

        # No op steps
        if self.__startup_policy is not None:
            max_steps = int(self.__startup_policy.get_max_steps())
            for _ in range(max_steps):
                act = self.__startup_policy.step(self.get_state(),
                                                 action_space)
                self.step(act)

        # Start training from this point
        self.__current_steps = 0

        # Reset processor
        if self.__processor is not None:
            self.__processor.reset()

        return self.get_state()

    def _pre_step(self, act):
        if self.__max_2_frames and self.__frame_skip > 1:
            rew = 0
            for i in range(self.__frame_skip - 2):
                rew += self.__ale.act(act)
                self.__prev_buffer = self.__ale.getScreenRGB(
                    self.__prev_buffer)

            self.__prev_buffer = self.__ale.getScreenRGB(self.__prev_buffer)

            rew += self.__ale.act(act)

            self.__current_buffer = self.__ale.getScreenRGB(
                self.__current_buffer)

            self.__is_terminal = self.__ale.game_over()

            if self.__processor is not None:
                self.__prev_state = self.__processor.process(
                    self.__prev_buffer)
                self.__current_state = self.__processor.process(
                    self.__current_buffer)
            else:
                self.__prev_state = self.__prev_buffer
                self.__current_state = self.__current_buffer

            self.__current_state = np.maximum.reduce(
                [self.__prev_state, self.__current_state])
        else:
            rew = self.__ale.act(act)
            self.__current_buffer = self.__ale.getScreenRGB(
                self.__current_buffer)
            self.__is_terminal = self.__ale.game_over()

            if self.__processor is not None:
                self.__current_state = self.__processor.process(
                    self.__current_buffer)

        if self.__multi_objs and self.__processor is not None:
            all_rewards = self.__processor.get_rewards(rew)
            return all_rewards
        else:
            return rew

    def _step(self, act):
        for i in range(len(self.__dis_act)):
            if act == self.__dis_act[i]:
                act = 0

        if not self.__loss_of_life_termination and not self.__loss_of_life_negative_reward:
            if not self.__is_terminal:
                next_lives = self.__ale.lives()
                if next_lives < self.__current_lives:
                    act = 1
                    self.__current_lives = next_lives
            return self._pre_step(act)
        else:
            rew = self._pre_step(act)
            next_lives = self.__ale.lives()
            if next_lives < self.__current_lives:
                if self.__loss_of_life_negative_reward:
                    rew -= 1
                self.__current_lives = next_lives
                self.__is_life_lost = True

            return rew

    def get_state(self):
        if not self.__max_2_frames:
            if self.__processor is not None:
                return self.__current_state
            else:
                return self.__current_buffer
        else:
            return self.__current_state

    def is_terminal(self):
        if self.__loss_of_life_termination and self.__is_life_lost:
            return True
        elif self.__max_episode_steps is not None and self.__current_steps > self.__max_episode_steps:
            return True
        else:
            return self.__is_terminal

    @staticmethod
    def get_rom_path(rom=None):
        if rom is None:
            return os.path.dirname(os.path.abspath(__file__)) + "/roms/"
        else:
            return os.path.dirname(
                os.path.abspath(__file__)) + "/roms/" + rom + ".bin"

    @staticmethod
    def list_all_roms():
        return [
            f for f in listdir(ALEEnvironment.get_rom_path())
            if isfile(join(ALEEnvironment.get_rom_path(), f))
        ]

    def get_state_space(self):
        if self.__processor is None:
            shape = self.__current_buffer.shape
        else:
            shape = self.__processor.process(self.__current_buffer).shape
        min_value = np.zeros(shape, dtype=np.uint8)
        max_value = np.full(shape, 255)
        return Space(min_value, max_value, True)

    def get_action_space(self):
        if self.__action_reduction >= 1:
            return Space(0, self.__action_reduction - 1, True)
        else:
            return Space(0, len(self.__action_set) - 1, True)

    def step(self, act):
        if isinstance(act, (list, np.ndarray)):
            if len(act) <= 0:
                raise ValueError('Empty action list !')
            act = act[0]
        self.__current_steps += 1
        act = self.__action_set[act]
        rew = self._step(act)
        return rew

    def get_current_steps(self):
        return self.__current_steps

    def is_atari(self):
        return True

    def is_render(self):
        return self.__is_render

    def get_number_of_objectives(self):
        if self.__processor is None:
            return 1
        else:
            return self.__processor.get_number_of_objectives()

    def get_number_of_agents(self):
        if self.__processor is None:
            return 1
        else:
            return self.__processor.get_number_of_agents()

    def get_state_processor(self):
        return self.__processor
示例#52
0
class AtariGame(Task):
    ''' RL task based on Arcade Game.
    '''

    def __init__(self, rom_path, num_frames=4, live=False, skip_frame=0, mode='normal'):
        self.ale = ALEInterface()
        if live:
            USE_SDL = True
            if USE_SDL:
                if sys.platform == 'darwin':
                    import pygame
                    pygame.init()
                    self.ale.setBool('sound', False) # Sound doesn't work on OSX
                elif sys.platform.startswith('linux'):
                    self.ale.setBool('sound', True)
            self.ale.setBool('display_screen', True)
        self.mode = mode
        self.live = live
        self.ale.loadROM(rom_path)
        self.num_frames = num_frames
        self.frames = []
        self.frame_id = 0
        self.cum_reward = 0
        self.skip_frame = skip_frame
        if mode == 'small':
            img = T.matrix('img')
            self.max_pool = theano.function([img], max_pool_2d(img, [4, 4]))
            self.img_shape = (16, 16)
        else:
            self.img_shape = (84, 84) # image shape according to DQN Nature paper.
        while len(self.frames) < 4:
            self.step(choice(self.valid_actions, 1)[0])
        self.reset()


    def copy(self):
        import dill as pickle
        return pickle.loads(pickle.dumps(self))


    def reset(self):
        self.ale.reset_game()
        self.frame_id = 0
        self.cum_reward = 0
        if self.skip_frame:
            for frame_i in range(self.skip_frame):
                self.step(choice(self.valid_actions, 1)[0])


    @property
    def _curr_frame(self):
        img = self.ale.getScreenRGB()
        img = rgb2yuv(img)[:, :, 0] # get Y channel, according to Nature paper.
        # print 'RAM', self.ale.getRAM()
        if self.mode == 'small':
            img = self.max_pool(img)
        img = imresize(img, self.img_shape, interp='bicubic')
        return  img


    @property
    def curr_state(self):
        '''
        return raw pixels.
        '''
        return np.array(self.frames, dtype=floatX) / floatX(255.) # normalize


    @property
    def state_shape(self):
        return self.curr_state.shape


    @property
    def num_actions(self):
        return len(self.valid_actions)


    @property
    def valid_actions(self):
        return self.ale.getLegalActionSet()


    def step(self, action):
        reward = self.ale.act(action)
        if len(self.frames) == self.num_frames:
            self.frames = self.frames[1:]
        self.frames.append(self._curr_frame)
        self.frame_id += 1
        #print 'frame_id', self.frame_id
        self.cum_reward += reward
        return reward # TODO: scale the gradient up.


    def is_end(self):
        if np.abs(self.cum_reward) > 0:
            return True
        return self.ale.game_over()


    def visualize(self, fig=1, fname=None, format='png'):
        import matplotlib.pyplot as plt
        fig = plt.figure(fig, figsize=(5,5))
        plt.clf()
        plt.axis('off')
        #res = plt.imshow(self.ale.getScreenRGB())
        res = plt.imshow(self._curr_frame, interpolation='none')
        if fname:
            plt.savefig(fname, format=format)
        else:
            plt.show()
        return res
示例#53
0
class AtariEmulator(BaseEnvironment):
    def __init__(self,
                 rom_addr,
                 random_start=False,
                 random_seed=6,
                 visualize=True,
                 single_life=False):
        self.ale = ALEInterface()

        self.ale.setInt(b"random_seed", 2 * random_seed)
        # For fuller control on explicit action repeat (>= ALE 0.5.0)
        self.ale.setFloat(b"repeat_action_probability", 0.0)
        # Disable frame_skip and color_averaging
        # See: http://is.gd/tYzVpj
        self.ale.setInt(b"frame_skip", 1)
        self.ale.setBool(b"color_averaging", False)
        full_rom_path = rom_addr
        self.ale.loadROM(str.encode(full_rom_path))
        self.legal_actions = self.ale.getMinimalActionSet()
        self.screen_width, self.screen_height = self.ale.getScreenDims()
        self.lives = self.ale.lives()
        self.writer = imageio.get_writer('breakout0.gif', fps=30)
        self.random_start = random_start
        self.single_life_episodes = single_life
        self.call_on_new_frame = visualize

        # Processed historcal frames that will be fed in to the network
        # (i.e., four 84x84 images)
        self.observation_pool = ObservationPool(
            np.zeros((84, 84, 4), dtype=np.uint8))
        self.rgb_screen = np.zeros((self.screen_height, self.screen_width, 3),
                                   dtype=np.uint8)
        self.gray_screen = np.zeros((self.screen_height, self.screen_width, 1),
                                    dtype=np.uint8)
        self.frame_pool = FramePool(
            np.empty((2, self.screen_height, self.screen_width),
                     dtype=np.uint8), self.__process_frame_pool)

    def get_legal_actions(self):
        return self.legal_actions

    def __get_screen_image(self):
        """
        Get the current frame luminance
        :return: the current frame
        """
        self.ale.getScreenGrayscale(self.gray_screen)
        if self.call_on_new_frame:
            self.ale.getScreenRGB(self.rgb_screen)
            self.on_new_frame(self.rgb_screen)
        return np.squeeze(self.gray_screen)

    def on_new_frame(self, frame):

        pass

    def __new_game(self):
        """ Restart game """
        self.ale.reset_game()
        self.lives = self.ale.lives()
        if self.random_start:
            wait = random.randint(0, MAX_START_WAIT)
            for _ in range(wait):
                self.ale.act(self.legal_actions[0])

    def __process_frame_pool(self, frame_pool):
        """ Preprocess frame pool """

        img = np.amax(frame_pool, axis=0)
        img = imresize(img, (84, 84), interp='nearest')
        img = img.astype(np.uint8)
        return img

    def __action_repeat(self, a, times=ACTION_REPEAT):
        """ Repeat action and grab screen into frame pool """
        reward = 0
        for i in range(times - FRAMES_IN_POOL):
            reward += self.ale.act(self.legal_actions[a])
        # Only need to add the last FRAMES_IN_POOL frames to the frame pool
        for i in range(FRAMES_IN_POOL):
            reward += self.ale.act(self.legal_actions[a])
            self.frame_pool.new_frame(self.__get_screen_image())
        return reward

    def get_initial_state(self):
        """ Get the initial state """
        self.__new_game()
        for step in range(4):
            _ = self.__action_repeat(0)
            self.observation_pool.new_observation(
                self.frame_pool.get_processed_frame())
        if self.__is_terminal():
            raise Exception('This should never happen.')
        return self.observation_pool.get_pooled_observations()

    def next(self, action):
        """ Get the next state, reward, and game over signal """

        reward = self.__action_repeat(np.argmax(action))
        self.observation_pool.new_observation(
            self.frame_pool.get_processed_frame())
        terminal = self.__is_terminal()
        self.lives = self.ale.lives()
        observation = self.observation_pool.get_pooled_observations()
        return observation, reward, terminal

    def __is_terminal(self):
        if self.single_life_episodes:
            return self.__is_over() or (self.lives > self.ale.lives())
        else:
            return self.__is_over()

    def __is_over(self):
        return self.ale.game_over()

    def get_noop(self):
        return [1.0, 0.0]
示例#54
0
文件: ale_env.py 项目: only4hj/DeepRL
class AleEnv():
    def __init__(self, rom, display_screen, use_env_frame_skip, frame_repeat):
        self.actions = None
        self.rom = rom
        self.display_screen = display_screen
        self.use_env_frame_skip = use_env_frame_skip
        self.frame_repeat = frame_repeat
        
    def initialize(self):
        self.ale = ALEInterface()
        self.ale.setInt("random_seed", random.randint(1, 1000))
        if self.display_screen:
            self.ale.setBool('display_screen', True)

        if self.use_env_frame_skip == True:
            self.ale.setInt('frame_skip', self.frame_repeat)
            self.ale.setBool('color_averaging', True)        
 
        self.ale.setFloat('repeat_action_probability', 0)
        self.ale.loadROM(self.rom)
        self.actions = self.ale.getMinimalActionSet()
        print 'actions: %s' % self.actions
        (self.screen_width,self.screen_height) = self.ale.getScreenDims()
        print("width/height: " +str(self.screen_width) + "/" + str(self.screen_height))
        
        self.initialized = True
        
    def get_actions(self, rom=None):
        if self.actions is None and rom != None:
            ale = ALEInterface()
            ale.loadROM(rom)
            self.actions = ale.getMinimalActionSet()
        return self.actions
        
    @property
    def state_dtype(self):
        return np.uint8
        
    @property
    def continuous_action(self):
        return False
    
    def reset_game(self):
        self.ale.reset_game()
        
    def lives(self):
        return self.ale.lives()
    
    def getScreenRGB(self):
        return self.ale.getScreenRGB()
    
    def getState(self, debug_display=False, debug_input=None):
        screen = self.ale.getScreenGrayscale()
        if screen is not None and debug_display:
            debug_input.show(screen.reshape(screen.shape[0], screen.shape[1]))
        return screen.reshape(self.screen_height, self.screen_width)
    
    def act(self, action):
        return self.ale.act(action)
    
    def game_over(self):
        return self.ale.game_over()
    
    def finish(self):
        return