class Emulate:
  def __init__(self, rom_file, display_screen=False,frame_skip=4,screen_height=84,screen_width=84,repeat_action_probability=0,color_averaging=True,random_seed=0,record_screen_path='screen_pics',record_sound_filename=None,minimal_action_set=True):
    self.ale = ALEInterface()
    if display_screen:
      if sys.platform == 'darwin':
        import pygame
        pygame.init()
        self.ale.setBool('sound', False) # Sound doesn't work on OSX
      elif sys.platform.startswith('linux'):
        self.ale.setBool('sound', True)
      self.ale.setBool('display_screen', True)

    self.ale.setInt('frame_skip', frame_skip)
    self.ale.setFloat('repeat_action_probability', repeat_action_probability)
    self.ale.setBool('color_averaging', color_averaging)

    if random_seed:
      self.ale.setInt('random_seed', random_seed)

    self.ale.loadROM(rom_file)

    if minimal_action_set:
      self.actions = self.ale.getMinimalActionSet()
    else:
      self.actions = self.ale.getLegalActionSet()

    self.dims = (screen_width,screen_height)

  def numActions(self):
    return len(self.actions)

  def getActions(self):
  	return self.actions

  def restart(self):
    self.ale.reset_game()

  def act(self, action):
    reward = self.ale.act(self.actions[action])
    return reward

  def getScreen(self):
    screen = self.ale.getScreenGrayscale()
    resized = cv2.resize(screen, self.dims)
    return resized

  def getScreenGray(self):
    screen = self.ale.getScreenGrayscale()
    resized = cv2.resize(screen, self.dims)
    rotated = np.rot90(resized,k=1)
    return rotated

  def getScreenColor(self):
    screen = self.ale.getScreenRGB()
    resized = cv2.resize(screen, self.dims)
    rotated = np.rot90(resized,k=1)
    return rotated

  def isTerminal(self):
    return self.ale.game_over()
示例#2
0
class FastAtariEnv(AtariEnv):
    def __init__(self,
                 game='Breakout',
                 obs_type='image',
                 frameskip=(2, 5),
                 repeat_action_probability=0.):
        self.game_path = atari_py.get_game_path(game)
        self._obs_type = obs_type
        self.frameskip = frameskip
        self.ale = ALEInterface()
        self.viewer = None
        assert isinstance(
            repeat_action_probability,
            (float, int)), "Invalid repeat_action_probability: {!r}".format(
                repeat_action_probability)
        self.ale.setFloat('repeat_action_probability'.encode('utf-8'),
                          repeat_action_probability)
        self._seed()
        (screen_width, screen_height) = self.ale.getScreenDims()
        self._buffer = np.empty((screen_height, screen_width, 3),
                                dtype=np.uint8)

    def _get_image(self):
        # Don't reorder from rgb to bgr as we're converting to greyscale anyway
        self.ale.getScreenRGB(self._buffer)  # says rgb but actually bgr
        return self._buffer
def initializeALE(romFile, rec_dir):
    ale = ALEInterface()

    max_frames_per_episode = ale.getInt("max_num_frames_per_episode")
    ale.setInt("random_seed", 123)
    ale.setFloat("repeat_action_probability", 0.0)
    ale.setInt("frame_skip", 5)
    # Set record flags
    ale.setString(b'record_screen_dir', rec_dir + '/')
    ale.setString("record_sound_filename", rec_dir + "/sound.wav")
    # We set fragsize to 64 to ensure proper sound sync
    ale.setInt("fragsize", 64)

    # Set USE_SDL to true to display the screen. ALE must be compilied
    # with SDL enabled for this to work. On OSX, pygame init is used to
    # proxy-call SDL_main.

    USE_SDL = False
    if USE_SDL:
        if sys.platform == 'darwin':
            import pygame
            pygame.init()
            ale.setBool('sound', False)  # Sound doesn't work on OSX
        elif sys.platform.startswith('linux'):
            ale.setBool('sound', True)
        ale.setBool('display_screen', True)

    ale.loadROM(romFile)
    actionSet = ale.getMinimalActionSet()

    return ale, actionSet
示例#4
0
class Environment:
  def __init__(self, rom_file, args):
    self.ale = ALEInterface()
    if args.display_screen:
      if sys.platform == 'darwin':
        import pygame
        pygame.init()
        self.ale.setBool('sound', False) # Sound doesn't work on OSX
      elif sys.platform.startswith('linux'):
        self.ale.setBool('sound', True)
      self.ale.setBool('display_screen', True)

    self.ale.setInt('frame_skip', args.frame_skip)
    self.ale.setFloat('repeat_action_probability', args.repeat_action_probability)
    self.ale.setBool('color_averaging', args.color_averaging)

    if args.random_seed:
      self.ale.setInt('random_seed', args.random_seed)

    if args.record_screen_path:
      if not os.path.exists(args.record_screen_path):
        logger.info("Creating folder %s" % args.record_screen_path)
        os.makedirs(args.record_screen_path)
      logger.info("Recording screens to %s", args.record_screen_path)
      self.ale.setString('record_screen_dir', args.record_screen_path)

    if args.record_sound_filename:
      logger.info("Recording sound to %s", args.record_sound_filename)
      self.ale.setBool('sound', True)
      self.ale.setString('record_sound_filename', args.record_sound_filename)

    self.ale.loadROM(rom_file)

    if args.minimal_action_set:
      self.actions = self.ale.getMinimalActionSet()
      logger.info("Using minimal action set with size %d" % len(self.actions))
    else:
      self.actions = self.ale.getLegalActionSet()
      logger.info("Using full action set with size %d" % len(self.actions))
    logger.debug("Actions: " + str(self.actions))

    self.dims = (args.screen_height, args.screen_width)

  def numActions(self):
    return len(self.actions)

  def restart(self):
    self.ale.reset_game()

  def act(self, action):
    reward = self.ale.act(self.actions[action])
    return reward

  def getScreen(self):
    screen = self.ale.getScreenGrayscale()
    resized = cv2.resize(screen, self.dims)
    return resized

  def isTerminal(self):
    return self.ale.game_over()
class env_atari:
    def __init__(self, params):
        self.params = params
        self.ale = ALEInterface()
        self.ale.setInt('random_seed', np.random.randint(0, 500))
        self.ale.setFloat('repeat_action_probability', params['repeat_prob'])
        self.ale.setInt(b'frame_skip', params['frameskip'])
        self.ale.setBool('color_averaging', True)
        self.ale.loadROM('roms/' + params['rom'] + '.bin')
        self.actions = self.ale.getMinimalActionSet()
        self.action_space = c_action_space(len(self.actions))
        self.screen_width, self.screen_height = self.ale.getScreenDims()

    def reset(self):
        self.ale.reset_game()
        seed = np.random.randint(0, 7)
        for i in range(seed):
            self.ale.act(0)
        return self.get_image()

    def step(self, action):
        reward = self.ale.act(self.actions[action])
        next_s = self.get_image()
        terminate = self.ale.game_over()
        return next_s, reward, float(terminate), 0

    def get_image(self):
        temp = np.zeros(self.screen_height * self.screen_width * 3,
                        dtype=np.uint8)
        self.ale.getScreenRGB(temp)
        #self.ale.getScreenGrayscale(temp)
        return temp.reshape((self.screen_height, self.screen_width, 3))
示例#6
0
    def __init__(self,
                 game,
                 seed=None,
                 use_sdl=False,
                 n_last_screens=4,
                 frame_skip=4,
                 treat_life_lost_as_terminal=True,
                 crop_or_scale='scale',
                 max_start_nullops=30,
                 record_screen_dir=None):
        self.n_last_screens = n_last_screens
        self.treat_life_lost_as_terminal = treat_life_lost_as_terminal
        self.crop_or_scale = crop_or_scale
        self.max_start_nullops = max_start_nullops

        # atari_py is used only to provide rom files. atari_py has its own
        # ale_python_interface, but it is obsolete.
        game_path = atari_py.get_game_path(game)

        ale = ALEInterface()
        if seed is not None:
            assert seed >= 0 and seed < 2 ** 16, \
                "ALE's random seed must be represented by unsigned int"
        else:
            # Use numpy's random state
            seed = np.random.randint(0, 2**16)
        ale.setInt(b'random_seed', seed)
        ale.setFloat(b'repeat_action_probability', 0.0)
        ale.setBool(b'color_averaging', False)
        if record_screen_dir is not None:
            ale.setString(b'record_screen_dir',
                          str.encode(str(record_screen_dir)))
        self.frame_skip = frame_skip
        if use_sdl:
            if 'DISPLAY' not in os.environ:
                raise RuntimeError(
                    'Please set DISPLAY environment variable for use_sdl=True')
            # SDL settings below are from the ALE python example
            if sys.platform == 'darwin':
                import pygame
                pygame.init()
                ale.setBool(b'sound', False)  # Sound doesn't work on OSX
            elif sys.platform.startswith('linux'):
                ale.setBool(b'sound', True)
            ale.setBool(b'display_screen', True)

        ale.loadROM(str.encode(str(game_path)))

        assert ale.getFrameNumber() == 0

        self.ale = ale
        self.legal_actions = ale.getMinimalActionSet()
        self.initialize()

        self.action_space = spaces.Discrete(len(self.legal_actions))
        one_screen_observation_space = spaces.Box(low=0,
                                                  high=255,
                                                  shape=(84, 84))
        self.observation_space = spaces.Tuple([one_screen_observation_space] *
                                              n_last_screens)
示例#7
0
def initializeALE(romFile):
    ale = ALEInterface()

    ale.setInt("max_num_frames_per_episode", 18000)
    ale.setInt("random_seed", 123)
    ale.setFloat("repeat_action_probability", 0.0)
    ale.setInt("frame_skip", 5)

    random_seed = ale.getInt("random_seed")
    print("random_seed: " + str(random_seed))

    # Set USE_SDL to true to display the screen. ALE must be compilied
    # with SDL enabled for this to work. On OSX, pygame init is used to
    # proxy-call SDL_main.

    USE_SDL = False
    if USE_SDL:
        if sys.platform == 'darwin':
            import pygame
            pygame.init()
            ale.setBool('sound', False)  # Sound doesn't work on OSX
        elif sys.platform.startswith('linux'):
            ale.setBool('sound', True)
        ale.setBool('display_screen', True)

    ale.loadROM(romFile)
    actionSet = ale.getMinimalActionSet()

    return ale, actionSet
def launch():
	logging.basicConfig(level=logging.INFO)
	myArgs = getParameters()
	rom = myArgs.game
	full_rom_path = os.path.join(myArgs.base_rom_path,rom)
	rng = np.random.RandomState()
	ale = ALEInterface()
	ale.setInt('random_seed',38)
	ale.setBool('display_screen',myArgs.display_screen)
	ale.setInt('frame_skip',myArgs.frame_skip)
	ale.setFloat('repeat_action_probability',myArgs.repeat_action_probability)

	ale.loadROM(full_rom_path)
	valid_actions = ale.getMinimalActionSet()
	'''for episode in xrange(10):
		total_reward = 0
		while not ale.game_over():
			from random import randrange
			a = valid_actions[randrange(len(valid_actions))]
			ale.act(a)
			#print reward
			#print ale.getScreenRGB()

			#total_reward += reward
			#print 'Episode', episode, 'ended with score:', total_reward
		ale.reset_game()
	'''
	memory_pool = ReplayMemory(myArgs.memory_size,rng)
	network_model = buildNetwork(myArgs.resized_height,myArgs.resized_width,myArgs.rmsp_epsilon,myArgs.rmsp_rho,myArgs.learning_rate,len(valid_actions))
	ddqn = DDQN(network_model,valid_actions,myArgs.target_nn_update_frequency,myArgs.discount,myArgs.phi_len)
	agent = Agent(myArgs,ddqn,memory_pool,valid_actions,rng)
	train_agent = TrainMyAgent(myArgs,ale,agent,valid_actions,rng)
	train_agent.run()
示例#9
0
class ALE(object):
    def __init__(self, init_seed, init_rand):
        self.ale = ALEInterface()
        self.ale.setInt(b'random_seed', init_seed)
        self.ale.setFloat(b'repeat_action_probability', 0.0) 
        self.ale.loadROM('./breakout.bin')
        self.action_size = 4

        self.screen = None
        self.reward = 0
        self.terminal = True
        self.init_rand = init_rand

    def setSetting(self, action_repeat, screen_type):
        self.action_repeat = action_repeat
        self.screen_type = screen_type

    def _step(self, action):
        self.reward = self.ale.act(action)
        self.terminal = self.ale.game_over()

        if self.screen_type == 0:
            self.screen = self.ale.getScreenRGB()
        elif self.screen_type == 1:
            self.screen = self.ale.getScreenGrayscale()
        else:
            sys.stderr.write('screen_type error!')
            exit()


    def state(self):
        return self.reward, self.screen, self.terminal

    def act(self, action):
        cumulated_reward = 0
        for _ in range(self.action_repeat):
            self._step(action)
            cumulated_reward += self.reward
            if self.terminal:
                break
        self.reward = cumulated_reward
        return self.state()

    def new_game(self):
        if self.ale.game_over():
            self.ale.reset_game()

            if self.screen_type == 0:
                self.screen = self.ale.getScreenRGB()
            elif self.screen_type == 1:
                self.screen = self.ale.getScreenGrayscale()
            else:
                sys.stderr.write('screen_type error!')
                exit()

        for _ in range(self.init_rand):
            self._step(0)

        return self.screen
示例#10
0
class ALEGame(object):
    """
    Class linked to the Arcade Learning Environment
    """
    def __init__(self, rand_seed, game_name):
        self.ale = ALEInterface()
        self.ale.setInt(b'random_seed', rand_seed)
        self.ale.setFloat(b'repeat_action_probability', 0.0)
        self.ale.setBool(b'color_averaging', True)
        self.ale.setInt(b'frame_skip', SKIPED_FRAMES)
        self.ale.loadROM(game_name.encode('ascii'))

        self.real_actions = self.ale.getMinimalActionSet()
        self.screen = np.empty((IMAGE_HEIGHT, IMAGE_WIDTH, 1), dtype=np.uint8)
        self.reset()

    def preprocess_image(self, is_to_reshape=False):
        """
        Get image from the game and reshape it
        """
        self.ale.getScreenGrayscale(self.screen)
        reshaped_screen = np.reshape(self.screen, (IMAGE_HEIGHT, IMAGE_WIDTH))
        x_t = skimage.transform.resize(reshaped_screen, (110, 84),
                                       preserve_range=True)

        x_t = x_t[18:102, :]

        if is_to_reshape:
            x_t = np.reshape(x_t, (84, 84, 1))

        x_t = x_t.astype(np.float32)
        x_t *= (1.0 / 255.0)
        return x_t

    def reset(self):
        """
        Resets the game and create the first state
        """
        self.ale.reset_game()
        self.act(0)
        x_t = self.preprocess_image()
        self.s_t = np.stack((x_t, x_t, x_t, x_t), axis=2)

    def act(self, action):
        self.reward = self.ale.act(action)
        self.is_game_over = self.ale.game_over()

    def process_to_next_image(self, action):
        """
        Acts and get new state        
        """
        real_action = self.real_actions[action]
        self.act(real_action)
        x_t1 = self.preprocess_image(True)
        self.s_t1 = np.append(self.s_t[:, :, 1:], x_t1, axis=2)

    def update(self):
        self.s_t = self.s_t1
示例#11
0
文件: ale.py 项目: andth80/a3c-demo
class Env():
    def __init__(self, rom_name):
        self.__initALE()
        self.__loadROM(rom_name)
        self.screen_history = []
        self.screens = []

    def __initALE(self):
        self.ale = ALEInterface()
        self.ale.setInt(b'random_seed', randrange(1000))
        self.ale.setInt(b'fragsize', 64)
        self.ale.setInt(b'frame_skip', 1)

        # qq set this back to 0.25?
        self.ale.setFloat(b'repeat_action_probability', 0)
        self.ale.setLoggerMode('error')

    def __loadROM(self, rom_name):
        self.ale.loadROM(rom_name.encode('utf-8'))
        self.actions = self.ale.getMinimalActionSet()

        (width, height) = self.ale.getScreenDims()
        self.screen_data1 = np.empty((height, width, 3), dtype=np.uint8)
        self.screen_data2 = np.empty((height, width, 3), dtype=np.uint8)

    def get_legal_action_count(self):
        return len(self.actions)

    def act(self, action_index):
        action = self.actions[action_index]
        reward = 0

        # perform the action 4 times
        reward += _clip(self.ale.act(action), -1, 1)
        reward += _clip(self.ale.act(action), -1, 1)
        reward += _clip(self.ale.act(action), -1, 1)
        self.ale.getScreenRGB(self.screen_data1)
        reward += _clip(self.ale.act(action), -1, 1)
        self.ale.getScreenRGB(self.screen_data2)

        # return the pixel-wise max of the last two frames (some games only
        # render every other frame)
        screen_data_combined = np.maximum(self.screen_data1, self.screen_data2)
        terminal = self.ale.game_over()

        self.screens.append(preprocess_screen(screen_data_combined))
        phi = get_phi(self.screens)

        return (terminal, reward, phi, self.screen_data2)

    def get_s(self):
        return get_phi(self.screens)

    def reset(self):
        self.ale.reset_game()
        self.screens = []
示例#12
0
文件: emulator.py 项目: hercky/a3c
class Emulator:
    def __init__(self):
    
        self.ale = ALEInterface()
        
        # turn off the sound
        self.ale.setBool('sound', False)
        
        self.ale.setBool('display_screen', EMULATOR_DISPLAY)

        self.ale.setInt('frame_skip', FRAME_SKIP)
        self.ale.setFloat('repeat_action_probability', REPEAT_ACTION_PROBABILITY)
        self.ale.setBool('color_averaging', COLOR_AVERAGING)

        self.ale.setInt('random_seed', RANDOM_SEED)

        if RECORD_SCENE_PATH:
            self.ale.setString('record_screen_dir', RECORD_SCENE_PATH)


        self.ale.loadROM(ROM_PATH)

        self.actions = self.ale.getMinimalActionSet()
        logger.info("Actions: " + str(self.actions))

        self.dims = DIMS
        #self.start_lives = self.ale.lives()

    def getActions(self):
        return self.actions

    def numActions(self):
        return len(self.actions)

    def restart(self):
        self.ale.reset_game()
        # can be omitted

    def act(self, action):
        reward = self.ale.act(self.actions[action])
        return reward

    def getScreen(self):
        # why grayscale ?
        screen = self.ale.getScreenGrayscale()
        resized = cv2.resize(screen, self.dims)
        # normalize
        #resized /= COLOR_SCALE

        return resized

    def isTerminal(self):
        # while training deepmind only ends when agent dies
        #terminate = DEATH_END and TRAIN and (self.ale.lives() < self.start_lives)

        return self.ale.game_over()
示例#13
0
class AtariEnvironment:
    def __init__(self, rom):
        self.ale = ALEInterface()
        self.ale.setFloat(b'repeat_action_probability', 0.0)
        self.ale.loadROM(rom_file=rom)
        self.action_space = self.ale.getMinimalActionSet()
        self.obs = self.reset()

        try:
            self.im = Image.fromarray(self.obs)
            self.root = Tk()
            self.tkim = ImageTk.PhotoImage(self.im)
            self.window = Label(image=self.tkim)
            self.window.image = self.tkim
            self.window.pack()

        except AttributeError:
            print("Cannot create rendering attributes")

    def step(self, action):
        reward = 0.

        # Use if you want environment to provide every 4th frame and repeat action in between
        for i in range(4):
            reward += float(self.ale.act(self.action_space[action]))
            if i == 2:
                frame1 = self.ale.getScreenGrayscale()
            if i == 3:
                frame2 = self.ale.getScreenGrayscale()

        self.obs = np.squeeze(np.maximum(frame1, frame2))

        # Use if you want to receive every frame from environment
        # reward += float(self.ale.act(self.action_space[action]))
        # self.obs = np.squeeze(self.ale.getScreenGrayscale())

        done = self.ale.game_over()
        return self.obs, reward, done

    def reset(self):
        self.ale.reset_game()
        self.obs = np.squeeze(self.ale.getScreenGrayscale())
        return self.obs

    def render(self, rate=0.1):
        self.im = Image.fromarray(self.obs)
        self.tkim = ImageTk.PhotoImage(self.im)
        self.window.configure(image=self.tkim)
        self.window.image = self.tkim
        self.window.update_idletasks()
        self.window.update()
        time.sleep(rate)

    def sample_action(self):
        action = random.choice([0, 1, 2, 3])
        return action
示例#14
0
文件: envs.py 项目: afcarl/uct_atari
class Atari(AtariEnv):
    metadata = {'render.modes': ['human', 'rgb_array']}

    def __init__(self,
                 game='pong',
                 obs_type='ram',
                 frameskip=(2, 5),
                 repeat_action_probability=0.):
        """Frameskip should be either a tuple (indicating a random range to
        choose from, with the top value exclude), or an int."""

        utils.EzPickle.__init__(self, game, obs_type)
        assert obs_type in ('ram', 'image')

        self.game_path = atari_py.get_game_path(game)
        if not os.path.exists(self.game_path):
            raise IOError('You asked for game %s but path %s does not exist' %
                          (game, self.game_path))
        self._obs_type = obs_type
        self.frameskip = frameskip
        self.ale = ALEInterface()
        self.viewer = None

        # Tune (or disable) ALE's action repeat:
        # https://github.com/openai/gym/issues/349
        assert isinstance(
            repeat_action_probability,
            (float, int)), "Invalid repeat_action_probability: {!r}".format(
                repeat_action_probability)
        self.ale.setFloat('repeat_action_probability'.encode('utf-8'),
                          repeat_action_probability)

        self._seed()

        (screen_width, screen_height) = self.ale.getScreenDims()
        self._buffer = np.empty((screen_height, screen_width, 3),
                                dtype=np.uint8)

        self._action_set = self.ale.getMinimalActionSet()
        self.action_space = spaces.Discrete(len(self._action_set))

        (screen_width, screen_height) = self.ale.getScreenDims()
        if self._obs_type == 'ram':
            self.observation_space = spaces.Box(low=np.zeros(128),
                                                high=np.zeros(128) + 255)
        elif self._obs_type == 'image':
            self.observation_space = spaces.Box(low=0,
                                                high=255,
                                                shape=(screen_height,
                                                       screen_width, 3))
        else:
            raise error.Error('Unrecognized observation type: {}'.format(
                self._obs_type))

    def _get_image(self):
        return self.ale.getScreenRGB(self._buffer).copy()
示例#15
0
class Emulator:
    def __init__(self):

        self.ale = ALEInterface()

        # turn off the sound
        self.ale.setBool('sound', False)

        self.ale.setBool('display_screen', EMULATOR_DISPLAY)

        self.ale.setInt('frame_skip', FRAME_SKIP)
        self.ale.setFloat('repeat_action_probability',
                          REPEAT_ACTION_PROBABILITY)
        self.ale.setBool('color_averaging', COLOR_AVERAGING)

        self.ale.setInt('random_seed', RANDOM_SEED)

        if RECORD_SCENE_PATH:
            self.ale.setString('record_screen_dir', RECORD_SCENE_PATH)

        self.ale.loadROM(ROM_PATH)

        self.actions = self.ale.getMinimalActionSet()
        logger.info("Actions: " + str(self.actions))

        self.dims = DIMS
        #self.start_lives = self.ale.lives()

    def getActions(self):
        return self.actions

    def numActions(self):
        return len(self.actions)

    def restart(self):
        self.ale.reset_game()
        # can be omitted

    def act(self, action):
        reward = self.ale.act(self.actions[action])
        return reward

    def getScreen(self):
        # why grayscale ?
        screen = self.ale.getScreenGrayscale()
        resized = cv2.resize(screen, self.dims)
        # normalize
        #resized /= COLOR_SCALE

        return resized

    def isTerminal(self):
        # while training deepmind only ends when agent dies
        #terminate = DEATH_END and TRAIN and (self.ale.lives() < self.start_lives)

        return self.ale.game_over()
示例#16
0
def init():

  pygame.init()
  rom_path = '/Users/maciej/Development/atari-roms'
  ale = ALEInterface()
  ale.setInt('random_seed', 123)
  ale.setBool('frame_skip', 1)
  ale.loadROM(rom_path + '/space_invaders.bin')
  ale.setFloat("repeat_action_probability", 0)
  return ale
示例#17
0
 def _init_ale(rand_seed, rom_file):
     assert os.path.exists(rom_file), '%s does not exists.'
     ale = ALEInterface()
     ale.setInt('random_seed', rand_seed)
     ale.setBool('showinfo', False)
     ale.setInt('frame_skip', 1)
     ale.setFloat('repeat_action_probability', 0.0)
     ale.setBool('color_averaging', False)
     ale.loadROM(rom_file)
     return ale
示例#18
0
 def _init_ale(rand_seed, rom_file):
     assert os.path.exists(rom_file), '%s does not exists.'
     ale = ALEInterface()
     ale.setInt('random_seed', rand_seed)
     ale.setBool('showinfo', False)
     ale.setInt('frame_skip', 1)
     ale.setFloat('repeat_action_probability', 0.0)
     ale.setBool('color_averaging', False)
     ale.loadROM(rom_file)
     return ale
示例#19
0
def init():

    pygame.init()
    rom_path = '/Users/maciej/Development/atari-roms'
    ale = ALEInterface()
    ale.setInt('random_seed', 123)
    ale.setBool('frame_skip', 1)
    ale.loadROM(rom_path + '/space_invaders.bin')
    ale.setFloat("repeat_action_probability", 0)
    return ale
示例#20
0
class Environment:
    def __init__(self, render=False):
        self.ale = ALEInterface()
        self.ale.setInt(b'random_seed', 0)
        self.ale.setFloat(b'repeat_action_probability', 0.0)
        self.ale.setBool(b'color_averaging', True)
        self.ale.setInt(b'frame_skip', 4)
        self.ale.setBool(b'display_screen', render)
        self.ale.loadROM(ENV.encode('ascii'))
        self._screen = np.empty((210, 160, 1), dtype=np.uint8)
        self._no_op_max = 7

    def set_render(self, render):
        if not render:
            self.ale.setBool(b'display_screen', render)

    def reset(self):
        self.ale.reset_game()

        # randomize initial state
        if self._no_op_max > 0:
            no_op = np.random.randint(0, self._no_op_max + 1)
            for _ in range(no_op):
                self.ale.act(0)

        self.ale.getScreenGrayscale(self._screen)
        screen = np.reshape(self._screen, (210, 160))
        screen = cv2.resize(screen, (84, 110))
        screen = screen[18:102, :]
        screen = screen.astype(np.float32)
        screen /= 255.0

        self.frame_buffer = np.stack((screen, screen, screen, screen), axis=2)
        return self.frame_buffer

    def act(self, action):

        reward = self.ale.act(4 + action)
        done = self.ale.game_over()

        self.ale.getScreenGrayscale(self._screen)
        screen = np.reshape(self._screen, (210, 160))
        screen = cv2.resize(screen, (84, 110))
        screen = np.reshape(screen[18:102, :], (84, 84, 1))
        screen = screen.astype(np.float32)
        screen *= (1 / 255.0)

        self.frame_buffer = np.append(self.frame_buffer[:, :, 1:],
                                      screen,
                                      axis=2)

        return self.frame_buffer, reward, done, ""

    def close(self):
        self.ale.setBool(b'display_screen', False)
示例#21
0
def init(game, display_screen=False, record_dir=None):
    if display_screen:
        import pygame
        pygame.init()
    ale = ALEInterface()
    ale.setBool('display_screen', display_screen)
    ale.setInt('random_seed', 123)
    if record_dir is not None:
        ale.setString("record_screen_dir", record_dir)
    ale.loadROM('{game}.bin'.format(game=game))
    ale.setFloat("repeat_action_probability", 0)

    return ale
示例#22
0
def init(display_screen=False):
    if display_screen:
        import pygame
        pygame.init()
    rom_path = '.'
    ale = ALEInterface()
    ale.setBool('display_screen', display_screen)
    ale.setInt('random_seed', 123)
    ale.setBool('frame_skip', 1)
    ale.loadROM(rom_path + '/space_invaders.bin')
    ale.setFloat("repeat_action_probability", 0)

    return ale
示例#23
0
def init(game, display_screen=False, record_dir=None):
    if display_screen:
        import pygame
        pygame.init()
    ale = ALEInterface()
    ale.setBool('display_screen', display_screen)
    ale.setInt('random_seed', 123)
    if record_dir is not None:
        ale.setString("record_screen_dir", record_dir)
    ale.loadROM('{game}.bin'.format(game=game))
    ale.setFloat("repeat_action_probability", 0)

    return ale
示例#24
0
class ALEInterfaceWrapper:
    def __init__(self, repeat_action_probability, rng):
        self.internal_action_repeat_prob = repeat_action_probability
        self.prev_action = 0
        self.rng_source = rng
        self.rng = deepcopy(self.rng_source)
        self.ale = ALEInterface()
        '''
		This sets the probability from the default 0.25 to 0.
		It ensures deterministic actions.
		'''
        self.ale.setFloat('repeat_action_probability', 0.0)

    def getScreenRGB(self):
        return self.ale.getScreenRGB()

    def game_over(self):
        return self.ale.game_over()

    def reset_game(self):
        self.ale.reset_game()

    def lives(self):
        return self.ale.lives()

    def getMinimalActionSet(self):
        return self.ale.getMinimalActionSet()

    def setInt(self, key, value):
        self.ale.setInt(key, value)

    def setFloat(self, key, value):
        self.ale.setFloat(key, value)

    def loadROM(self, rom):
        self.ale.loadROM(rom)

    def reset_action_seed(self):
        self.rng = deepcopy(self.rng_source)

    def set_action_seed(self, seed):
        self.rng = np.random.RandomState(seed)

    def act(self, action):
        actual_action = action
        if self.internal_action_repeat_prob > 0:
            if self.rng.uniform(0, 1) < self.internal_action_repeat_prob:
                actual_action = self.prev_action
        self.prev_action = actual_action
        return self.ale.act(actual_action)
示例#25
0
def init(display_screen=False, record_dir=None):
    if display_screen:
        import pygame
        pygame.init()
    rom_path = '.'
    ale = ALEInterface()
    ale.setBool('display_screen', display_screen)
    ale.setInt('random_seed', 123)
    if record_dir is not None:
        ale.setString("record_screen_dir", record_dir)
    ale.loadROM(rom_path + '/space_invaders.bin')
    ale.setFloat("repeat_action_probability", 0)

    return ale
示例#26
0
def init(display_screen=False, record_dir=None):
    if display_screen:
        import pygame
        pygame.init()
    rom_path = '.'
    ale = ALEInterface()
    ale.setBool('display_screen', display_screen)
    ale.setInt('random_seed', 123)
    if record_dir is not None:
        ale.setString("record_screen_dir", record_dir)
    ale.loadROM(rom_path + '/space_invaders.bin')
    ale.setFloat("repeat_action_probability", 0)

    return ale
示例#27
0
class AleInterface(object):
    def __init__(self, game, args):
        self.game = game
        self.ale = ALEInterface()

        # if sys.platform == 'darwin':
        #     self.ale.setBool('sound', False)  # Sound doesn't work on OSX
        # elif sys.platform.startswith('linux'):
        #     self.ale.setBool('sound', True)
        # self.ale.setBool('display_screen', True)
        #
        self.ale.setBool('display_screen', args.display_screen)

        self.ale.setInt('frame_skip', args.frame_skip)
        self.ale.setFloat('repeat_action_probability', args.repeat_action_probability)
        self.ale.setBool('color_averaging', args.color_averaging)
        self.ale.setInt('random_seed', args.random_seed)

        #
        # if rand_seed is not None:
        #     self.ale.setInt('random_seed', rand_seed)

        rom_file = "./roms/%s.bin" % game
        if not os.path.exists(rom_file):
            print "not found rom file:", rom_file
            sys.exit(-1)
        self.ale.loadROM(rom_file)

        self.actions = self.ale.getMinimalActionSet()


    def get_actions_num(self):
        return len(self.actions)

    def act(self, action):
        reward = self.ale.act(self.actions[action])
        return reward

    def get_screen_gray(self):
        return self.ale.getScreenGrayscale()

    def get_screen_rgb(self):
        return self.ale.getScreenRGB()

    def game_over(self):
        return self.ale.game_over()

    def reset_game(self):
        return self.ale.reset_game()
示例#28
0
    def __init__(self,
                 rom_path,
                 n_last_screens=4,
                 frame_skip=4,
                 treat_life_lost_as_terminal=True,
                 crop_or_scale='scale',
                 max_start_nullops=30,
                 record_screen_dir=None,
                 render=False,
                 max_episode_length=None,
                 max_time=None):
        self.frame_skip = frame_skip
        self.n_last_screens = n_last_screens
        self.treat_life_lost_as_terminal = treat_life_lost_as_terminal
        self.crop_or_scale = crop_or_scale
        self.max_start_nullops = max_start_nullops
        self.max_episode_length = max_episode_length
        self.max_time = max_time

        ale = ALEInterface()
        # Use numpy's random state
        seed = np.random.randint(0, 2**16)
        ale.setInt(b'random_seed', seed)
        ale.setFloat(b'repeat_action_probability', 0.0)
        ale.setBool(b'color_averaging', False)

        if record_screen_dir is not None:
            ale.setString(b'record_screen_dir', str.encode(record_screen_dir))

        if render:
            if sys.platform == 'darwin':
                import pygame
                pygame.init()
                ale.setBool(b'sound', False)  # Sound doesn't work on OSX
            elif sys.platform.startswith('linux'):
                ale.setBool(b'sound', True)
            ale.setBool(b'display_screen', True)

        ale.loadROM(str.encode(rom_path))

        self.ale = ale
        self.__exceed_max = False
        self.legal_actions = ale.getMinimalActionSet()
        self.reset()
示例#29
0
文件: ale.py 项目: carpedm20/async-rl
    def __init__(self, rom_filename, seed=None, use_sdl=False, n_last_screens=4,
                 frame_skip=4, treat_life_lost_as_terminal=True,
                 crop_or_scale='scale', max_start_nullops=30,
                 record_screen_dir=None):
        self.n_last_screens = n_last_screens
        self.treat_life_lost_as_terminal = treat_life_lost_as_terminal
        self.crop_or_scale = crop_or_scale
        self.max_start_nullops = max_start_nullops

        ale = ALEInterface()
        if seed is not None:
            assert seed >= 0 and seed < 2 ** 16, \
                "ALE's random seed must be represented by unsigned int"
        else:
            # Use numpy's random state
            seed = np.random.randint(0, 2 ** 16)
        ale.setInt(b'random_seed', seed)
        ale.setFloat(b'repeat_action_probability', 0.0)
        ale.setBool(b'color_averaging', False)
        if record_screen_dir is not None:
            ale.setString(b'record_screen_dir', str.encode(record_screen_dir))
        self.frame_skip = frame_skip
        if use_sdl:
            if 'DISPLAY' not in os.environ:
                raise RuntimeError(
                    'Please set DISPLAY environment variable for use_sdl=True')
            # SDL settings below are from the ALE python example
            if sys.platform == 'darwin':
                import pygame
                pygame.init()
                ale.setBool(b'sound', False)  # Sound doesn't work on OSX
            elif sys.platform.startswith('linux'):
                ale.setBool(b'sound', True)
            ale.setBool(b'display_screen', True)
        ale.loadROM(str.encode(rom_filename))

        assert ale.getFrameNumber() == 0


        self.ale = ale
        self.legal_actions = ale.getMinimalActionSet()
        self.initialize()
class ALEInterfaceWrapper:
    def __init__(self, repeat_action_probability):
        self.internal_action_repeat_prob = repeat_action_probability
        self.prev_action = 0
        self.ale = ALEInterface()
        '''
		This sets the probability from the default 0.25 to 0.
		It ensures deterministic actions.
		'''
        self.ale.setFloat('repeat_action_probability',
                          repeat_action_probability)

    def getScreenRGB(self):
        return self.ale.getScreenRGB()

    def game_over(self):
        return self.ale.game_over()

    def reset_game(self):
        self.ale.reset_game()

    def lives(self):
        return self.ale.lives()

    def getMinimalActionSet(self):
        return self.ale.getMinimalActionSet()

    def setInt(self, key, value):
        self.ale.setInt(key, value)

    def setFloat(self, key, value):
        self.ale.setFloat(key, value)

    def loadROM(self, rom):
        self.ale.loadROM(rom)

    def act(self, action):
        actual_action = action
        return self.ale.act(actual_action)
示例#31
0
    def _init_ale(self):
        ale = ALEInterface()
        ale.setBool('sound', self.play_sound)
        ale.setBool('display_screen', self.display_screen)
        ale.setInt('random_seed', self.random_seed)

        # Frame skip is implemented separately
        ale.setInt('frame_skip', 1)
        ale.setBool('color_averaging', False)
        ale.setFloat('repeat_action_probability', 0.0)
        # Somehow this repeat_action_probability has unexpected effect on game.
        # The larger this value is, the more frames games take to restart.
        # And when 1.0 games completely hang
        # We are setting the default value of 0.0 here, expecting that
        # it has no effect as frame_skip == 1
        # This action repeating is agent's concern
        # so we do not implement an equivalent in our wrapper.

        if self.record_screen_path:
            _LG.info('Recording screens: %s', self.record_screen_path)
            if not os.path.exists(self.record_screen_path):
                os.makedirs(self.record_screen_path)
            ale.setString('record_screen_dir', self.record_screen_path)

        if self.record_sound_filename:
            _LG.info('Recording sound: %s', self.record_sound_filename)
            record_sound_dir = os.path.dirname(self.record_sound_filename)
            if not os.path.exists(record_sound_dir):
                os.makedirs(record_sound_dir)
            ale.setBool('sound', True)
            ale.setString('record_sound_filename', self.record_sound_filename)

        ale.loadROM(self.rom_path)

        self._ale = ale
        self._actions = (ale.getMinimalActionSet() if self.minimal_action_set
                         else ale.getLegalActionSet())
示例#32
0
def ale_load_from_rom(rom_path, display_screen):
    rng = get_numpy_rng()
    try:
        from ale_python_interface import ALEInterface
    except ImportError as e:
        raise ImportError('Unable to import the python package of Arcade Learning Environment. ' \
                           'ALE may not have been installed correctly. Refer to ' \
                           '`https://github.com/mgbellemare/Arcade-Learning-Environment` for some' \
                           'installation guidance')

    ale = ALEInterface()
    ale.setInt(b'random_seed', rng.randint(1000))
    if display_screen:
        import sys
        if sys.platform == 'darwin':
            import pygame
            pygame.init()
            ale.setBool(b'sound', False) # Sound doesn't work on OSX
        ale.setBool(b'display_screen', True)
    else:
        ale.setBool(b'display_screen', False)
    ale.setFloat(b'repeat_action_probability', 0)
    ale.loadROM(str.encode(rom_path))
    return ale
示例#33
0
def ale_load_from_rom(rom_path, display_screen):
    rng = get_numpy_rng()
    try:
        from ale_python_interface import ALEInterface
    except ImportError as e:
        raise ImportError('Unable to import the python package of Arcade Learning Environment. ' \
                           'ALE may not have been installed correctly. Refer to ' \
                           '`https://github.com/mgbellemare/Arcade-Learning-Environment` for some' \
                           'installation guidance')

    ale = ALEInterface()
    ale.setInt('random_seed', rng.randint(1000))
    if display_screen:
        import sys
        if sys.platform == 'darwin':
            import pygame
            pygame.init()
            ale.setBool('sound', False)  # Sound doesn't work on OSX
        ale.setBool('display_screen', True)
    else:
        ale.setBool('display_screen', False)
    ale.setFloat('repeat_action_probability', 0)
    ale.loadROM(rom_path)
    return ale
示例#34
0
class AtariEnvironment:
    num_actions = 18  # Use full action set

    def __init__(self, frame_shape, frame_postprocess=lambda x: x):
        self.ale = ALEInterface()
        self.ale.setBool(b"display_screen", cfg.display_screen)
        self.ale.setInt(b"frame_skip", 1)
        self.ale.setBool(b"color_averaging", False)
        self.ale.setInt(b"random_seed", cfg.random_seed)
        self.ale.setFloat(b"repeat_action_probability", cfg.sticky_prob)

        self.ale.loadROM(str.encode(cfg.rom))

        self.ale.setMode(cfg.mode)
        self.ale.setDifficulty(cfg.difficulty)

        self.action_set = self.ale.getLegalActionSet()
        assert len(self.action_set) == AtariEnvironment.num_actions

        screen_dims = tuple(reversed(self.ale.getScreenDims())) + (1,)
        self._frame_buffer = CircularBuffer(
            cfg.frame_buffer_size, screen_dims, np.uint8
        )
        self._frame_stack = CircularBuffer(
            cfg.frame_history_size, frame_shape, np.uint8
        )
        self._frame_postprocess = frame_postprocess

        self._episode_count = 0
        self.reset(inc_episode_count=False)

    def _is_terminal(self):
        return self.ale.game_over()

    def _get_single_frame(self):
        stacked_frames = np.concatenate(self._frame_buffer, axis=2)
        maxed_frame = np.amax(stacked_frames, axis=2)
        expanded_frame = np.expand_dims(maxed_frame, 3)
        frame = self._frame_postprocess(expanded_frame)

        return frame

    def reset(self, inc_episode_count=True):
        self._episode_frames = 0
        self._episode_reward = 0
        if inc_episode_count:
            self._episode_count += 1

        self.ale.reset_game()
        for _ in range(cfg.frame_buffer_size):
            self._frame_buffer.append(self.ale.getScreenGrayscale())
        for _ in range(cfg.frame_history_size):
            self._frame_stack.append(self._get_single_frame())

    def act(self, action):
        assert not self._is_terminal()

        cum_reward = 0
        for _ in range(cfg.frame_skip):
            cum_reward += self.ale.act(self.action_set[action])
            self._frame_buffer.append(self.ale.getScreenGrayscale())

        self._frame_stack.append(self._get_single_frame())
        self._episode_frames += cfg.frame_skip
        self._episode_reward += cum_reward
        cum_reward = np.clip(cum_reward, -1, 1)

        return cum_reward, self.state, self._is_terminal()

    @property
    def state(self):
        assert len(self._frame_buffer) == cfg.frame_buffer_size
        assert len(self._frame_stack) == cfg.frame_history_size
        return np.concatenate(self._frame_stack, axis=-1)

    @property
    def episode_reward(self):
        return self._episode_reward

    @property
    def episode_frames(self):
        return self._episode_frames

    @property
    def episode_steps(self):
        return self._episode_frames // cfg.frame_skip

    @property
    def episode_count(self):
        return self._episode_count
示例#35
0
class AtariAleEnvironment(object):
    def __init__(self, env_name, display=False, no_op_max=7):
        self.ale = ALEInterface()
        self.ale.setInt(b'random_seed', 113 * np.random.randint(0, 5))
        self.ale.setFloat(b'repeat_action_probability', 0.0)
        self.ale.setBool(b'color_averaging', True)
        self.ale.setInt(b'frame_skip', 4)
        self._no_op_max = no_op_max

        if display:
            self._setup_display()

        rom_name = env_name + '.bin'
        self.ale.loadROM(rom_name.encode('ascii'))

        # collect minimal action set
        self.real_actions = self.ale.getMinimalActionSet()

        # height=210, width=160
        self._screen = np.empty((210, 160, 1), dtype=np.uint8)

    def _process_frame(self, action, reshape):
        reward = self.ale.act(action)
        terminal = self.ale.game_over()

        # screen shape is (210, 160, 1)
        self.ale.getScreenGrayscale(self._screen)

        # reshape it into (210, 160)
        reshaped_screen = np.reshape(self._screen, (210, 160))

        # resize to height=110, width=84
        resized_screen = cv2.resize(reshaped_screen, (84, 84))

        #x_t = resized_screen[18:102,:]
        x_t = resized_screen
        if reshape:
            x_t = np.reshape(x_t, (84, 84, 1))
        x_t = x_t.astype(np.float32)
        x_t *= (1.0 / 255.0)
        return reward, terminal, x_t

    def _setup_display(self):
        if sys.platform == 'darwin':
            import pygame
            pygame.init()
            self.ale.setBool(b'sound', False)
        elif sys.platform.startswith('linux'):
            self.ale.setBool(b'sound', True)
        self.ale.setBool(b'display_screen', True)

    def reset(self):
        self.ale.reset_game()

        # randomize initial state
        if self._no_op_max > 0:
            no_op = np.random.randint(0, self._no_op_max + 1)
            for _ in range(no_op):
                self.ale.act(0)

        _, _, x_t = self._process_frame(0, False)

        self.reward = 0
        self.terminal = False
        self.s_t = np.stack((x_t, x_t, x_t, x_t), axis=2)

        return self.s_t

    def step(self, action):
        # convert original 18 action index to minimal action set index
        real_action = self.real_actions[action]

        r, t, x_t1 = self._process_frame(real_action, True)

        self.reward = r
        self.terminal = t
        s_t1 = np.append(self.s_t[:, :, 1:], x_t1, axis=2)
        self.s_t = s_t1

        # 4th argument is some info from gym; consistency hack
        return self.s_t, self.reward, self.terminal, None
class AtariEmulator:

	def __init__(self, args):
		''' Initialize Atari environment '''

		# Parameters
		self.buffer_length = args.buffer_length
		self.screen_dims = args.screen_dims
		self.frame_skip = args.frame_skip
		self.blend_method = args.blend_method
		self.reward_processing = args.reward_processing
		self.max_start_wait = args.max_start_wait
		self.history_length = args.history_length
		self.start_frames_needed = self.buffer_length - 1 + ((args.history_length - 1) * self.frame_skip)

		#Initialize ALE instance
		self.ale = ALEInterface()
		self.ale.setFloat(b'repeat_action_probability', 0.0)
		if args.watch:
			self.ale.setBool(b'sound', True)
			self.ale.setBool(b'display_screen', True)
		self.ale.loadROM(str.encode(args.rom_path + '/' + args.game + '.bin'))

		self.buffer = np.empty((self.buffer_length, 210, 160))
		self.current = 0
		self.action_set = self.ale.getMinimalActionSet()
		self.lives = self.ale.lives()

		self.reset()


	def get_possible_actions(self):
		''' Return list of possible actions for game '''
		return self.action_set

	def get_screen(self):
		''' Add screen to frame buffer '''
		self.buffer[self.current] = np.squeeze(self.ale.getScreenGrayscale())
		self.current = (self.current + 1) % self.buffer_length


	def reset(self):
		self.ale.reset_game()
		self.lives = self.ale.lives()

		if self.max_start_wait < 0:
			print("ERROR: max start wait decreased beyond 0")
			sys.exit()
		elif self.max_start_wait <= self.start_frames_needed:
			wait = 0
		else:
			wait = random.randint(0, self.max_start_wait - self.start_frames_needed)
		for _ in range(wait):
			self.ale.act(self.action_set[0])

		# Fill frame buffer
		self.get_screen()
		for _ in range(self.buffer_length - 1):
			self.ale.act(self.action_set[0])
			self.get_screen()
		# get initial_states
		state = [(self.preprocess(), 0, 0, False)]
		for step in range(self.history_length - 1):
			state.append(self.run_step(0))

		# make sure agent hasn't died yet
		if self.isTerminal():
			print("Agent lost during start wait.  Decreasing max_start_wait by 1")
			self.max_start_wait -= 1
			return self.reset()

		return state


	def run_step(self, action):
		''' Apply action to game and return next screen and reward '''

		raw_reward = 0
		for step in range(self.frame_skip):
			raw_reward += self.ale.act(self.action_set[action])
			self.get_screen()

		reward = None
		if self.reward_processing == 'clip':
			reward = np.clip(raw_reward, -1, 1)
		else:
			reward = raw_reward

		terminal = self.isTerminal()
		self.lives = self.ale.lives()

		return (self.preprocess(), action, reward, terminal, raw_reward)



	def preprocess(self):
		''' Preprocess frame for agent '''

		img = None

		if self.blend_method == "max":
			img = np.amax(self.buffer, axis=0)

		return cv2.resize(img, self.screen_dims, interpolation=cv2.INTER_LINEAR)

	def isTerminal(self):
		return (self.isGameOver() or (self.lives > self.ale.lives()))


	def isGameOver(self):
		return self.ale.game_over()
示例#37
0
class AtariEmulator(BaseEnvironment):
    def __init__(self,
                 rom_addr,
                 random_start=False,
                 random_seed=6,
                 visualize=True,
                 single_life=False):
        self.ale = ALEInterface()

        self.ale.setInt(b"random_seed", 2 * random_seed)
        # For fuller control on explicit action repeat (>= ALE 0.5.0)
        self.ale.setFloat(b"repeat_action_probability", 0.0)
        # Disable frame_skip and color_averaging
        # See: http://is.gd/tYzVpj
        self.ale.setInt(b"frame_skip", 1)
        self.ale.setBool(b"color_averaging", False)
        full_rom_path = rom_addr
        self.ale.loadROM(str.encode(full_rom_path))
        self.legal_actions = self.ale.getMinimalActionSet()
        self.screen_width, self.screen_height = self.ale.getScreenDims()
        self.lives = self.ale.lives()
        self.writer = imageio.get_writer('breakout0.gif', fps=30)
        self.random_start = random_start
        self.single_life_episodes = single_life
        self.call_on_new_frame = visualize

        # Processed historcal frames that will be fed in to the network
        # (i.e., four 84x84 images)
        self.observation_pool = ObservationPool(
            np.zeros((84, 84, 4), dtype=np.uint8))
        self.rgb_screen = np.zeros((self.screen_height, self.screen_width, 3),
                                   dtype=np.uint8)
        self.gray_screen = np.zeros((self.screen_height, self.screen_width, 1),
                                    dtype=np.uint8)
        self.frame_pool = FramePool(
            np.empty((2, self.screen_height, self.screen_width),
                     dtype=np.uint8), self.__process_frame_pool)

    def get_legal_actions(self):
        return self.legal_actions

    def __get_screen_image(self):
        """
        Get the current frame luminance
        :return: the current frame
        """
        self.ale.getScreenGrayscale(self.gray_screen)
        if self.call_on_new_frame:
            self.ale.getScreenRGB(self.rgb_screen)
            self.on_new_frame(self.rgb_screen)
        return np.squeeze(self.gray_screen)

    def on_new_frame(self, frame):

        pass

    def __new_game(self):
        """ Restart game """
        self.ale.reset_game()
        self.lives = self.ale.lives()
        if self.random_start:
            wait = random.randint(0, MAX_START_WAIT)
            for _ in range(wait):
                self.ale.act(self.legal_actions[0])

    def __process_frame_pool(self, frame_pool):
        """ Preprocess frame pool """

        img = np.amax(frame_pool, axis=0)
        img = imresize(img, (84, 84), interp='nearest')
        img = img.astype(np.uint8)
        return img

    def __action_repeat(self, a, times=ACTION_REPEAT):
        """ Repeat action and grab screen into frame pool """
        reward = 0
        for i in range(times - FRAMES_IN_POOL):
            reward += self.ale.act(self.legal_actions[a])
        # Only need to add the last FRAMES_IN_POOL frames to the frame pool
        for i in range(FRAMES_IN_POOL):
            reward += self.ale.act(self.legal_actions[a])
            self.frame_pool.new_frame(self.__get_screen_image())
        return reward

    def get_initial_state(self):
        """ Get the initial state """
        self.__new_game()
        for step in range(4):
            _ = self.__action_repeat(0)
            self.observation_pool.new_observation(
                self.frame_pool.get_processed_frame())
        if self.__is_terminal():
            raise Exception('This should never happen.')
        return self.observation_pool.get_pooled_observations()

    def next(self, action):
        """ Get the next state, reward, and game over signal """

        reward = self.__action_repeat(np.argmax(action))
        self.observation_pool.new_observation(
            self.frame_pool.get_processed_frame())
        terminal = self.__is_terminal()
        self.lives = self.ale.lives()
        observation = self.observation_pool.get_pooled_observations()
        return observation, reward, terminal

    def __is_terminal(self):
        if self.single_life_episodes:
            return self.__is_over() or (self.lives > self.ale.lives())
        else:
            return self.__is_over()

    def __is_over(self):
        return self.ale.game_over()

    def get_noop(self):
        return [1.0, 0.0]
示例#38
0
class Environment:
	"""docstring for Environment"""

	BUFFER_LEN = 2
	EPISODE_FRAMES = 18000
	EPOCH_COUNT = 200
	EPOCH_STEPS = 250000
	EVAL_EPS = 0.001
	FRAMES_SKIP = 4
	FRAME_HEIGHT = 84
	FRAME_WIDTH = 84
	MAX_NO_OP = 30
	MAX_REWARD = 1
	
	def __init__(self, rom_name, rng, display_screen = False):
		self.api = ALEInterface()
		self.api.setInt('random_seed', rng.randint(333))
		self.api.setBool('display_screen', display_screen)
		self.api.setFloat('repeat_action_probability', 0.0)
		self.rom_name = rom_name
		self.display_screen = display_screen
		self.rng = rng
		self.repeat = Environment.FRAMES_SKIP
		self.buffer_len = Environment.BUFFER_LEN
		self.height = Environment.FRAME_HEIGHT
		self.width = Environment.FRAME_WIDTH
		self.episode_steps = Environment.EPISODE_FRAMES / Environment.FRAMES_SKIP
		self.merge_id = 0
		self.max_reward = Environment.MAX_REWARD
		self.eval_eps = Environment.EVAL_EPS
		self.log_dir = ''
		self.network_dir = ''

		self.api.loadROM('../rom/' + self.rom_name)
		self.minimal_actions = self.api.getMinimalActionSet()
		original_width, original_height = self.api.getScreenDims()
		self.merge_frame = np.zeros((self.buffer_len
								, original_height
								, original_width)
								, dtype = np.uint8)

	def get_action_count(self):
		return len(self.minimal_actions)

	def train(self, agent, store_freq, folder = None, start_epoch = 0):
		self._open_log_files(agent, folder)
		obs = np.zeros((self.height, self.width), dtype = np.uint8)
		epoch_count = Environment.EPOCH_COUNT

		for epoch in xrange(start_epoch, epoch_count):
			self.need_reset = True
			steps_left = Environment.EPOCH_STEPS

			print "\n" + "=" * 50
			print "Epoch #%d" % (epoch + 1)
			episode = 0
			train_start = time.time()
			while steps_left > 0:
				num_step, _ = self._run_episode(agent, steps_left, obs)
				steps_left -= num_step
				episode += 1
				if steps_left == 0 or episode % 10 == 0:
					print "Finished episode #%d, steps_left = %d" \
						% (episode, steps_left)
			train_end = time.time()

			valid_values = agent.get_validate_values()
			eval_values = self.evaluate(agent)
			test_end = time.time()

			train_time = train_end - train_start
			test_time = test_end - train_end
			step_per_sec = Environment.EPOCH_STEPS * 1. / max(1, train_time)
			print "\tFinished epoch #%d, episode trained = %d\n" \
				"\tValidate values = %.3f, evaluate reward = %.3f\n"\
				"\tTrain time = %.0fs, test time = %.0fs, steps/sec = %.4f" \
					% (epoch + 1, episode, valid_values, eval_values\
						, train_time, test_time, step_per_sec)

			self._update_log_files(agent, epoch + 1, episode
								, valid_values, eval_values
								, train_time, test_time
								, step_per_sec, store_freq)
			gc.collect()

	def evaluate(self, agent, episodes = 30, obs = None):
		print "\n***Start evaluating"
		if obs is None:
			obs = np.zeros((self.height, self.width), dtype = np.uint8)
		sum_reward = 0.0
		sum_step = 0.0
		for episode in xrange(episodes):
			self.need_reset = True
			step, reward = self._run_episode(agent, self.episode_steps, obs
											, self.eval_eps, evaluating = True)
			sum_reward += reward
			sum_step += step
			print "Finished episode %d, reward = %d, step = %d" \
					% (episode + 1, reward, step)
		self.need_reset = True
		print "Average reward per episode = %.4f" % (sum_reward / episodes)
		print "Average step per episode = %.4f" % (sum_step / episodes)
		return sum_reward / episodes

	def _prepare_game(self):
		if self.need_reset or self.api.game_over():
			self.api.reset_game()
			self.need_reset = False
			if Environment.MAX_NO_OP > 0:
				num_no_op = self.rng.randint(Environment.MAX_NO_OP + 1) \
							+ self.buffer_len
				for _ in xrange(num_no_op):
					self.api.act(0)

		for _ in xrange(self.buffer_len):
			self._update_buffer()

	def _run_episode(self, agent, steps_left, obs
					, eps = 0.0, evaluating = False):
		self._prepare_game()

		start_lives = self.api.lives()
		step_count = 0
		sum_reward = 0
		is_terminal = False
		while step_count < steps_left and not is_terminal:
			self._get_screen(obs)
			action_id, _ = agent.get_action(obs, eps, evaluating)
			
			reward = self._repeat_action(self.minimal_actions[action_id])
			reward_clip = reward
			if self.max_reward > 0:
				reward_clip = np.clip(reward, -self.max_reward, self.max_reward)

			life_lost = not evaluating and self.api.lives() < start_lives
			is_terminal = self.api.game_over() or life_lost \
						or step_count + 1 >= steps_left

			agent.add_experience(obs, is_terminal, action_id, reward_clip
								, evaluating)
			sum_reward += reward
			step_count += 1
			
		return step_count, sum_reward

	def _update_buffer(self):
		self.api.getScreenGrayscale(self.merge_frame[self.merge_id, ...])
		self.merge_id = (self.merge_id + 1) % self.buffer_len

	def _repeat_action(self, action):
		reward = 0
		for i in xrange(self.repeat):
			reward += self.api.act(action)
			if i + self.buffer_len >= self.repeat:
				self._update_buffer()
		return reward

	def _get_screen(self, resized_frame):
		self._resize_frame(self.merge_frame.max(axis = 0), resized_frame)
				
	def _resize_frame(self, src_frame, dst_frame):
		cv2.resize(src = src_frame, dst = dst_frame,
					dsize = (self.width, self.height),
					interpolation = cv2.INTER_LINEAR)

	def _open_log_files(self, agent, folder):
		time_str = time.strftime("_%m-%d-%H-%M", time.localtime())
		base_rom_name = os.path.splitext(os.path.basename(self.rom_name))[0]


		if folder is not None:
			self.log_dir = folder
			self.network_dir = self.log_dir + '/network'
		else:
			self.log_dir = '../run_results/' + base_rom_name + time_str
			self.network_dir = self.log_dir + '/network'

		info_name = get_next_name(self.log_dir, 'info', 'txt')
		git_name = get_next_name(self.log_dir, 'git-diff', '')

		try:
			os.stat(self.log_dir)
		except OSError:
			os.makedirs(self.log_dir)

		try:
			os.stat(self.network_dir)
		except OSError:
			os.makedirs(self.network_dir)

		with open(os.path.join(self.log_dir, info_name), 'w') as f:
			f.write('Commit: ' + subprocess.check_output(['git', 'rev-parse'
														, 'HEAD']))
			f.write('Run command: ')
			f.write(' '.join(pipes.quote(x) for x in sys.argv))
			f.write('\n\n')
			f.write(agent.get_info())
			write_info(f, Environment)
			write_info(f, agent.__class__)
			write_info(f, agent.network.__class__)

		# From https://github.com/spragunr/deep_q_rl/pull/49/files
		with open(os.path.join(self.log_dir, git_name), 'w') as f:
			f.write(subprocess.check_output(['git', 'diff', 'HEAD']))

		if folder is not None:
			return

		with open(os.path.join(self.log_dir, 'results.csv'), 'w') as f:
			f.write("epoch,episode_train,validate_values,evaluate_reward"\
				",train_time,test_time,steps_per_second\n")

		mem = psutil.virtual_memory()
		with open(os.path.join(self.log_dir, 'memory.csv'), 'w') as f:
			f.write("epoch,available,free,buffers,cached"\
					",available_readable,used_percent\n")
			f.write("%d,%d,%d,%d,%d,%s,%.1f\n" % \
					(0, mem.available, mem.free, mem.buffers, mem.cached
					, bytes2human(mem.available), mem.percent))

	def _update_log_files(self, agent, epoch, episode, valid_values
						, eval_values, train_time, test_time, step_per_sec
						, store_freq):
		print "Updating log files"
		with open(self.log_dir + '/results.csv', 'a') as f:
			f.write("%d,%d,%.4f,%.4f,%d,%d,%.4f\n" % \
						(epoch, episode, valid_values, eval_values
						, train_time, test_time, step_per_sec))

		mem = psutil.virtual_memory()
		with open(self.log_dir + '/memory.csv', 'a') as f:
			f.write("%d,%d,%d,%d,%d,%s,%.1f\n" % \
					(epoch, mem.available, mem.free, mem.buffers, mem.cached
					, bytes2human(mem.available), mem.percent))

		agent.dump_network(self.network_dir + ('/%03d' % (epoch)) + '.npz')

		if (store_freq >= 0 and epoch >= Environment.EPOCH_COUNT) or \
				(store_freq > 0 and (epoch % store_freq == 0)):
			agent.dump_exp(self.network_dir + '/exp.npz')

	def _setup_record(self, network_file):
		file_name, _ = os.path.splitext(os.path.basename(network_file))
		time_str = time.strftime("_%m-%d-%H-%M", time.localtime())
		img_dir = os.path.dirname(network_file) + '/images_' \
					+ file_name + time_str
		rom_name, _ = os.path.splitext(self.rom_name)
		out_name = os.path.dirname(network_file) + '/' + rom_name + '_' \
					+ file_name + time_str + '.mov'
		print out_name

		try:
			os.stat(img_dir)
		except OSError:
			os.makedirs(img_dir)

		self.api.setString('record_screen_dir', img_dir)
		self.api.loadROM('../rom/' + self.rom_name)
		return img_dir, out_name

	def record_run(self, agent, network_file, episode_id = 1):
		if episode_id > 1:
			self.evaluate(agent, episode_id - 1)
			system_state = self.api.cloneSystemState()

		img_dir, out_name = self._setup_record(network_file)

		if episode_id > 1:
			self.api.restoreSystemState(system_state)

		self.evaluate(agent, 1)
		script = \
				"""
					{
						ffmpeg -r 60 -i %s/%%06d.png -f mov -c:v libx264 %s
					} || {
						avconv -r 60 -i %s/%%06d.png -f mov -c:v libx264 %s
					}
				""" % (img_dir, out_name, img_dir, out_name)
		os.system(script)
示例#39
0
class ALEEnvironment(BaseEnvironment):
    """
    A wrapper of Arcade Learning Environment, which inherits all members of ``BaseEnvironment``.
    """
    # 63 games
    ADVENTURE = "adventure"
    AIR_RAID = "air_raid"
    ALIEN = "alien"
    AMIDAR = "amidar"
    ASSAULT = "assault"
    ASTERIX = "asterix"
    ASTEROIDS = "asteroids"
    ATLANTIS = "aslantis"
    BANK_HEIST = "bank_heist"
    BATTLE_ZONE = "battle_zone"
    BEAM_RIDER = "beam_rider"
    BERZERK = "berzerk"
    BOWLING = "bowling"
    BOXING = "boxing"
    BREAKOUT = "breakout"
    CARNIVAL = "carnival"
    CENTIPEDE = "centipede"
    CHOPPER_COMMAND = "chopper_command"
    CRAZY_CLIMBER = "crazy_climber"
    DEFENDER = "defender"
    DEMON_ATTACK = "demon_attack"
    DOUBLE_DUNK = "double_dunk"
    ELEVATOR_ACTION = "elevator_action"
    ENDURO = "enduro"
    FISHING_DERBY = "fishing_derby"
    FREEWAY = "freeway"
    FROSTBITE = "frostbite"
    GOPHER = "gopher"
    GRAVITAR = "gravitar"
    HERO = "hero"
    ICE_HOCKEY = "ice_hockey"
    JAMESBOND = "jamesbond"
    JOURNEY_ESCAPE = "journey_escape"
    KABOOM = "kaboom"
    KANGAROO = "kangaroo"
    KRULL = "krull"
    KUNGFU_MASTER = "kung_fu_master"
    MONTEZUMA = "montezuma_revenge"
    MS_PACMAN = "ms_pacman"
    UNKNOWN = "name_this_game"
    PHOENIX = "phoenix"
    PITFALL = "pitfall"
    PONG = "pong"
    POOYAN = "pooyan"
    PRIVATE_EYE = "private_eye"
    QBERT = "qbert"
    RIVERRAID = "riverraid"
    ROAD_RUNNER = "road_runner"
    ROBOTANK = "robotank"
    SEAQUEST = "seaquest"
    SKIING = "skiing"
    SOLARIS = "solaris"
    SPACE_INVADERS = "space_invaders"
    STAR_GUNNER = "star_gunner"
    TENNIS = "tennis"
    TIME_PILOT = "time_pilot"
    TUTANKHAM = "tutankham"
    UP_N_DOWN = "up_n_down"
    VENTURE = "venture"
    VIDEO_PINBALL = "video_pinball"
    WIZARD_OF_WOR = "wizard_of_wor"
    YARS_REVENGE = "yars_revenge"
    ZAXXON = "zaxxon"

    def __init__(self,
                 rom_name,
                 frame_skip=4,
                 repeat_action_probability=0.,
                 max_episode_steps=10000,
                 loss_of_life_termination=False,
                 loss_of_life_negative_reward=False,
                 bitwise_max_on_two_consecutive_frames=False,
                 is_render=False,
                 seed=None,
                 startup_policy=None,
                 disable_actions=None,
                 num_of_sub_actions=-1,
                 state_processor=AtariProcessor(resize_shape=(84, 84),
                                                convert_to_grayscale=True)):

        os.environ['SDL_VIDEO_CENTERED'] = '1'

        file_exist = isfile(ALEEnvironment.get_rom_path(rom_name))
        if not file_exist:
            raise ValueError("Rom not found ! Please put rom " + rom_name +
                             ".bin into: " + ALEEnvironment.get_rom_path())

        self.__rom_name = rom_name
        self.__ale = ALEInterface()

        if frame_skip < 0:
            print("Invalid frame_skip param ! Set default frame_skip = 4")
            self.__frame_skip = 4
        else:
            self.__frame_skip = frame_skip

        if repeat_action_probability < 0 or repeat_action_probability > 1:
            raise ValueError("Invalid repeat_action_probability")
        else:
            self.__repeat_action_probability = repeat_action_probability

        self.__max_episode_steps = max_episode_steps
        self.__loss_of_life_termination = loss_of_life_termination
        self.__loss_of_life_negative_reward = loss_of_life_negative_reward
        self.__max_2_frames = bitwise_max_on_two_consecutive_frames

        # Max 2 frames only work with grayscale
        self.__grayscale = False
        if state_processor is not None and type(
                state_processor
        ) is AtariProcessor and state_processor.get_grayscale():
            self.__grayscale = True

        if self.__max_2_frames and self.__frame_skip > 1 and self.__grayscale:
            self.__max_2_frames = True
        else:
            self.__max_2_frames = False

        self.__is_render = is_render
        self.__processor = state_processor

        if seed is None or seed <= 0 or seed >= 9999:
            if seed is not None and (seed < 0 or seed >= 9999):
                print("Invalid seed ! Default seed = randint(0, 9999")
            self.__seed = np.random.randint(0, 9999)
            self.__random_seed = True
        else:
            self.__random_seed = False
            self.__seed = seed

        self.__current_steps = 0
        self.__is_life_lost = False
        self.__is_terminal = False
        self.__current_lives = 0
        self.__action_reduction = num_of_sub_actions
        self.__scr_width, self.__scr_height, self.__action_set = self.__init_ale(
        )
        self.__prev_buffer = np.empty((self.__scr_height, self.__scr_width, 3),
                                      dtype=np.uint8)
        self.__current_buffer = np.empty(
            (self.__scr_height, self.__scr_width, 3), dtype=np.uint8)
        self.__current_state = None
        self.__prev_state = None
        self.__startup_policy = startup_policy
        if disable_actions is None:
            self.__dis_act = []
        else:
            self.__dis_act = disable_actions

        if self.__processor is not None and self.__processor.get_number_of_objectives(
        ) > 1:
            self.__multi_objs = True
        else:
            self.__multi_objs = False

    def get_processor(self):
        return self.__processor

    def __init_ale(self):

        self.__ale.setBool(b'display_screen', self.__is_render)

        if self.__max_2_frames and self.__frame_skip > 1:
            self.__ale.setInt(b'frame_skip', 1)
        else:
            self.__ale.setInt(b'frame_skip', self.__frame_skip)

        self.__ale.setInt(b'random_seed', self.__seed)
        self.__ale.setFloat(b'repeat_action_probability',
                            self.__repeat_action_probability)
        self.__ale.setBool(b'color_averaging', False)

        self.__ale.loadROM(
            ALEEnvironment.get_rom_path(self.__rom_name).encode())

        width, height = self.__ale.getScreenDims()
        return width, height, self.__ale.getMinimalActionSet()

    def clone(self):
        if self.__random_seed:
            seed = np.random.randint(0, 9999)
        else:
            seed = self.__seed

        return ALEEnvironment(self.__rom_name, self.__frame_skip,
                              self.__repeat_action_probability,
                              self.__max_episode_steps,
                              self.__loss_of_life_termination,
                              self.__loss_of_life_negative_reward,
                              self.__max_2_frames, self.__is_render, seed,
                              self.__startup_policy,
                              self.__dis_act, self.__action_reduction,
                              self.__processor.clone())

    def step_all(self, a):
        if isinstance(a, (list, np.ndarray)):
            if len(a) <= 0:
                raise ValueError('Empty action list !')
            a = a[0]
        self.__current_steps += 1
        act = self.__action_set[a]
        rew = self._step(act)
        next_state = self.get_state()
        _is_terminal = self.is_terminal()
        return next_state, rew, _is_terminal, self.__current_steps

    def reset(self):
        self.__ale.reset_game()
        self.__current_lives = self.__ale.lives()
        self.__is_life_lost = False
        self.__is_terminal = False
        self.__current_state = None
        self.__prev_state = None

        action_space = self.get_action_space()
        v_range, is_range = action_space.get_range()
        if len(v_range) > 1:
            self.step(1)

        # No op steps
        if self.__startup_policy is not None:
            max_steps = int(self.__startup_policy.get_max_steps())
            for _ in range(max_steps):
                act = self.__startup_policy.step(self.get_state(),
                                                 action_space)
                self.step(act)

        # Start training from this point
        self.__current_steps = 0

        # Reset processor
        if self.__processor is not None:
            self.__processor.reset()

        return self.get_state()

    def _pre_step(self, act):
        if self.__max_2_frames and self.__frame_skip > 1:
            rew = 0
            for i in range(self.__frame_skip - 2):
                rew += self.__ale.act(act)
                self.__prev_buffer = self.__ale.getScreenRGB(
                    self.__prev_buffer)

            self.__prev_buffer = self.__ale.getScreenRGB(self.__prev_buffer)

            rew += self.__ale.act(act)

            self.__current_buffer = self.__ale.getScreenRGB(
                self.__current_buffer)

            self.__is_terminal = self.__ale.game_over()

            if self.__processor is not None:
                self.__prev_state = self.__processor.process(
                    self.__prev_buffer)
                self.__current_state = self.__processor.process(
                    self.__current_buffer)
            else:
                self.__prev_state = self.__prev_buffer
                self.__current_state = self.__current_buffer

            self.__current_state = np.maximum.reduce(
                [self.__prev_state, self.__current_state])
        else:
            rew = self.__ale.act(act)
            self.__current_buffer = self.__ale.getScreenRGB(
                self.__current_buffer)
            self.__is_terminal = self.__ale.game_over()

            if self.__processor is not None:
                self.__current_state = self.__processor.process(
                    self.__current_buffer)

        if self.__multi_objs and self.__processor is not None:
            all_rewards = self.__processor.get_rewards(rew)
            return all_rewards
        else:
            return rew

    def _step(self, act):
        for i in range(len(self.__dis_act)):
            if act == self.__dis_act[i]:
                act = 0

        if not self.__loss_of_life_termination and not self.__loss_of_life_negative_reward:
            if not self.__is_terminal:
                next_lives = self.__ale.lives()
                if next_lives < self.__current_lives:
                    act = 1
                    self.__current_lives = next_lives
            return self._pre_step(act)
        else:
            rew = self._pre_step(act)
            next_lives = self.__ale.lives()
            if next_lives < self.__current_lives:
                if self.__loss_of_life_negative_reward:
                    rew -= 1
                self.__current_lives = next_lives
                self.__is_life_lost = True

            return rew

    def get_state(self):
        if not self.__max_2_frames:
            if self.__processor is not None:
                return self.__current_state
            else:
                return self.__current_buffer
        else:
            return self.__current_state

    def is_terminal(self):
        if self.__loss_of_life_termination and self.__is_life_lost:
            return True
        elif self.__max_episode_steps is not None and self.__current_steps > self.__max_episode_steps:
            return True
        else:
            return self.__is_terminal

    @staticmethod
    def get_rom_path(rom=None):
        if rom is None:
            return os.path.dirname(os.path.abspath(__file__)) + "/roms/"
        else:
            return os.path.dirname(
                os.path.abspath(__file__)) + "/roms/" + rom + ".bin"

    @staticmethod
    def list_all_roms():
        return [
            f for f in listdir(ALEEnvironment.get_rom_path())
            if isfile(join(ALEEnvironment.get_rom_path(), f))
        ]

    def get_state_space(self):
        if self.__processor is None:
            shape = self.__current_buffer.shape
        else:
            shape = self.__processor.process(self.__current_buffer).shape
        min_value = np.zeros(shape, dtype=np.uint8)
        max_value = np.full(shape, 255)
        return Space(min_value, max_value, True)

    def get_action_space(self):
        if self.__action_reduction >= 1:
            return Space(0, self.__action_reduction - 1, True)
        else:
            return Space(0, len(self.__action_set) - 1, True)

    def step(self, act):
        if isinstance(act, (list, np.ndarray)):
            if len(act) <= 0:
                raise ValueError('Empty action list !')
            act = act[0]
        self.__current_steps += 1
        act = self.__action_set[act]
        rew = self._step(act)
        return rew

    def get_current_steps(self):
        return self.__current_steps

    def is_atari(self):
        return True

    def is_render(self):
        return self.__is_render

    def get_number_of_objectives(self):
        if self.__processor is None:
            return 1
        else:
            return self.__processor.get_number_of_objectives()

    def get_number_of_agents(self):
        if self.__processor is None:
            return 1
        else:
            return self.__processor.get_number_of_agents()

    def get_state_processor(self):
        return self.__processor
示例#40
0
class GameEnvironment:

    def __init__(self, settings):

        self.ale = ALEInterface()
        self.ale.setBool('display_screen', settings['DISPLAY_SCREEN'])
        self.ale.setBool('sound', settings['SOUND'])
        self.ale.setBool('color_averaging', settings['COLOR_AVERAGING'])
        self.ale.setInt('random_seed', settings['RANDOM_SEED'])
        self.ale.setInt('frame_skip', settings['FRAME_SKIP'])
        self.ale.setFloat('repeat_action_probability', settings['REPEAT_ACTION_PROB'])
        roms_dir = settings['ROMS_DIR']
        rom_name = settings['ROM_NAME']
        ROM = None
        if(rom_name.endswith('.bin')):
            self.name = rom_name[:-4]
            ROM = rom_name
        else:
            self.name = rom_name
            ROM = rom_name + '.bin'

        self.ale.loadROM(os.path.join(roms_dir, ROM))
        self.random_starts = settings['RANDOM_STARTS']
        self.rng = settings['RNG']

        if(settings['MINIMAL_ACTION_SET']):
            self.actions = self.ale.getMinimalActionSet()
        else:
            self.actions = self.ale.getLegalActionSet()

        self.n_actions = len(self.actions)
        self.width, self.height = self.ale.getScreenDims()

        self.observation = np.zeros((self.height, self.width), dtype='uint8')
        self.reward = None
        self.game_over = None
        self.terminal = None
        self.total_lives = None

        self.init()

    def init(self):

        self.restartGame()
        self.reward = 0
        self.game_over = self.gameOver()
        self.terminal = self.game_over
        self.total_lives = self.lives()
        self.step(0)

    def getState(self):

        return self.observation, self.reward, self.terminal, self.game_over

    def step(self, action, training=False):

        self.reward = self.act(action)
        self.paint()
        lives = self.lives()
        self.game_over = self.gameOver()
        self.terminal = self.game_over
        if(training and (lives < self.total_lives)):
            self.terminal = True

        self.total_lives = lives
        return self.getState()

    def newGame(self):

        self.init()
        for i in xrange(self.rng.randint(1, self.random_starts)):
            self.act(0)
            terminal = self.gameOver()
            if(terminal):
                print "Warning terminal in random init"

        return self.step(0)

    def newTestGame(self):

        self.init()
        return self.getState()

    def paint(self):

        self.ale.getScreenGrayscale(self.observation)

    def getScreenRGB(self):

        return self.ale.getScreenRGB()

    def act(self, action):

        assert ((action >= 0) and (action < self.n_actions))
        return self.ale.act(self.actions[action])

    def lives(self):

        return self.ale.lives()

    def restartGame(self):

        self.ale.reset_game()

    def gameOver(self):

        return self.ale.game_over()
示例#41
0
class ALEEnvironment(BaseEnvironment):
    """
    The :class:`MinimalGameHandler` class takes care of the interface to the ALE and tries to do nothing else. It's
    meant for advanced users who need fine control over every aspect of the process. It has many functions that are simply
    wrappers of the underlying ALE but with pythonic names/usage.

    Parameters
    ----------
    rom : byte string
        Specifies the directory to load the rom from. Must be a byte string: b'dir_for_rom/rom.bin'
    display_screen : boolean
        Default False. Whether or not to show the game. True takes longer to run but can be fun to watch
    step_cap: int
        Default None. Maximum number of steps to run in an episode. Breakout can sometimes not return terminal
        even when game is ended. This fixes that and will return terminal after stepping above this count
    """
    def __init__(self, rom, resize_shape=(84, 84), skip_frame=1, repeat_action_probability=0.0,
                 step_cap=None, loss_of_life_termination=False, loss_of_life_negative_reward=False,
                 grayscale=True, display_screen=False, seed=np.random.RandomState()):
        # set up emulator
        self.ale = ALEInterface()

        if display_screen:
            self.ale.setBool(b'display_screen', True)

        self.ale.setInt(b'frame_skip', skip_frame)
        self.ale.setInt(b'random_seed', seed.randint(0, 9999))
        self.ale.setFloat(b'repeat_action_probability', repeat_action_probability)
        self.ale.setBool(b'color_averaging', False)

        self.ale.loadROM(rom.encode())

        # setup gamescreen object. I think this is faster than recreating an empty each time
        width, height = self.ale.getScreenDims()
        channels = 1 if grayscale else 3
        self.grayscale = grayscale
        self.gamescreen = np.empty((height, width, 1), dtype=np.uint8)

        self.resize_shape = resize_shape
        self.skip_frame = skip_frame
        self.step_cap = step_cap
        self.curr_step_count = 0

        # setup action converter
        # ALE returns legal action indexes, convert these to just numbers
        self.action_inds = self.ale.getMinimalActionSet()

        # setup lives
        self.loss_of_life_negative_reward = loss_of_life_negative_reward
        self.cur_lives = self.ale.lives()
        self.loss_of_life_termination = loss_of_life_termination
        self.life_lost = False

    def reset(self):
        self.ale.reset_game()
        self.cur_lives = self.ale.lives()
        self.life_lost = False
        self.curr_step_count = 0

    def step(self, action):
        self.curr_step_count += 1
        ale_action = self.action_inds[action]
        return self._step(ale_action)

    def _step(self, ale_action):
        if not self.loss_of_life_termination and not self.loss_of_life_negative_reward:
            return self.ale.act(ale_action)
        else:
            rew = self.ale.act(ale_action)
            new_lives = self.ale.lives()
            if new_lives < self.cur_lives:
                # if loss of life is negative reward subtract 1 from reward
                if self.loss_of_life_negative_reward:
                    rew -= 1
                self.cur_lives = new_lives
                self.life_lost = True
            return rew

    def get_state(self):
        if self.grayscale:
            self.gamescreen = self.ale.getScreenGrayscale(self.gamescreen)
        else:
            self.gamescreen = self.ale.getScreenRGB(self.gamescreen)
        # if resize_shape is none then don't resize
        if self.resize_shape is not None:
            # if grayscale we remove the last dimmension (channel)
            if self.grayscale:
                processedImg = imresize(self.gamescreen[:, :, 0], self.resize_shape)
            else:
                processedImg = imresize(self.gamescreen, self.resize_shape)
        return processedImg

    def get_state_shape(self):
        return self.resize_shape

    def get_terminal(self):
        if self.loss_of_life_termination and self.life_lost:
            return True
        elif self.step_cap is not None and self.curr_step_count > self.step_cap:
            return True
        else:
            return self.ale.game_over()

    def get_num_actions(self):
        return len(self.action_inds)
示例#42
0
文件: 0906.py 项目: boluoweifenda/DQN
#
#     print ''

# initialization
np.random.seed(SEED)

ale = ALEInterface()
if SEED == None:
    ale.setInt('random_seed', 0)
else:
    ale.setInt('random_seed', SEED)
ale.setInt("frame_skip",frameSkip)
ale.setBool('color_averaging', True)
ale.setBool('sound', False)
ale.setBool('display_screen', False)
ale.setFloat("repeat_action_probability", 0.0)
ale.loadROM(romPath)
legal_actions = ale.getMinimalActionSet()
n_actions = len(legal_actions)
opt.n_actions = n_actions

explorationRateDelta = (initialExplorationRate - finalExplorationRate)/(finalExplorationFrame-startLearningFrame)
explorationRate = initialExplorationRate + startLearningFrame*explorationRateDelta

if networkType == "CNN":
    width = 84
    height = 84
    Dim = [height,width]
else:
    width = 36
    height = 41
示例#43
0
class AtariPlayer(RLEnvironment):
    """
    A wrapper for atari emulator.
    NOTE: will automatically restart when a real episode ends
    """
    def __init__(self, rom_file, viz=0, height_range=(None,None),
            frame_skip=4, image_shape=(84, 84), nullop_start=30,
            live_lost_as_eoe=True):
        """
        :param rom_file: path to the rom
        :param frame_skip: skip every k frames and repeat the action
        :param image_shape: (w, h)
        :param height_range: (h1, h2) to cut
        :param viz: visualization to be done.
            Set to 0 to disable.
            Set to a positive number to be the delay between frames to show.
            Set to a string to be a directory to store frames.
        :param nullop_start: start with random number of null ops
        :param live_losts_as_eoe: consider lost of lives as end of episode.  useful for training.
        """
        super(AtariPlayer, self).__init__()
        if not os.path.isfile(rom_file) and '/' not in rom_file:
            rom_file = get_dataset_dir('atari_rom', rom_file)
        assert os.path.isfile(rom_file), \
                "rom {} not found. Please download at {}".format(rom_file, ROM_URL)

        try:
            ALEInterface.setLoggerMode(ALEInterface.Logger.Warning)
        except AttributeError:
            log_once()

        # avoid simulator bugs: https://github.com/mgbellemare/Arcade-Learning-Environment/issues/86
        with _ALE_LOCK:
            self.ale = ALEInterface()
            self.rng = get_rng(self)

            self.ale.setInt(b"random_seed", self.rng.randint(0, 10000))
            self.ale.setBool(b"showinfo", False)

            self.ale.setInt(b"frame_skip", 1)
            self.ale.setBool(b'color_averaging', False)
            # manual.pdf suggests otherwise.
            self.ale.setFloat(b'repeat_action_probability', 0.0)

            # viz setup
            if isinstance(viz, six.string_types):
                assert os.path.isdir(viz), viz
                self.ale.setString(b'record_screen_dir', viz)
                viz = 0
            if isinstance(viz, int):
                viz = float(viz)
            self.viz = viz
            if self.viz and isinstance(self.viz, float):
                self.windowname = os.path.basename(rom_file)
                cv2.startWindowThread()
                cv2.namedWindow(self.windowname)

            self.ale.loadROM(rom_file.encode('utf-8'))
        self.width, self.height = self.ale.getScreenDims()
        self.actions = self.ale.getMinimalActionSet()


        self.live_lost_as_eoe = live_lost_as_eoe
        self.frame_skip = frame_skip
        self.nullop_start = nullop_start
        self.height_range = height_range
        self.image_shape = image_shape

        self.current_episode_score = StatCounter()
        self.restart_episode()

    def _grab_raw_image(self):
        """
        :returns: the current 3-channel image
        """
        m = self.ale.getScreenRGB()
        return m.reshape((self.height, self.width, 3))

    def current_state(self):
        """
        :returns: a gray-scale (h, w, 1) float32 image
        """
        ret = self._grab_raw_image()
        # max-pooled over the last screen
        ret = np.maximum(ret, self.last_raw_screen)
        if self.viz:
            if isinstance(self.viz, float):
                #m = cv2.resize(ret, (1920,1200))
                cv2.imshow(self.windowname, ret)
                time.sleep(self.viz)
        ret = ret[self.height_range[0]:self.height_range[1],:].astype('float32')
        # 0.299,0.587.0.114. same as rgb2y in torch/image
        ret = cv2.cvtColor(ret, cv2.COLOR_RGB2GRAY)
        ret = cv2.resize(ret, self.image_shape)
        ret = np.expand_dims(ret, axis=2)
        return ret

    def get_action_space(self):
        return DiscreteActionSpace(len(self.actions))

    def restart_episode(self):
        if self.current_episode_score.count > 0:
            self.stats['score'].append(self.current_episode_score.sum)
        self.current_episode_score.reset()
        self.ale.reset_game()

        # random null-ops start
        n = self.rng.randint(self.nullop_start)
        self.last_raw_screen = self._grab_raw_image()
        for k in range(n):
            if k == n - 1:
                self.last_raw_screen = self._grab_raw_image()
            self.ale.act(0)

    def action(self, act):
        """
        :param act: an index of the action
        :returns: (reward, isOver)
        """
        oldlives = self.ale.lives()
        r = 0
        for k in range(self.frame_skip):
            if k == self.frame_skip - 1:
                self.last_raw_screen = self._grab_raw_image()
            r += self.ale.act(self.actions[act])
            newlives = self.ale.lives()
            if self.ale.game_over() or \
                    (self.live_lost_as_eoe and newlives < oldlives):
                break

        self.current_episode_score.feed(r)
        isOver = self.ale.game_over()
        if isOver:
            self.restart_episode()
        if self.live_lost_as_eoe:
            isOver = isOver or newlives < oldlives
        return (r, isOver)
示例#44
0
文件: ale_env.py 项目: only4hj/DeepRL
class AleEnv():
    def __init__(self, rom, display_screen, use_env_frame_skip, frame_repeat):
        self.actions = None
        self.rom = rom
        self.display_screen = display_screen
        self.use_env_frame_skip = use_env_frame_skip
        self.frame_repeat = frame_repeat
        
    def initialize(self):
        self.ale = ALEInterface()
        self.ale.setInt("random_seed", random.randint(1, 1000))
        if self.display_screen:
            self.ale.setBool('display_screen', True)

        if self.use_env_frame_skip == True:
            self.ale.setInt('frame_skip', self.frame_repeat)
            self.ale.setBool('color_averaging', True)        
 
        self.ale.setFloat('repeat_action_probability', 0)
        self.ale.loadROM(self.rom)
        self.actions = self.ale.getMinimalActionSet()
        print 'actions: %s' % self.actions
        (self.screen_width,self.screen_height) = self.ale.getScreenDims()
        print("width/height: " +str(self.screen_width) + "/" + str(self.screen_height))
        
        self.initialized = True
        
    def get_actions(self, rom=None):
        if self.actions is None and rom != None:
            ale = ALEInterface()
            ale.loadROM(rom)
            self.actions = ale.getMinimalActionSet()
        return self.actions
        
    @property
    def state_dtype(self):
        return np.uint8
        
    @property
    def continuous_action(self):
        return False
    
    def reset_game(self):
        self.ale.reset_game()
        
    def lives(self):
        return self.ale.lives()
    
    def getScreenRGB(self):
        return self.ale.getScreenRGB()
    
    def getState(self, debug_display=False, debug_input=None):
        screen = self.ale.getScreenGrayscale()
        if screen is not None and debug_display:
            debug_input.show(screen.reshape(screen.shape[0], screen.shape[1]))
        return screen.reshape(self.screen_height, self.screen_width)
    
    def act(self, action):
        return self.ale.act(action)
    
    def game_over(self):
        return self.ale.game_over()
    
    def finish(self):
        return    
示例#45
0
class AtariEnvironment:
    
    def __init__(self, args, outputDir):
        
        self.outputDir = outputDir
        self.screenCaptureFrequency = args.screen_capture_freq
        
        self.ale = ALEInterface()
        self.ale.setInt(b'random_seed', 123456)
        random.seed(123456)
        # Fix https://groups.google.com/forum/#!topic/deep-q-learning/p4FAIaabwlo
        self.ale.setFloat(b'repeat_action_probability', 0.0)

        # Load the ROM file
        self.ale.loadROM(args.rom)

        self.actionSet = self.ale.getMinimalActionSet()
        self.gameNumber = 0
        self.stepNumber = 0
        self.resetGame()

    def getNumActions(self):
        return len(self.actionSet)

    def getState(self):
        return self.state
    
    def getGameNumber(self):
        return self.gameNumber
    
    def getFrameNumber(self):
        return self.ale.getFrameNumber()
    
    def getEpisodeFrameNumber(self):
        return self.ale.getEpisodeFrameNumber()
    
    def getEpisodeStepNumber(self):
        return self.episodeStepNumber
    
    def getStepNumber(self):
        return self.stepNumber
    
    def getGameScore(self):
        return self.gameScore

    def isGameOver(self):
        return self.ale.game_over()

    def step(self, action):
        previousLives = self.ale.lives()
        reward = 0
        isTerminal = 0
        self.stepNumber += 1
        self.episodeStepNumber += 1
        
        for i in range(4):
            prevScreenRGB = self.ale.getScreenRGB()
            reward += self.ale.act(self.actionSet[action])
            screenRGB = self.ale.getScreenRGB()
    
            # Detect end of episode, I don't think I'm handling this right in terms
            # of the overall game loop (??)
            if self.ale.lives() < previousLives or self.ale.game_over():
                isTerminal = 1
                break

            if self.gameNumber % self.screenCaptureFrequency == 0:
                dir = self.outputDir + '/screen_cap/game-%06d' % (self.gameNumber)
                if not os.path.isdir(dir):
                    os.makedirs(dir)
                self.ale.saveScreenPNG(dir + '/frame-%06d.png' % (self.getEpisodeFrameNumber()))


        maxedScreen = np.maximum(screenRGB, prevScreenRGB)
        self.state = self.state.stateByAddingScreen(maxedScreen, self.ale.getFrameNumber())
        self.gameScore += reward
        return reward, self.state, isTerminal

    def resetGame(self):
        if self.ale.game_over():
            self.gameNumber += 1
        self.ale.reset_game()
        self.state = State().stateByAddingScreen(self.ale.getScreenRGB(), self.ale.getFrameNumber())
        self.gameScore = 0
        self.episodeStepNumber = 0 # environment steps vs ALE frames.  Will probably be 4*frame number
示例#46
0
class MyEnv(Environment):
    VALIDATION_MODE = 0

    def __init__(self, rng, rom="ale/breakout.bin", frame_skip=4, 
                 ale_options=[{"key": "random_seed", "value": 0}, 
                              {"key": "color_averaging", "value": True},
                              {"key": "repeat_action_probability", "value": 0.}]):
        self._mode = -1
        self._modeScore = 0.0
        self._modeEpisodeCount = 0

        self._frameSkip = frame_skip if frame_skip >= 1 else 1
        self._randomState = rng

        self._ale = ALEInterface()
        for option in ale_options:
            t = type(option["value"])
            if t is int:
                self._ale.setInt(option["key"], option["value"])
            elif t is float:
                self._ale.setFloat(option["key"], option["value"])
            elif t is bool:
                self._ale.setBool(option["key"], option["value"])
            else:
                raise ValueError("Option {} ({}) is not an int, bool or float.".format(option["key"], t))
        self._ale.loadROM(rom)

        w, h = self._ale.getScreenDims()
        self._screen = np.empty((h, w), dtype=np.uint8)
        self._reducedScreen = np.empty((84, 84), dtype=np.uint8)
        self._actions = self._ale.getMinimalActionSet()

                
    def reset(self, mode):
        if mode == MyEnv.VALIDATION_MODE:
            if self._mode != MyEnv.VALIDATION_MODE:
                self._mode = MyEnv.VALIDATION_MODE
                self._modeScore = 0.0
                self._modeEpisodeCount = 0
            else:
                self._modeEpisodeCount += 1
        elif self._mode != -1: # and thus mode == -1
            self._mode = -1

        self._ale.reset_game()
        for _ in range(self._randomState.randint(15)):
            self._ale.act(0)
        self._ale.getScreenGrayscale(self._screen)
        cv2.resize(self._screen, (84, 84), self._reducedScreen, interpolation=cv2.INTER_NEAREST)
        
        return [4 * [84 * [84 * [0]]]]
        
        
    def act(self, action):
        action = self._actions[action]
        
        reward = 0
        for _ in range(self._frameSkip):
            reward += self._ale.act(action)
            if self.inTerminalState():
                break
            
        self._ale.getScreenGrayscale(self._screen)
        cv2.resize(self._screen, (84, 84), self._reducedScreen, interpolation=cv2.INTER_NEAREST)
  
        self._modeScore += reward
        return np.sign(reward)

    def summarizePerformance(self, test_data_set):
        if self.inTerminalState() == False:
            self._modeEpisodeCount += 1
        print("== Mean score per episode is {} over {} episodes ==".format(self._modeScore / self._modeEpisodeCount, self._modeEpisodeCount))


    def inputDimensions(self):
        return [(4, 84, 84)]

    def observationType(self, subject):
        return np.uint8

    def nActions(self):
        return len(self._actions)

    def observe(self):
        return [np.array(self._reducedScreen)]

    def inTerminalState(self):
        return self._ale.game_over()
示例#47
0
class AtariPlayer(RLEnvironment):
    """
    A wrapper for atari emulator.
    """
    def __init__(self, rom_file, viz=0, height_range=(None,None),
            frame_skip=4, image_shape=(84, 84), nullop_start=30,
            live_lost_as_eoe=True):
        """
        :param rom_file: path to the rom
        :param frame_skip: skip every k frames and repeat the action
        :param image_shape: (w, h)
        :param height_range: (h1, h2) to cut
        :param viz: visualization to be done.
            Set to 0 to disable.
            Set to a positive number to be the delay between frames to show.
            Set to a string to be a directory to store frames.
        :param nullop_start: start with random number of null ops
        :param live_losts_as_eoe: consider lost of lives as end of episode.  useful for training.
        """
        super(AtariPlayer, self).__init__()
        self.ale = ALEInterface()
        self.rng = get_rng(self)

        self.ale.setInt("random_seed", self.rng.randint(0, 10000))
        self.ale.setBool("showinfo", False)

        try:
            ALEInterface.setLoggerMode(ALEInterface.Logger.Warning)
        except AttributeError:
            log_once()

        self.ale.setInt("frame_skip", 1)
        self.ale.setBool('color_averaging', False)
        # manual.pdf suggests otherwise. may need to check
        self.ale.setFloat('repeat_action_probability', 0.0)

        # viz setup
        if isinstance(viz, six.string_types):
            assert os.path.isdir(viz), viz
            self.ale.setString('record_screen_dir', viz)
            viz = 0
        if isinstance(viz, int):
            viz = float(viz)
        self.viz = viz
        if self.viz and isinstance(self.viz, float):
            self.windowname = os.path.basename(rom_file)
            cv2.startWindowThread()
            cv2.namedWindow(self.windowname)

        self.ale.loadROM(rom_file)
        self.width, self.height = self.ale.getScreenDims()
        self.actions = self.ale.getMinimalActionSet()


        self.live_lost_as_eoe = live_lost_as_eoe
        self.frame_skip = frame_skip
        self.nullop_start = nullop_start
        self.height_range = height_range
        self.image_shape = image_shape

        self.current_episode_score = StatCounter()
        self.restart_episode()

    def _grab_raw_image(self):
        """
        :returns: the current 3-channel image
        """
        m = self.ale.getScreenRGB()
        return m.reshape((self.height, self.width, 3))

    def current_state(self):
        """
        :returns: a gray-scale (h, w, 1) image
        """
        ret = self._grab_raw_image()
        # max-pooled over the last screen
        ret = np.maximum(ret, self.last_raw_screen)
        if self.viz:
            if isinstance(self.viz, float):
                cv2.imshow(self.windowname, ret)
                time.sleep(self.viz)
        ret = ret[self.height_range[0]:self.height_range[1],:]
        # 0.299,0.587.0.114. same as rgb2y in torch/image
        ret = cv2.cvtColor(ret, cv2.COLOR_RGB2GRAY)
        ret = cv2.resize(ret, self.image_shape)
        ret = np.expand_dims(ret, axis=2)
        return ret

    def get_num_actions(self):
        """
        :returns: the number of legal actions
        """
        return len(self.actions)

    def restart_episode(self):
        if self.current_episode_score.count > 0:
            self.stats['score'].append(self.current_episode_score.sum)
        self.current_episode_score.reset()
        self.ale.reset_game()

        # random null-ops start
        n = self.rng.randint(self.nullop_start)
        self.last_raw_screen = self._grab_raw_image()
        for k in range(n):
            if k == n - 1:
                self.last_raw_screen = self._grab_raw_image()
            self.ale.act(0)

    def action(self, act):
        """
        :param act: an index of the action
        :returns: (reward, isOver)
        """
        oldlives = self.ale.lives()
        r = 0
        for k in range(self.frame_skip):
            if k == self.frame_skip - 1:
                self.last_raw_screen = self._grab_raw_image()
            r += self.ale.act(self.actions[act])
            newlives = self.ale.lives()
            if self.ale.game_over() or \
                    (self.live_lost_as_eoe and newlives < oldlives):
                break

        self.current_episode_score.feed(r)
        isOver = self.ale.game_over()
        if isOver:
            self.restart_episode()
        if self.live_lost_as_eoe:
            isOver = isOver or newlives < oldlives
        return (r, isOver)

    def get_stat(self):
        try:
            return {'avg_score': np.mean(self.stats['score']),
                    'max_score': float(np.max(self.stats['score'])) }
        except ValueError:
            return {}
class GameState(object):
  def __init__(self, rand_seed, display=False, no_op_max=7):
    self.ale = ALEInterface()
    self.ale.setInt(b'random_seed', rand_seed)
    self.ale.setFloat(b'repeat_action_probability', 0.0)
    self.ale.setBool(b'color_averaging', True)
    self.ale.setInt(b'frame_skip', 4)
    self._no_op_max = no_op_max

    if display:
      self._setup_display()
    
    self.ale.loadROM(ROM.encode('ascii'))

    # collect minimal action set
    self.real_actions = self.ale.getMinimalActionSet()

    # height=210, width=160
    self._screen = np.empty((210, 160, 1), dtype=np.uint8)

    self.reset()

  def _process_frame(self, action, reshape):
    reward = self.ale.act(action)
    terminal = self.ale.game_over()

    # screen shape is (210, 160, 1)
    self.ale.getScreenGrayscale(self._screen)
    
    # reshape it into (210, 160)
    reshaped_screen = np.reshape(self._screen, (210, 160))
    
    # resize to height=110, width=84
    resized_screen = cv2.resize(reshaped_screen, (84, 110))
    
    x_t = resized_screen[18:102,:]
    if reshape:
      x_t = np.reshape(x_t, (84, 84, 1))
    x_t = x_t.astype(np.float32)
    x_t *= (1.0/255.0)
    return reward, terminal, x_t
    
    
  def _setup_display(self):
    if sys.platform == 'darwin':
      import pygame
      pygame.init()
      self.ale.setBool(b'sound', False)
    elif sys.platform.startswith('linux'):
      self.ale.setBool(b'sound', True)
    self.ale.setBool(b'display_screen', True)

  def reset(self):
    self.ale.reset_game()
    
    # randomize initial state
    if self._no_op_max > 0:
      no_op = np.random.randint(0, self._no_op_max + 1)
      for _ in range(no_op):
        self.ale.act(0)

    _, _, x_t = self._process_frame(0, False)
    
    self.reward = 0
    self.terminal = False
    self.s_t = np.stack((x_t, x_t, x_t, x_t), axis = 2)
    
  def process(self, action):
    # convert original 18 action index to minimal action set index
    real_action = self.real_actions[action]
    
    r, t, x_t1 = self._process_frame(real_action, True)

    self.reward = r
    self.terminal = t
    self.s_t1 = np.append(self.s_t[:,:,1:], x_t1, axis = 2)    

  def update(self):
    self.s_t = self.s_t1
示例#49
0
class Emulator:
    def __init__(self, rom_path, rom_name, visualize, actor_id, rseed, single_life_episodes = False):
        
        self.ale = ALEInterface()

        self.ale.setInt("random_seed", rseed * (actor_id +1))

        # For fuller control on explicit action repeat (>= ALE 0.5.0) 
        self.ale.setFloat("repeat_action_probability", 0.0)
        
        # Disable frame_skip and color_averaging
        # See: http://is.gd/tYzVpj
        self.ale.setInt("frame_skip", 1)
        self.ale.setBool("color_averaging", False)
        self.ale.loadROM(rom_path + "/" + rom_name + ".bin")
        self.legal_actions = self.ale.getMinimalActionSet()        
        self.screen_width,self.screen_height = self.ale.getScreenDims()
        #self.ale.setBool('display_screen', True)
        
        # Processed historcal frames that will be fed in to the network 
        # (i.e., four 84x84 images)
        self.screen_images_processed = np.zeros((IMG_SIZE_X, IMG_SIZE_Y, 
            NR_IMAGES)) 
        self.rgb_screen = np.zeros((self.screen_height,self.screen_width, 3), dtype=np.uint8)
        self.gray_screen = np.zeros((self.screen_height,self.screen_width,1), dtype=np.uint8)

        self.frame_pool = np.empty((2, self.screen_height, self.screen_width))
        self.current = 0
        self.lives = self.ale.lives()

        self.visualize = visualize
        self.visualize_processed = False
        self.windowname = rom_name + ' ' + str(actor_id)
        if self.visualize:
            logger.debug("Opening emulator window...")
            #from skimage import io
            #io.use_plugin('qt')
            cv2.startWindowThread()
            cv2.namedWindow(self.windowname)
            logger.debug("Emulator window opened")
            
        if self.visualize_processed:
            logger.debug("Opening processed frame window...")
            cv2.startWindowThread()
            logger.debug("Processed frame window opened")
            cv2.namedWindow(self.windowname + "_processed")
            
        self.single_life_episodes = single_life_episodes

    def get_screen_image(self):
        """ Add screen (luminance) to frame pool """
        # [screen_image, screen_image_rgb] = [self.ale.getScreenGrayscale(), 
        #     self.ale.getScreenRGB()]
        self.ale.getScreenGrayscale(self.gray_screen)
        self.ale.getScreenRGB(self.rgb_screen)
        self.frame_pool[self.current] = np.squeeze(self.gray_screen)
        self.current = (self.current + 1) % FRAMES_IN_POOL
        return self.rgb_screen

    def new_game(self):
        """ Restart game """
        self.ale.reset_game()
        self.lives = self.ale.lives()

        if MAX_START_WAIT < 0:
            logger.debug("Cannot time travel yet.")
            sys.exit()
        elif MAX_START_WAIT > 0:
            wait = random.randint(0, MAX_START_WAIT)
        else:
            wait = 0
        for _ in xrange(wait):
            self.ale.act(self.legal_actions[0])

    def process_frame_pool(self):
        """ Preprocess frame pool """
        
        img = None
        if BLEND_METHOD == "max_pool":
            img = np.amax(self.frame_pool, axis=0)
        
        #img resize(img[:210, :], (84, 84))
        img = cv2.resize(img[:210, :], (84, 84), 
            interpolation=cv2.INTER_LINEAR)
        
        img = img.astype(np.float32)
        img *= (1.0/255.0)
        
        return img
        # Reduce height to 210, if not so
        #cropped_img = img[:210, :]
        # Downsample to 110x84
        #down_sampled_img = resize(cropped_img, (84, 84))
        
        # Crop to 84x84 playing area
        #stackable_image = down_sampled_img[:, 26:110]
        #return stackable_image

    def action_repeat(self, a):
        """ Repeat action and grab screen into frame pool """
        reward = 0
        for i in xrange(ACTION_REPEAT):
            reward += self.ale.act(self.legal_actions[a])
            new_screen_image_rgb = self.get_screen_image()
        return reward, new_screen_image_rgb

    def get_reshaped_state(self, state):
        return np.reshape(state, 
            (1, IMG_SIZE_X, IMG_SIZE_Y, NR_IMAGES))
        #return np.reshape(self.screen_images_processed, 
        #    (1, IMG_SIZE_X, IMG_SIZE_Y, NR_IMAGES))

    def get_initial_state(self):
        """ Get the initial state """
        self.new_game()
        for step in xrange(NR_IMAGES):
            reward, new_screen_image_rgb = self.action_repeat(0)
            self.screen_images_processed[:, :, step] = self.process_frame_pool()
            self.show_screen(new_screen_image_rgb)
        if self.is_terminal():
            MAX_START_WAIT -= 1
            return self.get_initial_state()
        return np.copy(self.screen_images_processed) #get_reshaped_state()      

    def next(self, action):
        """ Get the next state, reward, and game over signal """
        reward, new_screen_image_rgb = self.action_repeat(np.argmax(action))
        self.screen_images_processed[:, :, 0:3] = \
            self.screen_images_processed[:, :, 1:4]
        self.screen_images_processed[:, :, 3] = self.process_frame_pool()
        self.show_screen(new_screen_image_rgb)
        terminal = self.is_terminal()
        self.lives = self.ale.lives()
        return np.copy(self.screen_images_processed), reward, terminal #get_reshaped_state(), reward, terminal
    
    def show_screen(self, image):
        """ Show visuals for raw and processed images """
        if self.visualize:
            #io.imshow(image[:210, :], fancy=True)
            cv2.imshow(self.windowname, image[:210, :])
        if self.visualize_processed:
            #io.imshow(self.screen_images_processed[:, :, 3], fancy=True)
            cv2.imshow(self.windowname + "_processed", self.screen_images_processed[:, :, 3])
            
    def is_terminal(self):
        if self.single_life_episodes:
            return (self.is_over() or (self.lives > self.ale.lives()))
        else:
            return self.is_over()

    def is_over(self):
        return self.ale.game_over()
示例#50
0
class AtariPlayer(gym.Env):
    """
    A wrapper for ALE emulator, with configurations to mimic DeepMind DQN settings.

    Info:
        score: the accumulated reward in the current game
        gameOver: True when the current game is Over
    """

    def __init__(self, rom_file, viz=0,
                 frame_skip=4, nullop_start=30,
                 live_lost_as_eoe=True, max_num_frames=0):
        """
        Args:
            rom_file: path to the rom
            frame_skip: skip every k frames and repeat the action
            viz: visualization to be done.
                Set to 0 to disable.
                Set to a positive number to be the delay between frames to show.
                Set to a string to be a directory to store frames.
            nullop_start: start with random number of null ops.
            live_losts_as_eoe: consider lost of lives as end of episode. Useful for training.
            max_num_frames: maximum number of frames per episode.
        """
        super(AtariPlayer, self).__init__()
        if not os.path.isfile(rom_file) and '/' not in rom_file:
            rom_file = get_dataset_path('atari_rom', rom_file)
        assert os.path.isfile(rom_file), \
            "rom {} not found. Please download at {}".format(rom_file, ROM_URL)

        try:
            ALEInterface.setLoggerMode(ALEInterface.Logger.Error)
        except AttributeError:
            if execute_only_once():
                logger.warn("You're not using latest ALE")

        # avoid simulator bugs: https://github.com/mgbellemare/Arcade-Learning-Environment/issues/86
        with _ALE_LOCK:
            self.ale = ALEInterface()
            self.rng = get_rng(self)
            self.ale.setInt(b"random_seed", self.rng.randint(0, 30000))
            self.ale.setInt(b"max_num_frames_per_episode", max_num_frames)
            self.ale.setBool(b"showinfo", False)

            self.ale.setInt(b"frame_skip", 1)
            self.ale.setBool(b'color_averaging', False)
            # manual.pdf suggests otherwise.
            self.ale.setFloat(b'repeat_action_probability', 0.0)

            # viz setup
            if isinstance(viz, six.string_types):
                assert os.path.isdir(viz), viz
                self.ale.setString(b'record_screen_dir', viz)
                viz = 0
            if isinstance(viz, int):
                viz = float(viz)
            self.viz = viz
            if self.viz and isinstance(self.viz, float):
                self.windowname = os.path.basename(rom_file)
                cv2.namedWindow(self.windowname)

            self.ale.loadROM(rom_file.encode('utf-8'))
        self.width, self.height = self.ale.getScreenDims()
        self.actions = self.ale.getMinimalActionSet()

        self.live_lost_as_eoe = live_lost_as_eoe
        self.frame_skip = frame_skip
        self.nullop_start = nullop_start

        self.action_space = spaces.Discrete(len(self.actions))
        self.observation_space = spaces.Box(
            low=0, high=255, shape=(self.height, self.width, 1), dtype=np.uint8)
        self._restart_episode()

    def get_action_meanings(self):
        return [ACTION_MEANING[i] for i in self.actions]

    def _grab_raw_image(self):
        """
        :returns: the current 3-channel image
        """
        m = self.ale.getScreenRGB()
        return m.reshape((self.height, self.width, 3))

    def _current_state(self):
        """
        :returns: a gray-scale (h, w, 1) uint8 image
        """
        ret = self._grab_raw_image()
        # max-pooled over the last screen
        ret = np.maximum(ret, self.last_raw_screen)
        if self.viz:
            if isinstance(self.viz, float):
                cv2.imshow(self.windowname, ret)
                cv2.waitKey(int(self.viz * 1000))
        ret = ret.astype('float32')
        # 0.299,0.587.0.114. same as rgb2y in torch/image
        ret = cv2.cvtColor(ret, cv2.COLOR_RGB2GRAY)[:, :, np.newaxis]
        return ret.astype('uint8')  # to save some memory

    def _restart_episode(self):
        with _ALE_LOCK:
            self.ale.reset_game()

        # random null-ops start
        n = self.rng.randint(self.nullop_start)
        self.last_raw_screen = self._grab_raw_image()
        for k in range(n):
            if k == n - 1:
                self.last_raw_screen = self._grab_raw_image()
            self.ale.act(0)

    def reset(self):
        if self.ale.game_over():
            self._restart_episode()
        return self._current_state()

    def step(self, act):
        oldlives = self.ale.lives()
        r = 0
        for k in range(self.frame_skip):
            if k == self.frame_skip - 1:
                self.last_raw_screen = self._grab_raw_image()
            r += self.ale.act(self.actions[act])
            newlives = self.ale.lives()
            if self.ale.game_over() or \
                    (self.live_lost_as_eoe and newlives < oldlives):
                break

        isOver = self.ale.game_over()
        if self.live_lost_as_eoe:
            isOver = isOver or newlives < oldlives

        info = {'ale.lives': newlives}
        return self._current_state(), r, isOver, info
示例#51
0
def test(session,
         hist_len=4,
         discount=0.99,
         act_rpt=4,
         upd_freq=4,
         min_sq_grad=0.01,
         epsilon=0.05,
         no_op_max=30,
         num_tests=1,
         learning_rate=0.0025,
         momentum=0.95,
         sq_momentum=0.95):
    #Create ALE object
    if len(sys.argv) < 3:
        print('Usage: %s rom_file record_screen_dir' % sys.argv[0])
        sys.exit()

    ale = ALEInterface()

    record_path = sys.argv[2]
    ale.setString('record_screen_dir', record_path)
    ale.setString('record_sound_filename', (record_path + '/sound.wav'))
    ale.setInt('fragsize', 64)
    cmd = 'mkdir '
    cmd += record_path
    os.system(cmd)

    # Get & Set the desired settings
    ale.setInt('random_seed', 123)
    #Changes repeat action probability from default of 0.25
    ale.setFloat('repeat_action_probability', 0.0)

    # Set USE_SDL to true to display the screen. ALE must be compilied
    # with SDL enabled for this to work. On OSX, pygame init is used to
    # proxy-call SDL_main.
    USE_SDL = False
    if USE_SDL:
        if sys.platform == 'darwin':
            import pygame
            pygame.init()
            ale.setBool('sound', False)  # Sound doesn't work on OSX
        elif sys.platform.startswith('linux'):
            ale.setBool('sound', True)
        ale.setBool('display_screen', True)

    # Load the ROM file
    ale.loadROM(sys.argv[1])

    # create DQN agent
    # learning_rate and momentum are unused parameters (but needed)
    agent = DQN(ale, session, epsilon, learning_rate, momentum, sq_momentum,
                hist_len, len(ale.getMinimalActionSet()), None, discount,
                rom_name(sys.argv[1]))

    #Store the most recent two images
    preprocess_stack = deque([], 2)

    num_episodes = 0
    while num_episodes < num_tests:
        #initialize sequence with initial image
        seq = list()
        #We only have one image, we cannot combine two images
        perform_no_ops(ale, no_op_max, preprocess_stack, seq)
        #proc_seq.append(pp.preprocess(seq))
        total_reward = 0

        while not ale.game_over():
            state = get_state(seq, hist_len)
            action = agent.get_action_best_network(state, epsilon)
            #skip frames by repeating action
            reward = 0
            for i in range(act_rpt):
                reward = reward + ale.act(action)
                preprocess_stack.append(ale.getScreenRGB())
            seq.append(pp.preprocess(preprocess_stack[0], preprocess_stack[1]))
            total_reward += reward
        print('Episode ended with score: %d' % (total_reward))
        num_episodes = num_episodes + 1
        ale.reset_game()
示例#52
0
class AtariPlayer(gym.Env):
    """
    A wrapper for ALE emulator, with configurations to mimic DeepMind DQN settings.

    Info:
        score: the accumulated reward in the current game
        gameOver: True when the current game is Over
    """

    def __init__(self, rom_file, viz=0,
                 frame_skip=4, nullop_start=30,
                 live_lost_as_eoe=True, max_num_frames=0):
        """
        Args:
            rom_file: path to the rom
            frame_skip: skip every k frames and repeat the action
            viz: visualization to be done.
                Set to 0 to disable.
                Set to a positive number to be the delay between frames to show.
                Set to a string to be a directory to store frames.
            nullop_start: start with random number of null ops.
            live_losts_as_eoe: consider lost of lives as end of episode. Useful for training.
            max_num_frames: maximum number of frames per episode.
        """
        super(AtariPlayer, self).__init__()
        if not os.path.isfile(rom_file) and '/' not in rom_file:
            rom_file = get_dataset_path('atari_rom', rom_file)
        assert os.path.isfile(rom_file), \
            "rom {} not found. Please download at {}".format(rom_file, ROM_URL)

        try:
            ALEInterface.setLoggerMode(ALEInterface.Logger.Error)
        except AttributeError:
            if execute_only_once():
                logger.warn("You're not using latest ALE")

        # avoid simulator bugs: https://github.com/mgbellemare/Arcade-Learning-Environment/issues/86
        with _ALE_LOCK:
            self.ale = ALEInterface()
            self.rng = get_rng(self)
            self.ale.setInt(b"random_seed", self.rng.randint(0, 30000))
            self.ale.setInt(b"max_num_frames_per_episode", max_num_frames)
            self.ale.setBool(b"showinfo", False)

            self.ale.setInt(b"frame_skip", 1)
            self.ale.setBool(b'color_averaging', False)
            # manual.pdf suggests otherwise.
            self.ale.setFloat(b'repeat_action_probability', 0.0)

            # viz setup
            if isinstance(viz, six.string_types):
                assert os.path.isdir(viz), viz
                self.ale.setString(b'record_screen_dir', viz)
                viz = 0
            if isinstance(viz, int):
                viz = float(viz)
            self.viz = viz
            if self.viz and isinstance(self.viz, float):
                self.windowname = os.path.basename(rom_file)
                cv2.startWindowThread()
                cv2.namedWindow(self.windowname)

            self.ale.loadROM(rom_file.encode('utf-8'))
        self.width, self.height = self.ale.getScreenDims()
        self.actions = self.ale.getMinimalActionSet()

        self.live_lost_as_eoe = live_lost_as_eoe
        self.frame_skip = frame_skip
        self.nullop_start = nullop_start

        self.action_space = spaces.Discrete(len(self.actions))
        self.observation_space = spaces.Box(
            low=0, high=255, shape=(self.height, self.width))
        self._restart_episode()

    def get_action_meanings(self):
        return [ACTION_MEANING[i] for i in self.actions]

    def _grab_raw_image(self):
        """
        :returns: the current 3-channel image
        """
        m = self.ale.getScreenRGB()
        return m.reshape((self.height, self.width, 3))

    def _current_state(self):
        """
        :returns: a gray-scale (h, w) uint8 image
        """
        ret = self._grab_raw_image()
        # max-pooled over the last screen
        ret = np.maximum(ret, self.last_raw_screen)
        if self.viz:
            if isinstance(self.viz, float):
                cv2.imshow(self.windowname, ret)
                cv2.waitKey(int(self.viz * 1000))
        ret = ret.astype('float32')
        # 0.299,0.587.0.114. same as rgb2y in torch/image
        ret = cv2.cvtColor(ret, cv2.COLOR_RGB2GRAY)
        return ret.astype('uint8')  # to save some memory

    def _restart_episode(self):
        with _ALE_LOCK:
            self.ale.reset_game()

        # random null-ops start
        n = self.rng.randint(self.nullop_start)
        self.last_raw_screen = self._grab_raw_image()
        for k in range(n):
            if k == n - 1:
                self.last_raw_screen = self._grab_raw_image()
            self.ale.act(0)

    def _reset(self):
        if self.ale.game_over():
            self._restart_episode()
        return self._current_state()

    def _step(self, act):
        oldlives = self.ale.lives()
        r = 0
        for k in range(self.frame_skip):
            if k == self.frame_skip - 1:
                self.last_raw_screen = self._grab_raw_image()
            r += self.ale.act(self.actions[act])
            newlives = self.ale.lives()
            if self.ale.game_over() or \
                    (self.live_lost_as_eoe and newlives < oldlives):
                break

        isOver = self.ale.game_over()
        if self.live_lost_as_eoe:
            isOver = isOver or newlives < oldlives

        info = {'ale.lives': newlives}
        return self._current_state(), r, isOver, info
示例#53
0
class AtariEnvironment:
    def __init__(self, args, outputDir):

        self.outputDir = outputDir
        self.screenCaptureFrequency = args.screen_capture_freq

        self.ale = ALEInterface()
        self.ale.setInt(b'random_seed', 123456)
        random.seed(123456)
        # Fix https://groups.google.com/forum/#!topic/deep-q-learning/p4FAIaabwlo
        self.ale.setFloat(b'repeat_action_probability', 0.0)

        # Load the ROM file
        self.ale.loadROM(args.rom)

        self.actionSet = self.ale.getMinimalActionSet()
        self.gameNumber = 0
        self.stepNumber = 0
        self.resetGame()

    def getNumActions(self):
        return len(self.actionSet)

    def getState(self):
        return self.state

    def getGameNumber(self):
        return self.gameNumber

    def getFrameNumber(self):
        return self.ale.getFrameNumber()

    def getEpisodeFrameNumber(self):
        return self.ale.getEpisodeFrameNumber()

    def getEpisodeStepNumber(self):
        return self.episodeStepNumber

    def getStepNumber(self):
        return self.stepNumber

    def getGameScore(self):
        return self.gameScore

    def isGameOver(self):
        return self.ale.game_over()

    def step(self, action):
        previousLives = self.ale.lives()
        reward = 0
        isTerminal = 0
        self.stepNumber += 1
        self.episodeStepNumber += 1

        for i in range(4):
            prevScreenRGB = self.ale.getScreenRGB()
            reward += self.ale.act(self.actionSet[action])
            screenRGB = self.ale.getScreenRGB()

            # Detect end of episode, I don't think I'm handling this right in terms
            # of the overall game loop (??)
            if self.ale.lives() < previousLives or self.ale.game_over():
                isTerminal = 1
                break

            if self.gameNumber % self.screenCaptureFrequency == 0:
                dir = self.outputDir + '/screen_cap/game-%06d' % (
                    self.gameNumber)
                if not os.path.isdir(dir):
                    os.makedirs(dir)
                self.ale.saveScreenPNG(dir + '/frame-%06d.png' %
                                       (self.getEpisodeFrameNumber()))

        maxedScreen = np.maximum(screenRGB, prevScreenRGB)
        self.state = self.state.stateByAddingScreen(maxedScreen,
                                                    self.ale.getFrameNumber())
        self.gameScore += reward
        return reward, self.state, isTerminal

    def resetGame(self):
        if self.ale.game_over():
            self.gameNumber += 1
        self.ale.reset_game()
        self.state = State().stateByAddingScreen(self.ale.getScreenRGB(),
                                                 self.ale.getFrameNumber())
        self.gameScore = 0
        self.episodeStepNumber = 0  # environment steps vs ALE frames.  Will probably be 4*frame number
示例#54
0
class Agent(object):
    def __init__(self):
        self._ale = ALEInterface()
        self._ale.setInt('random_seed', 123)
        self._ale.setFloat('repeat_action_probability', 0.0)
        self._ale.setBool('color_averaging', False)
        self._ale.loadROM('roms/enduro.bin')
        self._controller = Controller(self._ale)
        self._extractor = StateExtractor(self._ale)
        self._image = None

    def run(self, learn, episodes=1, draw=False):
        """ Implements the playing/learning loop.

        Args:
            learn(bool): Whether the self.learn() function should be called.
            episodes (int): The number of episodes to run the agent for.
            draw (bool): Whether to overlay the environment state on the frame.

        Returns:
            None
        """
        for e in range(episodes):
            # Observe the environment to set the initial state
            (grid, self._image) = self._extractor.run(draw=draw, scale=4.0)
            self.initialise(grid)

            num_frames = self._ale.getFrameNumber()

            # Each episode lasts 6500 frames
            while self._ale.getFrameNumber() - num_frames < 6500:
                # Take an action
                self.act()

                # Update the environment grid
                (grid, self._image) = self._extractor.run(draw=draw, scale=4.0)
                self.sense(grid)

                # Perform learning if required
                if learn:
                    self.learn()

                self.callback(learn, e + 1, self._ale.getFrameNumber() - num_frames)
            self._ale.reset_game()

    def getActionsSet(self):
        """ Returns the set of all possible actions
        """
        return [Action.ACCELERATE, Action.RIGHT, Action.LEFT, Action.BREAK]

    def move(self, action):
        """ Executes the action and advances the game to the next state.

        Args:
            action (int): The action which should executed. Make sure to use
                          the constants returned by self.getActionsSet()

        Returns:
           int: The obtained reward after executing the action
        """
        return self._controller.move(action)

    def initialise(self, grid):
        """ Called at the beginning of each episode, mainly used
        for state initialisation.

        Args:
            grid (np.ndarray): 11x10 array with the initial environment grid.

        Returns:
            None
        """
        raise NotImplementedError

    def act(self):
        """ Called at each loop iteration to choose and execute an action.

        Returns:
            None
        """
        raise NotImplementedError

    def sense(self, grid):
        """ Called at each loop iteration to construct the new state from
        the update environment grid.

        Returns:
            None
        """
        raise NotImplementedError

    def learn(self):
        """ Called at each loop iteration when the agent is learning. It should
        implement the learning procedure.

        Returns:
            None
        """
        raise NotImplementedError

    def callback(self, learn, episode, iteration):
        """ Called at each loop iteration mainly for reporting purposes.

        Args:
            learn (bool): Indicates whether the agent is learning or not.
            episode (int): The number of the current episode.
            iteration (int): The number of the current iteration.

        Returns:
            None
        """

        raise NotImplementedError
示例#55
0
文件: ale.py 项目: beegica/sith_torch
class AleEnv(object):
    '''ALE wrapper for RL training
    game_over_conditions={'points':(-1, 1)}: dict that describes all desired game over conditions
    each key corresponds to a condition that is checked; the first condition met produces a game over
        points: int or tuple of integers
            int:
                if x < 0, game ends when score is <= x
                if x >= 0, game ends when score is >= x
            tuple:
                game ends if score <= x[0] or score >= x[1]
        lives: int that ends game when lives <= x
        frames: int that ends game when total number of frames >= x
        episodes: int that ends game when num of episodes >= x
            Use max_num_frames_per_episode to set max episode length


    '''

    # will include timing and hidden functionality in future iterations

    def __init__(self,
                 rom_file,
                 display_screen=False,
                 sound=False,
                 random_seed=0,
                 game_over_conditions={},
                 frame_skip=1,
                 repeat_action_probability=0.25,
                 max_num_frames_per_episode=0,
                 min_action_set=False,
                 screen_color='gray',
                 fps=60,
                 output_buffer_size=1,
                 reduce_screen=False):

        # ALE instance and setup
        self.ale = ALEInterface()
        #TODO: check if rom file exists; will crash jupyter kernel otherwise
        self.ale.loadROM(str.encode(rom_file))

        self.ale.setBool(b'sound', sound)
        self.ale.setBool(b'display_screen', display_screen)

        if min_action_set:
            self.legal_actions = self.ale.getMinimalActionSet()
        else:
            self.legal_actions = self.ale.getLegalActionSet()

        self.ale.setInt(b'random_seed', random_seed)
        self.ale.setInt(b'frame_skip', frame_skip)
        self.frame_skip = frame_skip
        self.ale.setFloat(b'repeat_action_probability',
                          repeat_action_probability)
        self.ale.setInt(b'max_num_frames_per_episode',
                        max_num_frames_per_episode)

        self.ale.loadROM(str.encode(rom_file))

        self.game_over_conditions = game_over_conditions
        self.screen_color = screen_color
        self.reduce_screen = reduce_screen
        self.d_frame = (fps**-1) * self.frame_skip

        # set up output buffer
        self.output_buffer_size = output_buffer_size
        self.queue_size = self.output_buffer_size
        self._reset_params()

    def observe(self, flatten=False, expand_dim=False):

        if flatten is True:
            out = np.stack(self.output_queue[i]
                           for i in range(self.output_buffer_size)).flatten()

            if expand_dim is True:
                return np.expand_dims(np.expand_dims(out, axis=0), axis=1)
            else:
                return out
        else:
            out = np.stack(self.output_queue[i]
                           for i in range(self.output_buffer_size))
            out = np.squeeze(out)

            if expand_dim is True:
                return np.expand_dims(np.expand_dims(out, axis=0), axis=1)

            else:
                return out

    @property
    def width(self):
        return self.game_screen.shape[1]

    @property
    def height(self):
        return self.game_screen.shape[0]

    @property
    def game_over(self):
        return self._game_over()

    @property
    def actions(self):
        return self.legal_actions

    @property
    def lives(self):
        return self.ale.lives()

    def _reset_params(self):

        self.total_points = 0
        self.total_frames = 0
        self.curr_episode = 1
        self.prev_ep_frame_num = -float("inf")

        if self.screen_color == 'gray' or self.screen_color == 'grey':
            self.game_screen = np.squeeze(self.ale.getScreenGrayscale())
            if self.reduce_screen:
                self.game_screen = resize(self.game_screen,
                                          output_shape=(110, 84))
                self.game_screen = self.game_screen[0 + 21:-1 - 4, :]
        elif self.screen_color == 'rgb' or self.screen_color == 'color':
            self.game_screen = self.ale.getScreenRGB()
            if self.reduce_screen:
                self.game_screen = resize(self.game_screen,
                                          output_shape=(110, 84, 3))
                self.game_screen = self.game_screen[0 + 21:-1 - 4, :, :]

        self.output_queue = deque(
            np.zeros(shape=(self.queue_size - 1, self.height, self.width)),
            self.queue_size)
        self.output_queue.appendleft(self.game_screen)

    def reset(self):
        self.ale.reset_game()
        self._reset_params()

    def act(self, action):
        reward = self.ale.act(self.legal_actions[action])

        if self.screen_color == 'gray' or self.screen_color == 'grey':
            self.game_screen = np.squeeze(self.ale.getScreenGrayscale())
            if self.reduce_screen:
                self.game_screen = resize(self.game_screen,
                                          output_shape=(110, 84))
                self.game_screen = self.game_screen[0 + 21:-1 - 4, :]
        elif self.screen_color == 'rgb' or self.screen_color == 'color':
            self.game_screen = self.ale.getScreenRGB()
            if self.reduce_screen:
                self.game_screen = resize(self.game_screen,
                                          output_shape=(110, 84, 3))
                self.game_screen = self.game_screen[0 + 21:-1 - 4, :, :]

        self.output_queue.pop()
        self.output_queue.appendleft(self.game_screen)

        self.total_points += reward
        self.total_frames += self.frame_skip
        if self.ale.getEpisodeFrameNumber() <= self.prev_ep_frame_num:
            self.curr_episode += 1
        self.prev_ep_frame_num = self.ale.getEpisodeFrameNumber()

        return reward, self.d_frame, self.game_over

    def _game_over(self):
        if self.ale.game_over():
            return True

        for cond in self.game_over_conditions:
            if cond == 'points':
                if isinstance(self.game_over_conditions[cond], int):
                    if self.total_points >= self.game_over_conditions[cond]:
                        return True
                elif isinstance(self.game_over_conditions[cond], tuple):
                    if (self.total_points <= self.game_over_conditions[cond][0]
                            or self.total_points >=
                            self.game_over_conditions[cond][1]):
                        return True
            elif cond == 'lives':
                if self.lives <= self.game_over_conditions[cond]:
                    return True
            elif cond == 'frames':
                if self.total_frames >= self.game_over_conditions[cond]:
                    return True
            elif cond == 'episodes':
                if self.curr_episode >= self.game_over_conditions[cond]:
                    return True
            else:
                raise RuntimeError("ERROR: Invalid game over condition")

        return False
示例#56
0
class MyEnv(Environment):
    VALIDATION_MODE = 0

    def __init__(self,
                 rng,
                 rom="ale/breakout.bin",
                 frame_skip=4,
                 ale_options=[{
                     "key": "random_seed",
                     "value": 0
                 }, {
                     "key": "color_averaging",
                     "value": True
                 }, {
                     "key": "repeat_action_probability",
                     "value": 0.
                 }]):
        self._mode = -1
        self._modeScore = 0.0
        self._modeEpisodeCount = 0

        self._frameSkip = frame_skip if frame_skip >= 1 else 1
        self._randomState = rng

        self._ale = ALEInterface()
        for option in ale_options:
            t = type(option["value"])
            if t is int:
                self._ale.setInt(option["key"], option["value"])
            elif t is float:
                self._ale.setFloat(option["key"], option["value"])
            elif t is bool:
                self._ale.setBool(option["key"], option["value"])
            else:
                raise ValueError(
                    "Option {} ({}) is not an int, bool or float.".format(
                        option["key"], t))
        self._ale.loadROM(rom)

        w, h = self._ale.getScreenDims()
        self._screen = np.empty((h, w), dtype=np.uint8)
        self._reducedScreen = np.empty((84, 84), dtype=np.uint8)
        self._actions = self._ale.getMinimalActionSet()

    def reset(self, mode):
        if mode == MyEnv.VALIDATION_MODE:
            if self._mode != MyEnv.VALIDATION_MODE:
                self._mode = MyEnv.VALIDATION_MODE
                self._modeScore = 0.0
                self._modeEpisodeCount = 0
            else:
                self._modeEpisodeCount += 1
        elif self._mode != -1:  # and thus mode == -1
            self._mode = -1

        self._ale.reset_game()
        for _ in range(self._randomState.randint(15)):
            self._ale.act(0)
        self._ale.getScreenGrayscale(self._screen)
        cv2.resize(self._screen, (84, 84),
                   self._reducedScreen,
                   interpolation=cv2.INTER_NEAREST)

        return [4 * [84 * [84 * [0]]]]

    def act(self, action):
        action = self._actions[action]

        reward = 0
        for _ in range(self._frameSkip):
            reward += self._ale.act(action)
            if self.inTerminalState():
                break

        self._ale.getScreenGrayscale(self._screen)
        cv2.resize(self._screen, (84, 84),
                   self._reducedScreen,
                   interpolation=cv2.INTER_NEAREST)

        self._modeScore += reward
        return np.sign(reward)

    def summarizePerformance(self, test_data_set):
        if self.inTerminalState() == False:
            self._modeEpisodeCount += 1
        print("== Mean score per episode is {} over {} episodes ==".format(
            self._modeScore / self._modeEpisodeCount, self._modeEpisodeCount))

    def inputDimensions(self):
        return [(4, 84, 84)]

    def observationType(self, subject):
        return np.uint8

    def nActions(self):
        return len(self._actions)

    def observe(self):
        return [np.array(self._reducedScreen)]

    def inTerminalState(self):
        return self._ale.game_over()
示例#57
0
class ALEEnvironment(Environment):
  def __init__(self, rom_file, args):
    from ale_python_interface import ALEInterface
    self.ale = ALEInterface()
    if args.display_screen:
      if sys.platform == 'darwin':
        import pygame
        pygame.init()
        self.ale.setBool('sound', False) # Sound doesn't work on OSX
      elif sys.platform.startswith('linux'):
        self.ale.setBool('sound', True)
      self.ale.setBool('display_screen', True)

    self.ale.setInt('frame_skip', args.frame_skip)
    self.ale.setFloat('repeat_action_probability', args.repeat_action_probability)
    self.ale.setBool('color_averaging', args.color_averaging)

    if args.random_seed:
      self.ale.setInt('random_seed', args.random_seed)

    if args.record_screen_path:
      if not os.path.exists(args.record_screen_path):
        logger.info("Creating folder %s" % args.record_screen_path)
        os.makedirs(args.record_screen_path)
      logger.info("Recording screens to %s", args.record_screen_path)
      self.ale.setString('record_screen_dir', args.record_screen_path)

    if args.record_sound_filename:
      logger.info("Recording sound to %s", args.record_sound_filename)
      self.ale.setBool('sound', True)
      self.ale.setString('record_sound_filename', args.record_sound_filename)

    self.ale.loadROM(rom_file)

    if args.minimal_action_set:
      self.actions = self.ale.getMinimalActionSet()
      logger.info("Using minimal action set with size %d" % len(self.actions))
    else:
      self.actions = self.ale.getLegalActionSet()
      logger.info("Using full action set with size %d" % len(self.actions))
    logger.debug("Actions: " + str(self.actions))

    self.screen_width = args.screen_width
    self.screen_height = args.screen_height

    self.life_lost = False

  def numActions(self):
    return len(self.actions)

  def restart(self):
    # In test mode, the game is simply initialized. In train mode, if the game
    # is in terminal state due to a life loss but not yet game over, then only
    # life loss flag is reset so that the next game starts from the current
    # state. Otherwise, the game is simply initialized.
    if (
        self.mode == 'test' or
        not self.life_lost or  # `reset` called in a middle of episode
        self.ale.game_over()  # all lives are lost
    ):
      self.ale.reset_game()
    self.life_lost = False

  def act(self, action):
    lives = self.ale.lives()
    reward = self.ale.act(self.actions[action])
    self.life_lost = (not lives == self.ale.lives())
    return reward

  def getScreen(self):
    screen = self.ale.getScreenGrayscale()
    resized = cv2.resize(screen, (self.screen_width, self.screen_height))
    return resized

  def isTerminal(self):
    if self.mode == 'train':
      return self.ale.game_over() or self.life_lost
    return self.ale.game_over()