示例#1
0
def launch(args, defaults, description):
    """
    Execute a complete training run.
    """

    rec_screen = ""
    if "--nn-file" in args:
        temp_params = vars(load_params(args[args.index("--nn-file") + 1]))
        for p in temp_params:
            try:
                vars(defaults)[p.upper()] = temp_params[p]
            except:
                print "warning: parameter", p, "from param file doesn't exist."
        #rec_screen = args[args.index("--nn-file")+1][:-len("last_model.pkl")]+"/frames"

    parameters = process_args(args, defaults, description)

    if parameters.rom.endswith('.bin'):
        rom = parameters.rom
    else:
        rom = "%s.bin" % parameters.rom
    parameters.rom_path = os.path.join(defaults.BASE_ROM_PATH, rom)

    rng = np.random.RandomState(123456)

    folder_name = None if parameters.folder_name == "" else parameters.folder_name

    ale = ALEInterface()
    ale.setInt('random_seed', rng.randint(1000))
    ale.setBool('display_screen', parameters.display_screen)
    ale.setString('record_screen_dir', rec_screen)
    trainer = Q_Learning(model_params=parameters,
                         ale_env=ale,
                         folder_name=folder_name)
    trainer.train()
示例#2
0
文件: emulator.py 项目: amharc/jnp3
class Emulator(object):
    def __init__(self, settings):
        self.ale = ALEInterface()
        self.ale.setInt('frame_skip', settings['frame_skip'])
        self.ale.setInt('random_seed', np.random.RandomState().randint(1000))
        self.ale.setBool('color_averaging', False)
        self.ale.loadROM('roms/' + settings['rom_name'])
        self.actions = self.ale.getMinimalActionSet()
        self.width = settings['screen_width']
        self.height = settings['screen_height']
        
    def reset(self):
        self.ale.reset_game()

    def image(self):
        screen = self.ale.getScreenGrayscale()
        screen = cv2.resize(screen, (self.height, self.width),
                interpolation=cv2.INTER_LINEAR)
        return np.reshape(screen, (self.height, self.width))

    def full_image(self):
        screen = self.ale.getScreenRGB()
        return screen

    def act(self, action):
        return self.ale.act(self.actions[action])

    def terminal(self):
        return self.ale.game_over()
class env_atari:
    def __init__(self, params):
        self.params = params
        self.ale = ALEInterface()
        self.ale.setInt('random_seed', np.random.randint(0, 500))
        self.ale.setFloat('repeat_action_probability', params['repeat_prob'])
        self.ale.setInt(b'frame_skip', params['frameskip'])
        self.ale.setBool('color_averaging', True)
        self.ale.loadROM('roms/' + params['rom'] + '.bin')
        self.actions = self.ale.getMinimalActionSet()
        self.action_space = c_action_space(len(self.actions))
        self.screen_width, self.screen_height = self.ale.getScreenDims()

    def reset(self):
        self.ale.reset_game()
        seed = np.random.randint(0, 7)
        for i in range(seed):
            self.ale.act(0)
        return self.get_image()

    def step(self, action):
        reward = self.ale.act(self.actions[action])
        next_s = self.get_image()
        terminate = self.ale.game_over()
        return next_s, reward, float(terminate), 0

    def get_image(self):
        temp = np.zeros(self.screen_height * self.screen_width * 3,
                        dtype=np.uint8)
        self.ale.getScreenRGB(temp)
        #self.ale.getScreenGrayscale(temp)
        return temp.reshape((self.screen_height, self.screen_width, 3))
def launch():
	logging.basicConfig(level=logging.INFO)
	myArgs = getParameters()
	rom = myArgs.game
	full_rom_path = os.path.join(myArgs.base_rom_path,rom)
	rng = np.random.RandomState()
	ale = ALEInterface()
	ale.setInt('random_seed',38)
	ale.setBool('display_screen',myArgs.display_screen)
	ale.setInt('frame_skip',myArgs.frame_skip)
	ale.setFloat('repeat_action_probability',myArgs.repeat_action_probability)

	ale.loadROM(full_rom_path)
	valid_actions = ale.getMinimalActionSet()
	'''for episode in xrange(10):
		total_reward = 0
		while not ale.game_over():
			from random import randrange
			a = valid_actions[randrange(len(valid_actions))]
			ale.act(a)
			#print reward
			#print ale.getScreenRGB()

			#total_reward += reward
			#print 'Episode', episode, 'ended with score:', total_reward
		ale.reset_game()
	'''
	memory_pool = ReplayMemory(myArgs.memory_size,rng)
	network_model = buildNetwork(myArgs.resized_height,myArgs.resized_width,myArgs.rmsp_epsilon,myArgs.rmsp_rho,myArgs.learning_rate,len(valid_actions))
	ddqn = DDQN(network_model,valid_actions,myArgs.target_nn_update_frequency,myArgs.discount,myArgs.phi_len)
	agent = Agent(myArgs,ddqn,memory_pool,valid_actions,rng)
	train_agent = TrainMyAgent(myArgs,ale,agent,valid_actions,rng)
	train_agent.run()
示例#5
0
class Environment:
  def __init__(self, rom_file, args):
    self.ale = ALEInterface()
    if args.display_screen:
      if sys.platform == 'darwin':
        import pygame
        pygame.init()
        self.ale.setBool('sound', False) # Sound doesn't work on OSX
      elif sys.platform.startswith('linux'):
        self.ale.setBool('sound', True)
      self.ale.setBool('display_screen', True)

    self.ale.setInt('frame_skip', args.frame_skip)
    self.ale.setFloat('repeat_action_probability', args.repeat_action_probability)
    self.ale.setBool('color_averaging', args.color_averaging)

    if args.random_seed:
      self.ale.setInt('random_seed', args.random_seed)

    if args.record_screen_path:
      if not os.path.exists(args.record_screen_path):
        logger.info("Creating folder %s" % args.record_screen_path)
        os.makedirs(args.record_screen_path)
      logger.info("Recording screens to %s", args.record_screen_path)
      self.ale.setString('record_screen_dir', args.record_screen_path)

    if args.record_sound_filename:
      logger.info("Recording sound to %s", args.record_sound_filename)
      self.ale.setBool('sound', True)
      self.ale.setString('record_sound_filename', args.record_sound_filename)

    self.ale.loadROM(rom_file)

    if args.minimal_action_set:
      self.actions = self.ale.getMinimalActionSet()
      logger.info("Using minimal action set with size %d" % len(self.actions))
    else:
      self.actions = self.ale.getLegalActionSet()
      logger.info("Using full action set with size %d" % len(self.actions))
    logger.debug("Actions: " + str(self.actions))

    self.dims = (args.screen_height, args.screen_width)

  def numActions(self):
    return len(self.actions)

  def restart(self):
    self.ale.reset_game()

  def act(self, action):
    reward = self.ale.act(self.actions[action])
    return reward

  def getScreen(self):
    screen = self.ale.getScreenGrayscale()
    resized = cv2.resize(screen, self.dims)
    return resized

  def isTerminal(self):
    return self.ale.game_over()
示例#6
0
def main():
    arguments = docopt.docopt(__doc__, version='ALE Demo Version 1.0')

    pygame.init()

    ale = ALEInterface()
    ale.setInt(b'random_seed', 123)
    ale.setBool(b'display_screen', True)
    ale.loadROM(str.encode(arguments['<rom_file>']))

    legal_actions = ale.getLegalActionSet()
    width, height = ale.getScreenDims()
    print(width, height)
    frame = ale.getScreenRGB()
    frame = np.array(frame, dtype=float)

    rewards, num_episodes = [], int(arguments['--iters'] or 5)
    for episode in range(num_episodes):
        total_reward = 0
        while not ale.game_over():
            total_reward += ale.act(random.choice(legal_actions))
        print('Episode %d reward %d.' % (episode, total_reward))
        rewards.append(total_reward)
        ale.reset_game()

    average = sum(rewards) / len(rewards)
    print('Average for %d episodes: %d' % (num_episodes, average))
示例#7
0
class ALEGame(object):
    """
    Class linked to the Arcade Learning Environment
    """
    def __init__(self, rand_seed, game_name):
        self.ale = ALEInterface()
        self.ale.setInt(b'random_seed', rand_seed)
        self.ale.setFloat(b'repeat_action_probability', 0.0)
        self.ale.setBool(b'color_averaging', True)
        self.ale.setInt(b'frame_skip', SKIPED_FRAMES)
        self.ale.loadROM(game_name.encode('ascii'))

        self.real_actions = self.ale.getMinimalActionSet()
        self.screen = np.empty((IMAGE_HEIGHT, IMAGE_WIDTH, 1), dtype=np.uint8)
        self.reset()

    def preprocess_image(self, is_to_reshape=False):
        """
        Get image from the game and reshape it
        """
        self.ale.getScreenGrayscale(self.screen)
        reshaped_screen = np.reshape(self.screen, (IMAGE_HEIGHT, IMAGE_WIDTH))
        x_t = skimage.transform.resize(reshaped_screen, (110, 84),
                                       preserve_range=True)

        x_t = x_t[18:102, :]

        if is_to_reshape:
            x_t = np.reshape(x_t, (84, 84, 1))

        x_t = x_t.astype(np.float32)
        x_t *= (1.0 / 255.0)
        return x_t

    def reset(self):
        """
        Resets the game and create the first state
        """
        self.ale.reset_game()
        self.act(0)
        x_t = self.preprocess_image()
        self.s_t = np.stack((x_t, x_t, x_t, x_t), axis=2)

    def act(self, action):
        self.reward = self.ale.act(action)
        self.is_game_over = self.ale.game_over()

    def process_to_next_image(self, action):
        """
        Acts and get new state        
        """
        real_action = self.real_actions[action]
        self.act(real_action)
        x_t1 = self.preprocess_image(True)
        self.s_t1 = np.append(self.s_t[:, :, 1:], x_t1, axis=2)

    def update(self):
        self.s_t = self.s_t1
示例#8
0
class AtariEnvironment(Environment):
    """
    Atari Environment Object
    """
    def __init__(self,
                 rom_path,
                 action_repeat=4,
                 death_end=True,
                 width_resize=84,
                 height_resize=84,
                 resize_mod='scale'):
        super(Environment, self).__init__()
        self.action_repeat = action_repeat
        self.death_end = death_end
        self.width_resize = width_resize
        self.height_resize = height_resize
        self.resize_mod = resize_mod
        self.display = False

        from ale_python_interface import ALEInterface
        self.ale = ALEInterface()
        self.ale.loadROM(rom_path)
        self.ale.setInt('random_seed', np.random.randint(1000))
        self.ale.setBool('display_screen', self.display)
        self.action_set = self.ale.getMinimalActionSet()
        self.num_actions = len(self.action_set)
        self.start_lives = self.ale.lives()
        width, height = self.ale.getScreenDims()
        self.currentScreen = np.empty((height, width), dtype=np.uint8)
        self.reset()

    def reset(self):
        self.ale.reset_game()
        self.ale.getScreenGrayscale(self.currentScreen)
        self.terminal = False

    def step(self, action, repeat=None):
        repeat = self.action_repeat if repeat is None else repeat
        reward = 0
        for _ in range(repeat):
            reward += self.ale.act(self.action_set[action])
        self.ale.getScreenGrayscale(self.currentScreen)
        self.terminal = self.death_end and self.ale.lives(
        ) < self.start_lives or self.ale.game_over()
        return reward

    def get_frame(self):
        if self.resize_mod == 'scale':
            return imresize(self.currentScreen,
                            (self.width_resize, self.height_resize),
                            interp='bilinear')
        elif self.resize_mod == 'crop':
            height, width = self.currentScreen.shape
            res = (height - width) / 2
            crop = self.currentScreen[res:(res + width), :]
            return imresize(crop, (self.width_resize, self.height_resize),
                            interp='bilinear')
示例#9
0
 def _init_ale(rand_seed, rom_file):
     assert os.path.exists(rom_file), '%s does not exists.'
     ale = ALEInterface()
     ale.setInt('random_seed', rand_seed)
     ale.setBool('showinfo', False)
     ale.setInt('frame_skip', 1)
     ale.setFloat('repeat_action_probability', 0.0)
     ale.setBool('color_averaging', False)
     ale.loadROM(rom_file)
     return ale
示例#10
0
def init():

    pygame.init()
    rom_path = '/Users/maciej/Development/atari-roms'
    ale = ALEInterface()
    ale.setInt('random_seed', 123)
    ale.setBool('frame_skip', 1)
    ale.loadROM(rom_path + '/space_invaders.bin')
    ale.setFloat("repeat_action_probability", 0)
    return ale
示例#11
0
def init():

  pygame.init()
  rom_path = '/Users/maciej/Development/atari-roms'
  ale = ALEInterface()
  ale.setInt('random_seed', 123)
  ale.setBool('frame_skip', 1)
  ale.loadROM(rom_path + '/space_invaders.bin')
  ale.setFloat("repeat_action_probability", 0)
  return ale
示例#12
0
 def _init_ale(rand_seed, rom_file):
     assert os.path.exists(rom_file), '%s does not exists.'
     ale = ALEInterface()
     ale.setInt('random_seed', rand_seed)
     ale.setBool('showinfo', False)
     ale.setInt('frame_skip', 1)
     ale.setFloat('repeat_action_probability', 0.0)
     ale.setBool('color_averaging', False)
     ale.loadROM(rom_file)
     return ale
示例#13
0
class Environment:
    def __init__(self, show_screen, history_length):
        self.ale = ALEInterface()
        self.ale.setInt('frame_skip', 4)
        self.history = None
        self.history_length = history_length
        if show_screen:
            self.display_screen()
        self.load_game()
        (screen_width, screen_height) = self.ale.getScreenDims()
        self.screen_data = np.empty((screen_height, screen_width, 1),
                                    dtype=np.uint8)  # 210x160 screen data
        self.dims = (84, 84)  # input size for neural network
        self.actions = [3, 0, 1, 4]  # noop, left, right, fire,

    def display_screen(self):
        self.ale.setBool("display_screen", True)

    def turn_on_sound(self):
        self.ale.setBool("sound", True)

    def restart(self):
        """reset game"""
        self.ale.reset_game()

    def act(self, action):
        """:returns reward of an action"""
        return self.ale.act(self.actions[action])

    def __get_screen(self):
        """:returns Grayscale thresholded resized screen image """
        self.ale.getScreenGrayscale(self.screen_data)
        resized = cv2.resize(self.screen_data, self.dims)
        return resized

    def get_state(self):
        binary_screen = self.__get_screen()
        if self.history is None:
            self.history = deque(maxlen=self.history_length)
            for _ in range(self.history_length - 1):
                self.history.append(binary_screen)
        self.history.append(binary_screen)
        result = np.stack(self.history, axis=0)
        return result

    def isTerminal(self):
        """checks if game is over"""
        return self.ale.game_over()

    def load_game(self):
        """load game from file"""
        self.ale.loadROM("Breakout.bin")
示例#14
0
def init(display_screen=False):
    if display_screen:
        import pygame
        pygame.init()
    rom_path = '.'
    ale = ALEInterface()
    ale.setBool('display_screen', display_screen)
    ale.setInt('random_seed', 123)
    ale.setBool('frame_skip', 1)
    ale.loadROM(rom_path + '/space_invaders.bin')
    ale.setFloat("repeat_action_probability", 0)

    return ale
示例#15
0
def init(game, display_screen=False, record_dir=None):
    if display_screen:
        import pygame
        pygame.init()
    ale = ALEInterface()
    ale.setBool('display_screen', display_screen)
    ale.setInt('random_seed', 123)
    if record_dir is not None:
        ale.setString("record_screen_dir", record_dir)
    ale.loadROM('{game}.bin'.format(game=game))
    ale.setFloat("repeat_action_probability", 0)

    return ale
示例#16
0
def init(game, display_screen=False, record_dir=None):
    if display_screen:
        import pygame
        pygame.init()
    ale = ALEInterface()
    ale.setBool('display_screen', display_screen)
    ale.setInt('random_seed', 123)
    if record_dir is not None:
        ale.setString("record_screen_dir", record_dir)
    ale.loadROM('{game}.bin'.format(game=game))
    ale.setFloat("repeat_action_probability", 0)

    return ale
示例#17
0
class Environment:
    def __init__(self, show_screen, history_length):
        self.ale = ALEInterface()
        self.ale.setInt('frame_skip', 4)
        self.history = None
        self.history_length = history_length
        if show_screen:
            self.display_screen()
        self.load_game()
        (screen_width, screen_height) = self.ale.getScreenDims()
        self.screen_data = np.empty((screen_height, screen_width, 1), dtype=np.uint8)  # 210x160 screen data
        self.dims = (84, 84)  # input size for neural network
        self.actions = [3, 0, 1, 4]  # noop, left, right, fire,

    def display_screen(self):
        self.ale.setBool("display_screen", True)

    def turn_on_sound(self):
        self.ale.setBool("sound", True)

    def restart(self):
        """reset game"""
        self.ale.reset_game()

    def act(self, action):
        """:returns reward of an action"""
        return self.ale.act(self.actions[action])

    def __get_screen(self):
        """:returns Grayscale thresholded resized screen image """
        self.ale.getScreenGrayscale(self.screen_data)
        resized = cv2.resize(self.screen_data, self.dims)
        return resized

    def get_state(self):
        binary_screen = self.__get_screen()
        if self.history is None:
            self.history = deque(maxlen=self.history_length)
            for _ in range(self.history_length - 1):
                self.history.append(binary_screen)
        self.history.append(binary_screen)
        result = np.stack(self.history, axis=0)
        return result

    def isTerminal(self):
        """checks if game is over"""
        return self.ale.game_over()

    def load_game(self):
        """load game from file"""
        self.ale.loadROM("Breakout.bin")
示例#18
0
def init(display_screen=False, record_dir=None):
    if display_screen:
        import pygame
        pygame.init()
    rom_path = '.'
    ale = ALEInterface()
    ale.setBool('display_screen', display_screen)
    ale.setInt('random_seed', 123)
    if record_dir is not None:
        ale.setString("record_screen_dir", record_dir)
    ale.loadROM(rom_path + '/space_invaders.bin')
    ale.setFloat("repeat_action_probability", 0)

    return ale
示例#19
0
    def __init__(self,
                 game,
                 seed=None,
                 use_sdl=False,
                 n_last_screens=4,
                 frame_skip=4,
                 treat_life_lost_as_terminal=True,
                 crop_or_scale='scale',
                 max_start_nullops=30,
                 record_screen_dir=None):
        self.n_last_screens = n_last_screens
        self.treat_life_lost_as_terminal = treat_life_lost_as_terminal
        self.crop_or_scale = crop_or_scale
        self.max_start_nullops = max_start_nullops

        # atari_py is used only to provide rom files. atari_py has its own
        # ale_python_interface, but it is obsolete.
        game_path = atari_py.get_game_path(game)

        ale = ALEInterface()
        if seed is not None:
            assert seed >= 0 and seed < 2 ** 16, \
                "ALE's random seed must be represented by unsigned int"
        else:
            # Use numpy's random state
            seed = np.random.randint(0, 2**16)
        ale.setInt(b'random_seed', seed)
        ale.setFloat(b'repeat_action_probability', 0.0)
        ale.setBool(b'color_averaging', False)
        if record_screen_dir is not None:
            ale.setString(b'record_screen_dir',
                          str.encode(str(record_screen_dir)))
        self.frame_skip = frame_skip
        if use_sdl:
            if 'DISPLAY' not in os.environ:
                raise RuntimeError(
                    'Please set DISPLAY environment variable for use_sdl=True')
            # SDL settings below are from the ALE python example
            if sys.platform == 'darwin':
                import pygame
                pygame.init()
                ale.setBool(b'sound', False)  # Sound doesn't work on OSX
            elif sys.platform.startswith('linux'):
                ale.setBool(b'sound', True)
            ale.setBool(b'display_screen', True)

        ale.loadROM(str.encode(str(game_path)))

        assert ale.getFrameNumber() == 0

        self.ale = ale
        self.legal_actions = ale.getMinimalActionSet()
        self.initialize()

        self.action_space = spaces.Discrete(len(self.legal_actions))
        one_screen_observation_space = spaces.Box(low=0,
                                                  high=255,
                                                  shape=(84, 84))
        self.observation_space = spaces.Tuple([one_screen_observation_space] *
                                              n_last_screens)
class Emulate:
  def __init__(self, rom_file, display_screen=False,frame_skip=4,screen_height=84,screen_width=84,repeat_action_probability=0,color_averaging=True,random_seed=0,record_screen_path='screen_pics',record_sound_filename=None,minimal_action_set=True):
    self.ale = ALEInterface()
    if display_screen:
      if sys.platform == 'darwin':
        import pygame
        pygame.init()
        self.ale.setBool('sound', False) # Sound doesn't work on OSX
      elif sys.platform.startswith('linux'):
        self.ale.setBool('sound', True)
      self.ale.setBool('display_screen', True)

    self.ale.setInt('frame_skip', frame_skip)
    self.ale.setFloat('repeat_action_probability', repeat_action_probability)
    self.ale.setBool('color_averaging', color_averaging)

    if random_seed:
      self.ale.setInt('random_seed', random_seed)

    self.ale.loadROM(rom_file)

    if minimal_action_set:
      self.actions = self.ale.getMinimalActionSet()
    else:
      self.actions = self.ale.getLegalActionSet()

    self.dims = (screen_width,screen_height)

  def numActions(self):
    return len(self.actions)

  def getActions(self):
  	return self.actions

  def restart(self):
    self.ale.reset_game()

  def act(self, action):
    reward = self.ale.act(self.actions[action])
    return reward

  def getScreen(self):
    screen = self.ale.getScreenGrayscale()
    resized = cv2.resize(screen, self.dims)
    return resized

  def getScreenGray(self):
    screen = self.ale.getScreenGrayscale()
    resized = cv2.resize(screen, self.dims)
    rotated = np.rot90(resized,k=1)
    return rotated

  def getScreenColor(self):
    screen = self.ale.getScreenRGB()
    resized = cv2.resize(screen, self.dims)
    rotated = np.rot90(resized,k=1)
    return rotated

  def isTerminal(self):
    return self.ale.game_over()
示例#21
0
def init(display_screen=False, record_dir=None):
    if display_screen:
        import pygame
        pygame.init()
    rom_path = '.'
    ale = ALEInterface()
    ale.setBool('display_screen', display_screen)
    ale.setInt('random_seed', 123)
    if record_dir is not None:
        ale.setString("record_screen_dir", record_dir)
    ale.loadROM(rom_path + '/space_invaders.bin')
    ale.setFloat("repeat_action_probability", 0)

    return ale
示例#22
0
class AleInterface(object):
    def __init__(self, game, args):
        self.game = game
        self.ale = ALEInterface()

        # if sys.platform == 'darwin':
        #     self.ale.setBool('sound', False)  # Sound doesn't work on OSX
        # elif sys.platform.startswith('linux'):
        #     self.ale.setBool('sound', True)
        # self.ale.setBool('display_screen', True)
        #
        self.ale.setBool('display_screen', args.display_screen)

        self.ale.setInt('frame_skip', args.frame_skip)
        self.ale.setFloat('repeat_action_probability', args.repeat_action_probability)
        self.ale.setBool('color_averaging', args.color_averaging)
        self.ale.setInt('random_seed', args.random_seed)

        #
        # if rand_seed is not None:
        #     self.ale.setInt('random_seed', rand_seed)

        rom_file = "./roms/%s.bin" % game
        if not os.path.exists(rom_file):
            print "not found rom file:", rom_file
            sys.exit(-1)
        self.ale.loadROM(rom_file)

        self.actions = self.ale.getMinimalActionSet()


    def get_actions_num(self):
        return len(self.actions)

    def act(self, action):
        reward = self.ale.act(self.actions[action])
        return reward

    def get_screen_gray(self):
        return self.ale.getScreenGrayscale()

    def get_screen_rgb(self):
        return self.ale.getScreenRGB()

    def game_over(self):
        return self.ale.game_over()

    def reset_game(self):
        return self.ale.reset_game()
示例#23
0
class Environment:
    def __init__(self, render=False):
        self.ale = ALEInterface()
        self.ale.setInt(b'random_seed', 0)
        self.ale.setFloat(b'repeat_action_probability', 0.0)
        self.ale.setBool(b'color_averaging', True)
        self.ale.setInt(b'frame_skip', 4)
        self.ale.setBool(b'display_screen', render)
        self.ale.loadROM(ENV.encode('ascii'))
        self._screen = np.empty((210, 160, 1), dtype=np.uint8)
        self._no_op_max = 7

    def set_render(self, render):
        if not render:
            self.ale.setBool(b'display_screen', render)

    def reset(self):
        self.ale.reset_game()

        # randomize initial state
        if self._no_op_max > 0:
            no_op = np.random.randint(0, self._no_op_max + 1)
            for _ in range(no_op):
                self.ale.act(0)

        self.ale.getScreenGrayscale(self._screen)
        screen = np.reshape(self._screen, (210, 160))
        screen = cv2.resize(screen, (84, 110))
        screen = screen[18:102, :]
        screen = screen.astype(np.float32)
        screen /= 255.0

        self.frame_buffer = np.stack((screen, screen, screen, screen), axis=2)
        return self.frame_buffer

    def act(self, action):

        reward = self.ale.act(4 + action)
        done = self.ale.game_over()

        self.ale.getScreenGrayscale(self._screen)
        screen = np.reshape(self._screen, (210, 160))
        screen = cv2.resize(screen, (84, 110))
        screen = np.reshape(screen[18:102, :], (84, 84, 1))
        screen = screen.astype(np.float32)
        screen *= (1 / 255.0)

        self.frame_buffer = np.append(self.frame_buffer[:, :, 1:],
                                      screen,
                                      axis=2)

        return self.frame_buffer, reward, done, ""

    def close(self):
        self.ale.setBool(b'display_screen', False)
示例#24
0
class Breakout(object):
    steps_between_actions = 4

    def __init__(self):
        self.ale = ALEInterface()
        self.ale.setInt('random_seed', 123)
        self.ale.setBool("display_screen", False)
        self.ale.setBool("sound", False)
        self.ale.loadROM("%s/breakout.bin" % rom_directory)
        self.current_state = [
            self.ale.getScreenRGB(), self.ale.getScreenRGB()
        ]

    def start_episode(self):
        self.ale.reset_game()

    def take_action(self, action):
        assert not self.terminated

        def step():
            reward = self.ale.act(action)
            self.roll_state()
            return reward

        reward = sum(step() for _ in xrange(self.steps_between_actions))

        return (reward, self.current_state)

    def roll_state(self):
        assert len(self.current_state) == 2
        self.current_state = [self.current_state[1], self.ale.getScreenRGB()]
        assert len(self.current_state) == 2

    @property
    def actions(self):
        return self.ale.getMinimalActionSet()

    @property
    def terminated(self):
        return self.ale.game_over() or self.ale.lives() < 5
def initializeALE(romFile, rec_dir):
    ale = ALEInterface()

    max_frames_per_episode = ale.getInt("max_num_frames_per_episode")
    ale.setInt("random_seed", 123)
    ale.setFloat("repeat_action_probability", 0.0)
    ale.setInt("frame_skip", 5)
    # Set record flags
    ale.setString(b'record_screen_dir', rec_dir + '/')
    ale.setString("record_sound_filename", rec_dir + "/sound.wav")
    # We set fragsize to 64 to ensure proper sound sync
    ale.setInt("fragsize", 64)

    # Set USE_SDL to true to display the screen. ALE must be compilied
    # with SDL enabled for this to work. On OSX, pygame init is used to
    # proxy-call SDL_main.

    USE_SDL = False
    if USE_SDL:
        if sys.platform == 'darwin':
            import pygame
            pygame.init()
            ale.setBool('sound', False)  # Sound doesn't work on OSX
        elif sys.platform.startswith('linux'):
            ale.setBool('sound', True)
        ale.setBool('display_screen', True)

    ale.loadROM(romFile)
    actionSet = ale.getMinimalActionSet()

    return ale, actionSet
示例#26
0
def initializeALE(romFile):
    ale = ALEInterface()

    ale.setInt("max_num_frames_per_episode", 18000)
    ale.setInt("random_seed", 123)
    ale.setFloat("repeat_action_probability", 0.0)
    ale.setInt("frame_skip", 5)

    random_seed = ale.getInt("random_seed")
    print("random_seed: " + str(random_seed))

    # Set USE_SDL to true to display the screen. ALE must be compilied
    # with SDL enabled for this to work. On OSX, pygame init is used to
    # proxy-call SDL_main.

    USE_SDL = False
    if USE_SDL:
        if sys.platform == 'darwin':
            import pygame
            pygame.init()
            ale.setBool('sound', False)  # Sound doesn't work on OSX
        elif sys.platform.startswith('linux'):
            ale.setBool('sound', True)
        ale.setBool('display_screen', True)

    ale.loadROM(romFile)
    actionSet = ale.getMinimalActionSet()

    return ale, actionSet
示例#27
0
class Atari:
    def __init__(self, rom_dir):
        self.ale = ALEInterface()

        # Set settings
        self.ale.setInt("random_seed", 123)
        self.frame_skip = 4
        self.ale.setInt("frame_skip", self.frame_skip)
        self.ale.setBool("display_screen", False)
        self.ale.setBool("sound", True)
        self.record_sound_for_user = True
        self.ale.setBool("record_sound_for_user", self.record_sound_for_user)

        # NOTE recording audio to file still works. But if both file recording and
        # record_sound_for_user are enabled, then only the latter is done
        #  self.ale.setString("record_sound_filename", "")

        # Get settings
        self.ale.loadROM(rom_dir)
        self.screen_width, self.screen_height = self.ale.getScreenDims()
        self.legal_actions = self.ale.getLegalActionSet()

        # Action count across all episodes
        self.action_count = 0
        self.start_time = time.time()

        self.reset()

    def reset(self):
        self.ale.reset_game()

    def take_action(self):
        action = self.legal_actions[np.random.randint(self.legal_actions.size)]
        self.ale.act(action)
        self.action_count += 1

    def print_fps(self, delta_t=500):
        if self.action_count % delta_t == 0:
            print '[atari.py] Frames/second: %f' % (
                self.action_count / (time.time() - self.start_time))
            print '[atari.py] Overall game frame count:', atari.action_count * atari.frame_skip
            print '---------'

    def get_image_and_audio(self):
        np_data_image = np.zeros(self.screen_width * self.screen_height * 3,
                                 dtype=np.uint8)
        if self.record_sound_for_user:
            np_data_audio = np.zeros(self.ale.getAudioSize(), dtype=np.uint8)
            self.ale.getScreenRGBAndAudio(np_data_image, np_data_audio)

            # Also supports independent audio queries if user desires:
            #  self.ale.getAudio(np_data_audio)
        else:
            np_data_audio = 0
            self.ale.getScreenRGB(np_data_image)

        return np.reshape(np_data_image,
                          (self.screen_height, self.screen_width,
                           3)), np.asarray(np_data_audio)
示例#28
0
class Emulator:
    def __init__(self):

        self.ale = ALEInterface()

        # turn off the sound
        self.ale.setBool('sound', False)

        self.ale.setBool('display_screen', EMULATOR_DISPLAY)

        self.ale.setInt('frame_skip', FRAME_SKIP)
        self.ale.setFloat('repeat_action_probability',
                          REPEAT_ACTION_PROBABILITY)
        self.ale.setBool('color_averaging', COLOR_AVERAGING)

        self.ale.setInt('random_seed', RANDOM_SEED)

        if RECORD_SCENE_PATH:
            self.ale.setString('record_screen_dir', RECORD_SCENE_PATH)

        self.ale.loadROM(ROM_PATH)

        self.actions = self.ale.getMinimalActionSet()
        logger.info("Actions: " + str(self.actions))

        self.dims = DIMS
        #self.start_lives = self.ale.lives()

    def getActions(self):
        return self.actions

    def numActions(self):
        return len(self.actions)

    def restart(self):
        self.ale.reset_game()
        # can be omitted

    def act(self, action):
        reward = self.ale.act(self.actions[action])
        return reward

    def getScreen(self):
        # why grayscale ?
        screen = self.ale.getScreenGrayscale()
        resized = cv2.resize(screen, self.dims)
        # normalize
        #resized /= COLOR_SCALE

        return resized

    def isTerminal(self):
        # while training deepmind only ends when agent dies
        #terminate = DEATH_END and TRAIN and (self.ale.lives() < self.start_lives)

        return self.ale.game_over()
示例#29
0
文件: emulator.py 项目: hercky/a3c
class Emulator:
    def __init__(self):
    
        self.ale = ALEInterface()
        
        # turn off the sound
        self.ale.setBool('sound', False)
        
        self.ale.setBool('display_screen', EMULATOR_DISPLAY)

        self.ale.setInt('frame_skip', FRAME_SKIP)
        self.ale.setFloat('repeat_action_probability', REPEAT_ACTION_PROBABILITY)
        self.ale.setBool('color_averaging', COLOR_AVERAGING)

        self.ale.setInt('random_seed', RANDOM_SEED)

        if RECORD_SCENE_PATH:
            self.ale.setString('record_screen_dir', RECORD_SCENE_PATH)


        self.ale.loadROM(ROM_PATH)

        self.actions = self.ale.getMinimalActionSet()
        logger.info("Actions: " + str(self.actions))

        self.dims = DIMS
        #self.start_lives = self.ale.lives()

    def getActions(self):
        return self.actions

    def numActions(self):
        return len(self.actions)

    def restart(self):
        self.ale.reset_game()
        # can be omitted

    def act(self, action):
        reward = self.ale.act(self.actions[action])
        return reward

    def getScreen(self):
        # why grayscale ?
        screen = self.ale.getScreenGrayscale()
        resized = cv2.resize(screen, self.dims)
        # normalize
        #resized /= COLOR_SCALE

        return resized

    def isTerminal(self):
        # while training deepmind only ends when agent dies
        #terminate = DEATH_END and TRAIN and (self.ale.lives() < self.start_lives)

        return self.ale.game_over()
示例#30
0
    def __init__(self,
                 rom_path,
                 n_last_screens=4,
                 frame_skip=4,
                 treat_life_lost_as_terminal=True,
                 crop_or_scale='scale',
                 max_start_nullops=30,
                 record_screen_dir=None,
                 render=False,
                 max_episode_length=None,
                 max_time=None):
        self.frame_skip = frame_skip
        self.n_last_screens = n_last_screens
        self.treat_life_lost_as_terminal = treat_life_lost_as_terminal
        self.crop_or_scale = crop_or_scale
        self.max_start_nullops = max_start_nullops
        self.max_episode_length = max_episode_length
        self.max_time = max_time

        ale = ALEInterface()
        # Use numpy's random state
        seed = np.random.randint(0, 2**16)
        ale.setInt(b'random_seed', seed)
        ale.setFloat(b'repeat_action_probability', 0.0)
        ale.setBool(b'color_averaging', False)

        if record_screen_dir is not None:
            ale.setString(b'record_screen_dir', str.encode(record_screen_dir))

        if render:
            if sys.platform == 'darwin':
                import pygame
                pygame.init()
                ale.setBool(b'sound', False)  # Sound doesn't work on OSX
            elif sys.platform.startswith('linux'):
                ale.setBool(b'sound', True)
            ale.setBool(b'display_screen', True)

        ale.loadROM(str.encode(rom_path))

        self.ale = ale
        self.__exceed_max = False
        self.legal_actions = ale.getMinimalActionSet()
        self.reset()
示例#31
0
文件: ale.py 项目: carpedm20/async-rl
    def __init__(self, rom_filename, seed=None, use_sdl=False, n_last_screens=4,
                 frame_skip=4, treat_life_lost_as_terminal=True,
                 crop_or_scale='scale', max_start_nullops=30,
                 record_screen_dir=None):
        self.n_last_screens = n_last_screens
        self.treat_life_lost_as_terminal = treat_life_lost_as_terminal
        self.crop_or_scale = crop_or_scale
        self.max_start_nullops = max_start_nullops

        ale = ALEInterface()
        if seed is not None:
            assert seed >= 0 and seed < 2 ** 16, \
                "ALE's random seed must be represented by unsigned int"
        else:
            # Use numpy's random state
            seed = np.random.randint(0, 2 ** 16)
        ale.setInt(b'random_seed', seed)
        ale.setFloat(b'repeat_action_probability', 0.0)
        ale.setBool(b'color_averaging', False)
        if record_screen_dir is not None:
            ale.setString(b'record_screen_dir', str.encode(record_screen_dir))
        self.frame_skip = frame_skip
        if use_sdl:
            if 'DISPLAY' not in os.environ:
                raise RuntimeError(
                    'Please set DISPLAY environment variable for use_sdl=True')
            # SDL settings below are from the ALE python example
            if sys.platform == 'darwin':
                import pygame
                pygame.init()
                ale.setBool(b'sound', False)  # Sound doesn't work on OSX
            elif sys.platform.startswith('linux'):
                ale.setBool(b'sound', True)
            ale.setBool(b'display_screen', True)
        ale.loadROM(str.encode(rom_filename))

        assert ale.getFrameNumber() == 0


        self.ale = ale
        self.legal_actions = ale.getMinimalActionSet()
        self.initialize()
示例#32
0
    def _init_ale(self):
        ale = ALEInterface()
        ale.setBool('sound', self.play_sound)
        ale.setBool('display_screen', self.display_screen)
        ale.setInt('random_seed', self.random_seed)

        # Frame skip is implemented separately
        ale.setInt('frame_skip', 1)
        ale.setBool('color_averaging', False)
        ale.setFloat('repeat_action_probability', 0.0)
        # Somehow this repeat_action_probability has unexpected effect on game.
        # The larger this value is, the more frames games take to restart.
        # And when 1.0 games completely hang
        # We are setting the default value of 0.0 here, expecting that
        # it has no effect as frame_skip == 1
        # This action repeating is agent's concern
        # so we do not implement an equivalent in our wrapper.

        if self.record_screen_path:
            _LG.info('Recording screens: %s', self.record_screen_path)
            if not os.path.exists(self.record_screen_path):
                os.makedirs(self.record_screen_path)
            ale.setString('record_screen_dir', self.record_screen_path)

        if self.record_sound_filename:
            _LG.info('Recording sound: %s', self.record_sound_filename)
            record_sound_dir = os.path.dirname(self.record_sound_filename)
            if not os.path.exists(record_sound_dir):
                os.makedirs(record_sound_dir)
            ale.setBool('sound', True)
            ale.setString('record_sound_filename', self.record_sound_filename)

        ale.loadROM(self.rom_path)

        self._ale = ale
        self._actions = (ale.getMinimalActionSet() if self.minimal_action_set
                         else ale.getLegalActionSet())
def main():
    if len(sys.argv) < 2:
        dir_rom = '/Users/lguan/Documents/Study/Research/Atari-2600-Roms/T-Z/Tennis.bin'
    else:
        dir_rom = sys.argv[1]

    ale = ALEInterface()

    # Get & Set the desired settings
    ale.setInt(b'random_seed', 123)

    # Set USE_SDL to true to display the screen. ALE must be compilied
    # with SDL enabled for this to work. On OSX, pygame init is used to
    # proxy-call SDL_main.
    USE_SDL = True
    if USE_SDL:
        # mac OS
        if sys.platform == 'darwin':
            pygame.init()
            ale.setBool('sound', False)  # Sound doesn't work on OSX
        elif sys.platform.startswith('linux'):
            ale.setBool('sound', True)

        ale.setBool('display_screen', True)

    # Load the ROM file
    rom_file = str.encode(dir_rom)
    print('- Loading ROM - %s' % dir_rom)
    ale.loadROM(rom_file)
    print('- Complete loading ROM')

    legal_actions = ale.getMinimalActionSet()

    # Play 10 episodes
    for episode in range(10):
        total_reward = 0
        while not ale.game_over():
            a = legal_actions[np.random.randint(legal_actions.size)]
            # Apply an action and get the resulting reward
            reward = ale.act(a)
            total_reward += reward
        print('Episode %d ended with score: %d' % (episode, total_reward))
        ale.reset_game()
示例#34
0
class game(object):
    def __init__(self, display):
        self.ale = ALEInterface()

        # Get & Set the desired settings
        self.ale.setInt('random_seed', 123)

        # Set USE_SDL to true to display the screen. ALE must be compilied
        # with SDL enabled for this to work. On OSX, pygame init is used to
        # proxy-call SDL_main.
        USE_SDL = display
        if USE_SDL:
            if sys.platform == 'darwin':
                import pygame
                pygame.init()
                self.ale.setBool('sound', False)  # Sound doesn't work on OSX
            elif sys.platform.startswith('linux'):
                self.ale.setBool('sound', True)
            self.ale.setBool('display_screen', True)

        # Load the ROM file
        self.ale.loadROM("ms_pacman.bin")

    def act(self, action):
        return self.ale.act(action)

    def getState(self):
        return get_feature(self.ale.getScreen())

    def getScreen(self):
        return self.ale.getScreen()

    def reset_game(self):
        self.ale.reset_game()

    def lives(self):
        return self.ale.lives()

    def game_over(self):
        return self.ale.game_over()
示例#35
0
def init_ale(rom, display):
    ale = ALEInterface()
    # Get & Set the desired settings
    ale.setInt(b'random_seed', 123)

    # Set USE_SDL to true to display the screen. ALE must be compilied
    # with SDL enabled for this to work. On OSX, pygame init is used to
    # proxy-call SDL_main.
    USE_SDL = display
    if USE_SDL:
        if sys.platform == 'darwin':
            import pygame
            pygame.init()
            ale.setBool('sound', False) # Sound doesn't work on OSX
        elif sys.platform.startswith('linux'):
            ale.setBool('sound', True)
        ale.setBool('display_screen', display)

    # Load the ROM file
    ale.loadROM(rom)

    return ale
示例#36
0
def init_ale(rom, display):
    ale = ALEInterface()
    # Get & Set the desired settings
    ale.setInt(b'random_seed', 123)

    # Set USE_SDL to true to display the screen. ALE must be compilied
    # with SDL enabled for this to work. On OSX, pygame init is used to
    # proxy-call SDL_main.
    USE_SDL = display
    if USE_SDL:
        if sys.platform == 'darwin':
            import pygame
            pygame.init()
            ale.setBool('sound', False)  # Sound doesn't work on OSX
        elif sys.platform.startswith('linux'):
            ale.setBool('sound', True)
        ale.setBool('display_screen', display)

    # Load the ROM file
    ale.loadROM(rom)

    return ale
示例#37
0
class Game():
    """
    Wrapper around the ALEInterface class.
    """

    def __init__(self, rom_file, sdl=False):
        self.ale = ALEInterface()
        # Setup SDL
        if sdl:
            if sys.platform == 'darwin':
                import pygame
                pygame.init()
                self.ale.setBool(b'sound', False) # Sound doesn't work on OSX
            elif sys.platform.startswith('linux'):
                self.ale.setBool(b'sound', True)
            self.ale.setBool(b'display_screen', True)

        # Load rom
        self.ale.loadROM(str.encode(rom_file))

    def get_action_set(self):
        return self.ale.getLegalActionSet()

    def get_minimal_action_set(self):
        return self.ale.getMinimalActionSet()

    def game_over(self):
        return self.ale.game_over()

    def act(self, action):
        return self.ale.act(action)

    def reset_game(self):
        self.ale.reset_game()

    def get_frame(self):
        return self.ale.getScreenRGB()
示例#38
0
def ale_load_from_rom(rom_path, display_screen):
    rng = get_numpy_rng()
    try:
        from ale_python_interface import ALEInterface
    except ImportError as e:
        raise ImportError('Unable to import the python package of Arcade Learning Environment. ' \
                           'ALE may not have been installed correctly. Refer to ' \
                           '`https://github.com/mgbellemare/Arcade-Learning-Environment` for some' \
                           'installation guidance')

    ale = ALEInterface()
    ale.setInt(b'random_seed', rng.randint(1000))
    if display_screen:
        import sys
        if sys.platform == 'darwin':
            import pygame
            pygame.init()
            ale.setBool(b'sound', False) # Sound doesn't work on OSX
        ale.setBool(b'display_screen', True)
    else:
        ale.setBool(b'display_screen', False)
    ale.setFloat(b'repeat_action_probability', 0)
    ale.loadROM(str.encode(rom_path))
    return ale
def get_random_baseline(gamepath):
    ale = ALEInterface()
    ale.setInt('random_seed', 42)

    recordings_dir = './recordings/breakout/'

    USE_SDL = True
    if USE_SDL:
        if sys.platform == 'darwin':
            import pygame
            pygame.init()
            ale.setBool('sound', False) # Sound doesn't work on OSX
            #ale.setString("record_screen_dir", recordings_dir);
        elif sys.platform.startswith('linux'):
            ale.setBool('sound', True)
            ale.setBool('display_screen', True)

    # Load the ROM file
    ale.loadROM(gamepath)

    # Get the list of legal actions
    legal_actions = ale.getLegalActionSet()

    # Play 5 episodes
    rewards = []
    for episode in xrange(10):
        total_reward = 0
        while not ale.game_over():
            a = legal_actions[randrange(len(legal_actions))]
            reward = ale.act(a);
            total_reward += reward
        rewards.append(total_reward)
        #print 'Episode', episode, 'ended with score:', total_reward
        ale.reset_game()
    avg_reward = sum(rewards) / float(len(rewards))
    return avg_reward
示例#40
0
def ale_load_from_rom(rom_path, display_screen):
    rng = get_numpy_rng()
    try:
        from ale_python_interface import ALEInterface
    except ImportError as e:
        raise ImportError('Unable to import the python package of Arcade Learning Environment. ' \
                           'ALE may not have been installed correctly. Refer to ' \
                           '`https://github.com/mgbellemare/Arcade-Learning-Environment` for some' \
                           'installation guidance')

    ale = ALEInterface()
    ale.setInt('random_seed', rng.randint(1000))
    if display_screen:
        import sys
        if sys.platform == 'darwin':
            import pygame
            pygame.init()
            ale.setBool('sound', False)  # Sound doesn't work on OSX
        ale.setBool('display_screen', True)
    else:
        ale.setBool('display_screen', False)
    ale.setFloat('repeat_action_probability', 0)
    ale.loadROM(rom_path)
    return ale
示例#41
0
def get_random_baseline(gamepath):
    ale = ALEInterface()
    ale.setInt('random_seed', 42)

    recordings_dir = './recordings/breakout/'

    USE_SDL = True
    if USE_SDL:
        if sys.platform == 'darwin':
            import pygame
            pygame.init()
            ale.setBool('sound', False)  # Sound doesn't work on OSX
            #ale.setString("record_screen_dir", recordings_dir);
        elif sys.platform.startswith('linux'):
            ale.setBool('sound', True)
            ale.setBool('display_screen', True)

    # Load the ROM file
    ale.loadROM(gamepath)

    # Get the list of legal actions
    legal_actions = ale.getLegalActionSet()

    # Play 5 episodes
    rewards = []
    for episode in xrange(10):
        total_reward = 0
        while not ale.game_over():
            a = legal_actions[randrange(len(legal_actions))]
            reward = ale.act(a)
            total_reward += reward
        rewards.append(total_reward)
        #print 'Episode', episode, 'ended with score:', total_reward
        ale.reset_game()
    avg_reward = sum(rewards) / float(len(rewards))
    return avg_reward
示例#42
0
class GameState(object):
  def __init__(self, rand_seed, display=False):
    self.ale = ALEInterface()
    self.ale.setInt('random_seed', rand_seed)

    if display:
      self._setup_display()
    
    self.ale.loadROM(ROM)

    # height=210, width=160
    self.screen = np.empty((210, 160, 1), dtype=np.uint8)
    
    no_action = 0
    
    self.reward = self.ale.act(no_action)
    self.terminal = self.ale.game_over()

    # screenのshapeは、(210, 160, 1)
    self.ale.getScreenGrayscale(self.screen)
    
    # (210, 160)にreshape
    reshaped_screen = np.reshape(self.screen, (210, 160))
    
    # height=110, width=84にリサイズ
    resized_screen = cv2.resize(reshaped_screen, (84, 110))
    
    x_t = resized_screen[18:102,:]
    x_t = x_t.astype(np.float32)
    x_t *= (1.0/255.0)
    self.s_t = np.stack((x_t, x_t, x_t, x_t), axis = 2)

    # 実際に利用するactionのみを集めておく
    self.real_actions = self.ale.getMinimalActionSet()
    
  def _setup_display(self):
    if sys.platform == 'darwin':
      import pygame
      pygame.init()
      self.ale.setBool('sound', False)
    elif sys.platform.startswith('linux'):
      self.ale.setBool('sound', True)
    self.ale.setBool('display_screen', True)
    
  def process(self, action):
    # 18種類のうちの実際に利用するactionに変換
    real_action = self.real_actions[action]
    self.reward = self.ale.act(real_action)
    #self.reward = self.ale.act(action)
    self.terminal = self.ale.game_over()
    
    # screenのshapeは、(210, 160, 1)
    self.ale.getScreenGrayscale(self.screen)
    
    # (210, 160)にreshape
    reshaped_screen = np.reshape(self.screen, (210, 160))
    
    # height=210, width=160
    
    # height=110, width=84にリサイズ
    resized_screen = cv2.resize(reshaped_screen, (84, 110))
    x_t1 = resized_screen[18:102,:]
    x_t1 = np.reshape(x_t1, (84, 84, 1))
    x_t1 = x_t1.astype(np.float32)    
    x_t1 *= (1.0/255.0)
    
    self.s_t1 = np.append(x_t1, self.s_t[:,:,0:3], axis = 2)
    if self.terminal:
      self.ale.reset_game()

  def update(self):
    self.s_t = self.s_t1
示例#43
0
class AtariPlayer(RLEnvironment):
    """
    A wrapper for atari emulator.
    NOTE: will automatically restart when a real episode ends
    """
    def __init__(self, rom_file, viz=0, height_range=(None,None),
            frame_skip=4, image_shape=(84, 84), nullop_start=30,
            live_lost_as_eoe=True):
        """
        :param rom_file: path to the rom
        :param frame_skip: skip every k frames and repeat the action
        :param image_shape: (w, h)
        :param height_range: (h1, h2) to cut
        :param viz: visualization to be done.
            Set to 0 to disable.
            Set to a positive number to be the delay between frames to show.
            Set to a string to be a directory to store frames.
        :param nullop_start: start with random number of null ops
        :param live_losts_as_eoe: consider lost of lives as end of episode.  useful for training.
        """
        super(AtariPlayer, self).__init__()
        if not os.path.isfile(rom_file) and '/' not in rom_file:
            rom_file = get_dataset_dir('atari_rom', rom_file)
        assert os.path.isfile(rom_file), \
                "rom {} not found. Please download at {}".format(rom_file, ROM_URL)

        try:
            ALEInterface.setLoggerMode(ALEInterface.Logger.Warning)
        except AttributeError:
            log_once()

        # avoid simulator bugs: https://github.com/mgbellemare/Arcade-Learning-Environment/issues/86
        with _ALE_LOCK:
            self.ale = ALEInterface()
            self.rng = get_rng(self)

            self.ale.setInt(b"random_seed", self.rng.randint(0, 10000))
            self.ale.setBool(b"showinfo", False)

            self.ale.setInt(b"frame_skip", 1)
            self.ale.setBool(b'color_averaging', False)
            # manual.pdf suggests otherwise.
            self.ale.setFloat(b'repeat_action_probability', 0.0)

            # viz setup
            if isinstance(viz, six.string_types):
                assert os.path.isdir(viz), viz
                self.ale.setString(b'record_screen_dir', viz)
                viz = 0
            if isinstance(viz, int):
                viz = float(viz)
            self.viz = viz
            if self.viz and isinstance(self.viz, float):
                self.windowname = os.path.basename(rom_file)
                cv2.startWindowThread()
                cv2.namedWindow(self.windowname)

            self.ale.loadROM(rom_file.encode('utf-8'))
        self.width, self.height = self.ale.getScreenDims()
        self.actions = self.ale.getMinimalActionSet()


        self.live_lost_as_eoe = live_lost_as_eoe
        self.frame_skip = frame_skip
        self.nullop_start = nullop_start
        self.height_range = height_range
        self.image_shape = image_shape

        self.current_episode_score = StatCounter()
        self.restart_episode()

    def _grab_raw_image(self):
        """
        :returns: the current 3-channel image
        """
        m = self.ale.getScreenRGB()
        return m.reshape((self.height, self.width, 3))

    def current_state(self):
        """
        :returns: a gray-scale (h, w, 1) float32 image
        """
        ret = self._grab_raw_image()
        # max-pooled over the last screen
        ret = np.maximum(ret, self.last_raw_screen)
        if self.viz:
            if isinstance(self.viz, float):
                #m = cv2.resize(ret, (1920,1200))
                cv2.imshow(self.windowname, ret)
                time.sleep(self.viz)
        ret = ret[self.height_range[0]:self.height_range[1],:].astype('float32')
        # 0.299,0.587.0.114. same as rgb2y in torch/image
        ret = cv2.cvtColor(ret, cv2.COLOR_RGB2GRAY)
        ret = cv2.resize(ret, self.image_shape)
        ret = np.expand_dims(ret, axis=2)
        return ret

    def get_action_space(self):
        return DiscreteActionSpace(len(self.actions))

    def restart_episode(self):
        if self.current_episode_score.count > 0:
            self.stats['score'].append(self.current_episode_score.sum)
        self.current_episode_score.reset()
        self.ale.reset_game()

        # random null-ops start
        n = self.rng.randint(self.nullop_start)
        self.last_raw_screen = self._grab_raw_image()
        for k in range(n):
            if k == n - 1:
                self.last_raw_screen = self._grab_raw_image()
            self.ale.act(0)

    def action(self, act):
        """
        :param act: an index of the action
        :returns: (reward, isOver)
        """
        oldlives = self.ale.lives()
        r = 0
        for k in range(self.frame_skip):
            if k == self.frame_skip - 1:
                self.last_raw_screen = self._grab_raw_image()
            r += self.ale.act(self.actions[act])
            newlives = self.ale.lives()
            if self.ale.game_over() or \
                    (self.live_lost_as_eoe and newlives < oldlives):
                break

        self.current_episode_score.feed(r)
        isOver = self.ale.game_over()
        if isOver:
            self.restart_episode()
        if self.live_lost_as_eoe:
            isOver = isOver or newlives < oldlives
        return (r, isOver)
示例#44
0
class ALEEnvironment(BaseEnvironment):
    """
    A wrapper of Arcade Learning Environment, which inherits all members of ``BaseEnvironment``.
    """
    # 63 games
    ADVENTURE = "adventure"
    AIR_RAID = "air_raid"
    ALIEN = "alien"
    AMIDAR = "amidar"
    ASSAULT = "assault"
    ASTERIX = "asterix"
    ASTEROIDS = "asteroids"
    ATLANTIS = "aslantis"
    BANK_HEIST = "bank_heist"
    BATTLE_ZONE = "battle_zone"
    BEAM_RIDER = "beam_rider"
    BERZERK = "berzerk"
    BOWLING = "bowling"
    BOXING = "boxing"
    BREAKOUT = "breakout"
    CARNIVAL = "carnival"
    CENTIPEDE = "centipede"
    CHOPPER_COMMAND = "chopper_command"
    CRAZY_CLIMBER = "crazy_climber"
    DEFENDER = "defender"
    DEMON_ATTACK = "demon_attack"
    DOUBLE_DUNK = "double_dunk"
    ELEVATOR_ACTION = "elevator_action"
    ENDURO = "enduro"
    FISHING_DERBY = "fishing_derby"
    FREEWAY = "freeway"
    FROSTBITE = "frostbite"
    GOPHER = "gopher"
    GRAVITAR = "gravitar"
    HERO = "hero"
    ICE_HOCKEY = "ice_hockey"
    JAMESBOND = "jamesbond"
    JOURNEY_ESCAPE = "journey_escape"
    KABOOM = "kaboom"
    KANGAROO = "kangaroo"
    KRULL = "krull"
    KUNGFU_MASTER = "kung_fu_master"
    MONTEZUMA = "montezuma_revenge"
    MS_PACMAN = "ms_pacman"
    UNKNOWN = "name_this_game"
    PHOENIX = "phoenix"
    PITFALL = "pitfall"
    PONG = "pong"
    POOYAN = "pooyan"
    PRIVATE_EYE = "private_eye"
    QBERT = "qbert"
    RIVERRAID = "riverraid"
    ROAD_RUNNER = "road_runner"
    ROBOTANK = "robotank"
    SEAQUEST = "seaquest"
    SKIING = "skiing"
    SOLARIS = "solaris"
    SPACE_INVADERS = "space_invaders"
    STAR_GUNNER = "star_gunner"
    TENNIS = "tennis"
    TIME_PILOT = "time_pilot"
    TUTANKHAM = "tutankham"
    UP_N_DOWN = "up_n_down"
    VENTURE = "venture"
    VIDEO_PINBALL = "video_pinball"
    WIZARD_OF_WOR = "wizard_of_wor"
    YARS_REVENGE = "yars_revenge"
    ZAXXON = "zaxxon"

    def __init__(self,
                 rom_name,
                 frame_skip=4,
                 repeat_action_probability=0.,
                 max_episode_steps=10000,
                 loss_of_life_termination=False,
                 loss_of_life_negative_reward=False,
                 bitwise_max_on_two_consecutive_frames=False,
                 is_render=False,
                 seed=None,
                 startup_policy=None,
                 disable_actions=None,
                 num_of_sub_actions=-1,
                 state_processor=AtariProcessor(resize_shape=(84, 84),
                                                convert_to_grayscale=True)):

        os.environ['SDL_VIDEO_CENTERED'] = '1'

        file_exist = isfile(ALEEnvironment.get_rom_path(rom_name))
        if not file_exist:
            raise ValueError("Rom not found ! Please put rom " + rom_name +
                             ".bin into: " + ALEEnvironment.get_rom_path())

        self.__rom_name = rom_name
        self.__ale = ALEInterface()

        if frame_skip < 0:
            print("Invalid frame_skip param ! Set default frame_skip = 4")
            self.__frame_skip = 4
        else:
            self.__frame_skip = frame_skip

        if repeat_action_probability < 0 or repeat_action_probability > 1:
            raise ValueError("Invalid repeat_action_probability")
        else:
            self.__repeat_action_probability = repeat_action_probability

        self.__max_episode_steps = max_episode_steps
        self.__loss_of_life_termination = loss_of_life_termination
        self.__loss_of_life_negative_reward = loss_of_life_negative_reward
        self.__max_2_frames = bitwise_max_on_two_consecutive_frames

        # Max 2 frames only work with grayscale
        self.__grayscale = False
        if state_processor is not None and type(
                state_processor
        ) is AtariProcessor and state_processor.get_grayscale():
            self.__grayscale = True

        if self.__max_2_frames and self.__frame_skip > 1 and self.__grayscale:
            self.__max_2_frames = True
        else:
            self.__max_2_frames = False

        self.__is_render = is_render
        self.__processor = state_processor

        if seed is None or seed <= 0 or seed >= 9999:
            if seed is not None and (seed < 0 or seed >= 9999):
                print("Invalid seed ! Default seed = randint(0, 9999")
            self.__seed = np.random.randint(0, 9999)
            self.__random_seed = True
        else:
            self.__random_seed = False
            self.__seed = seed

        self.__current_steps = 0
        self.__is_life_lost = False
        self.__is_terminal = False
        self.__current_lives = 0
        self.__action_reduction = num_of_sub_actions
        self.__scr_width, self.__scr_height, self.__action_set = self.__init_ale(
        )
        self.__prev_buffer = np.empty((self.__scr_height, self.__scr_width, 3),
                                      dtype=np.uint8)
        self.__current_buffer = np.empty(
            (self.__scr_height, self.__scr_width, 3), dtype=np.uint8)
        self.__current_state = None
        self.__prev_state = None
        self.__startup_policy = startup_policy
        if disable_actions is None:
            self.__dis_act = []
        else:
            self.__dis_act = disable_actions

        if self.__processor is not None and self.__processor.get_number_of_objectives(
        ) > 1:
            self.__multi_objs = True
        else:
            self.__multi_objs = False

    def get_processor(self):
        return self.__processor

    def __init_ale(self):

        self.__ale.setBool(b'display_screen', self.__is_render)

        if self.__max_2_frames and self.__frame_skip > 1:
            self.__ale.setInt(b'frame_skip', 1)
        else:
            self.__ale.setInt(b'frame_skip', self.__frame_skip)

        self.__ale.setInt(b'random_seed', self.__seed)
        self.__ale.setFloat(b'repeat_action_probability',
                            self.__repeat_action_probability)
        self.__ale.setBool(b'color_averaging', False)

        self.__ale.loadROM(
            ALEEnvironment.get_rom_path(self.__rom_name).encode())

        width, height = self.__ale.getScreenDims()
        return width, height, self.__ale.getMinimalActionSet()

    def clone(self):
        if self.__random_seed:
            seed = np.random.randint(0, 9999)
        else:
            seed = self.__seed

        return ALEEnvironment(self.__rom_name, self.__frame_skip,
                              self.__repeat_action_probability,
                              self.__max_episode_steps,
                              self.__loss_of_life_termination,
                              self.__loss_of_life_negative_reward,
                              self.__max_2_frames, self.__is_render, seed,
                              self.__startup_policy,
                              self.__dis_act, self.__action_reduction,
                              self.__processor.clone())

    def step_all(self, a):
        if isinstance(a, (list, np.ndarray)):
            if len(a) <= 0:
                raise ValueError('Empty action list !')
            a = a[0]
        self.__current_steps += 1
        act = self.__action_set[a]
        rew = self._step(act)
        next_state = self.get_state()
        _is_terminal = self.is_terminal()
        return next_state, rew, _is_terminal, self.__current_steps

    def reset(self):
        self.__ale.reset_game()
        self.__current_lives = self.__ale.lives()
        self.__is_life_lost = False
        self.__is_terminal = False
        self.__current_state = None
        self.__prev_state = None

        action_space = self.get_action_space()
        v_range, is_range = action_space.get_range()
        if len(v_range) > 1:
            self.step(1)

        # No op steps
        if self.__startup_policy is not None:
            max_steps = int(self.__startup_policy.get_max_steps())
            for _ in range(max_steps):
                act = self.__startup_policy.step(self.get_state(),
                                                 action_space)
                self.step(act)

        # Start training from this point
        self.__current_steps = 0

        # Reset processor
        if self.__processor is not None:
            self.__processor.reset()

        return self.get_state()

    def _pre_step(self, act):
        if self.__max_2_frames and self.__frame_skip > 1:
            rew = 0
            for i in range(self.__frame_skip - 2):
                rew += self.__ale.act(act)
                self.__prev_buffer = self.__ale.getScreenRGB(
                    self.__prev_buffer)

            self.__prev_buffer = self.__ale.getScreenRGB(self.__prev_buffer)

            rew += self.__ale.act(act)

            self.__current_buffer = self.__ale.getScreenRGB(
                self.__current_buffer)

            self.__is_terminal = self.__ale.game_over()

            if self.__processor is not None:
                self.__prev_state = self.__processor.process(
                    self.__prev_buffer)
                self.__current_state = self.__processor.process(
                    self.__current_buffer)
            else:
                self.__prev_state = self.__prev_buffer
                self.__current_state = self.__current_buffer

            self.__current_state = np.maximum.reduce(
                [self.__prev_state, self.__current_state])
        else:
            rew = self.__ale.act(act)
            self.__current_buffer = self.__ale.getScreenRGB(
                self.__current_buffer)
            self.__is_terminal = self.__ale.game_over()

            if self.__processor is not None:
                self.__current_state = self.__processor.process(
                    self.__current_buffer)

        if self.__multi_objs and self.__processor is not None:
            all_rewards = self.__processor.get_rewards(rew)
            return all_rewards
        else:
            return rew

    def _step(self, act):
        for i in range(len(self.__dis_act)):
            if act == self.__dis_act[i]:
                act = 0

        if not self.__loss_of_life_termination and not self.__loss_of_life_negative_reward:
            if not self.__is_terminal:
                next_lives = self.__ale.lives()
                if next_lives < self.__current_lives:
                    act = 1
                    self.__current_lives = next_lives
            return self._pre_step(act)
        else:
            rew = self._pre_step(act)
            next_lives = self.__ale.lives()
            if next_lives < self.__current_lives:
                if self.__loss_of_life_negative_reward:
                    rew -= 1
                self.__current_lives = next_lives
                self.__is_life_lost = True

            return rew

    def get_state(self):
        if not self.__max_2_frames:
            if self.__processor is not None:
                return self.__current_state
            else:
                return self.__current_buffer
        else:
            return self.__current_state

    def is_terminal(self):
        if self.__loss_of_life_termination and self.__is_life_lost:
            return True
        elif self.__max_episode_steps is not None and self.__current_steps > self.__max_episode_steps:
            return True
        else:
            return self.__is_terminal

    @staticmethod
    def get_rom_path(rom=None):
        if rom is None:
            return os.path.dirname(os.path.abspath(__file__)) + "/roms/"
        else:
            return os.path.dirname(
                os.path.abspath(__file__)) + "/roms/" + rom + ".bin"

    @staticmethod
    def list_all_roms():
        return [
            f for f in listdir(ALEEnvironment.get_rom_path())
            if isfile(join(ALEEnvironment.get_rom_path(), f))
        ]

    def get_state_space(self):
        if self.__processor is None:
            shape = self.__current_buffer.shape
        else:
            shape = self.__processor.process(self.__current_buffer).shape
        min_value = np.zeros(shape, dtype=np.uint8)
        max_value = np.full(shape, 255)
        return Space(min_value, max_value, True)

    def get_action_space(self):
        if self.__action_reduction >= 1:
            return Space(0, self.__action_reduction - 1, True)
        else:
            return Space(0, len(self.__action_set) - 1, True)

    def step(self, act):
        if isinstance(act, (list, np.ndarray)):
            if len(act) <= 0:
                raise ValueError('Empty action list !')
            act = act[0]
        self.__current_steps += 1
        act = self.__action_set[act]
        rew = self._step(act)
        return rew

    def get_current_steps(self):
        return self.__current_steps

    def is_atari(self):
        return True

    def is_render(self):
        return self.__is_render

    def get_number_of_objectives(self):
        if self.__processor is None:
            return 1
        else:
            return self.__processor.get_number_of_objectives()

    def get_number_of_agents(self):
        if self.__processor is None:
            return 1
        else:
            return self.__processor.get_number_of_agents()

    def get_state_processor(self):
        return self.__processor
示例#45
0
class AtariPlayer(gym.Env):
    """
    A wrapper for ALE emulator, with configurations to mimic DeepMind DQN settings.

    Info:
        score: the accumulated reward in the current game
        gameOver: True when the current game is Over
    """

    def __init__(self, rom_file, viz=0,
                 frame_skip=4, nullop_start=30,
                 live_lost_as_eoe=True, max_num_frames=0):
        """
        Args:
            rom_file: path to the rom
            frame_skip: skip every k frames and repeat the action
            viz: visualization to be done.
                Set to 0 to disable.
                Set to a positive number to be the delay between frames to show.
                Set to a string to be a directory to store frames.
            nullop_start: start with random number of null ops.
            live_losts_as_eoe: consider lost of lives as end of episode. Useful for training.
            max_num_frames: maximum number of frames per episode.
        """
        super(AtariPlayer, self).__init__()
        if not os.path.isfile(rom_file) and '/' not in rom_file:
            rom_file = get_dataset_path('atari_rom', rom_file)
        assert os.path.isfile(rom_file), \
            "rom {} not found. Please download at {}".format(rom_file, ROM_URL)

        try:
            ALEInterface.setLoggerMode(ALEInterface.Logger.Error)
        except AttributeError:
            if execute_only_once():
                logger.warn("You're not using latest ALE")

        # avoid simulator bugs: https://github.com/mgbellemare/Arcade-Learning-Environment/issues/86
        with _ALE_LOCK:
            self.ale = ALEInterface()
            self.rng = get_rng(self)
            self.ale.setInt(b"random_seed", self.rng.randint(0, 30000))
            self.ale.setInt(b"max_num_frames_per_episode", max_num_frames)
            self.ale.setBool(b"showinfo", False)

            self.ale.setInt(b"frame_skip", 1)
            self.ale.setBool(b'color_averaging', False)
            # manual.pdf suggests otherwise.
            self.ale.setFloat(b'repeat_action_probability', 0.0)

            # viz setup
            if isinstance(viz, six.string_types):
                assert os.path.isdir(viz), viz
                self.ale.setString(b'record_screen_dir', viz)
                viz = 0
            if isinstance(viz, int):
                viz = float(viz)
            self.viz = viz
            if self.viz and isinstance(self.viz, float):
                self.windowname = os.path.basename(rom_file)
                cv2.namedWindow(self.windowname)

            self.ale.loadROM(rom_file.encode('utf-8'))
        self.width, self.height = self.ale.getScreenDims()
        self.actions = self.ale.getMinimalActionSet()

        self.live_lost_as_eoe = live_lost_as_eoe
        self.frame_skip = frame_skip
        self.nullop_start = nullop_start

        self.action_space = spaces.Discrete(len(self.actions))
        self.observation_space = spaces.Box(
            low=0, high=255, shape=(self.height, self.width, 1), dtype=np.uint8)
        self._restart_episode()

    def get_action_meanings(self):
        return [ACTION_MEANING[i] for i in self.actions]

    def _grab_raw_image(self):
        """
        :returns: the current 3-channel image
        """
        m = self.ale.getScreenRGB()
        return m.reshape((self.height, self.width, 3))

    def _current_state(self):
        """
        :returns: a gray-scale (h, w, 1) uint8 image
        """
        ret = self._grab_raw_image()
        # max-pooled over the last screen
        ret = np.maximum(ret, self.last_raw_screen)
        if self.viz:
            if isinstance(self.viz, float):
                cv2.imshow(self.windowname, ret)
                cv2.waitKey(int(self.viz * 1000))
        ret = ret.astype('float32')
        # 0.299,0.587.0.114. same as rgb2y in torch/image
        ret = cv2.cvtColor(ret, cv2.COLOR_RGB2GRAY)[:, :, np.newaxis]
        return ret.astype('uint8')  # to save some memory

    def _restart_episode(self):
        with _ALE_LOCK:
            self.ale.reset_game()

        # random null-ops start
        n = self.rng.randint(self.nullop_start)
        self.last_raw_screen = self._grab_raw_image()
        for k in range(n):
            if k == n - 1:
                self.last_raw_screen = self._grab_raw_image()
            self.ale.act(0)

    def reset(self):
        if self.ale.game_over():
            self._restart_episode()
        return self._current_state()

    def step(self, act):
        oldlives = self.ale.lives()
        r = 0
        for k in range(self.frame_skip):
            if k == self.frame_skip - 1:
                self.last_raw_screen = self._grab_raw_image()
            r += self.ale.act(self.actions[act])
            newlives = self.ale.lives()
            if self.ale.game_over() or \
                    (self.live_lost_as_eoe and newlives < oldlives):
                break

        isOver = self.ale.game_over()
        if self.live_lost_as_eoe:
            isOver = isOver or newlives < oldlives

        info = {'ale.lives': newlives}
        return self._current_state(), r, isOver, info
示例#46
0
ep = 1
ale = ALEInterface()
vf = nn.Neural_Net()
vf.cuda()
if load_model == True:
    vf.main_model.load_state_dict(tc.load(model_path))
    vf.update_model.load_state_dict(tc.load(model_path))
gpu_dtype = tc.cuda.FloatTensor
cpu_dtype = tc.FloatTensor
# device = tc.device("cuda:0" if tc.cuda.is_available() else "cpu")
# vf = nn.Neural_Net().to(device)

# get screen or not
USE_SDL = False
if USE_SDL:
    ale.setBool(b'display_screen', True)

# load game rom file
name_of_the_game = 'space_invaders'
game_path = '/home/juna/atari_project/Arcade-Learning-Environment/roms/' + name_of_the_game + '.bin'
ale.loadROM(game_path.encode())

minimal_actions = ale.getMinimalActionSet()

print('minimal_actions :\n', minimal_actions)

screen_data = np.empty((210, 160, 1), dtype=np.uint8)
screen_data = None

#initialize the state
image = ale.getScreenGrayscale(screen_data)
示例#47
0
class MyEnv(Environment):
    VALIDATION_MODE = 0

    def __init__(self, rng, rom="ale/breakout.bin", frame_skip=4, 
                 ale_options=[{"key": "random_seed", "value": 0}, 
                              {"key": "color_averaging", "value": True},
                              {"key": "repeat_action_probability", "value": 0.}]):
        self._mode = -1
        self._modeScore = 0.0
        self._modeEpisodeCount = 0

        self._frameSkip = frame_skip if frame_skip >= 1 else 1
        self._randomState = rng

        self._ale = ALEInterface()
        for option in ale_options:
            t = type(option["value"])
            if t is int:
                self._ale.setInt(option["key"], option["value"])
            elif t is float:
                self._ale.setFloat(option["key"], option["value"])
            elif t is bool:
                self._ale.setBool(option["key"], option["value"])
            else:
                raise ValueError("Option {} ({}) is not an int, bool or float.".format(option["key"], t))
        self._ale.loadROM(rom)

        w, h = self._ale.getScreenDims()
        self._screen = np.empty((h, w), dtype=np.uint8)
        self._reducedScreen = np.empty((84, 84), dtype=np.uint8)
        self._actions = self._ale.getMinimalActionSet()

                
    def reset(self, mode):
        if mode == MyEnv.VALIDATION_MODE:
            if self._mode != MyEnv.VALIDATION_MODE:
                self._mode = MyEnv.VALIDATION_MODE
                self._modeScore = 0.0
                self._modeEpisodeCount = 0
            else:
                self._modeEpisodeCount += 1
        elif self._mode != -1: # and thus mode == -1
            self._mode = -1

        self._ale.reset_game()
        for _ in range(self._randomState.randint(15)):
            self._ale.act(0)
        self._ale.getScreenGrayscale(self._screen)
        cv2.resize(self._screen, (84, 84), self._reducedScreen, interpolation=cv2.INTER_NEAREST)
        
        return [4 * [84 * [84 * [0]]]]
        
        
    def act(self, action):
        action = self._actions[action]
        
        reward = 0
        for _ in range(self._frameSkip):
            reward += self._ale.act(action)
            if self.inTerminalState():
                break
            
        self._ale.getScreenGrayscale(self._screen)
        cv2.resize(self._screen, (84, 84), self._reducedScreen, interpolation=cv2.INTER_NEAREST)
  
        self._modeScore += reward
        return np.sign(reward)

    def summarizePerformance(self, test_data_set):
        if self.inTerminalState() == False:
            self._modeEpisodeCount += 1
        print("== Mean score per episode is {} over {} episodes ==".format(self._modeScore / self._modeEpisodeCount, self._modeEpisodeCount))


    def inputDimensions(self):
        return [(4, 84, 84)]

    def observationType(self, subject):
        return np.uint8

    def nActions(self):
        return len(self._actions)

    def observe(self):
        return [np.array(self._reducedScreen)]

    def inTerminalState(self):
        return self._ale.game_over()
示例#48
0
class AtariEmulator(BaseEnvironment):
    def __init__(self,
                 rom_addr,
                 random_start=False,
                 random_seed=6,
                 visualize=True,
                 single_life=False):
        self.ale = ALEInterface()

        self.ale.setInt(b"random_seed", 2 * random_seed)
        # For fuller control on explicit action repeat (>= ALE 0.5.0)
        self.ale.setFloat(b"repeat_action_probability", 0.0)
        # Disable frame_skip and color_averaging
        # See: http://is.gd/tYzVpj
        self.ale.setInt(b"frame_skip", 1)
        self.ale.setBool(b"color_averaging", False)
        full_rom_path = rom_addr
        self.ale.loadROM(str.encode(full_rom_path))
        self.legal_actions = self.ale.getMinimalActionSet()
        self.screen_width, self.screen_height = self.ale.getScreenDims()
        self.lives = self.ale.lives()
        self.writer = imageio.get_writer('breakout0.gif', fps=30)
        self.random_start = random_start
        self.single_life_episodes = single_life
        self.call_on_new_frame = visualize

        # Processed historcal frames that will be fed in to the network
        # (i.e., four 84x84 images)
        self.observation_pool = ObservationPool(
            np.zeros((84, 84, 4), dtype=np.uint8))
        self.rgb_screen = np.zeros((self.screen_height, self.screen_width, 3),
                                   dtype=np.uint8)
        self.gray_screen = np.zeros((self.screen_height, self.screen_width, 1),
                                    dtype=np.uint8)
        self.frame_pool = FramePool(
            np.empty((2, self.screen_height, self.screen_width),
                     dtype=np.uint8), self.__process_frame_pool)

    def get_legal_actions(self):
        return self.legal_actions

    def __get_screen_image(self):
        """
        Get the current frame luminance
        :return: the current frame
        """
        self.ale.getScreenGrayscale(self.gray_screen)
        if self.call_on_new_frame:
            self.ale.getScreenRGB(self.rgb_screen)
            self.on_new_frame(self.rgb_screen)
        return np.squeeze(self.gray_screen)

    def on_new_frame(self, frame):

        pass

    def __new_game(self):
        """ Restart game """
        self.ale.reset_game()
        self.lives = self.ale.lives()
        if self.random_start:
            wait = random.randint(0, MAX_START_WAIT)
            for _ in range(wait):
                self.ale.act(self.legal_actions[0])

    def __process_frame_pool(self, frame_pool):
        """ Preprocess frame pool """

        img = np.amax(frame_pool, axis=0)
        img = imresize(img, (84, 84), interp='nearest')
        img = img.astype(np.uint8)
        return img

    def __action_repeat(self, a, times=ACTION_REPEAT):
        """ Repeat action and grab screen into frame pool """
        reward = 0
        for i in range(times - FRAMES_IN_POOL):
            reward += self.ale.act(self.legal_actions[a])
        # Only need to add the last FRAMES_IN_POOL frames to the frame pool
        for i in range(FRAMES_IN_POOL):
            reward += self.ale.act(self.legal_actions[a])
            self.frame_pool.new_frame(self.__get_screen_image())
        return reward

    def get_initial_state(self):
        """ Get the initial state """
        self.__new_game()
        for step in range(4):
            _ = self.__action_repeat(0)
            self.observation_pool.new_observation(
                self.frame_pool.get_processed_frame())
        if self.__is_terminal():
            raise Exception('This should never happen.')
        return self.observation_pool.get_pooled_observations()

    def next(self, action):
        """ Get the next state, reward, and game over signal """

        reward = self.__action_repeat(np.argmax(action))
        self.observation_pool.new_observation(
            self.frame_pool.get_processed_frame())
        terminal = self.__is_terminal()
        self.lives = self.ale.lives()
        observation = self.observation_pool.get_pooled_observations()
        return observation, reward, terminal

    def __is_terminal(self):
        if self.single_life_episodes:
            return self.__is_over() or (self.lives > self.ale.lives())
        else:
            return self.__is_over()

    def __is_over(self):
        return self.ale.game_over()

    def get_noop(self):
        return [1.0, 0.0]
示例#49
0
文件: 0720.py 项目: tianlei822/DQN
testExplorationRate = 0.05
SEED = 726
np.random.seed(SEED)
loadModel = False
saveData = False
saveModel = False
gamma = .99
learningRate = 0.00025

display_screen = False
frameSkip = 4

ale = ALEInterface()
ale.setInt('random_seed', SEED)
ale.setInt("frame_skip", frameSkip)
ale.setBool('color_averaging', True)
ale.setBool('sound', False)
ale.setBool('display_screen', False)
ale.loadROM("rom/breakout.bin")
legal_actions = ale.getMinimalActionSet()

width = 84
height = 84

memorySize = 1000000
maxEpisode = 10000000
maxFrame = 50000000

historyLength = 4
batchSize = 32
示例#50
0
  sys.exit()

ale = ALEInterface()

# Get & Set the desired settings
ale.setInt(b'random_seed', 123)

# Set USE_SDL to true to display the screen. ALE must be compilied
# with SDL enabled for this to work. On OSX, pygame init is used to
# proxy-call SDL_main.
USE_SDL = False
if USE_SDL:
  if sys.platform == 'darwin':
    import pygame
    pygame.init()
    ale.setBool(b'sound', False) # Sound doesn't work on OSX
  elif sys.platform.startswith('linux'):
    ale.setBool(b'sound', True)
  ale.setBool(b'display_screen', True)

# Load the ROM file
rom_file = str.encode(sys.argv[1])
ale.loadROM(rom_file)

# Get the list of legal actions
legal_actions = ale.getLegalActionSet()

# Play 10 episodes
for episode in range(10):
  total_reward = 0
  while not ale.game_over():
示例#51
0
文件: 0906.py 项目: boluoweifenda/DQN
#     print 'Options:\n'
#     for i in dict.keys():
#         print " ",i,"=",dict[i]
#
#     print ''

# initialization
np.random.seed(SEED)

ale = ALEInterface()
if SEED == None:
    ale.setInt('random_seed', 0)
else:
    ale.setInt('random_seed', SEED)
ale.setInt("frame_skip",frameSkip)
ale.setBool('color_averaging', True)
ale.setBool('sound', False)
ale.setBool('display_screen', False)
ale.setFloat("repeat_action_probability", 0.0)
ale.loadROM(romPath)
legal_actions = ale.getMinimalActionSet()
n_actions = len(legal_actions)
opt.n_actions = n_actions

explorationRateDelta = (initialExplorationRate - finalExplorationRate)/(finalExplorationFrame-startLearningFrame)
explorationRate = initialExplorationRate + startLearningFrame*explorationRateDelta

if networkType == "CNN":
    width = 84
    height = 84
    Dim = [height,width]
示例#52
0
# Author: Ben Goodrich
#
# This is a direct port to python of the shared library example from
# ALE provided in doc/examples/sharedLibraryInterfaceExample.cpp
import sys
import numpy as np
from random import randrange
from ale_python_interface import ALEInterface

ale = ALEInterface()

# Get & Set the desired settings
ale.setInt('random_seed', 123)

# Shows screen of the game to see what is going on
ale.setBool("display_screen", True)
ale.setBool("sound", True)

# Set USE_SDL to true to display the screen. ALE must be compilied
# with SDL enabled for this to work. On OSX, pygame init is used to
# proxy-call SDL_main.
# USE_SDL = False
# if USE_SDL:
#     if sys.platform == 'darwin':
#         import pygame
#
#         pygame.init()
#         ale.setBool('sound', False)  # Sound doesn't work on OSX
#     elif sys.platform.startswith('linux'):
#         ale.setBool('sound', True)
#     ale.setBool('display_screen', True)
示例#53
0
class AtariGame(Task):
    ''' RL task based on Arcade Game.
    '''

    def __init__(self, rom_path, num_frames=4, live=False, skip_frame=0, mode='normal'):
        self.ale = ALEInterface()
        if live:
            USE_SDL = True
            if USE_SDL:
                if sys.platform == 'darwin':
                    import pygame
                    pygame.init()
                    self.ale.setBool('sound', False) # Sound doesn't work on OSX
                elif sys.platform.startswith('linux'):
                    self.ale.setBool('sound', True)
            self.ale.setBool('display_screen', True)
        self.mode = mode
        self.live = live
        self.ale.loadROM(rom_path)
        self.num_frames = num_frames
        self.frames = []
        self.frame_id = 0
        self.cum_reward = 0
        self.skip_frame = skip_frame
        if mode == 'small':
            img = T.matrix('img')
            self.max_pool = theano.function([img], max_pool_2d(img, [4, 4]))
            self.img_shape = (16, 16)
        else:
            self.img_shape = (84, 84) # image shape according to DQN Nature paper.
        while len(self.frames) < 4:
            self.step(choice(self.valid_actions, 1)[0])
        self.reset()


    def copy(self):
        import dill as pickle
        return pickle.loads(pickle.dumps(self))


    def reset(self):
        self.ale.reset_game()
        self.frame_id = 0
        self.cum_reward = 0
        if self.skip_frame:
            for frame_i in range(self.skip_frame):
                self.step(choice(self.valid_actions, 1)[0])


    @property
    def _curr_frame(self):
        img = self.ale.getScreenRGB()
        img = rgb2yuv(img)[:, :, 0] # get Y channel, according to Nature paper.
        # print 'RAM', self.ale.getRAM()
        if self.mode == 'small':
            img = self.max_pool(img)
        img = imresize(img, self.img_shape, interp='bicubic')
        return  img


    @property
    def curr_state(self):
        '''
        return raw pixels.
        '''
        return np.array(self.frames, dtype=floatX) / floatX(255.) # normalize


    @property
    def state_shape(self):
        return self.curr_state.shape


    @property
    def num_actions(self):
        return len(self.valid_actions)


    @property
    def valid_actions(self):
        return self.ale.getLegalActionSet()


    def step(self, action):
        reward = self.ale.act(action)
        if len(self.frames) == self.num_frames:
            self.frames = self.frames[1:]
        self.frames.append(self._curr_frame)
        self.frame_id += 1
        #print 'frame_id', self.frame_id
        self.cum_reward += reward
        return reward # TODO: scale the gradient up.


    def is_end(self):
        if np.abs(self.cum_reward) > 0:
            return True
        return self.ale.game_over()


    def visualize(self, fig=1, fname=None, format='png'):
        import matplotlib.pyplot as plt
        fig = plt.figure(fig, figsize=(5,5))
        plt.clf()
        plt.axis('off')
        #res = plt.imshow(self.ale.getScreenRGB())
        res = plt.imshow(self._curr_frame, interpolation='none')
        if fname:
            plt.savefig(fname, format=format)
        else:
            plt.show()
        return res
示例#54
0
class Environment:
	"""docstring for Environment"""

	BUFFER_LEN = 2
	EPISODE_FRAMES = 18000
	EPOCH_COUNT = 200
	EPOCH_STEPS = 250000
	EVAL_EPS = 0.001
	FRAMES_SKIP = 4
	FRAME_HEIGHT = 84
	FRAME_WIDTH = 84
	MAX_NO_OP = 30
	MAX_REWARD = 1
	
	def __init__(self, rom_name, rng, display_screen = False):
		self.api = ALEInterface()
		self.api.setInt('random_seed', rng.randint(333))
		self.api.setBool('display_screen', display_screen)
		self.api.setFloat('repeat_action_probability', 0.0)
		self.rom_name = rom_name
		self.display_screen = display_screen
		self.rng = rng
		self.repeat = Environment.FRAMES_SKIP
		self.buffer_len = Environment.BUFFER_LEN
		self.height = Environment.FRAME_HEIGHT
		self.width = Environment.FRAME_WIDTH
		self.episode_steps = Environment.EPISODE_FRAMES / Environment.FRAMES_SKIP
		self.merge_id = 0
		self.max_reward = Environment.MAX_REWARD
		self.eval_eps = Environment.EVAL_EPS
		self.log_dir = ''
		self.network_dir = ''

		self.api.loadROM('../rom/' + self.rom_name)
		self.minimal_actions = self.api.getMinimalActionSet()
		original_width, original_height = self.api.getScreenDims()
		self.merge_frame = np.zeros((self.buffer_len
								, original_height
								, original_width)
								, dtype = np.uint8)

	def get_action_count(self):
		return len(self.minimal_actions)

	def train(self, agent, store_freq, folder = None, start_epoch = 0):
		self._open_log_files(agent, folder)
		obs = np.zeros((self.height, self.width), dtype = np.uint8)
		epoch_count = Environment.EPOCH_COUNT

		for epoch in xrange(start_epoch, epoch_count):
			self.need_reset = True
			steps_left = Environment.EPOCH_STEPS

			print "\n" + "=" * 50
			print "Epoch #%d" % (epoch + 1)
			episode = 0
			train_start = time.time()
			while steps_left > 0:
				num_step, _ = self._run_episode(agent, steps_left, obs)
				steps_left -= num_step
				episode += 1
				if steps_left == 0 or episode % 10 == 0:
					print "Finished episode #%d, steps_left = %d" \
						% (episode, steps_left)
			train_end = time.time()

			valid_values = agent.get_validate_values()
			eval_values = self.evaluate(agent)
			test_end = time.time()

			train_time = train_end - train_start
			test_time = test_end - train_end
			step_per_sec = Environment.EPOCH_STEPS * 1. / max(1, train_time)
			print "\tFinished epoch #%d, episode trained = %d\n" \
				"\tValidate values = %.3f, evaluate reward = %.3f\n"\
				"\tTrain time = %.0fs, test time = %.0fs, steps/sec = %.4f" \
					% (epoch + 1, episode, valid_values, eval_values\
						, train_time, test_time, step_per_sec)

			self._update_log_files(agent, epoch + 1, episode
								, valid_values, eval_values
								, train_time, test_time
								, step_per_sec, store_freq)
			gc.collect()

	def evaluate(self, agent, episodes = 30, obs = None):
		print "\n***Start evaluating"
		if obs is None:
			obs = np.zeros((self.height, self.width), dtype = np.uint8)
		sum_reward = 0.0
		sum_step = 0.0
		for episode in xrange(episodes):
			self.need_reset = True
			step, reward = self._run_episode(agent, self.episode_steps, obs
											, self.eval_eps, evaluating = True)
			sum_reward += reward
			sum_step += step
			print "Finished episode %d, reward = %d, step = %d" \
					% (episode + 1, reward, step)
		self.need_reset = True
		print "Average reward per episode = %.4f" % (sum_reward / episodes)
		print "Average step per episode = %.4f" % (sum_step / episodes)
		return sum_reward / episodes

	def _prepare_game(self):
		if self.need_reset or self.api.game_over():
			self.api.reset_game()
			self.need_reset = False
			if Environment.MAX_NO_OP > 0:
				num_no_op = self.rng.randint(Environment.MAX_NO_OP + 1) \
							+ self.buffer_len
				for _ in xrange(num_no_op):
					self.api.act(0)

		for _ in xrange(self.buffer_len):
			self._update_buffer()

	def _run_episode(self, agent, steps_left, obs
					, eps = 0.0, evaluating = False):
		self._prepare_game()

		start_lives = self.api.lives()
		step_count = 0
		sum_reward = 0
		is_terminal = False
		while step_count < steps_left and not is_terminal:
			self._get_screen(obs)
			action_id, _ = agent.get_action(obs, eps, evaluating)
			
			reward = self._repeat_action(self.minimal_actions[action_id])
			reward_clip = reward
			if self.max_reward > 0:
				reward_clip = np.clip(reward, -self.max_reward, self.max_reward)

			life_lost = not evaluating and self.api.lives() < start_lives
			is_terminal = self.api.game_over() or life_lost \
						or step_count + 1 >= steps_left

			agent.add_experience(obs, is_terminal, action_id, reward_clip
								, evaluating)
			sum_reward += reward
			step_count += 1
			
		return step_count, sum_reward

	def _update_buffer(self):
		self.api.getScreenGrayscale(self.merge_frame[self.merge_id, ...])
		self.merge_id = (self.merge_id + 1) % self.buffer_len

	def _repeat_action(self, action):
		reward = 0
		for i in xrange(self.repeat):
			reward += self.api.act(action)
			if i + self.buffer_len >= self.repeat:
				self._update_buffer()
		return reward

	def _get_screen(self, resized_frame):
		self._resize_frame(self.merge_frame.max(axis = 0), resized_frame)
				
	def _resize_frame(self, src_frame, dst_frame):
		cv2.resize(src = src_frame, dst = dst_frame,
					dsize = (self.width, self.height),
					interpolation = cv2.INTER_LINEAR)

	def _open_log_files(self, agent, folder):
		time_str = time.strftime("_%m-%d-%H-%M", time.localtime())
		base_rom_name = os.path.splitext(os.path.basename(self.rom_name))[0]


		if folder is not None:
			self.log_dir = folder
			self.network_dir = self.log_dir + '/network'
		else:
			self.log_dir = '../run_results/' + base_rom_name + time_str
			self.network_dir = self.log_dir + '/network'

		info_name = get_next_name(self.log_dir, 'info', 'txt')
		git_name = get_next_name(self.log_dir, 'git-diff', '')

		try:
			os.stat(self.log_dir)
		except OSError:
			os.makedirs(self.log_dir)

		try:
			os.stat(self.network_dir)
		except OSError:
			os.makedirs(self.network_dir)

		with open(os.path.join(self.log_dir, info_name), 'w') as f:
			f.write('Commit: ' + subprocess.check_output(['git', 'rev-parse'
														, 'HEAD']))
			f.write('Run command: ')
			f.write(' '.join(pipes.quote(x) for x in sys.argv))
			f.write('\n\n')
			f.write(agent.get_info())
			write_info(f, Environment)
			write_info(f, agent.__class__)
			write_info(f, agent.network.__class__)

		# From https://github.com/spragunr/deep_q_rl/pull/49/files
		with open(os.path.join(self.log_dir, git_name), 'w') as f:
			f.write(subprocess.check_output(['git', 'diff', 'HEAD']))

		if folder is not None:
			return

		with open(os.path.join(self.log_dir, 'results.csv'), 'w') as f:
			f.write("epoch,episode_train,validate_values,evaluate_reward"\
				",train_time,test_time,steps_per_second\n")

		mem = psutil.virtual_memory()
		with open(os.path.join(self.log_dir, 'memory.csv'), 'w') as f:
			f.write("epoch,available,free,buffers,cached"\
					",available_readable,used_percent\n")
			f.write("%d,%d,%d,%d,%d,%s,%.1f\n" % \
					(0, mem.available, mem.free, mem.buffers, mem.cached
					, bytes2human(mem.available), mem.percent))

	def _update_log_files(self, agent, epoch, episode, valid_values
						, eval_values, train_time, test_time, step_per_sec
						, store_freq):
		print "Updating log files"
		with open(self.log_dir + '/results.csv', 'a') as f:
			f.write("%d,%d,%.4f,%.4f,%d,%d,%.4f\n" % \
						(epoch, episode, valid_values, eval_values
						, train_time, test_time, step_per_sec))

		mem = psutil.virtual_memory()
		with open(self.log_dir + '/memory.csv', 'a') as f:
			f.write("%d,%d,%d,%d,%d,%s,%.1f\n" % \
					(epoch, mem.available, mem.free, mem.buffers, mem.cached
					, bytes2human(mem.available), mem.percent))

		agent.dump_network(self.network_dir + ('/%03d' % (epoch)) + '.npz')

		if (store_freq >= 0 and epoch >= Environment.EPOCH_COUNT) or \
				(store_freq > 0 and (epoch % store_freq == 0)):
			agent.dump_exp(self.network_dir + '/exp.npz')

	def _setup_record(self, network_file):
		file_name, _ = os.path.splitext(os.path.basename(network_file))
		time_str = time.strftime("_%m-%d-%H-%M", time.localtime())
		img_dir = os.path.dirname(network_file) + '/images_' \
					+ file_name + time_str
		rom_name, _ = os.path.splitext(self.rom_name)
		out_name = os.path.dirname(network_file) + '/' + rom_name + '_' \
					+ file_name + time_str + '.mov'
		print out_name

		try:
			os.stat(img_dir)
		except OSError:
			os.makedirs(img_dir)

		self.api.setString('record_screen_dir', img_dir)
		self.api.loadROM('../rom/' + self.rom_name)
		return img_dir, out_name

	def record_run(self, agent, network_file, episode_id = 1):
		if episode_id > 1:
			self.evaluate(agent, episode_id - 1)
			system_state = self.api.cloneSystemState()

		img_dir, out_name = self._setup_record(network_file)

		if episode_id > 1:
			self.api.restoreSystemState(system_state)

		self.evaluate(agent, 1)
		script = \
				"""
					{
						ffmpeg -r 60 -i %s/%%06d.png -f mov -c:v libx264 %s
					} || {
						avconv -r 60 -i %s/%%06d.png -f mov -c:v libx264 %s
					}
				""" % (img_dir, out_name, img_dir, out_name)
		os.system(script)
示例#55
0
display_screen = False

batchSize = 50


ale = ALEInterface()
ale.setInt('random_seed', 123)
ale.setInt("frame_skip",frameSkip)

USE_SDL = True
if USE_SDL:
  if sys.platform == 'darwin':
    import pygame
    pygame.init()
    ale.setBool('sound', False) # Sound doesn't work on OSX
  elif sys.platform.startswith('linux'):
    ale.setBool('sound', False)
  ale.setBool('display_screen', display_screen)

ale.loadROM("rom/Breakout.A26")
legal_actions = ale.getMinimalActionSet()



n_senses = 82*72
n_actions = len(legal_actions)
temporal_window = 1
hiddenSize1 = 256
hiddenSize2 = 32
network_size = n_senses*(temporal_window) + n_actions*(temporal_window-1)
class AtariEmulator:

	def __init__(self, args):
		''' Initialize Atari environment '''

		# Parameters
		self.buffer_length = args.buffer_length
		self.screen_dims = args.screen_dims
		self.frame_skip = args.frame_skip
		self.blend_method = args.blend_method
		self.reward_processing = args.reward_processing
		self.max_start_wait = args.max_start_wait
		self.history_length = args.history_length
		self.start_frames_needed = self.buffer_length - 1 + ((args.history_length - 1) * self.frame_skip)

		#Initialize ALE instance
		self.ale = ALEInterface()
		self.ale.setFloat(b'repeat_action_probability', 0.0)
		if args.watch:
			self.ale.setBool(b'sound', True)
			self.ale.setBool(b'display_screen', True)
		self.ale.loadROM(str.encode(args.rom_path + '/' + args.game + '.bin'))

		self.buffer = np.empty((self.buffer_length, 210, 160))
		self.current = 0
		self.action_set = self.ale.getMinimalActionSet()
		self.lives = self.ale.lives()

		self.reset()


	def get_possible_actions(self):
		''' Return list of possible actions for game '''
		return self.action_set

	def get_screen(self):
		''' Add screen to frame buffer '''
		self.buffer[self.current] = np.squeeze(self.ale.getScreenGrayscale())
		self.current = (self.current + 1) % self.buffer_length


	def reset(self):
		self.ale.reset_game()
		self.lives = self.ale.lives()

		if self.max_start_wait < 0:
			print("ERROR: max start wait decreased beyond 0")
			sys.exit()
		elif self.max_start_wait <= self.start_frames_needed:
			wait = 0
		else:
			wait = random.randint(0, self.max_start_wait - self.start_frames_needed)
		for _ in range(wait):
			self.ale.act(self.action_set[0])

		# Fill frame buffer
		self.get_screen()
		for _ in range(self.buffer_length - 1):
			self.ale.act(self.action_set[0])
			self.get_screen()
		# get initial_states
		state = [(self.preprocess(), 0, 0, False)]
		for step in range(self.history_length - 1):
			state.append(self.run_step(0))

		# make sure agent hasn't died yet
		if self.isTerminal():
			print("Agent lost during start wait.  Decreasing max_start_wait by 1")
			self.max_start_wait -= 1
			return self.reset()

		return state


	def run_step(self, action):
		''' Apply action to game and return next screen and reward '''

		raw_reward = 0
		for step in range(self.frame_skip):
			raw_reward += self.ale.act(self.action_set[action])
			self.get_screen()

		reward = None
		if self.reward_processing == 'clip':
			reward = np.clip(raw_reward, -1, 1)
		else:
			reward = raw_reward

		terminal = self.isTerminal()
		self.lives = self.ale.lives()

		return (self.preprocess(), action, reward, terminal, raw_reward)



	def preprocess(self):
		''' Preprocess frame for agent '''

		img = None

		if self.blend_method == "max":
			img = np.amax(self.buffer, axis=0)

		return cv2.resize(img, self.screen_dims, interpolation=cv2.INTER_LINEAR)

	def isTerminal(self):
		return (self.isGameOver() or (self.lives > self.ale.lives()))


	def isGameOver(self):
		return self.ale.game_over()
示例#57
0
# ale.loadROM(rom_file)
#
# # Get the list of legal actions
# legal_actions = ale.getLegalActionSet()
#
# # Play 10 episodes
# for episode in range(10):
#   total_reward = 0
#   while not ale.game_over():
#     a = legal_actions[randrange(len(legal_actions))]
#     # Apply an action and get the resulting reward
#     reward = ale.act(a);
#     total_reward += reward
#   print('Episode %d ended with score: %d' % (episode, total_reward))
#   ale.reset_game()



from ale_python_interface import ALEInterface
ale = ALEInterface()
ale.setBool('display_screen', True)
rom_file = "./roms/breakoutv.bin"
ale.loadROM(rom_file)
ale.reset_game()
ale.getScreenRGB()
ale.reset_game()
ale.act(0)



示例#58
0
class MyEnv(Environment):
    VALIDATION_MODE = 0

    def __init__(self,
                 rng,
                 rom="ale/breakout.bin",
                 frame_skip=4,
                 ale_options=[{
                     "key": "random_seed",
                     "value": 0
                 }, {
                     "key": "color_averaging",
                     "value": True
                 }, {
                     "key": "repeat_action_probability",
                     "value": 0.
                 }]):
        self._mode = -1
        self._modeScore = 0.0
        self._modeEpisodeCount = 0

        self._frameSkip = frame_skip if frame_skip >= 1 else 1
        self._randomState = rng

        self._ale = ALEInterface()
        for option in ale_options:
            t = type(option["value"])
            if t is int:
                self._ale.setInt(option["key"], option["value"])
            elif t is float:
                self._ale.setFloat(option["key"], option["value"])
            elif t is bool:
                self._ale.setBool(option["key"], option["value"])
            else:
                raise ValueError(
                    "Option {} ({}) is not an int, bool or float.".format(
                        option["key"], t))
        self._ale.loadROM(rom)

        w, h = self._ale.getScreenDims()
        self._screen = np.empty((h, w), dtype=np.uint8)
        self._reducedScreen = np.empty((84, 84), dtype=np.uint8)
        self._actions = self._ale.getMinimalActionSet()

    def reset(self, mode):
        if mode == MyEnv.VALIDATION_MODE:
            if self._mode != MyEnv.VALIDATION_MODE:
                self._mode = MyEnv.VALIDATION_MODE
                self._modeScore = 0.0
                self._modeEpisodeCount = 0
            else:
                self._modeEpisodeCount += 1
        elif self._mode != -1:  # and thus mode == -1
            self._mode = -1

        self._ale.reset_game()
        for _ in range(self._randomState.randint(15)):
            self._ale.act(0)
        self._ale.getScreenGrayscale(self._screen)
        cv2.resize(self._screen, (84, 84),
                   self._reducedScreen,
                   interpolation=cv2.INTER_NEAREST)

        return [4 * [84 * [84 * [0]]]]

    def act(self, action):
        action = self._actions[action]

        reward = 0
        for _ in range(self._frameSkip):
            reward += self._ale.act(action)
            if self.inTerminalState():
                break

        self._ale.getScreenGrayscale(self._screen)
        cv2.resize(self._screen, (84, 84),
                   self._reducedScreen,
                   interpolation=cv2.INTER_NEAREST)

        self._modeScore += reward
        return np.sign(reward)

    def summarizePerformance(self, test_data_set):
        if self.inTerminalState() == False:
            self._modeEpisodeCount += 1
        print("== Mean score per episode is {} over {} episodes ==".format(
            self._modeScore / self._modeEpisodeCount, self._modeEpisodeCount))

    def inputDimensions(self):
        return [(4, 84, 84)]

    def observationType(self, subject):
        return np.uint8

    def nActions(self):
        return len(self._actions)

    def observe(self):
        return [np.array(self._reducedScreen)]

    def inTerminalState(self):
        return self._ale.game_over()
示例#59
0
class ALEEnvironment(Environment):
  def __init__(self, rom_file, args):
    from ale_python_interface import ALEInterface
    self.ale = ALEInterface()
    if args.display_screen:
      if sys.platform == 'darwin':
        import pygame
        pygame.init()
        self.ale.setBool('sound', False) # Sound doesn't work on OSX
      elif sys.platform.startswith('linux'):
        self.ale.setBool('sound', True)
      self.ale.setBool('display_screen', True)

    self.ale.setInt('frame_skip', args.frame_skip)
    self.ale.setFloat('repeat_action_probability', args.repeat_action_probability)
    self.ale.setBool('color_averaging', args.color_averaging)

    if args.random_seed:
      self.ale.setInt('random_seed', args.random_seed)

    if args.record_screen_path:
      if not os.path.exists(args.record_screen_path):
        logger.info("Creating folder %s" % args.record_screen_path)
        os.makedirs(args.record_screen_path)
      logger.info("Recording screens to %s", args.record_screen_path)
      self.ale.setString('record_screen_dir', args.record_screen_path)

    if args.record_sound_filename:
      logger.info("Recording sound to %s", args.record_sound_filename)
      self.ale.setBool('sound', True)
      self.ale.setString('record_sound_filename', args.record_sound_filename)

    self.ale.loadROM(rom_file)

    if args.minimal_action_set:
      self.actions = self.ale.getMinimalActionSet()
      logger.info("Using minimal action set with size %d" % len(self.actions))
    else:
      self.actions = self.ale.getLegalActionSet()
      logger.info("Using full action set with size %d" % len(self.actions))
    logger.debug("Actions: " + str(self.actions))

    self.screen_width = args.screen_width
    self.screen_height = args.screen_height

    self.life_lost = False

  def numActions(self):
    return len(self.actions)

  def restart(self):
    # In test mode, the game is simply initialized. In train mode, if the game
    # is in terminal state due to a life loss but not yet game over, then only
    # life loss flag is reset so that the next game starts from the current
    # state. Otherwise, the game is simply initialized.
    if (
        self.mode == 'test' or
        not self.life_lost or  # `reset` called in a middle of episode
        self.ale.game_over()  # all lives are lost
    ):
      self.ale.reset_game()
    self.life_lost = False

  def act(self, action):
    lives = self.ale.lives()
    reward = self.ale.act(self.actions[action])
    self.life_lost = (not lives == self.ale.lives())
    return reward

  def getScreen(self):
    screen = self.ale.getScreenGrayscale()
    resized = cv2.resize(screen, (self.screen_width, self.screen_height))
    return resized

  def isTerminal(self):
    if self.mode == 'train':
      return self.ale.game_over() or self.life_lost
    return self.ale.game_over()