示例#1
0
文件: emulator.py 项目: amharc/jnp3
class Emulator(object):
    FRAME_SKIP = 4
    SCREEN_WIDTH = 84
    SCREEN_HEIGHT = 84

    def __init__(self, rom):
        self.ale = ALEInterface()
        self.max_num_frames_per_episode = 100000 #self.ale.getInt('max_num_frames_per_episode')
        self.ale.setInt('frame_skip', self.FRAME_SKIP)
        self.ale.loadROM('roms/' + rom)
        self.actions = self.ale.getMinimalActionSet()
        
    def reset(self):
        self.ale.reset_game()

    def image(self):
        screen = self.ale.getScreenGrayscale()
        screen = cv2.resize(screen, (self.SCREEN_HEIGHT, self.SCREEN_WIDTH))
        return np.reshape(screen, (self.SCREEN_HEIGHT, self.SCREEN_WIDTH))

    def act(self, action):
        return self.ale.act(action)

    def terminal(self):
        return self.ale.game_over()
class Emulate:
  def __init__(self, rom_file, display_screen=False,frame_skip=4,screen_height=84,screen_width=84,repeat_action_probability=0,color_averaging=True,random_seed=0,record_screen_path='screen_pics',record_sound_filename=None,minimal_action_set=True):
    self.ale = ALEInterface()
    if display_screen:
      if sys.platform == 'darwin':
        import pygame
        pygame.init()
        self.ale.setBool('sound', False) # Sound doesn't work on OSX
      elif sys.platform.startswith('linux'):
        self.ale.setBool('sound', True)
      self.ale.setBool('display_screen', True)

    self.ale.setInt('frame_skip', frame_skip)
    self.ale.setFloat('repeat_action_probability', repeat_action_probability)
    self.ale.setBool('color_averaging', color_averaging)

    if random_seed:
      self.ale.setInt('random_seed', random_seed)

    self.ale.loadROM(rom_file)

    if minimal_action_set:
      self.actions = self.ale.getMinimalActionSet()
    else:
      self.actions = self.ale.getLegalActionSet()

    self.dims = (screen_width,screen_height)

  def numActions(self):
    return len(self.actions)

  def getActions(self):
  	return self.actions

  def restart(self):
    self.ale.reset_game()

  def act(self, action):
    reward = self.ale.act(self.actions[action])
    return reward

  def getScreen(self):
    screen = self.ale.getScreenGrayscale()
    resized = cv2.resize(screen, self.dims)
    return resized

  def getScreenGray(self):
    screen = self.ale.getScreenGrayscale()
    resized = cv2.resize(screen, self.dims)
    rotated = np.rot90(resized,k=1)
    return rotated

  def getScreenColor(self):
    screen = self.ale.getScreenRGB()
    resized = cv2.resize(screen, self.dims)
    rotated = np.rot90(resized,k=1)
    return rotated

  def isTerminal(self):
    return self.ale.game_over()
class pyrlcade_environment(object):
    def init(self,rom_file,ale_frame_skip):

        self.ale = ALEInterface()

        self.max_frames_per_episode = self.ale.getInt("max_num_frames_per_episode");
        self.ale.set("random_seed",123)
        self.ale.set("disable_color_averaging",1)
        self.ale.set("frame_skip",ale_frame_skip)

        self.ale.loadROM(rom_file)
        self.legal_actions = self.ale.getMinimalActionSet()
        ram_size = self.ale.getRAMSize()
        self.ram = np.zeros((ram_size),dtype=np.uint8)
        self.ale.getRAM(self.ram)

        self.state = self.ale.getRAM(self.ram)

    def reset_state(self):
        self.ale.reset_game()

    def set_action(self,a):
        self.action = a

    def step(self):
        self.reward = self.ale.act(self.action)
        is_terminal = self.ale.game_over()
        return is_terminal

    def get_state(self):
        self.ale.getRAM(self.ram)
        return self.ram

    def get_reward(self):
        return self.reward
示例#4
0
文件: emulator.py 项目: amharc/jnp3
class Emulator(object):
    def __init__(self, settings):
        self.ale = ALEInterface()
        self.ale.setInt('frame_skip', settings['frame_skip'])
        self.ale.setInt('random_seed', np.random.RandomState().randint(1000))
        self.ale.setBool('color_averaging', False)
        self.ale.loadROM('roms/' + settings['rom_name'])
        self.actions = self.ale.getMinimalActionSet()
        self.width = settings['screen_width']
        self.height = settings['screen_height']
        
    def reset(self):
        self.ale.reset_game()

    def image(self):
        screen = self.ale.getScreenGrayscale()
        screen = cv2.resize(screen, (self.height, self.width),
                interpolation=cv2.INTER_LINEAR)
        return np.reshape(screen, (self.height, self.width))

    def full_image(self):
        screen = self.ale.getScreenRGB()
        return screen

    def act(self, action):
        return self.ale.act(self.actions[action])

    def terminal(self):
        return self.ale.game_over()
def launch():
	logging.basicConfig(level=logging.INFO)
	myArgs = getParameters()
	rom = myArgs.game
	full_rom_path = os.path.join(myArgs.base_rom_path,rom)
	rng = np.random.RandomState()
	ale = ALEInterface()
	ale.setInt('random_seed',38)
	ale.setBool('display_screen',myArgs.display_screen)
	ale.setInt('frame_skip',myArgs.frame_skip)
	ale.setFloat('repeat_action_probability',myArgs.repeat_action_probability)

	ale.loadROM(full_rom_path)
	valid_actions = ale.getMinimalActionSet()
	'''for episode in xrange(10):
		total_reward = 0
		while not ale.game_over():
			from random import randrange
			a = valid_actions[randrange(len(valid_actions))]
			ale.act(a)
			#print reward
			#print ale.getScreenRGB()

			#total_reward += reward
			#print 'Episode', episode, 'ended with score:', total_reward
		ale.reset_game()
	'''
	memory_pool = ReplayMemory(myArgs.memory_size,rng)
	network_model = buildNetwork(myArgs.resized_height,myArgs.resized_width,myArgs.rmsp_epsilon,myArgs.rmsp_rho,myArgs.learning_rate,len(valid_actions))
	ddqn = DDQN(network_model,valid_actions,myArgs.target_nn_update_frequency,myArgs.discount,myArgs.phi_len)
	agent = Agent(myArgs,ddqn,memory_pool,valid_actions,rng)
	train_agent = TrainMyAgent(myArgs,ale,agent,valid_actions,rng)
	train_agent.run()
示例#6
0
class Environment:
  def __init__(self, rom_file, args):
    self.ale = ALEInterface()
    if args.display_screen:
      if sys.platform == 'darwin':
        import pygame
        pygame.init()
        self.ale.setBool('sound', False) # Sound doesn't work on OSX
      elif sys.platform.startswith('linux'):
        self.ale.setBool('sound', True)
      self.ale.setBool('display_screen', True)

    self.ale.setInt('frame_skip', args.frame_skip)
    self.ale.setFloat('repeat_action_probability', args.repeat_action_probability)
    self.ale.setBool('color_averaging', args.color_averaging)

    if args.random_seed:
      self.ale.setInt('random_seed', args.random_seed)

    if args.record_screen_path:
      if not os.path.exists(args.record_screen_path):
        logger.info("Creating folder %s" % args.record_screen_path)
        os.makedirs(args.record_screen_path)
      logger.info("Recording screens to %s", args.record_screen_path)
      self.ale.setString('record_screen_dir', args.record_screen_path)

    if args.record_sound_filename:
      logger.info("Recording sound to %s", args.record_sound_filename)
      self.ale.setBool('sound', True)
      self.ale.setString('record_sound_filename', args.record_sound_filename)

    self.ale.loadROM(rom_file)

    if args.minimal_action_set:
      self.actions = self.ale.getMinimalActionSet()
      logger.info("Using minimal action set with size %d" % len(self.actions))
    else:
      self.actions = self.ale.getLegalActionSet()
      logger.info("Using full action set with size %d" % len(self.actions))
    logger.debug("Actions: " + str(self.actions))

    self.dims = (args.screen_height, args.screen_width)

  def numActions(self):
    return len(self.actions)

  def restart(self):
    self.ale.reset_game()

  def act(self, action):
    reward = self.ale.act(self.actions[action])
    return reward

  def getScreen(self):
    screen = self.ale.getScreenGrayscale()
    resized = cv2.resize(screen, self.dims)
    return resized

  def isTerminal(self):
    return self.ale.game_over()
示例#7
0
def map_game_to_ALE(game_name, interactive):
    game_path = '/cvgl/u/nishith/MultiTaskRL/libs/DQN_ale/roms/' \
                + game_name + '.bin'
    print game_path
    game = ALEInterface()
    if interactive:
        setup_display(game)
    game.loadROM(game_path)
    return game
示例#8
0
class AtariMDP(MDP, Serializable):

    def __init__(self, rom_path, obs_type=OBS_RAM, frame_skip=4):
        Serializable.__init__(self, rom_path, obs_type, frame_skip)
        self.options = (rom_path, obs_type, frame_skip)
        
        self.ale = ALEInterface()
        self.ale.loadROM(rom_path)        
        self._rom_path = rom_path
        self._obs_type = obs_type
        self._action_set = self.ale.getMinimalActionSet()
        self.frame_skip = frame_skip


    def get_image(self):
        return to_rgb(self.ale)
    def get_ram(self):
        return to_ram(self.ale)
    def game_over(self):
        return self.ale.game_over()
    def reset_game(self):
        return self.ale.reset_game()

    @property
    def n_actions(self):
        return len(self.action_set)

    def get_obs(self):
        if self._obs_type == OBS_RAM:
            return self.get_ram()[None,:]
        else:
            assert self._obs_type == OBS_IMAGE
            return self.get_image()[None,:,:,:]

    def step(self, a):

        reward = 0.0
        action = self.action_set[a]
        for _ in xrange(self.frame_skip):
            reward += self.ale.act(action)
        ob = self.get_obs().reshape(1,-1)
        return ob, np.array([reward]), self.ale.game_over()

    # return: (states, observations)
    def reset(self):
        self.ale.reset_game()
        return self.get_obs()

    @property
    def action_set(self):
        return self._action_set

    def plot(self):
        import cv2
        cv2.imshow("atarigame",self.get_image()) #pylint: disable=E1101
        cv2.waitKey(10) #pylint: disable=E1101
示例#9
0
文件: emulator.py 项目: hercky/a3c
class Emulator:
    def __init__(self):
    
        self.ale = ALEInterface()
        
        # turn off the sound
        self.ale.setBool('sound', False)
        
        self.ale.setBool('display_screen', EMULATOR_DISPLAY)

        self.ale.setInt('frame_skip', FRAME_SKIP)
        self.ale.setFloat('repeat_action_probability', REPEAT_ACTION_PROBABILITY)
        self.ale.setBool('color_averaging', COLOR_AVERAGING)

        self.ale.setInt('random_seed', RANDOM_SEED)

        if RECORD_SCENE_PATH:
            self.ale.setString('record_screen_dir', RECORD_SCENE_PATH)


        self.ale.loadROM(ROM_PATH)

        self.actions = self.ale.getMinimalActionSet()
        logger.info("Actions: " + str(self.actions))

        self.dims = DIMS
        #self.start_lives = self.ale.lives()

    def getActions(self):
        return self.actions

    def numActions(self):
        return len(self.actions)

    def restart(self):
        self.ale.reset_game()
        # can be omitted

    def act(self, action):
        reward = self.ale.act(self.actions[action])
        return reward

    def getScreen(self):
        # why grayscale ?
        screen = self.ale.getScreenGrayscale()
        resized = cv2.resize(screen, self.dims)
        # normalize
        #resized /= COLOR_SCALE

        return resized

    def isTerminal(self):
        # while training deepmind only ends when agent dies
        #terminate = DEATH_END and TRAIN and (self.ale.lives() < self.start_lives)

        return self.ale.game_over()
示例#10
0
def init():

  pygame.init()
  rom_path = '/Users/maciej/Development/atari-roms'
  ale = ALEInterface()
  ale.setInt('random_seed', 123)
  ale.setBool('frame_skip', 1)
  ale.loadROM(rom_path + '/space_invaders.bin')
  ale.setFloat("repeat_action_probability", 0)
  return ale
示例#11
0
 def _init_ale(rand_seed, rom_file):
     assert os.path.exists(rom_file), '%s does not exists.'
     ale = ALEInterface()
     ale.setInt('random_seed', rand_seed)
     ale.setBool('showinfo', False)
     ale.setInt('frame_skip', 1)
     ale.setFloat('repeat_action_probability', 0.0)
     ale.setBool('color_averaging', False)
     ale.loadROM(rom_file)
     return ale
示例#12
0
def peekActionSize(rom):
  if args.use_gym:
    import gym
    env = gym.make(args.gym_env)
    return env.action_space.n
  else:
    from ale_python_interface import ALEInterface
    ale = ALEInterface()
    ale.loadROM(rom.encode('ascii'))
    return len(ale.getMinimalActionSet())
示例#13
0
 def loadROM(self, rom_file):
     ALEInterface.loadROM(self, rom_file)
     if self.minimum_actions:
         self.legal_actions = self.getMinimalActionSet()
     else:
         self.legal_actions = self.getLegalActionSet()
     self.num_actions = len(self.legal_actions)
     self.setInt('frame_skip', self.frame_skip)
     if self.random_seed is not None:
         self.setInt('random_seed', self.random_seed)
     self.height, self.width = self.getScreenDims()
示例#14
0
def init(game, display_screen=False, record_dir=None):
    if display_screen:
        import pygame
        pygame.init()
    ale = ALEInterface()
    ale.setBool('display_screen', display_screen)
    ale.setInt('random_seed', 123)
    if record_dir is not None:
        ale.setString("record_screen_dir", record_dir)
    ale.loadROM('{game}.bin'.format(game=game))
    ale.setFloat("repeat_action_probability", 0)

    return ale
示例#15
0
def init(display_screen=False):
    if display_screen:
        import pygame
        pygame.init()
    rom_path = '.'
    ale = ALEInterface()
    ale.setBool('display_screen', display_screen)
    ale.setInt('random_seed', 123)
    ale.setBool('frame_skip', 1)
    ale.loadROM(rom_path + '/space_invaders.bin')
    ale.setFloat("repeat_action_probability", 0)

    return ale
示例#16
0
class Environment:
    def __init__(self, show_screen, history_length):
        self.ale = ALEInterface()
        self.ale.setInt('frame_skip', 4)
        self.history = None
        self.history_length = history_length
        if show_screen:
            self.display_screen()
        self.load_game()
        (screen_width, screen_height) = self.ale.getScreenDims()
        self.screen_data = np.empty((screen_height, screen_width, 1), dtype=np.uint8)  # 210x160 screen data
        self.dims = (84, 84)  # input size for neural network
        self.actions = [3, 0, 1, 4]  # noop, left, right, fire,

    def display_screen(self):
        self.ale.setBool("display_screen", True)

    def turn_on_sound(self):
        self.ale.setBool("sound", True)

    def restart(self):
        """reset game"""
        self.ale.reset_game()

    def act(self, action):
        """:returns reward of an action"""
        return self.ale.act(self.actions[action])

    def __get_screen(self):
        """:returns Grayscale thresholded resized screen image """
        self.ale.getScreenGrayscale(self.screen_data)
        resized = cv2.resize(self.screen_data, self.dims)
        return resized

    def get_state(self):
        binary_screen = self.__get_screen()
        if self.history is None:
            self.history = deque(maxlen=self.history_length)
            for _ in range(self.history_length - 1):
                self.history.append(binary_screen)
        self.history.append(binary_screen)
        result = np.stack(self.history, axis=0)
        return result

    def isTerminal(self):
        """checks if game is over"""
        return self.ale.game_over()

    def load_game(self):
        """load game from file"""
        self.ale.loadROM("Breakout.bin")
示例#17
0
def init(display_screen=False, record_dir=None):
    if display_screen:
        import pygame
        pygame.init()
    rom_path = '.'
    ale = ALEInterface()
    ale.setBool('display_screen', display_screen)
    ale.setInt('random_seed', 123)
    if record_dir is not None:
        ale.setString("record_screen_dir", record_dir)
    ale.loadROM(rom_path + '/space_invaders.bin')
    ale.setFloat("repeat_action_probability", 0)

    return ale
示例#18
0
class AleInterface(object):
    def __init__(self, game, args):
        self.game = game
        self.ale = ALEInterface()

        # if sys.platform == 'darwin':
        #     self.ale.setBool('sound', False)  # Sound doesn't work on OSX
        # elif sys.platform.startswith('linux'):
        #     self.ale.setBool('sound', True)
        # self.ale.setBool('display_screen', True)
        #
        self.ale.setBool('display_screen', args.display_screen)

        self.ale.setInt('frame_skip', args.frame_skip)
        self.ale.setFloat('repeat_action_probability', args.repeat_action_probability)
        self.ale.setBool('color_averaging', args.color_averaging)
        self.ale.setInt('random_seed', args.random_seed)

        #
        # if rand_seed is not None:
        #     self.ale.setInt('random_seed', rand_seed)

        rom_file = "./roms/%s.bin" % game
        if not os.path.exists(rom_file):
            print "not found rom file:", rom_file
            sys.exit(-1)
        self.ale.loadROM(rom_file)

        self.actions = self.ale.getMinimalActionSet()


    def get_actions_num(self):
        return len(self.actions)

    def act(self, action):
        reward = self.ale.act(self.actions[action])
        return reward

    def get_screen_gray(self):
        return self.ale.getScreenGrayscale()

    def get_screen_rgb(self):
        return self.ale.getScreenRGB()

    def game_over(self):
        return self.ale.game_over()

    def reset_game(self):
        return self.ale.reset_game()
class emulator:
	def __init__(self, rom_name, vis,windowname='preview'):
		self.ale = ALEInterface()
		self.max_frames_per_episode = self.ale.getInt("max_num_frames_per_episode");
		self.ale.setInt("random_seed",123)
		self.ale.setInt("frame_skip",4)
		self.ale.loadROM('roms/' + rom_name )
		self.legal_actions = self.ale.getMinimalActionSet()
		self.action_map = dict()
		self.windowname = windowname
		for i in range(len(self.legal_actions)):
			self.action_map[self.legal_actions[i]] = i
		self.init_frame_number = 0

		# print(self.legal_actions)
		self.screen_width,self.screen_height = self.ale.getScreenDims()
		print("width/height: " +str(self.screen_width) + "/" + str(self.screen_height))
		self.vis = vis
		if vis: 
			cv2.startWindowThread()
			cv2.namedWindow(self.windowname)

	def get_image(self):
		numpy_surface = np.zeros(self.screen_height*self.screen_width*3, dtype=np.uint8)
		self.ale.getScreenRGB(numpy_surface)
		image = np.reshape(numpy_surface, (self.screen_height, self.screen_width, 3))
		return image

	def newGame(self):
		# Instead of resetting the game, we load a checkpoint and start from there.
		# self.ale.reset_game()
		self.ale.restoreState(self.ale.decodeState(checkpoints[random.randint(0,99)].astype('uint8')))
		self.init_frame_number = self.ale.getFrameNumber()
		#self.ale.restoreState(self.ale.decodeState(np.reshape(checkpoint,(1009,1))))
		return self.get_image()

	def next(self, action_indx):
		reward = self.ale.act(action_indx)	
		nextstate = self.get_image()
		# scipy.misc.imsave('test.png',nextstate)
		if self.vis:
			cv2.imshow(self.windowname,nextstate)
		return nextstate, reward, self.ale.game_over()

	def get_frame_number(self):
		return self.ale.getFrameNumber() - self.init_frame_number
示例#20
0
文件: ale.py 项目: carpedm20/async-rl
    def __init__(self, rom_filename, seed=None, use_sdl=False, n_last_screens=4,
                 frame_skip=4, treat_life_lost_as_terminal=True,
                 crop_or_scale='scale', max_start_nullops=30,
                 record_screen_dir=None):
        self.n_last_screens = n_last_screens
        self.treat_life_lost_as_terminal = treat_life_lost_as_terminal
        self.crop_or_scale = crop_or_scale
        self.max_start_nullops = max_start_nullops

        ale = ALEInterface()
        if seed is not None:
            assert seed >= 0 and seed < 2 ** 16, \
                "ALE's random seed must be represented by unsigned int"
        else:
            # Use numpy's random state
            seed = np.random.randint(0, 2 ** 16)
        ale.setInt(b'random_seed', seed)
        ale.setFloat(b'repeat_action_probability', 0.0)
        ale.setBool(b'color_averaging', False)
        if record_screen_dir is not None:
            ale.setString(b'record_screen_dir', str.encode(record_screen_dir))
        self.frame_skip = frame_skip
        if use_sdl:
            if 'DISPLAY' not in os.environ:
                raise RuntimeError(
                    'Please set DISPLAY environment variable for use_sdl=True')
            # SDL settings below are from the ALE python example
            if sys.platform == 'darwin':
                import pygame
                pygame.init()
                ale.setBool(b'sound', False)  # Sound doesn't work on OSX
            elif sys.platform.startswith('linux'):
                ale.setBool(b'sound', True)
            ale.setBool(b'display_screen', True)
        ale.loadROM(str.encode(rom_filename))

        assert ale.getFrameNumber() == 0


        self.ale = ale
        self.legal_actions = ale.getMinimalActionSet()
        self.initialize()
示例#21
0
class Breakout(object):
    steps_between_actions = 4

    def __init__(self):
        self.ale = ALEInterface()
        self.ale.setInt('random_seed', 123)
        self.ale.setBool("display_screen", False)
        self.ale.setBool("sound", False)
        self.ale.loadROM("%s/breakout.bin" % rom_directory)
        self.current_state = [
            self.ale.getScreenRGB(), self.ale.getScreenRGB()
        ]

    def start_episode(self):
        self.ale.reset_game()

    def take_action(self, action):
        assert not self.terminated

        def step():
            reward = self.ale.act(action)
            self.roll_state()
            return reward

        reward = sum(step() for _ in xrange(self.steps_between_actions))

        return (reward, self.current_state)

    def roll_state(self):
        assert len(self.current_state) == 2
        self.current_state = [self.current_state[1], self.ale.getScreenRGB()]
        assert len(self.current_state) == 2

    @property
    def actions(self):
        return self.ale.getMinimalActionSet()

    @property
    def terminated(self):
        return self.ale.game_over() or self.ale.lives() < 5
示例#22
0
def init_ale(rom, display):
    ale = ALEInterface()
    # Get & Set the desired settings
    ale.setInt(b'random_seed', 123)

    # Set USE_SDL to true to display the screen. ALE must be compilied
    # with SDL enabled for this to work. On OSX, pygame init is used to
    # proxy-call SDL_main.
    USE_SDL = display
    if USE_SDL:
        if sys.platform == 'darwin':
            import pygame
            pygame.init()
            ale.setBool('sound', False) # Sound doesn't work on OSX
        elif sys.platform.startswith('linux'):
            ale.setBool('sound', True)
        ale.setBool('display_screen', display)

    # Load the ROM file
    ale.loadROM(rom)

    return ale
class emulator:
	def __init__(self, rom_name, vis):
		if vis:
			import cv2
		self.ale = ALEInterface()
		self.max_frames_per_episode = self.ale.getInt("max_num_frames_per_episode");
		self.ale.setInt("random_seed",123)
		self.ale.setInt("frame_skip",4)
		self.ale.loadROM('roms/' + rom_name )
		self.legal_actions = self.ale.getMinimalActionSet()
		self.action_map = dict()
		for i in range(len(self.legal_actions)):
			self.action_map[self.legal_actions[i]] = i

		# print(self.legal_actions)
		self.screen_width,self.screen_height = self.ale.getScreenDims()
		print("width/height: " +str(self.screen_width) + "/" + str(self.screen_height))
		self.vis = vis
		if vis: 
			cv2.startWindowThread()
			cv2.namedWindow("preview")

	def get_image(self):
		numpy_surface = np.zeros(self.screen_height*self.screen_width*3, dtype=np.uint8)
		self.ale.getScreenRGB(numpy_surface)
		image = np.reshape(numpy_surface, (self.screen_height, self.screen_width, 3))
		return image

	def newGame(self):
		self.ale.reset_game()
		return self.get_image()

	def next(self, action_indx):
		reward = self.ale.act(action_indx)	
		nextstate = self.get_image()
		# scipy.misc.imsave('test.png',nextstate)
		if self.vis:
			cv2.imshow('preview',nextstate)
		return nextstate, reward, self.ale.game_over()
示例#24
0
class Game():
    """
    Wrapper around the ALEInterface class.
    """

    def __init__(self, rom_file, sdl=False):
        self.ale = ALEInterface()
        # Setup SDL
        if sdl:
            if sys.platform == 'darwin':
                import pygame
                pygame.init()
                self.ale.setBool(b'sound', False) # Sound doesn't work on OSX
            elif sys.platform.startswith('linux'):
                self.ale.setBool(b'sound', True)
            self.ale.setBool(b'display_screen', True)

        # Load rom
        self.ale.loadROM(str.encode(rom_file))

    def get_action_set(self):
        return self.ale.getLegalActionSet()

    def get_minimal_action_set(self):
        return self.ale.getMinimalActionSet()

    def game_over(self):
        return self.ale.game_over()

    def act(self, action):
        return self.ale.act(action)

    def reset_game(self):
        self.ale.reset_game()

    def get_frame(self):
        return self.ale.getScreenRGB()
def get_random_baseline(gamepath):
    ale = ALEInterface()
    ale.setInt('random_seed', 42)

    recordings_dir = './recordings/breakout/'

    USE_SDL = True
    if USE_SDL:
        if sys.platform == 'darwin':
            import pygame
            pygame.init()
            ale.setBool('sound', False) # Sound doesn't work on OSX
            #ale.setString("record_screen_dir", recordings_dir);
        elif sys.platform.startswith('linux'):
            ale.setBool('sound', True)
            ale.setBool('display_screen', True)

    # Load the ROM file
    ale.loadROM(gamepath)

    # Get the list of legal actions
    legal_actions = ale.getLegalActionSet()

    # Play 5 episodes
    rewards = []
    for episode in xrange(10):
        total_reward = 0
        while not ale.game_over():
            a = legal_actions[randrange(len(legal_actions))]
            reward = ale.act(a);
            total_reward += reward
        rewards.append(total_reward)
        #print 'Episode', episode, 'ended with score:', total_reward
        ale.reset_game()
    avg_reward = sum(rewards) / float(len(rewards))
    return avg_reward
示例#26
0
def ale_load_from_rom(rom_path, display_screen):
    rng = get_numpy_rng()
    try:
        from ale_python_interface import ALEInterface
    except ImportError as e:
        raise ImportError('Unable to import the python package of Arcade Learning Environment. ' \
                           'ALE may not have been installed correctly. Refer to ' \
                           '`https://github.com/mgbellemare/Arcade-Learning-Environment` for some' \
                           'installation guidance')

    ale = ALEInterface()
    ale.setInt(b'random_seed', rng.randint(1000))
    if display_screen:
        import sys
        if sys.platform == 'darwin':
            import pygame
            pygame.init()
            ale.setBool(b'sound', False) # Sound doesn't work on OSX
        ale.setBool(b'display_screen', True)
    else:
        ale.setBool(b'display_screen', False)
    ale.setFloat(b'repeat_action_probability', 0)
    ale.loadROM(str.encode(rom_path))
    return ale
class Atari:
	def __init__(self,rom_name):
		self.ale = ALEInterface()
		self.max_frames_per_episode = self.ale.getInt("max_num_frames_per_episode")
		self.ale.setInt("random_seed",123)
		self.ale.setInt("frame_skip",4)
		self.ale.loadROM('./' +rom_name)
		self.screen_width,self.screen_height = self.ale.getScreenDims()
		self.legal_actions = self.ale.getMinimalActionSet()
		self.action_map = dict()
		for i in range(len(self.legal_actions)):
			self.action_map[self.legal_actions[i]] = i
		#print len(self.legal_actions)
		self.windowname = rom_name
		#cv2.startWindowThread()
		#cv2.namedWindow(rom_name)

	def get_image(self):
		numpy_surface = np.zeros(self.screen_height*self.screen_width*3, dtype=np.uint8)
		self.ale.getScreenRGB(numpy_surface)
		image = np.reshape(numpy_surface, (self.screen_height, self.screen_width, 3))
		return image

	def newGame(self):
		self.ale.reset_game()
		return self.get_image()

	def next(self, action):
		reward = self.ale.act(self.legal_actions[np.argmax(action)])	
		nextstate = self.get_image()
		
		#cv2.imshow(self.windowname,nextstate)
		if self.ale.game_over():
			self.newGame()
		#print "reward %d" % reward 
		return nextstate, reward, self.ale.game_over()
示例#28
0
def get_num_actions(rom_path, rom_name):
    from ale_python_interface import ALEInterface
    filename = '{0}/{1}.bin'.format(rom_path, rom_name)
    ale = ALEInterface()
    ale.loadROM(filename)
    return len(ale.getMinimalActionSet())
    def __init__(self,
                 random_seed,
                 frame_skip,
                 repeat_action_probability,
                 sound,
                 display_screen,
                 block_state_repr=None,
                 enemy_state_repr=None,
                 friendly_state_repr=None):
        ale = ALEInterface()

        # Get & Set the desired settings
        if random_seed is not None:
            ale.setInt('random_seed', random_seed)
        ale.setInt('frame_skip', frame_skip)
        ale.setFloat('repeat_action_probability', repeat_action_probability)

        if display_screen:
            if sys.platform == 'darwin':
                import pygame
                pygame.init()
            ale.setBool('sound', sound)

        ale.setBool('display_screen', display_screen)

        # Load the ROM file
        ale.loadROM('qbert.bin')

        # Get the list of legal actions
        legal_actions = ale.getLegalActionSet()
        minimal_actions = ale.getMinimalActionSet()
        logging.debug('Legal actions: {}'.format(
            [action_number_to_name(a) for a in legal_actions]))
        logging.debug('Minimal actions: {}'.format(
            [action_number_to_name(a) for a in minimal_actions]))

        width, height = ale.getScreenDims()
        rgb_screen = np.empty([height, width, 3], dtype=np.uint8)

        ram_size = ale.getRAMSize()
        ram = np.zeros(ram_size, dtype=np.uint8)

        # ALE components
        self.ale = ale
        self.lives = ale.lives()
        self.rgb_screen = rgb_screen
        self.ram_size = ale.getRAMSize()
        self.ram = ram

        # Verbose state representation
        self.desired_color = COLOR_YELLOW
        self.block_colors = INITIAL_COLORS
        self.enemies = INITIAL_ENEMY_POSITIONS
        self.friendlies = INITIAL_FRIENDLY_POSITIONS
        self.discs = INITIAL_DISCS
        self.current_row, self.current_col = 0, 0
        self.level = 1
        self.enemy_present = False
        self.friendly_present = False
        self.block_state_repr = block_state_repr
        self.enemy_state_repr = enemy_state_repr
        self.friendly_state_repr = friendly_state_repr
        self.num_colored_blocks = 0
示例#30
0
class Agent(object):
    def __init__(self):
        self._ale = ALEInterface()
        self._ale.setInt('random_seed', 123)
        self._ale.setFloat('repeat_action_probability', 0.0)
        self._ale.setBool('color_averaging', False)
        self._ale.loadROM('roms/enduro.bin')
        self._controller = Controller(self._ale)
        self._extractor = StateExtractor(self._ale)
        self._image = None
        self.curr_action = 0

    def run(self, learn, episodes=1, draw=False):
        """ Implements the playing/learning loop.

        Args:
            learn(bool): Whether the self.learn() function should be called.
            episodes (int): The number of episodes to run the agent for.
            draw (bool): Whether to overlay the environment state on the frame.

        Returns:
            None
        """
        if learn:
            self.init_Q()
        action = random.choice(
            self.getActionsSet())  # init_action for q_learning
        for e in range(episodes):
            # Observe the environment to set the initial state
            (grid, self._image) = self._extractor.run(draw=draw, scale=4.0)
            self.initialise(grid)

            num_frames = self._ale.getFrameNumber()
            # Each episode lasts 6500 frames
            while self._ale.getFrameNumber() - num_frames < 6500:
                # Take an action
                self.act(action)

                # Update the environment grid
                s_grid = grid
                (grid, self._image) = self._extractor.run(draw=draw, scale=4.0)
                self.sense(grid)
                s_next_grid = grid
                # Perform learning if required
                if learn:
                    # self.learn(s_grid,s_next_grid) # for q learning
                    action = self.learn(s_grid, s_next_grid, action)

                self.callback(learn, e + 1,
                              self._ale.getFrameNumber() - num_frames)
            self.end_state(e)
            self._ale.reset_game()

    def getActionsSet(self):
        """ Returns the set of all possible actions
        """
        return [Action.ACCELERATE, Action.RIGHT, Action.LEFT, Action.BREAK]

    def move(self, action):
        """ Executes the action and advances the game to the next state.

        Args:
            action (int): The action which should executed. Make sure to use
                          the constants returned by self.getActionsSet()

        Returns:
           int: The obtained reward after executing the action
        """
        return self._controller.move(action)

    def initialise(self, grid):
        """ Called at the beginning of each episode, mainly used
        for state initialisation.

        Args:
            grid (np.ndarray): 11x10 array with the initial environment grid.

        Returns:
            None
        """
        raise NotImplementedError

    def act(self):
        """ Called at each loop iteration to choose and execute an action.

        Returns:
            None
        """
        raise NotImplementedError

    def sense(self, grid):
        """ Called at each loop iteration to construct the new state from
        the update environment grid.

        Returns:
            None
        """
        raise NotImplementedError

    def learn(self):
        """ Called at each loop iteration when the agent is learning. It should
        implement the learning procedure.

        Returns:
            None
        """
        raise NotImplementedError

    def callback(self, learn, episode, iteration):
        """ Called at each loop iteration mainly for reporting purposes.

        Args:
            learn (bool): Indicates whether the agent is learning or not.
            episode (int): The number of the current episode.
            iteration (int): The number of the current iteration.

        Returns:
            None
        """

    def get_surround():

        raise NotImplementedError
示例#31
0
        default=True,
        type=bool_arg,
        help="Whether or not to start with 30 noops for each env. Default True",
        dest="random_start")
    return parser


if __name__ == "__main__":
    args = get_arg_parser().parse_args()

    from atari_emulator import AtariEmulator
    from ale_python_interface import ALEInterface

    filename = args.rom_path + "/" + args.game + ".bin"
    ale_int = ALEInterface()
    ale_int.loadROM(str.encode(filename))
    num_actions = len(ale_int.getMinimalActionSet())

    args.num_actions = num_actions
    args.random_seed = 3

    ray.init()
    create_environment = lambda i: AtariEmulator.remote(i, args)

    emulators = np.asarray([create_environment(i) for i in range(4)])
    variables = [(np.asarray([
        ray.get(emulator.get_initial_state.remote()) for emulator in emulators
    ],
                             dtype=np.uint8)), (np.zeros(4, dtype=np.float32)),
                 (np.asarray([False] * 4, dtype=np.float32)),
                 (np.zeros((4, num_actions), dtype=np.float32))]
示例#32
0
    vf.main_model.load_state_dict(tc.load(model_path))
    vf.update_model.load_state_dict(tc.load(model_path))
gpu_dtype = tc.cuda.FloatTensor
cpu_dtype = tc.FloatTensor
# device = tc.device("cuda:0" if tc.cuda.is_available() else "cpu")
# vf = nn.Neural_Net().to(device)

# get screen or not
USE_SDL = False
if USE_SDL:
    ale.setBool(b'display_screen', True)

# load game rom file
name_of_the_game = 'space_invaders'
game_path = '/home/juna/Documents/Projects/atari_project/Arcade-Learning-Environment/roms/' + name_of_the_game + '.bin'
ale.loadROM(game_path.encode())

minimal_actions = ale.getMinimalActionSet()

print('minimal_actions :\n', minimal_actions)

screen_data = None

#initialize the state
image = ale.getScreenGrayscale(screen_data)
image = impre(name_of_the_game, image)
state = tc.stack((image, image, image, image),
                 dim=0).unsqueeze(0).type(gpu_dtype)
del image

memory_buffer = []
示例#33
0
                                        episode,
                                        type(agent).__name__))

        if agent.explorationProb > MINIMUM_EXPLORATION_EPSILON:
            agent.explorationProb -= reduce_exploration_prob_amount

        print('episode: {} ended with score: {}'.format(episode, total_reward))
        ale.reset_game()
    return rewards


if __name__ == '__main__':
    game = 'asterix.bin'
    gamepath = os.path.join('roms', game)
    agent = LEARNING_ALGORITHM
    ale = ALEInterface()
    ale.loadROM(gamepath)
    actions = ale.getMinimalActionSet()
    agent.actions = actions
    print actions
    if LOAD_WEIGHTS:
        agent.weights = file_utils.load_weights(WEIGHTS_FILENAME)
    rewards = train_agent(
        gamepath,
        agent,
        n_episodes=NUM_EPISODES,
        display_screen=DISPLAY_SCREEN,
        record_weights=RECORD_WEIGHTS,
        reduce_exploration_prob_amount=EXPLORATION_REDUCTION_AMOUNT,
        n_frames_to_skip=NUM_FRAMES_TO_SKIP)
示例#34
0
def train_agent(gamepath, agent, n_episodes, display_screen, record_weights,
                reduce_exploration_prob_amount, n_frames_to_skip):
    """
    :description: trains an agent to play a game

    :type gamepath: string
    :param gamepath: path to the binary of the game to be played

    :type agent: subclass RLAlgorithm
    :param agent: the algorithm/agent that learns to play the game

    :type n_episodes: int
    :param n_episodes: number of episodes of the game on which to train
    """

    # load the ale interface to interact with
    ale = ALEInterface()
    ale.setInt('random_seed', 42)

    # display/recording settings, doesn't seem to work currently
    recordings_dir = './recordings/breakout/'
    # previously "USE_SDL"
    if display_screen:
        if sys.platform == 'darwin':
            import pygame
            pygame.init()
            ale.setBool('sound', False)  # Sound doesn't work on OSX
            #ale.setString("record_screen_dir", recordings_dir);
        elif sys.platform.startswith('linux'):
            ale.setBool('sound', True)
        ale.setBool('display_screen', True)

    ale.loadROM(gamepath)
    ale.setInt("frame_skip", n_frames_to_skip)

    screen_preprocessor = screen_utils.RGBScreenPreprocessor()

    rewards = []
    best_reward = 0
    print('starting training...')
    for episode in xrange(n_episodes):
        action = 0
        reward = 0
        newAction = None

        total_reward = 0
        counter = 0
        lives = ale.lives()

        screen = np.zeros((32, 32, 3), dtype=np.int8)
        state = {
            "screen": screen,
            "objects": None,
            "prev_objects": None,
            "prev_action": 0,
            "action": 0
        }

        while not ale.game_over():
            # if newAction is None then we are training an off-policy algorithm
            # otherwise, we are training an on policy algorithm
            if newAction is None:
                action = agent.getAction(state)
            else:
                action = newAction
            reward += ale.act(action)

            if ale.lives() < lives:
                lives = ale.lives()
                reward -= 1
            total_reward += reward

            new_screen = ale.getScreenRGB()
            new_screen = screen_preprocessor.preprocess(new_screen)
            new_state = {
                "screen": new_screen,
                "objects": None,
                "prev_objects": state["objects"],
                "prev_action": state["action"],
                "action": action
            }
            newAction = agent.incorporateFeedback(state, action, reward,
                                                  new_state)

            state = new_state
            reward = 0

        rewards.append(total_reward)

        if total_reward > best_reward and record_weights:
            best_reward = total_reward
            print("Best reward: {}".format(total_reward))

        if episode % PRINT_TRAINING_INFO_PERIOD == 0:
            print '\n############################'
            print '### training information ###'
            print("Average reward: {}".format(np.mean(rewards)))
            print("Last 50: {}".format(
                np.mean(rewards[-NUM_EPISODES_AVERAGE_REWARD_OVER:])))
            print("Exploration probability: {}".format(agent.explorationProb))
            print('action: {}'.format(action))
            print('size of weights dict: {}'.format(len(agent.weights)))
            print('current objects: {}'.format(state['objects']))
            print('previous objects: {}'.format(state['prev_objects']))
            avg_feat_weight = np.mean(
                [v for k, v in agent.weights.iteritems()])
            print('average feature weight: {}'.format(avg_feat_weight))
            print '############################'
            print '############################\n'

        if episode != 0 and episode % RECORD_WEIGHTS_PERIOD == 0 and record_weights:
            file_utils.save_rewards(rewards,
                                    filename='episode-{}-{}-rewards'.format(
                                        episode,
                                        type(agent).__name__))
            file_utils.save_weights(agent.weights,
                                    filename='episode-{}-{}-weights'.format(
                                        episode,
                                        type(agent).__name__))

        if agent.explorationProb > MINIMUM_EXPLORATION_EPSILON:
            agent.explorationProb -= reduce_exploration_prob_amount

        print('episode: {} ended with score: {}'.format(episode, total_reward))
        ale.reset_game()
    return rewards
示例#35
0
class AtariPlayer(RLEnvironment):
    """
    A wrapper for atari emulator.
    Will automatically restart when a real episode ends (isOver might be just
    lost of lives but not game over).
    """
    def __init__(self,
                 rom_file,
                 viz=0,
                 height_range=(None, None),
                 frame_skip=4,
                 image_shape=(84, 84),
                 nullop_start=30,
                 live_lost_as_eoe=True,
                 env_name="Boxing-v0"):
        """
        :param rom_file: path to the rom
        :param frame_skip: skip every k frames and repeat the action
        :param image_shape: (w, h)
        :param height_range: (h1, h2) to cut
        :param viz: visualization to be done.
            Set to 0 to disable.
            Set to a positive number to be the delay between frames to show.
            Set to a string to be a directory to store frames.
        :param nullop_start: start with random number of null ops
        :param live_losts_as_eoe: consider lost of lives as end of episode.  useful for training.
        """
        super(AtariPlayer, self).__init__()
        if not os.path.isfile(rom_file) and '/' not in rom_file:
            rom_file = get_dataset_path('atari_rom', rom_file)
        assert os.path.isfile(rom_file), \
                "rom {} not found. Please download at {}".format(rom_file, ROM_URL)

        try:
            ALEInterface.setLoggerMode(ALEInterface.Logger.Warning)
        except AttributeError:
            if execute_only_once():
                logger.warn("You're not using latest ALE")

        # avoid simulator bugs: https://github.com/mgbellemare/Arcade-Learning-Environment/issues/86
        with _ALE_LOCK:
            self.ale = ALEInterface()
            self.rng = get_rng(self)
            self.ale.setInt(b"random_seed", self.rng.randint(0, 30000))
            self.ale.setBool(b"showinfo", False)

            self.ale.setInt(b"frame_skip", 1)
            self.ale.setBool(b'color_averaging', False)
            # manual.pdf suggests otherwise.
            self.ale.setFloat(b'repeat_action_probability', 0.0)

            # viz setup
            if isinstance(viz, six.string_types):
                assert os.path.isdir(viz), viz
                self.ale.setString(b'record_screen_dir', viz)
                viz = 0
            if isinstance(viz, int):
                viz = float(viz)
            self.viz = viz
            if self.viz and isinstance(self.viz, float):
                self.windowname = os.path.basename(rom_file)
                cv2.startWindowThread()
                cv2.namedWindow(self.windowname)

            self.ale.loadROM(rom_file.encode('utf-8'))
        self.width, self.height = self.ale.getScreenDims()
        self.actions = self.ale.getMinimalActionSet()

        self.live_lost_as_eoe = live_lost_as_eoe
        self.frame_skip = frame_skip
        self.nullop_start = nullop_start
        self.height_range = height_range
        self.image_shape = image_shape

        self.current_episode_score = StatCounter()
        self.restart_episode()

# re

    def _grab_raw_image(self):
        """
        :returns: the current 3-channel image
        """
        m = self.ale.getScreenRGB()
        return m.reshape((self.height, self.width, 3))

    def current_state(self):
        """
        :returns: a gray-scale (h, w, 1) uint8 image
        """
        ret = self._grab_raw_image()
        # max-pooled over the last screen
        ret = np.maximum(ret, self.last_raw_screen)
        if self.viz:
            if isinstance(self.viz, float):
                cv2.imshow(self.windowname, ret)
                time.sleep(self.viz)
        ret = ret[self.height_range[0]:self.height_range[1], :].astype(
            'float32')
        # 0.299,0.587.0.114. same as rgb2y in torch/image
        ret = cv2.cvtColor(ret, cv2.COLOR_RGB2GRAY)
        ret = cv2.resize(ret, self.image_shape)
        ret = np.expand_dims(ret, axis=2)
        return ret.astype('uint8')  # to save some memory

    def get_action_space(self):
        return DiscreteActionSpace(len(self.actions))

    def finish_episode(self):
        self.stats['score'].append(self.current_episode_score.sum)

    def restart_episode(self):
        self.current_episode_score.reset()
        with _ALE_LOCK:
            self.ale.reset_game()

        # random null-ops start
        n = self.rng.randint(self.nullop_start)
        self.last_raw_screen = self._grab_raw_image()
        for k in range(n):
            if k == n - 1:
                self.last_raw_screen = self._grab_raw_image()
            self.ale.act(0)

    def action(self, act):
        """
        :param act: an index of the action
        :returns: (reward, isOver)
        """
        oldlives = self.ale.lives()
        r = 0
        for k in range(self.frame_skip):
            if k == self.frame_skip - 1:
                self.last_raw_screen = self._grab_raw_image()
            r += self.ale.act(self.actions[act])
            newlives = self.ale.lives()
            if self.ale.game_over() or \
                    (self.live_lost_as_eoe and newlives < oldlives):
                break

        self.current_episode_score.feed(r)
        isOver = self.ale.game_over()
        if self.live_lost_as_eoe:
            isOver = isOver or newlives < oldlives
        if isOver:
            self.finish_episode()
        if self.ale.game_over():
            self.restart_episode()
        return (r, isOver)
示例#36
0
14  #11111 fire up/down/left/right (invalid)
)

if(len(sys.argv) < 2):
    print("Usage ./ale_python_test_pygame_player.py <ROM_FILE_NAME>")
    sys.exit()
    
ale = ALEInterface()

max_frames_per_episode = ale.getInt("max_num_frames_per_episode");
ale.set("random_seed",123)

random_seed = ale.getInt("random_seed")
print("random_seed: " + str(random_seed))

ale.loadROM(sys.argv[1])
legal_actions = ale.getMinimalActionSet()
print legal_actions

(screen_width,screen_height) = ale.getScreenDims()
print("width/height: " +str(screen_width) + "/" + str(screen_height))

(display_width,display_height) = (1024,420)

#init pygame
pygame.init()
screen = pygame.display.set_mode((display_width,display_height))
pygame.display.set_caption("Arcade Learning Environment Player Agent Display")

game_surface = pygame.Surface((screen_width,screen_height))
示例#37
0
ale = ALEInterface()
ale.setInt('random_seed', 123)
ale.setInt("frame_skip",frameSkip)

USE_SDL = True
if USE_SDL:
  if sys.platform == 'darwin':
    import pygame
    pygame.init()
    ale.setBool('sound', False) # Sound doesn't work on OSX
  elif sys.platform.startswith('linux'):
    ale.setBool('sound', False)
  ale.setBool('display_screen', display_screen)

ale.loadROM("rom/Breakout.A26")
legal_actions = ale.getMinimalActionSet()


n_senses = 41*36
n_actions = len(legal_actions)
temporal_window = 1
hiddenSize1 = 256
hiddenSize2 = 32
network_size = n_senses*(temporal_window) + n_actions*(temporal_window-1)

dataRom = np.zeros([dataRomSize,n_senses + n_actions + 1],dtype= 'uint8')

sess = tf.InteractiveSession(config=tf.ConfigProto(log_device_placement=True))

x = tf.placeholder(tf.float32, [None, network_size])
示例#38
0
class ALEEnvironment():
    def __init__(self, rom_file, args):

        self.ale = ALEInterface()
        self.histLen = 4

        if args.display_screen:
            if sys.platform == 'darwin':
                import pygame
                pygame.init()
                self.ale.setBool('sound', False)  # Sound doesn't work on OSX
            elif sys.platform.startswith('linux'):
                self.ale.setBool('sound', True)
            self.ale.setBool('display_screen', True)

        self.ale.setInt('frame_skip', args.frame_skip)
        self.ale.setFloat('repeat_action_probability', 0.0)
        self.ale.setBool('color_averaging', args.color_averaging)

        #if args.random_seed:
        #  self.ale.setInt('random_seed', args.random_seed)
        self.ale.setInt(
            'random_seed',
            0)  #hoang addition to fix the random seed across all environment
        self.ale.loadROM(rom_file)

        if args.minimal_action_set:
            self.actions = self.ale.getMinimalActionSet()
            logger.info("Using minimal action set with size %d" %
                        len(self.actions))
        else:
            self.actions = self.ale.getLegalActionSet()
            logger.info("Using full action set with size %d" %
                        len(self.actions))
        logger.debug("Actions: " + str(self.actions))

        self.screen_width = args.screen_width
        self.screen_height = args.screen_height

        self.mode = "train"
        self.life_lost = False
        self.initSrcreen = self.getScreen()
        print("size of screen is:", self.initSrcreen.shape)
        im = Image.fromarray(self.initSrcreen)
        im.save('initial_screen.jpeg')
        im = Image.open('initial_screen.jpeg')
        pix = im.load()
        #  print "devil's color", pix[13,62]
        #  print "agent's color", pix[42,33]
        #  exit()

        # draw = ImageDraw.Draw(im)
        #  draw.rectangle([(37, 29),(48, 37)], outline = 'red')
        #   draw.rectangle([(69, 68), (73, 71)], outline = 'white')
        #   draw.rectangle([(7, 41), (11, 45)], outline = 'white')
        #   draw.rectangle([(11, 58), (15, 66)], outline = 'white')
        #   draw.rectangle([(70, 20), (73, 35)], outline='white') #right door
        #   draw.rectangle([(11, 68), (15, 71)], outline='white')
        #   im.save('first_subgoal_box.jpeg')
        #  exit()
        # use this tool to get bounding box: http://nicodjimenez.github.io/boxLabel/annotate.html
        self.goalSet = []
        # goal 0
        self.goalSet.append([[69, 68], [
            73, 71
        ]])  # Lower Right Ladder. This is the box for detecting first subgoal
        # self.goalSet.append([[11, 58], [15, 66]]) # lower left ladder 3
        # self.goalSet.append([[11, 68], [15, 71]])  # lower left ladder 3
        # goal 2
        self.goalSet.append([[7, 41],
                             [11, 45]])  # Key. This will be second sub goal

        self.goalSet.append([[11, 68], [15, 71]])  # lower left ladder 3
        # goal 4
        self.goalSet.append(
            [[69, 68],
             [73,
              71]])  # Lower Right Ladder again, this will be the third subgoal
        # goal 6
        self.goalSet.append([[70, 20],
                             [73,
                              35]])  # Right Door. This will be the 4th subgoal
        self.goalCenterLoc = []
        for goal in self.goalSet:
            goalCenter = [
                float(goal[0][0] + goal[1][0]) / 2,
                float(goal[0][1] + goal[1][1]) / 2
            ]
            self.goalCenterLoc.append(goalCenter)
        self.agentOriginLoc = [42, 33]
        self.agentLastX = 42
        self.agentLastY = 33
        self.devilLastX = 0
        self.devilLastY = 0
        self.reachedGoal = [0, 0, 0, 0, 0, 0, 0]
        self.histState = self.initializeHistState()

    def initializeHistState(self):
        histState = np.concatenate((self.getState(), self.getState()), axis=2)
        histState = np.concatenate((histState, self.getState()), axis=2)
        histState = np.concatenate((histState, self.getState()), axis=2)
        return histState

    def numActions(self):
        return len(self.actions)

    def resetGoalReach(self):
        self.reachedGoal = [0, 0, 0, 0, 0, 0, 0, 0]

    def restart(self):
        self.ale.reset_game()
        self.life_lost = False
        self.reachedGoal = [0, 0, 0, 0, 0, 0, 0]
        for i in range(19):
            self.act(0)  #wait for initialization
        self.histState = self.initializeHistState()
        self.agentLastX = self.agentOriginLoc[0]
        self.agentLastY = self.agentOriginLoc[1]

    """
  def restart(self):
    # In test mode, the game is simply initialized. In train mode, if the game
    # is in terminal state due to a life loss but not yet game over, then only
    # life loss flag is reset so that the next game starts from the current
    # state. Otherwise, the game is simply initialized.
    if (
                  self.mode == 'test' or
                not self.life_lost or  # `reset` called in a middle of episode
              self.ale.game_over()  # all lives are lost
    ):
      self.ale.reset_game()
    self.life_lost = False
    self.reachedGoal = [0, 0, 0]
    for i in range(19):
      self.act(0) #wait for initialization
    self.histState = self.initializeHistState()
    self.agentLastX = self.agentOriginLoc[0]
    self.agentLastY = self.agentOriginLoc[1]
  """

    def beginNextLife(self):
        self.life_lost = False
        self.reachedGoal = [0, 0, 0, 0, 0, 0, 0]
        for i in range(19):
            self.act(0)  #wait for initialization
        self.histState = self.initializeHistState()
        self.agentLastX = self.agentOriginLoc[0]
        self.agentLastY = self.agentOriginLoc[1]

    def act(self, action):
        lives = self.ale.lives()
        reward = self.ale.act(self.actions[action])
        self.life_lost = (not lives == self.ale.lives())
        currState = self.getState()
        self.histState = np.concatenate((self.histState[:, :, 1:], currState),
                                        axis=2)
        return reward

    def getScreen(self):
        screen = self.ale.getScreenGrayscale()
        resized = cv2.resize(screen, (self.screen_width, self.screen_height))
        return resized

    def getScreenRGB(self):
        screen = self.ale.getScreenRGB()
        resized = cv2.resize(screen, (self.screen_width, self.screen_height))
        #resized = screen
        return resized

    def getAgentLoc(self, img):
        #  img = self.getScreenRGB()

        man = [200, 72, 72]
        mask = np.zeros(np.shape(img))
        mask[:, :, 0] = man[0]
        mask[:, :, 1] = man[1]
        mask[:, :, 2] = man[2]

        diff = img - mask
        indxs = np.where(diff == 0)
        diff[np.where(diff < 0)] = 0
        diff[np.where(diff > 0)] = 0
        diff[indxs] = 255
        if (np.shape(indxs[0])[0] == 0):
            mean_x = self.agentLastX
            mean_y = self.agentLastY
        else:
            mean_y = np.sum(indxs[0]) / np.shape(indxs[0])[0]
            mean_x = np.sum(indxs[1]) / np.shape(indxs[1])[0]
        self.agentLastX = mean_x
        self.agentLastY = mean_y
        return (mean_x, mean_y)

    def getDevilLoc(self, img):
        #    img = self.getScreenRGB()
        # man = [0, 16, 2]
        devilColor = [236, 236, 236]
        mask = np.zeros(np.shape(img))
        mask[:, :, 0] = devilColor[0]
        mask[:, :, 1] = devilColor[1]
        mask[:, :, 2] = devilColor[2]
        diff = img - mask
        indxs = np.where(diff == 0)
        diff[np.where(diff < 0)] = 0
        diff[np.where(diff > 0)] = 0
        diff[indxs] = 255
        if (np.shape(indxs[0])[0] == 0):
            mean_x = self.devilLastX
            mean_y = self.devilLastY
        else:
            mean_y = np.sum(indxs[0]) / np.shape(indxs[0])[0]
            mean_x = np.sum(indxs[1]) / np.shape(indxs[1])[0]
        self.devilLastX = mean_x
        self.devilLastY = mean_y
        return (mean_x, mean_y)

    def distanceReward(self, lastGoal, goal):
        if (lastGoal == -1):
            lastGoalCenter = self.agentOriginLoc
        else:
            lastGoalCenter = self.goalCenterLoc[lastGoal]
        goalCenter = self.goalCenterLoc[goal]
        agentX, agentY = self.getAgentLoc()
        dis = np.sqrt((goalCenter[0] - agentX) * (goalCenter[0] - agentX) +
                      (goalCenter[1] - agentY) * (goalCenter[1] - agentY))
        disLast = np.sqrt((lastGoalCenter[0] - agentX) *
                          (lastGoalCenter[0] - agentX) +
                          (lastGoalCenter[1] - agentY) *
                          (lastGoalCenter[1] - agentY))
        disGoals = np.sqrt((goalCenter[0] - lastGoalCenter[0]) *
                           (goalCenter[0] - lastGoalCenter[0]) +
                           (goalCenter[1] - lastGoalCenter[1]) *
                           (goalCenter[1] - lastGoalCenter[1]))
        return 0.001 * (disLast - dis) / disGoals

    # add color channel for input of network
    def getState(self):
        screen = self.ale.getScreenGrayscale()
        resized = cv2.resize(screen, (self.screen_width, self.screen_height))
        return np.reshape(resized, (84, 84, 1))

    def getStackedState(self):
        return self.histState

    def isTerminal(self):
        if self.mode == 'train':
            return self.ale.game_over() or self.life_lost
        return self.ale.game_over()

    def isGameOver(self):
        return self.ale.game_over()

    def isLifeLost(self):
        return self.life_lost

    def reset(self):
        self.ale.reset_game()
        self.life_lost = False

    def goalReached(self, goal):
        # if goal in [0,2,4,6]: # those are original task where bounding boxes are used to detect the location of agents
        subset = [
            0, 2, 3, 4, 6
        ]  # those are original task where bounding boxes are used to detect the location of agents
        if goal in subset:
            # goal_index = goal/2
            goal_index = subset.index(goal)
            goalPosition = self.goalSet[goal_index]
            goalScreen = self.initSrcreen
            stateScreen = self.getScreen()
            count = 0
            for y in range(goalPosition[0][0], goalPosition[1][0]):
                for x in range(goalPosition[0][1], goalPosition[1][1]):
                    if goalScreen[x][y] != stateScreen[x][y]:
                        count = count + 1
            # 30 is total number of pixels of agent
            if float(count) / 30 > 0.3:
                self.reachedGoal[goal] = 1
                return True
        if goal == 1:
            # detect if agent is to the left of the devil
            #    return self.agent_left_devil()
            return self.detect_left_ladder()
        ############## -- DML modified -- ###########
        # if goal == 4:
        #     # detect if agent is to the right of the devil
        # #    return self.agent_right_devil()
        #     return self.detect_right_ladder()
        ################# -- end -- ###########
        if goal == 5:
            # detect if the agent is back to the original location
            return self.original_location_reached()
        return False

    def detect_right_ladder(self):
        goalPosition = self.goalSet[0]
        goalScreen = self.initSrcreen
        stateScreen = self.getScreen()
        count = 0
        for y in range(goalPosition[0][0], goalPosition[1][0]):
            for x in range(goalPosition[0][1], goalPosition[1][1]):
                if goalScreen[x][y] != stateScreen[x][y]:
                    count = count + 1
        # 30 is total number of pixels of agent
        if float(count) / 30 > 0.3:
            goal = 5
            self.reachedGoal[goal] = 1
            return True
        return False

    def detect_left_ladder(self):
        goalPosition = self.goalSet[2]
        goalScreen = self.initSrcreen
        stateScreen = self.getScreen()
        count = 0
        for y in range(goalPosition[0][0], goalPosition[1][0]):
            for x in range(goalPosition[0][1], goalPosition[1][1]):
                if goalScreen[x][y] != stateScreen[x][y]:
                    count = count + 1
        # 30 is total number of pixels of agent
        if float(count) / 30 > 0.3:
            goal = 5
            self.reachedGoal[goal] = 1
            return True
        return False

    def original_location_reached(self):
        img = self.getScreenRGB()
        (x, y) = self.getAgentLoc(img)
        #  print "Agent's location:",x,y
        if abs(x - 42) <= 2 and abs(y - 33) <= 2:
            return True
        else:
            return False

    def pause(self):
        os.system('read -s -n 1 -p "Press any key to continue...\n"')

    def agent_left_devil(self):
        img = self.ale.getScreenRGB()
        (x, y) = self.getAgentLoc(img)
        (a, b) = self.getDevilLoc(img)
        #  print "Agent's location:",x,y
        #  print "Devil's location:", a,b
        if (a - x > 40) and (abs(y - b) <= 40):
            return True
        else:
            return False

    def agent_right_devil(self):
        img = self.getScreenRGB()
        (x, y) = self.getAgentLoc(img)
        (a, b) = self.getDevilLoc(img)
        # print "Agent's location:",x,y
        # print "Devil's location:",a,b

        # if (x-a > 25) and (abs(y-b) <= 40):
        if (x - a > 40) and (abs(y - b) <= 40):
            return True
        else:
            return False

    def goalNotReachedBefore(self, goal):
        if (self.reachedGoal[goal] == 1):
            return False
        return True
示例#39
0
class ALEEnvironment(Environment):
    def __init__(self, rom_file, args):
        from ale_python_interface import ALEInterface
        self.ale = ALEInterface()

        # Set ALE configuration
        self.ale.setInt(b'frame_skip', args.frame_skip)
        self.ale.setFloat(b'repeat_action_probability',
                          args.repeat_action_probability)
        self.ale.setBool(b'color_averaging', args.color_averaging)

        if args.random_seed:
            self.ale.setInt(b'random_seed', args.random_seed)

        if args.record_screen_path:
            if not os.path.exists(args.record_screen_path):
                os.makedirs(args.record_screen_path)
            self.ale.setString(b'record_screen_dir',
                               args.record_screen_path.encode())

        if args.record_sound_filename:
            self.ale.setBool(b'sound', True)
            self.ale.setString(b'record_sound_filename',
                               args.record_sound_filename.encode())

        # Load ROM
        self.ale.loadROM(rom_file.encode())

        # Set game difficulty and mode (after loading)
        self.ale.setDifficulty(args.game_difficulty)
        self.ale.setMode(args.game_mode)

        # Whether to use minimum set or set
        if args.minimal_action_set:
            self.actions = self.ale.getMinimalActionSet()
        else:
            self.actions = self.ale.getLegalActionSet()

        # Life lost control
        self.life_lost = False

        # Initialize base class
        super(ALEEnvironment, self).__init__(args)

    def action_dim(self):
        return len(self.actions)

    def reset(self):
        # In test mode, the game is simply initialized. In train mode, if the game
        # is in terminal state due to a life loss but not yet game over, then only
        # life loss flag is reset so that the next game starts from the current
        # state. Otherwise, the game is simply initialized.
        if (self.mode == 'test' or not self.life_lost
                or  # `reset` called in a middle of episode
                self.ale.game_over()  # all lives are lost
            ):
            self.ale.reset_game()
        self.life_lost = False
        screen = self._get_state(self.ale.getScreenRGB())
        return screen

    def step(self, action, action_b=0, ignore_screen=False):
        lives = self.ale.lives()
        # Act on environment
        reward = self.ale.act(self.actions[action],
                              self.actions[action_b] + 18)
        # Check if life was lost
        self.life_lost = (not lives == self.ale.lives())
        # Check terminal state
        terminal = (self.ale.game_over() or self.life_lost
                    ) if self.mode == 'train' else self.ale.game_over()
        # Check if should ignore the screen (in case of RobotEnvironment)
        if ignore_screen:
            screen = None
        else:
            # Get screen from ALE
            screen = self._get_state(self.ale.getScreenRGB())
            # Wait for next frame to start
            self.fps_control.wait_next_frame()
        return screen, reward, terminal
示例#40
0
class AtariPlayer(gym.Env):
    """
    A wrapper for ALE emulator, with configurations to mimic DeepMind DQN settings.

    Info:
        score: the accumulated reward in the current game
        gameOver: True when the current game is Over
    """
    def __init__(self,
                 rom_file,
                 viz=0,
                 frame_skip=4,
                 nullop_start=30,
                 live_lost_as_eoe=True,
                 max_num_frames=0):
        """
        Args:
            rom_file: path to the rom
            frame_skip: skip every k frames and repeat the action
            viz: visualization to be done.
                Set to 0 to disable.
                Set to a positive number to be the delay between frames to show.
                Set to a string to be a directory to store frames.
            nullop_start: start with random number of null ops.
            live_losts_as_eoe: consider lost of lives as end of episode. Useful for training.
            max_num_frames: maximum number of frames per episode.
        """
        super(AtariPlayer, self).__init__()
        if not os.path.isfile(rom_file) and '/' not in rom_file:
            rom_file = get_dataset_path('atari_rom', rom_file)
        assert os.path.isfile(rom_file), \
            "rom {} not found. Please download at {}".format(rom_file, ROM_URL)

        try:
            ALEInterface.setLoggerMode(ALEInterface.Logger.Error)
        except AttributeError:
            if execute_only_once():
                logger.warn("You're not using latest ALE")

        # avoid simulator bugs: https://github.com/mgbellemare/Arcade-Learning-Environment/issues/86
        with _ALE_LOCK:
            self.ale = ALEInterface()
            self.rng = get_rng(self)
            self.ale.setInt(b"random_seed", self.rng.randint(0, 30000))
            self.ale.setInt(b"max_num_frames_per_episode", max_num_frames)
            self.ale.setBool(b"showinfo", False)

            self.ale.setInt(b"frame_skip", 1)
            self.ale.setBool(b'color_averaging', False)
            # manual.pdf suggests otherwise.
            self.ale.setFloat(b'repeat_action_probability', 0.0)

            # viz setup
            if isinstance(viz, six.string_types):
                assert os.path.isdir(viz), viz
                self.ale.setString(b'record_screen_dir', viz)
                viz = 0
            if isinstance(viz, int):
                viz = float(viz)
            self.viz = viz
            if self.viz and isinstance(self.viz, float):
                self.windowname = os.path.basename(rom_file)
                cv2.namedWindow(self.windowname)

            self.ale.loadROM(rom_file.encode('utf-8'))
        self.width, self.height = self.ale.getScreenDims()
        self.actions = self.ale.getMinimalActionSet()

        self.live_lost_as_eoe = live_lost_as_eoe
        self.frame_skip = frame_skip
        self.nullop_start = nullop_start

        self.action_space = spaces.Discrete(len(self.actions))
        self.observation_space = spaces.Box(low=0,
                                            high=255,
                                            shape=(self.height, self.width),
                                            dtype=np.uint8)
        self._restart_episode()

    def get_action_meanings(self):
        return [ACTION_MEANING[i] for i in self.actions]

    def _grab_raw_image(self):
        """
        :returns: the current 3-channel image
        """
        m = self.ale.getScreenRGB()
        return m.reshape((self.height, self.width, 3))

    def _current_state(self):
        """
        :returns: a gray-scale (h, w) uint8 image
        """
        ret = self._grab_raw_image()
        # max-pooled over the last screen
        ret = np.maximum(ret, self.last_raw_screen)
        if self.viz:
            if isinstance(self.viz, float):
                cv2.imshow(self.windowname, ret)
                cv2.waitKey(int(self.viz * 1000))
        ret = ret.astype('float32')
        # 0.299,0.587.0.114. same as rgb2y in torch/image
        ret = cv2.cvtColor(ret, cv2.COLOR_RGB2GRAY)[:, :]
        return ret.astype('uint8')  # to save some memory

    def _restart_episode(self):
        with _ALE_LOCK:
            self.ale.reset_game()

        # random null-ops start
        n = self.rng.randint(self.nullop_start)
        self.last_raw_screen = self._grab_raw_image()
        for k in range(n):
            if k == n - 1:
                self.last_raw_screen = self._grab_raw_image()
            self.ale.act(0)

    def reset(self):
        if self.ale.game_over():
            self._restart_episode()
        return self._current_state()

    def render(self, *args, **kwargs):
        pass  # visualization for this env is through the viz= argument when creating the player

    def step(self, act):
        oldlives = self.ale.lives()
        r = 0
        for k in range(self.frame_skip):
            if k == self.frame_skip - 1:
                self.last_raw_screen = self._grab_raw_image()
            r += self.ale.act(self.actions[act])
            newlives = self.ale.lives()
            if self.ale.game_over() or \
                    (self.live_lost_as_eoe and newlives < oldlives):
                break

        isOver = self.ale.game_over()
        if self.live_lost_as_eoe:
            isOver = isOver or newlives < oldlives

        info = {'ale.lives': newlives}
        return self._current_state(), r, isOver, info
示例#41
0
class AtariEmulator:
    
    def __init__(self, rom, visualization = False, save = False, windowName = 'AtariGame'):
        self.ale = ALEInterface()
        # self.ale.setInt(b'frame_skip', 1)
        self.ale.setInt(b"random_seed", 123)
        # self.ale.setFloat(b'repeat_action_probability', 0) # default = 0.25
        self.ale.loadROM(b'roms/' + rom)
        self.legalActions = self.ale.getMinimalActionSet()
        self.life_lost = False
        self.mode = 'train'
        self.visualization = visualization and not save
        self.windowName = windowName
        self.save = save
        self.totalReward = 0
        if self.visualization:
            cv2.namedWindow(self.windowName)
        elif self.save:
            self.index = 0
            self.bestReward = 0
            self.totalReward = 0
            if os.path.exists('result'):
                shutil.rmtree('result')
            if os.path.exists('best_result'):
                shutil.rmtree('best_result')
            if not os.path.exists('result'):
                os.mkdir('result')
            if not os.path.exists('best_result'):
                os.mkdir('best_result')


    def start(self):
        # In train mode: life_lost = True but game is not over, don't restart the game
        if self.mode == 'test' or not self.life_lost or self.ale.game_over():
            self.ale.reset_game()
        self.life_lost = False
        return cv2.resize(self.ale.getScreenGrayscale(), (84, 110))[26:]


    def isTerminal(self):
        if self.mode == 'train':
            return self.ale.game_over() or self.life_lost
        return self.ale.game_over()


    def next(self, action): # index of action int legalActions
        lives = self.ale.lives() # the remaining lives
        reward = 0
        for i in range(4): # action repeat
            reward += self.ale.act(self.legalActions[action])
            self.life_lost = (lives != self.ale.lives())  # after action, judge life lost
            if self.mode == 'train' and self.life_lost:
                reward -= 1
            if self.isTerminal():
                break
        self.totalReward += reward
        state = self.ale.getScreenGrayscale()
        rgb_state = self.ale.getScreenRGB()
        if self.visualization:
            cv2.imshow(self.windowName, rgb_state)
            cv2.waitKey(10)
        elif self.save:
            cv2.imwrite(os.path.join('result', '%04d.png') % self.index, rgb_state)
            self.index += 1
            if self.isTerminal():
                print('Scores: %d, index: %d' % (self.totalReward, self.index))
                if self.totalReward > self.bestReward:
                    self.bestReward = self.totalReward
                    copyDir('result', 'best_result')
                self.index = 0
                self.totalReward = 0

        return cv2.resize(state, (84, 110))[26:], reward, self.isTerminal()


    def setMode(self, mode):
        self.mode = mode

    def randomStart(self, s_t):
        channels = s_t.shape[-1]
        self.start()
        for i in range(np.random.randint(channels, 30) + 1):
            s_t_plus_1, r_t, isTerminal = self.next(0)
            s_t[..., 0:channels-1] = s_t[..., 1:channels]
            s_t[..., -1] = s_t_plus_1
            if isTerminal:
                self.start()
示例#42
0
saveData = False
saveModel = False
gamma = .99
learningRate = 0.00025

display_screen = False
frameSkip = 4
ale = ALEInterface()
ale.setInt('random_seed', 0)
ale.setInt("frame_skip", frameSkip)
ale.setBool('color_averaging', True)
ale.setBool('sound', False)
ale.setBool('display_screen', False)
ale.setFloat("repeat_action_probability", 0.)
t = ale.getFloat("repeat_action_probability")
ale.loadROM("rom/breakout.bin")
legal_actions = ale.getMinimalActionSet()

width = 84
height = 84

memorySize = 1000000
maxEpisode = 10000000
maxFrame = 50000000

historyLength = 4
batchSize = 32

startLearningFrame = 50000
finalExplorationFrame = 1000000
# dummy = 30
示例#43
0
def main():

    pygame.init()

    ale = ALEInterface()
    ale.setInt(b'random_seed', 123)
    ale.setBool(b'display_screen', True)
    ale.setInt(b'frame_skip', 4)
    # ale.setFloat(b'repeat_action_probability', .7)
    # ale.setBool(b'color_averaging', True)

    game = 'breakout'  #ACKTR tasks#, 'space_invaders', 'seaquest', 'qbert', 'pong', 'beam_rider', 'breakout'
    rom = home + '/Documents/ALE/roms/supported/' + game + '.bin'
    ale.loadROM(str.encode(rom))

    legal_actions = ale.getLegalActionSet()
    rewards, num_episodes = [], 5

    config = []
    agent = DQN_agent(config)

    for episode in range(num_episodes):
        total_reward = 0

        exp_state = []
        exp_action = 0
        exp_reward = 0
        exp_next_state = []
        while not ale.game_over():

            #Save frame
            frame = ale.getScreenGrayscale()
            frame = cv2.resize(frame, (84, 84))
            exp_next_state.append(frame)
            #Make action
            action = random.choice(legal_actions)
            reward = ale.act(action)
            total_reward += reward
            exp_reward += exp_reward
            #Make experience
            if len(exp_next_state) == 4:
                state_ready = np.reshape(np.stack(exp_next_state),
                                         [4 * 84, 84])
                # cv2.imshow('image',state_ready)
                # cv2.waitKey(0)
                exp_action = action
                if len(exp_state) == 0:
                    exp_state = exp_next_state
                else:
                    experience = [
                        exp_state, exp_action, exp_reward, exp_next_state
                    ]
                    exp_reward = 0
                    exp_state = exp_next_state
                    exp_next_state = []

        print('Episode %d reward %d.' % (episode, total_reward))
        rewards.append(total_reward)
        ale.reset_game()

    average = sum(rewards) / len(rewards)
    print('Average for %d episodes: %d' % (num_episodes, average))
示例#44
0
class ALE(object):
    def __init__(self, init_seed, init_rand):
        self.ale = ALEInterface()
        self.ale.setInt(b'random_seed', init_seed)
        self.ale.setBool('display_screen', False)
        self.ale.setBool('sound', False)
        self.ale.setFloat(b'repeat_action_probability', 0.0)
        self.ale.loadROM('./breakout.bin')
        self.action_size = 4

        self._screen = None
        self.reward = 0
        self.terminal = True
        self.init_rand = init_rand

    #def setSetting(self, action_repeat, random_init_step, screen_type):
    def setSetting(self, action_repeat, screen_type):
        self.action_repeat = action_repeat
        self.screen_type = screen_type
        #self.random_init_step = random_init_step

    def _step(self, action):
        # debug transform
        if action == 2:
            action = 4

        self.reward = self.ale.act(action)
        self.terminal = self.ale.game_over()

        if self.screen_type == 0:
            self._screen = self.ale.getScreenRGB()
        elif self.screen_type == 1:
            self._screen = self.ale.getScreenGrayscale()
        else:
            sys.stderr.write('screen_type error!')
            exit()

    def state(self):
        return self.reward, self.screen, self.terminal

    def act(self, action):
        cumulated_reward = 0
        for _ in range(self.action_repeat):
            self._step(action)
            cumulated_reward += self.reward
            if self.terminal:
                break
        self.reward = cumulated_reward

        return self.state()

    def train_act(self, action):
        cumulated_reward = 0
        for _ in range(self.action_repeat):
            self._step(action)
            cumulated_reward += self.reward
            if self.terminal:
                break
        self.reward = cumulated_reward

        return (self._screen, self.state())

    def new_game(self):
        if self.ale.game_over():
            self.ale.reset_game()

            if self.screen_type == 0:
                self._screen = self.ale.getScreenRGB()
            elif self.screen_type == 1:
                self._screen = self.ale.getScreenGrayscale()
            else:
                sys.stderr.write('screen_type error!')
                exit()

        self._step(0)

        #for _ in range(random.randint(0, self.random_init_step - 1)):
        for _ in range(self.init_rand):
            self._step(0)

        return self.screen

    @property
    def screen(self):
        return cv2.resize(
            cv2.cvtColor(self._screen, cv2.COLOR_RGB2GRAY) / 255., (84, 84))
class ArcadeLearningEnvironment(Environment):
    """
    [Arcade Learning Environment](https://github.com/mgbellemare/Arcade-Learning-Environment)
    adapter (specification key: `ale`, `arcade_learning_environment`).

    May require:
    ```bash
    sudo apt-get install libsdl1.2-dev libsdl-gfx1.2-dev libsdl-image1.2-dev cmake

    git clone https://github.com/mgbellemare/Arcade-Learning-Environment.git
    cd Arcade-Learning-Environment

    mkdir build && cd build
    cmake -DUSE_SDL=ON -DUSE_RLGLUE=OFF -DBUILD_EXAMPLES=ON ..
    make -j 4
    cd ..

    pip3 install .
    ```

    Args:
        level (string): ALE rom file
            (<span style="color:#C00000"><b>required</b></span>).
        loss_of_life_termination: Signals a terminal state on loss of life
            (<span style="color:#00C000"><b>default</b></span>: false).
        loss_of_life_reward (float): Reward/Penalty on loss of life (negative values are a penalty)
            (<span style="color:#00C000"><b>default</b></span>: 0.0).
        repeat_action_probability (float): Repeats last action with given probability
            (<span style="color:#00C000"><b>default</b></span>: 0.0).
        visualize (bool): Whether to visualize interaction
            (<span style="color:#00C000"><b>default</b></span>: false).
        frame_skip (int > 0): Number of times to repeat an action without observing
            (<span style="color:#00C000"><b>default</b></span>: 1).
        seed (int): Random seed
            (<span style="color:#00C000"><b>default</b></span>: none).
    """
    def __init__(self,
                 level,
                 life_loss_terminal=False,
                 life_loss_punishment=0.0,
                 repeat_action_probability=0.0,
                 visualize=False,
                 frame_skip=1,
                 seed=None):
        super().__init__()

        from ale_python_interface import ALEInterface

        self.environment = ALEInterface()
        self.rom_file = level

        self.life_loss_terminal = life_loss_terminal
        self.life_loss_punishment = life_loss_punishment

        self.environment.setFloat(b'repeat_action_probability',
                                  repeat_action_probability)
        self.environment.setBool(b'display_screen', visualize)
        self.environment.setInt(b'frame_skip', frame_skip)
        if seed is not None:
            self.environment.setInt(b'random_seed', seed)

        # All set commands must be done before loading the ROM.
        self.environment.loadROM(rom_file=self.rom_file.encode())
        self.available_actions = tuple(self.environment.getLegalActionSet())

        # Full list of actions:
        # No-Op, Fire, Up, Right, Left, Down, Up Right, Up Left, Down Right, Down Left, Up Fire,
        # Right Fire, Left Fire, Down Fire, Up Right Fire, Up Left Fire, Down Right Fire, Down Left
        # Fire

    def __str__(self):
        return super().__str__() + '({})'.format(self.rom_file)

    def states(self):
        width, height = self.environment.getScreenDims()
        return dict(type='float', shape=(height, width, 3))

    def actions(self):
        return dict(type='int', num_values=len(self.available_actions))

    def close(self):
        self.environment.__del__()
        self.environment = None

    def get_states(self):
        screen = np.copy(
            self.environment.getScreenRGB(screen_data=self.screen))
        screen = screen.astype(dtype=np.float32) / 255.0
        return screen

    def reset(self):
        self.environment.reset_game()
        width, height = self.environment.getScreenDims()
        self.screen = np.empty((height, width, 3), dtype=np.uint8)
        self.lives = self.environment.lives()
        return self.get_states()

    def execute(self, actions):
        reward = self.environment.act(action=self.available_actions[actions])
        terminal = self.environment.game_over()
        states = self.get_states()

        next_lives = self.environment.lives()
        if next_lives < self.lives:
            if self.life_loss_terminal:
                terminal = True
            elif self.life_loss_punishment > 0.0:
                reward -= self.life_loss_punishment
            self.lives = next_lives

        return states, terminal, reward
示例#46
0
def get_num_actions(rom_path, rom_name):
    from ale_python_interface import ALEInterface
    filename = rom_path + "/" + rom_name + ".bin"
    ale = ALEInterface()
    ale.loadROM(filename)
    return len(ale.getMinimalActionSet())
示例#47
0
class KungFuMaster(object):
    def __init__(
            self,
            rom='/home/josema/AI/ALE/Arcade-Learning-Environment/Roms/kung_fu_master.bin',
            trainsessionname='test'):

        self.agent = None
        self.isAuto = True
        self.gui_visible = False
        self.userquit = False
        self.optimalPolicyUser = False  # optimal policy set by user
        self.trainsessionname = trainsessionname
        self.elapsedtime = 0  # elapsed time for this experiment

        self.keys = 0

        # Configuration
        self.pause = False  # game is paused
        self.debug = False

        self.sleeptime = 0.0
        self.command = 0
        self.iteration = 0
        self.cumreward = 0
        self.cumreward100 = 0  # cum reward for statistics
        self.cumscore100 = 0
        self.ngoalreached = 0
        self.max_level = 1

        self.hiscore = 0
        self.hireward = -1000000
        self.resfile = open("data/" + self.trainsessionname + ".dat", "a+")

        self.legal_actions = 0
        self.rom = rom
        self.key_status = []

    def init(self, agent):  # init after creation (uses args set from cli)
        self.ale = ALEInterface()
        self.ale.setInt('random_seed', 123)
        ram_size = self.ale.getRAMSize()
        self.ram = np.zeros((ram_size), dtype=np.uint8)

        if (self.gui_visible):
            os.environ['SDL_VIDEO_CENTERED'] = '1'
            if sys.platform == 'darwin':
                pygame.init()
                self.ale.setBool('sound', False)  # Sound doesn't work on OSX
            elif sys.platform.startswith('linux'):
                pygame.init()

                self.ale.setBool('sound', True)
                self.ale.setBool('display_screen', False)

        self.ale.loadROM(self.rom)
        self.legal_actions = self.ale.getLegalActionSet()

        if (self.gui_visible):
            (self.screen_width, self.screen_height) = self.ale.getScreenDims()
            print("width/height: " + str(self.screen_width) + "/" +
                  str(self.screen_height))

            (display_width, display_height) = (1024, 420)
            self.screen = pygame.display.set_mode(
                (display_width, display_height))

            pygame.display.set_caption(
                "Reinforcement Learning - Sapienza - Jose M Salas")
            self.numpy_surface = np.zeros(
                (self.screen_height, self.screen_width, 3), dtype=np.uint8)

            self.game_surface = pygame.Surface(
                (self.screen_width, self.screen_height))

            pygame.display.flip()
            #init clock
            self.clock = pygame.time.Clock()

        self.agent = agent
        self.nactions = len(
            self.legal_actions
        )  # 0: not moving, 1: left, 2: right, 3: up, 4: down
        for i in range(self.nactions):
            self.key_status.append(False)

        print(self.nactions)
        #        ns = 89999 # Number of statuses if we use enemy type ram info without level number
        #FINAL        ns = 489999 # Number of statuses if we use enemy type ram info
        ns = 4899999  # Number of statuses if we use enemy type ram info

        #        ns = 48999
        print('Number of states: %d' % ns)
        self.agent.init(ns, self.nactions)  # 1 for RA not used here

    def initScreen(self):

        if (self.gui_visible):
            if sys.platform == 'darwin':
                pygame.init()
                self.ale.setBool('sound', False)  # Sound doesn't work on OSX
            elif sys.platform.startswith('linux'):
                pygame.init()

                self.ale.setBool('sound', True)
                self.ale.setBool('display_screen', False)
        if (self.gui_visible):
            (self.screen_width, self.screen_height) = self.ale.getScreenDims()
            print("width/height: " + str(self.screen_width) + "/" +
                  str(self.screen_height))

            (display_width, display_height) = (1024, 420)
            self.screen = pygame.display.set_mode(
                (display_width, display_height))

            pygame.display.set_caption(
                "Reinforcement Learning - Sapienza - Jose M Salas")
            self.numpy_surface = np.zeros(
                (self.screen_height, self.screen_width, 3), dtype=np.uint8)

            self.game_surface = pygame.Surface(
                (self.screen_width, self.screen_height))

            pygame.display.flip()
            #init clock
            self.clock = pygame.time.Clock()

    def reset(self):
        self.pos_x = 0
        self.pos_y = 0
        # Kung fu master observations
        self.enemy_pos = 0
        self.n_enemies = 0
        self.my_pos = 0
        self.danger_pos = 0
        self.danger_type = 0
        self.enemy_type = 0  # 0, 1, 2, 3, 80, 81, 82, 40
        self.blocked = 0
        self.prev_blocked = 0
        self.hold_hit = 0
        self.time_left1 = 0
        self.time_left2 = 0
        self.my_energy = 39
        self.previous_my_energy = 39
        self.lifes = 3
        self.previous_lifes = 3
        self.got_hit = 0
        self.got_blocked = 0
        self.got_unblocked = 0
        self.still_blocked = False
        self.starting_pos = 0
        self.level = 1

        self.score = 0
        self.cumreward = 0
        self.cumscore = 0
        self.action_reward = 0

        self.current_reward = 0  # accumulate reward over all events happened during this action until next different state

        self.prev_state = None  # previous state
        self.firstAction = True  # first action of the episode
        self.finished = False  # episode finished
        self.newstate = True  # new state reached
        self.numactions = 0  # number of actions in this episode
        self.iteration += 1

        self.agent.optimal = self.optimalPolicyUser or (
            self.iteration % 100
        ) == 0  # False #(random.random() < 0.5)  # choose greedy action selection for the entire episode

    def pair_function(self):
        # Combine the number of enemies, player blocked and danger type information into 7 different states
        if self.n_enemies > 0:
            self.danger_type = 0

    # print (str(self.n_enemies) + " - " + str(self.danger_type) + ' - ' + str(self.blocked))
        pair = (int)(
            (0.5 * (self.n_enemies + self.danger_type) *
             (self.n_enemies + self.danger_type + 1) + self.danger_type + 1) *
            (1 - (self.blocked / 128)))
        if pair > 8:
            return 5  #game not started yet
        else:
            return pair

    def enemy_type_s(self):
        if self.enemy_type > 127:
            return (self.enemy_type - 128 + 4)
        elif self.enemy_type == 64:
            return 8
        else:
            return self.enemy_type

    def getstate(self):

        #        print ('enemy type: ' + str(self.enemy_type_s()) + 'level: ' + str(self.level -1) )
        x = (int)((self.level - 1) * 1000000 + self.pair_function() * 100000 +
                  (self.enemy_type_s() * 10000) +
                  np.rint(self.my_pos / 32) * 1000 +
                  np.rint(self.enemy_pos / 32) * 100 +
                  np.rint(self.danger_pos / 32) * 10 +
                  np.rint(self.hold_hit / 16))
        #3FINAL        x = (int)((self.enemy_type_s()*1000) + (self.level-1)*100000 + self.pair_function()*10000 + np.rint(self.enemy_pos/32)*100 + np.rint(self.danger_pos/32)*10 + np.rint(self.hold_hit/16))

        #2NO LEVEL        x = (int)((self.enemy_type_s()*1000) + self.pair_function()*10000 + np.rint(self.enemy_pos/32)*100 + np.rint(self.danger_pos/32)*10 + np.rint(self.hold_hit/16))
        #1NO ENEMY TYPE        x = (int)((self.level-1)*10000 + self.pair_function()*1000 + np.rint(self.enemy_pos/32)*100 + np.rint(self.danger_pos/32)*10 + np.rint(self.hold_hit/16))

        return x

    def goal_reached(self):

        #return (self.my_energy>0 and self.time_left1==0 and self.time_left2<5) #and self.my_energy==39)
        return (self.level == 5)

    def update(self, a):

        self.command = a
        # Update RAM
        self.ale.getRAM(self.ram)

        # Get info from RAM
        self.enemy_pos = self.ram[72]
        self.n_enemies = self.ram[91]
        self.danger_pos = self.ram[73]
        self.my_pos = self.ram[74]
        self.hold_hit = self.ram[77]

        self.enemy_type = self.ram[54]

        if self.level < self.ram[31]:
            self.starting_pos = self.ram[74]
        self.level = self.ram[31]
        self.max_level = max(self.level, self.max_level)

        # Danger/Enemy position:
        # 49 = no danger
        # 50 = danger approaching from left
        # 208 = danger approaching from right

        # ram[96] = 6, danger comes from top
        # ram[96] = 29, danger comes from bottom
        # ram[96] = 188, none
        if self.ram[96] == 6:
            self.danger_type = 0
        elif self.ram[96] == 29:
            self.danger_type = 1
        else:
            self.danger_type = 2

        self.time_left1 = self.ram[27]
        self.time_left2 = self.ram[28]

        self.previous_my_energy = self.my_energy
        self.my_energy = self.ram[75]

        if self.my_energy < self.previous_my_energy and not self.still_blocked and self.ram[
                34] == 0:
            self.got_hit = STATES['GotHit']
        else:
            self.got_hit = 0

        self.previous_lifes = self.lifes
        self.lifes = self.ram[29]
        self.prev_blocked = self.blocked
        self.blocked = self.ram[61]
        if self.blocked > self.prev_blocked and not self.still_blocked:
            self.got_blocked = STATES['GotBlocked']
            self.still_blocked = True
            self.got_unblocked = 0
        elif self.blocked < self.prev_blocked and self.still_blocked:
            self.got_unblocked = STATES['GotUnblocked']
            self.still_blocked = False
            self.got_blocked = 0
        else:
            self.got_blocked = 0
            self.got_unblocked = 0

#        print ('enemy_pos=' +str(self.enemy_pos) + ' - danger_pos=' + str(self.danger_pos) + ' - my_position='
#               + str(self.my_pos) + ' - my_energy=' + str(self.my_energy) + ' - blocked=' + str(self.blocked) + ' - danger_type=' + str(self.danger_type))

        self.prev_state = self.getstate()  # remember previous state

        # print " == Update start ",self.prev_state," action",self.command

        self.current_reward = 0  # accumulate reward over all events happened during this action until next different state
        #print('self.current_reward = 0')
        self.numactions += 1  # total number of actions axecuted in this episode

        # while (self.prev_state == self.getstate()):

        if (self.firstAction):
            self.starting_pos = self.ram[74]
            self.firstAction = False
            self.current_reward = self.ale.act(a)
        else:
            self.current_reward = self.ale.act(a)

        if self.ram[34] == 0:  #only when playing
            if (a == 3 and self.starting_pos < self.my_pos) or (
                    a == 4 and self.starting_pos > self.my_pos):
                self.action_reward = STATES['MoveFW']
            elif (a == 3 and self.starting_pos > self.my_pos) or (
                    a == 4 and self.starting_pos < self.my_pos):
                self.action_reward = STATES['MoveBW']
            else:
                self.action_reward = STATES['NotMoving']

        self.score += self.current_reward
        self.current_reward += self.action_reward

        #        print('score= ' + str(self.score) + ' current reward=' +str(np.rint(self.current_reward))+ ' - energy=' + str(self.my_energy/39.0) +
        #        ' - got_hot='+ str(self.got_hit) + ' - got_blocked='  + str(self.got_blocked) + ' - got_unblocked=' + str(self.got_unblocked))
        # check if episode terminated

        #self.draw_screen

        if self.goal_reached():
            self.current_reward += STATES['Alive']
            self.ngoalreached += 1
            #self.ale.reset_game()
            self.finished = True

        if (self.ale.game_over()):
            self.current_reward += STATES['Dead']
            if self.level > 1:
                print('game over in level ' + str(self.level))
            if self.my_energy > 0 and self.lifes == 3:
                print('Game over alive????')
            self.ale.reset_game()

            self.finished = True
        if self.level > 2:
            if self.gui_visible == False:
                self.gui_visible = True
                self.initScreen()
        #print " ** Update end ",self.getstate(), " prev ",self.prev_state

    def input(self):
        self.isPressed = False
        if self.gui_visible:

            for event in pygame.event.get():
                if event.type == pygame.QUIT:
                    return False

                if event.type == pygame.KEYDOWN:

                    if event.key == pygame.K_SPACE:
                        self.pause = not self.pause
                        print "Game paused: ", self.pause
                    elif event.key == pygame.K_a:
                        self.isAuto = not self.isAuto
                        self.sleeptime = int(self.isAuto) * 0.07
                    elif event.key == pygame.K_s:
                        self.sleeptime = 1.0
                        self.agent.debug = False
                    elif event.key == pygame.K_d:
                        self.sleeptime = 0.07
                        self.agent.debug = False
                    elif event.key == pygame.K_f:
                        self.sleeptime = 0.005
                        self.agent.debug = False
                    elif event.key == pygame.K_g:
                        self.sleeptime = 0.0
                        self.agent.debug = False
                    elif event.key == pygame.K_o:
                        self.optimalPolicyUser = not self.optimalPolicyUser
                        print "Best policy: ", self.optimalPolicyUser
                    elif event.key == pygame.K_q:
                        self.userquit = True
                        print "User quit !!!"
                    else:

                        pressed = pygame.key.get_pressed()

                        self.keys = 0
                        self.keys |= pressed[pygame.K_UP]
                        self.keys |= pressed[pygame.K_DOWN] << 1
                        self.keys |= pressed[pygame.K_LEFT] << 2
                        self.keys |= pressed[pygame.K_RIGHT] << 3
                        self.keys |= pressed[pygame.K_z] << 4
                        self.command = key_action_tform_table[self.keys]
                        self.key_status[self.command] = True

                if event.type == pygame.KEYUP:
                    pressed = pygame.key.get_pressed()

                    self.keys = 0
                    self.keys |= pressed[pygame.K_UP]
                    self.keys |= pressed[pygame.K_DOWN] << 1
                    self.keys |= pressed[pygame.K_LEFT] << 2
                    self.keys |= pressed[pygame.K_RIGHT] << 3
                    self.keys |= pressed[pygame.K_z] << 4
                    self.command = key_action_tform_table[self.keys]
                    self.key_status[self.command] = False
                    if not (True in self.key_status):
                        self.command = 0

        return True

    def getUserAction(self):
        return self.command

    def getreward(self):

        r = np.rint(
            self.current_reward
        ) + self.got_hit + self.got_blocked + self.got_unblocked - np.rint(
            self.blocked / 128)
        self.cumreward += r

        return r

    def print_report(self, printall=False):
        toprint = printall
        ch = ' '
        if (self.agent.optimal):
            ch = '*'
            toprint = True

        s = 'Iter %6d, sc: %3d, l: %d,  na: %4d, r: %5d %c' % (
            self.iteration, self.score, self.level, self.numactions,
            self.cumreward, ch)

        if self.score > self.hiscore:
            self.hiscore = self.score
            s += ' HISCORE '
            toprint = True
        if self.cumreward > self.hireward:
            self.hireward = self.cumreward
            s += ' HIREWARD '
            toprint = True

        if (toprint):
            print(s)

        self.cumreward100 += self.cumreward
        self.cumscore100 += self.score
        numiter = 100
        if (self.iteration % numiter == 0):
            #self.doSave()
            pgoal = float(self.ngoalreached * 100) / numiter
            print(
                '----------------------------------------------------------------------------------------------------------------------'
            )
            print(
                "%s %6d avg last 100: reward %d | score %.2f | level %d | p goals %.1f %%"
                % (self.trainsessionname, self.iteration, self.cumreward100 /
                   100, float(self.cumscore100) / 100, self.max_level, pgoal))
            print(
                '----------------------------------------------------------------------------------------------------------------------'
            )
            self.cumreward100 = 0
            self.cumscore100 = 0
            self.ngoalreached = 0

        sys.stdout.flush()

        self.resfile.write(
            "%d,%d,%d,%d\n" %
            (self.score, self.cumreward, self.goal_reached(), self.numactions))
        self.resfile.flush()

    def draw(self):
        if self.gui_visible:

            self.screen.fill((0, 0, 0))

            self.ale.getScreenRGB(self.numpy_surface)

            pygame.surfarray.blit_array(
                self.game_surface, np.transpose(self.numpy_surface, (1, 0, 2)))
            #        pygame.pixelcopy.array_to_surface(self.game_surface, np.transpose(self.numpy_surface,(1,0,2)))
            self.screen.blit(
                pygame.transform.scale2x(
                    pygame.transform.scale(
                        self.game_surface,
                        (self.screen_height, self.screen_height))), (0, 0))

            #Display ram bytes
            font = pygame.font.SysFont("Ubuntu Mono", 32)
            text = font.render("RAM: ", 1, (255, 208, 208))
            self.screen.blit(text, (430, 10))

            font = pygame.font.SysFont("Ubuntu Mono", 25)
            height = font.get_height() * 1.2

            line_pos = 40
            ram_pos = 0
            while (ram_pos < 128):
                ram_string = ''.join([
                    "%02X " % self.ram[x]
                    for x in range(ram_pos, min(ram_pos + 16, 128))
                ])
                text = font.render(ram_string, 1, (255, 255, 255))
                self.screen.blit(text, (440, line_pos))
                line_pos += height
                ram_pos += 16

            #display current action
            font = pygame.font.SysFont("Ubuntu Mono", 32)
            text = font.render("Current Action: " + str(self.command), 1,
                               (208, 208, 255))
            height = font.get_height() * 1.2
            self.screen.blit(text, (430, line_pos))
            line_pos += height

            #display reward
            font = pygame.font.SysFont("Ubuntu Mono", 30)
            text = font.render("Total Reward: " + str(self.cumreward), 1,
                               (208, 255, 255))
            self.screen.blit(text, (430, line_pos))

            pygame.display.flip()
#            clock.tick(60.)
        else:
            return 0

    def quit(self):
        self.resfile.close()
        pygame.quit()
class AtariEmulator:

	def __init__(self, args):
		''' Initialize Atari environment '''

		# Parameters
		self.buffer_length = args.buffer_length
		self.screen_dims = args.screen_dims
		self.frame_skip = args.frame_skip
		self.blend_method = args.blend_method
		self.reward_processing = args.reward_processing
		self.max_start_wait = args.max_start_wait
		self.history_length = args.history_length
		self.start_frames_needed = self.buffer_length - 1 + ((args.history_length - 1) * self.frame_skip)

		#Initialize ALE instance
		self.ale = ALEInterface()
		self.ale.setFloat(b'repeat_action_probability', 0.0)
		if args.watch:
			self.ale.setBool(b'sound', True)
			self.ale.setBool(b'display_screen', True)
		self.ale.loadROM(str.encode(args.rom_path + '/' + args.game + '.bin'))

		self.buffer = np.empty((self.buffer_length, 210, 160))
		self.current = 0
		self.action_set = self.ale.getMinimalActionSet()
		self.lives = self.ale.lives()

		self.reset()


	def get_possible_actions(self):
		''' Return list of possible actions for game '''
		return self.action_set

	def get_screen(self):
		''' Add screen to frame buffer '''
		self.buffer[self.current] = np.squeeze(self.ale.getScreenGrayscale())
		self.current = (self.current + 1) % self.buffer_length


	def reset(self):
		self.ale.reset_game()
		self.lives = self.ale.lives()

		if self.max_start_wait < 0:
			print("ERROR: max start wait decreased beyond 0")
			sys.exit()
		elif self.max_start_wait <= self.start_frames_needed:
			wait = 0
		else:
			wait = random.randint(0, self.max_start_wait - self.start_frames_needed)
		for _ in range(wait):
			self.ale.act(self.action_set[0])

		# Fill frame buffer
		self.get_screen()
		for _ in range(self.buffer_length - 1):
			self.ale.act(self.action_set[0])
			self.get_screen()
		# get initial_states
		state = [(self.preprocess(), 0, 0, False)]
		for step in range(self.history_length - 1):
			state.append(self.run_step(0))

		# make sure agent hasn't died yet
		if self.isTerminal():
			print("Agent lost during start wait.  Decreasing max_start_wait by 1")
			self.max_start_wait -= 1
			return self.reset()

		return state


	def run_step(self, action):
		''' Apply action to game and return next screen and reward '''

		raw_reward = 0
		for step in range(self.frame_skip):
			raw_reward += self.ale.act(self.action_set[action])
			self.get_screen()

		reward = None
		if self.reward_processing == 'clip':
			reward = np.clip(raw_reward, -1, 1)
		else:
			reward = raw_reward

		terminal = self.isTerminal()
		self.lives = self.ale.lives()

		return (self.preprocess(), action, reward, terminal, raw_reward)



	def preprocess(self):
		''' Preprocess frame for agent '''

		img = None

		if self.blend_method == "max":
			img = np.amax(self.buffer, axis=0)

		return cv2.resize(img, self.screen_dims, interpolation=cv2.INTER_LINEAR)

	def isTerminal(self):
		return (self.isGameOver() or (self.lives > self.ale.lives()))


	def isGameOver(self):
		return self.ale.game_over()
示例#49
0
文件: train.py 项目: cgel/PDQN_tf
import cv2
import random
import threading
import sys
import time
import os
from replayMemory import ReplayMemory
from buildGraph import createQNetwork, build_train_op

ale = ALEInterface()
viz = False
rom_name = "roms/Breakout.bin"
ale.setBool('sound', False)
ale.setBool('display_screen', viz)
ale.setInt("frame_skip", 4)
ale.loadROM(rom_name)
legal_actions = ale.getMinimalActionSet()
action_map = {}
for i in range(len(legal_actions)):
    action_map[i] = legal_actions[i]
action_num = len(action_map)


class config:
    batch_size = args.batch_size
    action_num = action_num
    replay_memory_capacity = args.replay_memory_capacity
    steps_before_training = args.steps_before_training
    buff_size = 4
    device = args.device
    gamma = args.gamma
示例#50
0
class Emulator:
    def __init__(self,
                 rom_path,
                 rom_name,
                 visualize,
                 actor_id,
                 rseed,
                 single_life_episodes=False):

        self.ale = ALEInterface()

        self.ale.setInt("random_seed", rseed * (actor_id + 1))

        # For fuller control on explicit action repeat (>= ALE 0.5.0)
        self.ale.setFloat("repeat_action_probability", 0.0)

        # Disable frame_skip and color_averaging
        # See: http://is.gd/tYzVpj
        self.ale.setInt("frame_skip", 1)
        self.ale.setBool("color_averaging", False)
        self.ale.loadROM(rom_path + "/" + rom_name + ".bin")
        self.legal_actions = self.ale.getMinimalActionSet()
        self.screen_width, self.screen_height = self.ale.getScreenDims()
        #self.ale.setBool('display_screen', True)

        # Processed historcal frames that will be fed in to the network
        # (i.e., four 84x84 images)
        self.screen_images_processed = np.zeros(
            (IMG_SIZE_X, IMG_SIZE_Y, NR_IMAGES))
        self.rgb_screen = np.zeros((self.screen_height, self.screen_width, 3),
                                   dtype=np.uint8)
        self.gray_screen = np.zeros((self.screen_height, self.screen_width, 1),
                                    dtype=np.uint8)

        self.frame_pool = np.empty((2, self.screen_height, self.screen_width))
        self.current = 0
        self.lives = self.ale.lives()

        self.visualize = visualize
        self.visualize_processed = False
        self.windowname = rom_name + ' ' + str(actor_id)
        if self.visualize:
            logger.debug("Opening emulator window...")
            #from skimage import io
            #io.use_plugin('qt')
            cv2.startWindowThread()
            cv2.namedWindow(self.windowname)
            logger.debug("Emulator window opened")

        if self.visualize_processed:
            logger.debug("Opening processed frame window...")
            cv2.startWindowThread()
            logger.debug("Processed frame window opened")
            cv2.namedWindow(self.windowname + "_processed")

        self.single_life_episodes = single_life_episodes

    def get_screen_image(self):
        """ Add screen (luminance) to frame pool """
        # [screen_image, screen_image_rgb] = [self.ale.getScreenGrayscale(),
        #     self.ale.getScreenRGB()]
        self.ale.getScreenGrayscale(self.gray_screen)
        self.ale.getScreenRGB(self.rgb_screen)
        self.frame_pool[self.current] = np.squeeze(self.gray_screen)
        self.current = (self.current + 1) % FRAMES_IN_POOL
        return self.rgb_screen

    def new_game(self):
        """ Restart game """
        self.ale.reset_game()
        self.lives = self.ale.lives()

        if MAX_START_WAIT < 0:
            logger.debug("Cannot time travel yet.")
            sys.exit()
        elif MAX_START_WAIT > 0:
            wait = random.randint(0, MAX_START_WAIT)
        else:
            wait = 0
        for _ in xrange(wait):
            self.ale.act(self.legal_actions[0])

    def process_frame_pool(self):
        """ Preprocess frame pool """

        img = None
        if BLEND_METHOD == "max_pool":
            img = np.amax(self.frame_pool, axis=0)

        #img resize(img[:210, :], (84, 84))
        img = cv2.resize(img[:210, :], (84, 84),
                         interpolation=cv2.INTER_LINEAR)

        img = img.astype(np.float32)
        img *= (1.0 / 255.0)

        return img
        # Reduce height to 210, if not so
        #cropped_img = img[:210, :]
        # Downsample to 110x84
        #down_sampled_img = resize(cropped_img, (84, 84))

        # Crop to 84x84 playing area
        #stackable_image = down_sampled_img[:, 26:110]
        #return stackable_image

    def action_repeat(self, a):
        """ Repeat action and grab screen into frame pool """
        reward = 0
        for i in xrange(ACTION_REPEAT):
            reward += self.ale.act(self.legal_actions[a])
            new_screen_image_rgb = self.get_screen_image()
        return reward, new_screen_image_rgb

    def get_reshaped_state(self, state):
        return np.reshape(state, (1, IMG_SIZE_X, IMG_SIZE_Y, NR_IMAGES))
        #return np.reshape(self.screen_images_processed,
        #    (1, IMG_SIZE_X, IMG_SIZE_Y, NR_IMAGES))

    def get_initial_state(self):
        """ Get the initial state """
        self.new_game()
        for step in xrange(NR_IMAGES):
            reward, new_screen_image_rgb = self.action_repeat(0)
            self.screen_images_processed[:, :,
                                         step] = self.process_frame_pool()
            self.show_screen(new_screen_image_rgb)
        if self.is_terminal():
            MAX_START_WAIT -= 1
            return self.get_initial_state()
        return np.copy(self.screen_images_processed)  #get_reshaped_state()

    def next(self, action):
        """ Get the next state, reward, and game over signal """
        reward, new_screen_image_rgb = self.action_repeat(np.argmax(action))
        self.screen_images_processed[:, :, 0:3] = \
            self.screen_images_processed[:, :, 1:4]
        self.screen_images_processed[:, :, 3] = self.process_frame_pool()
        self.show_screen(new_screen_image_rgb)
        terminal = self.is_terminal()
        self.lives = self.ale.lives()
        return np.copy(
            self.screen_images_processed
        ), reward, terminal  #get_reshaped_state(), reward, terminal

    def show_screen(self, image):
        """ Show visuals for raw and processed images """
        if self.visualize:
            #io.imshow(image[:210, :], fancy=True)
            cv2.imshow(self.windowname, image[:210, :])
        if self.visualize_processed:
            #io.imshow(self.screen_images_processed[:, :, 3], fancy=True)
            cv2.imshow(self.windowname + "_processed",
                       self.screen_images_processed[:, :, 3])

    def is_terminal(self):
        if self.single_life_episodes:
            return (self.is_over() or (self.lives > self.ale.lives()))
        else:
            return self.is_over()

    def is_over(self):
        return self.ale.game_over()
示例#51
0
if frame_pooling_style == "color_averaging":
    ale.setInt('frame_skip', agent_params["frame_skip"])

# Set USE_SDL to true to display the screen. ALE must be compilied
# with SDL enabled for this to work. On OSX, pygame init is used to
# proxy-call SDL_main.
USE_SDL = False  # True
if USE_SDL:
    ale.setBool('display_screen', True)
    if sys.platform == 'darwin':
        import pygame
        pygame.init()
        ale.setBool('sound', False)  # Sound doesn't work on OSX

# Load the ROM file
ale.loadROM('../roms/' + sys.argv[1] + '.bin')

#Get the list of available modes and difficulties
avail_modes = ale.getAvailableModes()
avail_diff = ale.getAvailableDifficulties()

print 'Number of available modes: ', len(avail_modes)
print 'Number of available difficulties: ', len(avail_diff)

# Get the list of legal actions
if use_minimal_actions:
    action_set = ale.getMinimalActionSet()
else:
    action_set = ale.getLegalActionSet()

agent_params["n_actions"] = len(action_set)
示例#52
0
class Atari:
    def __init__(self, rom_dir):
        self.ale = ALEInterface()

        # Set settings
        self.ale.setInt("random_seed", 123)
        self.frame_skip = 1
        self.ale.setInt("frame_skip", self.frame_skip)
        self.ale.setBool("display_screen", False)
        self.ale.setBool("sound", True)
        self.record_sound_for_user = True
        self.ale.setBool("record_sound_for_user", self.record_sound_for_user)

        # NOTE recording audio to file still works. But if both file recording and
        # record_sound_for_user are enabled, then only the latter is done
        #  self.ale.setString("record_sound_filename", "")

        # Get settings
        self.ale.loadROM(rom_dir)
        self.action_count = 0
        self.screen_width, self.screen_height = self.ale.getScreenDims()
        self.legal_actions = self.ale.getLegalActionSet()
        self.framerate = 60  # Should read from ALE settings technically
        self.samples_per_frame = 512  # Should read from ALE SoundExporter class technically
        self.audio_freq = self.framerate * self.samples_per_frame  #/self.frame_skip
        self.all_audio = np.zeros((0, ), dtype=np.uint8)

        # Saving audio/video to disk for verification.
        self.save_to_file = True  # NOTE set to False to test actual screen/audio query speed!
        if self.save_to_file:
            self.save_dir_av = './logs_av_seq_Example'  # Save png sequence and audio wav file here
            self.save_dir_movies = './log_movies_Example'
            self.save_image_prefix = 'image_frames'
            self.save_audio_filename = 'audio_user_recorder.wav'
            self.create_save_dir(self.save_dir_av)

    def take_action(self):
        action = self.legal_actions[np.random.randint(self.legal_actions.size)]
        self.ale.act(action)

    def create_save_dir(self, directory):
        # Remove previous img/audio image logs
        if os.path.exists(directory):
            shutil.rmtree(directory)
        os.makedirs(directory)

    def get_image_and_audio(self):
        np_data_image = np.zeros(self.screen_width * self.screen_height * 3,
                                 dtype=np.uint8)
        if self.record_sound_for_user:
            np_data_audio = np.zeros(self.ale.getAudioSize(), dtype=np.uint8)
            self.ale.getScreenRGBAndAudio(np_data_image, np_data_audio)

            # Also supports independent audio queries if user desires:
            #  self.ale.getAudio(np_data_audio)
        else:
            #  np_data_audio = 0
            np_data_audio = np.zeros(self.ale.getAudioSize(), dtype=np.uint8)
            self.ale.getAudio(np_data_audio)
            self.ale.getScreenRGB(np_data_image)

        return np.reshape(np_data_image,
                          (self.screen_height, self.screen_width,
                           3)), np.asarray(np_data_audio)

    def audio_to_mfcc(self, audio):
        mfcc_data = mfcc(signal=audio,
                         samplerate=self.audio_freq,
                         winlen=0.002,
                         winstep=0.0006)
        mfcc_data = np.swapaxes(mfcc_data, 0, 1)  # Time on x-axis

        # Normalization
        min_data = np.min(mfcc_data.flatten())
        max_data = np.max(mfcc_data.flatten())
        mfcc_data = (mfcc_data - min_data) / (max_data - min_data)

        return mfcc_data

    def save_image(self, image):
        number = str(self.action_count).zfill(6)
        scipy.misc.imsave(
            os.path.join(self.save_dir_av,
                         self.save_image_prefix + number + '.png'), image)

    def save_audio(self, audio):
        wavfile.write(os.path.join(self.save_dir_av, self.save_audio_filename),
                      self.audio_freq, audio)

    def save_movie(self, movie_name):
        # Use ffmpeg to convert the saved img sequences and audio to mp4

        # Video recording
        command = [
            "ffmpeg",
            '-y',  # overwrite output file if it exists
            '-r',
            str(self.framerate),  # frames per second
            '-i',
            os.path.join(self.save_dir_av, self.save_image_prefix +
                         '%6d.png')  # Video input comes from pngs
        ]

        # Audio if available
        if self.record_sound_for_user:
            command.extend([
                '-i',
                os.path.join(self.save_dir_av, self.save_audio_filename)
            ])  # Audio input comes from wav

        # Codecs and output
        command.extend([
            '-c:v',
            'libx264',  # Video codec
            '-c:a',
            'mp3',  # Audio codec
            os.path.join(self.save_dir_movies,
                         movie_name + '.mp4')  # Output dir
        ])

        # Make movie dir and write the mp4
        if not os.path.exists(self.save_dir_movies):
            os.makedirs(self.save_dir_movies)
        sp.call(
            command
        )  # NOTE: needs ffmpeg! Will throw 'dir doesn't exist err' otherwise.

    def concat_image_audio(self, image, audio_mfcc):
        # Concatenates image and audio to test sync'ing in saved .mp4
        audio_mfcc = scipy.misc.imresize(audio_mfcc, np.shape(
            image))  # Resize MFCC image to be same size as screen image
        cmap = plt.get_cmap('viridis')  # Apply a colormap to spectrogram
        audio_mfcc = (np.delete(cmap(audio_mfcc), 3, 2) * 255.).astype(
            np.uint8)  # Gray MFCC -> 4 channel colormap -> 3 channel colormap
        image = np.concatenate((image, audio_mfcc),
                               axis=1)  # Concat screen image and MFCC image
        return image

    def plot_mfcc(self, audio_mfcc):
        plt.clf()
        plt.imshow(audio_mfcc,
                   interpolation='bilinear',
                   cmap=plt.get_cmap('viridis'))
        plt.pause(0.001)
示例#53
0
文件: 0906.py 项目: boluoweifenda/DQN
#     print ''

# initialization
np.random.seed(SEED)

ale = ALEInterface()
if SEED == None:
    ale.setInt('random_seed', 0)
else:
    ale.setInt('random_seed', SEED)
ale.setInt("frame_skip",frameSkip)
ale.setBool('color_averaging', True)
ale.setBool('sound', False)
ale.setBool('display_screen', False)
ale.setFloat("repeat_action_probability", 0.0)
ale.loadROM(romPath)
legal_actions = ale.getMinimalActionSet()
n_actions = len(legal_actions)
opt.n_actions = n_actions

explorationRateDelta = (initialExplorationRate - finalExplorationRate)/(finalExplorationFrame-startLearningFrame)
explorationRate = initialExplorationRate + startLearningFrame*explorationRateDelta

if networkType == "CNN":
    width = 84
    height = 84
    Dim = [height,width]
else:
    width = 36
    height = 41
    Dim = [height*width]
示例#54
0
class AtariPlayer(RLEnvironment):
    """
    A wrapper for atari emulator.
    NOTE: will automatically restart when a real episode ends
    """
    def __init__(self,
                 rom_file,
                 viz=0,
                 height_range=(None, None),
                 frame_skip=4,
                 image_shape=(84, 84),
                 nullop_start=30,
                 live_lost_as_eoe=True):
        """
        :param rom_file: path to the rom
        :param frame_skip: skip every k frames and repeat the action
        :param image_shape: (w, h)
        :param height_range: (h1, h2) to cut
        :param viz: visualization to be done.
            Set to 0 to disable.
            Set to a positive number to be the delay between frames to show.
            Set to a string to be a directory to store frames.
        :param nullop_start: start with random number of null ops
        :param live_losts_as_eoe: consider lost of lives as end of episode.  useful for training.
        """
        super(AtariPlayer, self).__init__()
        self.ale = ALEInterface()
        self.rng = get_rng(self)

        self.ale.setInt("random_seed", self.rng.randint(0, 10000))
        self.ale.setBool("showinfo", False)

        try:
            ALEInterface.setLoggerMode(ALEInterface.Logger.Warning)
        except AttributeError:
            log_once()

        self.ale.setInt("frame_skip", 1)
        self.ale.setBool('color_averaging', False)
        # manual.pdf suggests otherwise.
        self.ale.setFloat('repeat_action_probability', 0.0)

        # viz setup
        if isinstance(viz, six.string_types):
            assert os.path.isdir(viz), viz
            self.ale.setString('record_screen_dir', viz)
            viz = 0
        if isinstance(viz, int):
            viz = float(viz)
        self.viz = viz
        if self.viz and isinstance(self.viz, float):
            self.windowname = os.path.basename(rom_file)
            cv2.startWindowThread()
            cv2.namedWindow(self.windowname)

        self.ale.loadROM(rom_file)
        self.width, self.height = self.ale.getScreenDims()
        self.actions = self.ale.getMinimalActionSet()

        self.live_lost_as_eoe = live_lost_as_eoe
        self.frame_skip = frame_skip
        self.nullop_start = nullop_start
        self.height_range = height_range
        self.image_shape = image_shape

        self.current_episode_score = StatCounter()
        self.restart_episode()

    def _grab_raw_image(self):
        """
        :returns: the current 3-channel image
        """
        m = self.ale.getScreenRGB()
        return m.reshape((self.height, self.width, 3))

    def current_state(self):
        """
        :returns: a gray-scale (h, w, 1) image
        """
        ret = self._grab_raw_image()
        # max-pooled over the last screen
        ret = np.maximum(ret, self.last_raw_screen)
        if self.viz:
            if isinstance(self.viz, float):
                #m = cv2.resize(ret, (1920,1200))
                cv2.imshow(self.windowname, ret)
                time.sleep(self.viz)
        ret = ret[self.height_range[0]:self.height_range[1], :]
        # 0.299,0.587.0.114. same as rgb2y in torch/image
        ret = cv2.cvtColor(ret, cv2.COLOR_RGB2GRAY)
        ret = cv2.resize(ret, self.image_shape)
        ret = np.expand_dims(ret, axis=2)
        return ret

    def get_action_space(self):
        return DiscreteActionSpace(len(self.actions))

    def restart_episode(self):
        if self.current_episode_score.count > 0:
            self.stats['score'].append(self.current_episode_score.sum)
        self.current_episode_score.reset()
        self.ale.reset_game()

        # random null-ops start
        n = self.rng.randint(self.nullop_start)
        self.last_raw_screen = self._grab_raw_image()
        for k in range(n):
            if k == n - 1:
                self.last_raw_screen = self._grab_raw_image()
            self.ale.act(0)

    def action(self, act):
        """
        :param act: an index of the action
        :returns: (reward, isOver)
        """
        oldlives = self.ale.lives()
        r = 0
        for k in range(self.frame_skip):
            if k == self.frame_skip - 1:
                self.last_raw_screen = self._grab_raw_image()
            r += self.ale.act(self.actions[act])
            newlives = self.ale.lives()
            if self.ale.game_over() or \
                    (self.live_lost_as_eoe and newlives < oldlives):
                break

        self.current_episode_score.feed(r)
        isOver = self.ale.game_over()
        if isOver:
            self.restart_episode()
        if self.live_lost_as_eoe:
            isOver = isOver or newlives < oldlives
        return (r, isOver)

    def get_stat(self):
        try:
            return {
                'avg_score': np.mean(self.stats['score']),
                'max_score': float(np.max(self.stats['score']))
            }
        except ValueError:
            return {}
示例#55
0
class GameState(object):
  def __init__(self, rand_seed, display=False):
    self.ale = ALEInterface()
    self.ale.setInt('random_seed', rand_seed)

    if display:
      self._setup_display()
    
    self.ale.loadROM(ROM)

    # height=210, width=160
    self.screen = np.empty((210, 160, 1), dtype=np.uint8)
    
    no_action = 0
    
    self.reward = self.ale.act(no_action)
    self.terminal = self.ale.game_over()

    # screenのshapeは、(210, 160, 1)
    self.ale.getScreenGrayscale(self.screen)
    
    # (210, 160)にreshape
    reshaped_screen = np.reshape(self.screen, (210, 160))
    
    # height=110, width=84にリサイズ
    resized_screen = cv2.resize(reshaped_screen, (84, 110))
    
    x_t = resized_screen[18:102,:]
    x_t = x_t.astype(np.float32)
    x_t *= (1.0/255.0)
    self.s_t = np.stack((x_t, x_t, x_t, x_t), axis = 2)

    # 実際に利用するactionのみを集めておく
    self.real_actions = self.ale.getMinimalActionSet()
    
  def _setup_display(self):
    if sys.platform == 'darwin':
      import pygame
      pygame.init()
      self.ale.setBool('sound', False)
    elif sys.platform.startswith('linux'):
      self.ale.setBool('sound', True)
    self.ale.setBool('display_screen', True)
    
  def process(self, action):
    # 18種類のうちの実際に利用するactionに変換
    real_action = self.real_actions[action]
    self.reward = self.ale.act(real_action)
    #self.reward = self.ale.act(action)
    self.terminal = self.ale.game_over()
    
    # screenのshapeは、(210, 160, 1)
    self.ale.getScreenGrayscale(self.screen)
    
    # (210, 160)にreshape
    reshaped_screen = np.reshape(self.screen, (210, 160))
    
    # height=210, width=160
    
    # height=110, width=84にリサイズ
    resized_screen = cv2.resize(reshaped_screen, (84, 110))
    x_t1 = resized_screen[18:102,:]
    x_t1 = np.reshape(x_t1, (84, 84, 1))
    x_t1 = x_t1.astype(np.float32)    
    x_t1 *= (1.0/255.0)
    
    self.s_t1 = np.append(x_t1, self.s_t[:,:,0:3], axis = 2)
    if self.terminal:
      self.ale.reset_game()

  def update(self):
    self.s_t = self.s_t1
示例#56
0
class ALEEnvironment(BaseEnvironment):
    """
    A wrapper of Arcade Learning Environment, which inherits all members of ``BaseEnvironment``.
    """
    # 63 games
    ADVENTURE = "adventure"
    AIR_RAID = "air_raid"
    ALIEN = "alien"
    AMIDAR = "amidar"
    ASSAULT = "assault"
    ASTERIX = "asterix"
    ASTEROIDS = "asteroids"
    ATLANTIS = "aslantis"
    BANK_HEIST = "bank_heist"
    BATTLE_ZONE = "battle_zone"
    BEAM_RIDER = "beam_rider"
    BERZERK = "berzerk"
    BOWLING = "bowling"
    BOXING = "boxing"
    BREAKOUT = "breakout"
    CARNIVAL = "carnival"
    CENTIPEDE = "centipede"
    CHOPPER_COMMAND = "chopper_command"
    CRAZY_CLIMBER = "crazy_climber"
    DEFENDER = "defender"
    DEMON_ATTACK = "demon_attack"
    DOUBLE_DUNK = "double_dunk"
    ELEVATOR_ACTION = "elevator_action"
    ENDURO = "enduro"
    FISHING_DERBY = "fishing_derby"
    FREEWAY = "freeway"
    FROSTBITE = "frostbite"
    GOPHER = "gopher"
    GRAVITAR = "gravitar"
    HERO = "hero"
    ICE_HOCKEY = "ice_hockey"
    JAMESBOND = "jamesbond"
    JOURNEY_ESCAPE = "journey_escape"
    KABOOM = "kaboom"
    KANGAROO = "kangaroo"
    KRULL = "krull"
    KUNGFU_MASTER = "kung_fu_master"
    MONTEZUMA = "montezuma_revenge"
    MS_PACMAN = "ms_pacman"
    UNKNOWN = "name_this_game"
    PHOENIX = "phoenix"
    PITFALL = "pitfall"
    PONG = "pong"
    POOYAN = "pooyan"
    PRIVATE_EYE = "private_eye"
    QBERT = "qbert"
    RIVERRAID = "riverraid"
    ROAD_RUNNER = "road_runner"
    ROBOTANK = "robotank"
    SEAQUEST = "seaquest"
    SKIING = "skiing"
    SOLARIS = "solaris"
    SPACE_INVADERS = "space_invaders"
    STAR_GUNNER = "star_gunner"
    TENNIS = "tennis"
    TIME_PILOT = "time_pilot"
    TUTANKHAM = "tutankham"
    UP_N_DOWN = "up_n_down"
    VENTURE = "venture"
    VIDEO_PINBALL = "video_pinball"
    WIZARD_OF_WOR = "wizard_of_wor"
    YARS_REVENGE = "yars_revenge"
    ZAXXON = "zaxxon"

    def __init__(self,
                 rom_name,
                 frame_skip=4,
                 repeat_action_probability=0.,
                 max_episode_steps=10000,
                 loss_of_life_termination=False,
                 loss_of_life_negative_reward=False,
                 bitwise_max_on_two_consecutive_frames=False,
                 is_render=False,
                 seed=None,
                 startup_policy=None,
                 disable_actions=None,
                 num_of_sub_actions=-1,
                 state_processor=AtariProcessor(resize_shape=(84, 84),
                                                convert_to_grayscale=True)):

        os.environ['SDL_VIDEO_CENTERED'] = '1'

        file_exist = isfile(ALEEnvironment.get_rom_path(rom_name))
        if not file_exist:
            raise ValueError("Rom not found ! Please put rom " + rom_name +
                             ".bin into: " + ALEEnvironment.get_rom_path())

        self.__rom_name = rom_name
        self.__ale = ALEInterface()

        if frame_skip < 0:
            print("Invalid frame_skip param ! Set default frame_skip = 4")
            self.__frame_skip = 4
        else:
            self.__frame_skip = frame_skip

        if repeat_action_probability < 0 or repeat_action_probability > 1:
            raise ValueError("Invalid repeat_action_probability")
        else:
            self.__repeat_action_probability = repeat_action_probability

        self.__max_episode_steps = max_episode_steps
        self.__loss_of_life_termination = loss_of_life_termination
        self.__loss_of_life_negative_reward = loss_of_life_negative_reward
        self.__max_2_frames = bitwise_max_on_two_consecutive_frames

        # Max 2 frames only work with grayscale
        self.__grayscale = False
        if state_processor is not None and type(
                state_processor
        ) is AtariProcessor and state_processor.get_grayscale():
            self.__grayscale = True

        if self.__max_2_frames and self.__frame_skip > 1 and self.__grayscale:
            self.__max_2_frames = True
        else:
            self.__max_2_frames = False

        self.__is_render = is_render
        self.__processor = state_processor

        if seed is None or seed <= 0 or seed >= 9999:
            if seed is not None and (seed < 0 or seed >= 9999):
                print("Invalid seed ! Default seed = randint(0, 9999")
            self.__seed = np.random.randint(0, 9999)
            self.__random_seed = True
        else:
            self.__random_seed = False
            self.__seed = seed

        self.__current_steps = 0
        self.__is_life_lost = False
        self.__is_terminal = False
        self.__current_lives = 0
        self.__action_reduction = num_of_sub_actions
        self.__scr_width, self.__scr_height, self.__action_set = self.__init_ale(
        )
        self.__prev_buffer = np.empty((self.__scr_height, self.__scr_width, 3),
                                      dtype=np.uint8)
        self.__current_buffer = np.empty(
            (self.__scr_height, self.__scr_width, 3), dtype=np.uint8)
        self.__current_state = None
        self.__prev_state = None
        self.__startup_policy = startup_policy
        if disable_actions is None:
            self.__dis_act = []
        else:
            self.__dis_act = disable_actions

        if self.__processor.get_number_of_objectives() > 1:
            self.__multi_objs = True
        else:
            self.__multi_objs = False

    def get_processor(self):
        return self.__processor

    def __init_ale(self):

        self.__ale.setBool(b'display_screen', self.__is_render)

        if self.__max_2_frames and self.__frame_skip > 1:
            self.__ale.setInt(b'frame_skip', 1)
        else:
            self.__ale.setInt(b'frame_skip', self.__frame_skip)

        self.__ale.setInt(b'random_seed', self.__seed)
        self.__ale.setFloat(b'repeat_action_probability',
                            self.__repeat_action_probability)
        self.__ale.setBool(b'color_averaging', False)

        self.__ale.loadROM(
            ALEEnvironment.get_rom_path(self.__rom_name).encode())

        width, height = self.__ale.getScreenDims()
        return width, height, self.__ale.getMinimalActionSet()

    def clone(self):
        if self.__random_seed:
            seed = np.random.randint(0, 9999)
        else:
            seed = self.__seed

        return ALEEnvironment(self.__rom_name, self.__frame_skip,
                              self.__repeat_action_probability,
                              self.__max_episode_steps,
                              self.__loss_of_life_termination,
                              self.__loss_of_life_negative_reward,
                              self.__max_2_frames, self.__is_render, seed,
                              self.__startup_policy,
                              self.__dis_act, self.__action_reduction,
                              self.__processor.clone())

    def step_all(self, a):
        if isinstance(a, (list, np.ndarray)):
            if len(a) <= 0:
                raise ValueError('Empty action list !')
            a = a[0]
        self.__current_steps += 1
        act = self.__action_set[a]
        rew = self._step(act)
        next_state = self.get_state()
        _is_terminal = self.is_terminal()
        return next_state, rew, _is_terminal, self.__current_steps

    def reset(self):
        self.__ale.reset_game()
        self.__current_lives = self.__ale.lives()
        self.__is_life_lost = False
        self.__is_terminal = False
        self.__current_state = None
        self.__prev_state = None

        action_space = self.get_action_space()
        v_range, is_range = action_space.get_range()
        if len(v_range) > 1:
            self.step(1)

        # No op steps
        if self.__startup_policy is not None:
            max_steps = int(self.__startup_policy.get_max_steps())
            for _ in range(max_steps):
                act = self.__startup_policy.step(self.get_state(),
                                                 action_space)
                self.step(act)

        # Start training from this point
        self.__current_steps = 0

        # Reset processor
        self.__processor.reset()

        return self.get_state()

    def _pre_step(self, act):
        if self.__max_2_frames and self.__frame_skip > 1:
            rew = 0
            for i in range(self.__frame_skip - 2):
                rew += self.__ale.act(act)
                self.__prev_buffer = self.__ale.getScreenRGB(
                    self.__prev_buffer)

            self.__prev_buffer = self.__ale.getScreenRGB(self.__prev_buffer)

            rew += self.__ale.act(act)

            self.__current_buffer = self.__ale.getScreenRGB(
                self.__current_buffer)

            self.__is_terminal = self.__ale.game_over()

            self.__prev_state = self.__processor.process(self.__prev_buffer)

            self.__current_state = self.__processor.process(
                self.__current_buffer)

            self.__current_state = np.maximum.reduce(
                [self.__prev_state, self.__current_state])
        else:
            rew = self.__ale.act(act)
            self.__current_buffer = self.__ale.getScreenRGB(
                self.__current_buffer)
            self.__is_terminal = self.__ale.game_over()

            if self.__processor is not None:
                self.__current_state = self.__processor.process(
                    self.__current_buffer)

        if self.__multi_objs and self.__processor is not None:
            all_rewards = self.__processor.get_rewards(rew)
            return all_rewards
        else:
            return rew

    def _step(self, act):
        for i in range(len(self.__dis_act)):
            if act == self.__dis_act[i]:
                act = 0

        if not self.__loss_of_life_termination and not self.__loss_of_life_negative_reward:
            if not self.__is_terminal:
                next_lives = self.__ale.lives()
                if next_lives < self.__current_lives:
                    act = 1
                    self.__current_lives = next_lives
            return self._pre_step(act)
        else:
            rew = self._pre_step(act)
            next_lives = self.__ale.lives()
            if next_lives < self.__current_lives:
                if self.__loss_of_life_negative_reward:
                    rew -= 1
                self.__current_lives = next_lives
                self.__is_life_lost = True

            return rew

    def get_state(self):
        if not self.__max_2_frames:
            if self.__processor is not None:
                return self.__current_state
            else:
                return self.__current_buffer
        else:
            return self.__current_state

    def is_terminal(self):
        if self.__loss_of_life_termination and self.__is_life_lost:
            return True
        elif self.__max_episode_steps is not None and self.__current_steps > self.__max_episode_steps:
            return True
        else:
            return self.__is_terminal

    @staticmethod
    def get_rom_path(rom=None):
        if rom is None:
            return os.path.dirname(os.path.abspath(__file__)) + "/roms/"
        else:
            return os.path.dirname(
                os.path.abspath(__file__)) + "/roms/" + rom + ".bin"

    @staticmethod
    def list_all_roms():
        return [
            f for f in listdir(ALEEnvironment.get_rom_path())
            if isfile(join(ALEEnvironment.get_rom_path(), f))
        ]

    def get_state_space(self):
        if self.__processor is None:
            shape = self.__current_buffer.shape
        else:
            shape = self.__processor.process(self.__current_buffer).shape
        min_value = np.zeros(shape, dtype=np.uint8)
        max_value = np.full(shape, 255)
        return Space(min_value, max_value, True)

    def get_action_space(self):
        if self.__action_reduction >= 1:
            return Space(0, self.__action_reduction - 1, True)
        else:
            return Space(0, len(self.__action_set) - 1, True)

    def step(self, act):
        if isinstance(act, (list, np.ndarray)):
            if len(act) <= 0:
                raise ValueError('Empty action list !')
            act = act[0]
        self.__current_steps += 1
        act = self.__action_set[act]
        rew = self._step(act)
        return rew

    def get_current_steps(self):
        return self.__current_steps

    def is_atari(self):
        return True

    def is_render(self):
        return self.__is_render

    def get_number_of_objectives(self):
        if self.__processor is None:
            return 1
        else:
            return self.__processor.get_number_of_objectives()

    def get_number_of_agents(self):
        if self.__processor is None:
            return 1
        else:
            return self.__processor.get_number_of_agents()

    def get_state_processor(self):
        return self.__processor
15, #10111 fire up/down/left (invalid)
11, #11000 fire right
14, #11001 fire up/right
16, #11010 fire down/right
14, #11011 fire up/down/right (invalid)
11, #11100 fire left/right (invalid)
14, #11101 fire left/right/up (invalid)
16, #11110 fire left/right/down (invalid)
14  #11111 fire up/down/left/right (invalid)
)


ale = ALEInterface()
rom = b'../roms/breakout.bin'

ale.loadROM(rom)
legal_actions = ale.getMinimalActionSet()
print(legal_actions)

(screen_width,screen_height) = ale.getScreenDims()
print("width/height: " +str(screen_width) + "/" + str(screen_height))

(display_width,display_height) = (1024,420)

#init pygame
pygame.init()
screen = pygame.display.set_mode((display_width,display_height))
pygame.display.set_caption("Arcade Learning Environment Player Agent Display")

game_surface = pygame.Surface((screen_width,screen_height))
示例#58
0
class AtariEmulator(BaseEnvironment):
    def __init__(self, actor_id, args):
        self.ale = ALEInterface()
        self.ale.setInt(b"random_seed", args.random_seed * (actor_id + 1))
        # For fuller control on explicit action repeat (>= ALE 0.5.0)
        self.ale.setFloat(b"repeat_action_probability", 0.0)
        # Disable frame_skip and color_averaging
        # See: http://is.gd/tYzVpj
        self.ale.setInt(b"frame_skip", 1)
        self.ale.setBool(b"color_averaging", False)
        full_rom_path = args.rom_path + "/" + args.game + ".bin"
        self.ale.loadROM(str.encode(full_rom_path))
        self.legal_actions = self.ale.getMinimalActionSet()
        self.screen_width, self.screen_height = self.ale.getScreenDims()
        self.lives = self.ale.lives()

        self.random_start = args.random_start
        self.single_life_episodes = args.single_life_episodes
        self.call_on_new_frame = args.visualize

        # Processed historcal frames that will be fed in to the network 
        # (i.e., four 84x84 images)
        self.observation_pool = ObservationPool(np.zeros((IMG_SIZE_X, IMG_SIZE_Y, NR_IMAGES), dtype=np.uint8))
        self.rgb_screen = np.zeros((self.screen_height, self.screen_width, 3), dtype=np.uint8)
        self.gray_screen = np.zeros((self.screen_height, self.screen_width, 1), dtype=np.uint8)
        self.frame_pool = FramePool(np.empty((2, self.screen_height, self.screen_width), dtype=np.uint8),
                                    self.__process_frame_pool)

    def get_legal_actions(self):
        return self.legal_actions

    def __get_screen_image(self):
        """
        Get the current frame luminance
        :return: the current frame
        """
        self.ale.getScreenGrayscale(self.gray_screen)
        if self.call_on_new_frame:
            self.ale.getScreenRGB(self.rgb_screen)
            self.on_new_frame(self.rgb_screen)
        return np.squeeze(self.gray_screen)

    def on_new_frame(self, frame):
        pass

    def __new_game(self):
        """ Restart game """
        self.ale.reset_game()
        self.lives = self.ale.lives()
        if self.random_start:
            wait = random.randint(0, MAX_START_WAIT)
            for _ in range(wait):
                self.ale.act(self.legal_actions[0])

    def __process_frame_pool(self, frame_pool):
        """ Preprocess frame pool """

        img = np.amax(frame_pool, axis=0)
        img = imresize(img, (84, 84), interp='nearest')
        img = img.astype(np.uint8)
        return img

    def __action_repeat(self, a, times=ACTION_REPEAT):
        """ Repeat action and grab screen into frame pool """
        reward = 0
        for i in range(times - FRAMES_IN_POOL):
            reward += self.ale.act(self.legal_actions[a])
        # Only need to add the last FRAMES_IN_POOL frames to the frame pool
        for i in range(FRAMES_IN_POOL):
            reward += self.ale.act(self.legal_actions[a])
            self.frame_pool.new_frame(self.__get_screen_image())
        return reward

    def get_initial_state(self):
        """ Get the initial state """
        self.__new_game()
        for step in range(NR_IMAGES):
            _ = self.__action_repeat(0)
            self.observation_pool.new_observation(self.frame_pool.get_processed_frame())
        if self.__is_terminal():
            raise Exception('This should never happen.')
        return self.observation_pool.get_pooled_observations()

    def next(self, action):
        """ Get the next state, reward, and game over signal """

        reward = self.__action_repeat(np.argmax(action))
        self.observation_pool.new_observation(self.frame_pool.get_processed_frame())
        terminal = self.__is_terminal()
        self.lives = self.ale.lives()
        observation = self.observation_pool.get_pooled_observations()
        return observation, reward, terminal

    def __is_terminal(self):
        if self.single_life_episodes:
            return self.__is_over() or (self.lives > self.ale.lives())
        else:
            return self.__is_over()

    def __is_over(self):
        return self.ale.game_over()

    def get_noop(self):
        return [1.0, 0.0]
def main():
    if len(sys.argv) < 2:
        dir_rom = '/Users/lguan/Documents/Study/Research/Atari-2600-Roms/K-P/ms_pacman.bin'
    else:
        dir_rom = sys.argv[1]

    ale = ALEInterface()

    # Get & Set the desired settings
    ale.setInt(b'random_seed', 123)

    # Set USE_SDL to true to display the screen. ALE must be compilied
    # with SDL enabled for this to work. On OSX, pygame init is used to
    # proxy-call SDL_main.
    USE_SDL = False
    if USE_SDL:
        # mac OS
        if sys.platform == 'darwin':
            pygame.init()
            ale.setBool('sound', False)  # Sound doesn't work on OSX
        elif sys.platform.startswith('linux'):
            ale.setBool('sound', True)

        ale.setBool('display_screen', True)

    # Load the ROM file
    rom_file = str.encode(dir_rom)
    print('- Loading ROM - %s' % dir_rom)
    ale.loadROM(rom_file)
    print('- Complete loading ROM')

    (game_surface_width, game_surface_height) = ale.getScreenDims()
    print("game surface width/height: " + str(game_surface_width) + "/" +
          str(game_surface_height))

    (display_width, display_height) = (800, 640)
    print 'display width/height', (display_width, display_height)

    available_action = ale.getLegalActionSet()
    print available_action

    # init pygame
    pygame.init()
    display_screen = pygame.display.set_mode((display_width, display_height))
    pygame.display.set_caption(
        "Arcade Learning Environment Player Agent Display")

    # init clock
    clock = pygame.time.Clock()
    is_exit = False

    # Play 10 episodes
    for episode in range(10):
        if is_exit:
            break

        total_reward = 0

        while not ale.game_over() and not is_exit:

            a = getActionFromKeyboard()
            # Apply an action and get the resulting reward
            reward = ale.act(a)
            total_reward += reward
            # clear screen
            display_screen.fill((0, 0, 0))
            # render game surface
            renderGameSurface(ale, display_screen,
                              (game_surface_width, game_surface_height))
            # display related info
            displayRelatedInfo(display_screen, a, total_reward)

            pygame.display.flip()

            # process pygame event queue
            for event in pygame.event.get():
                if event.type == pygame.QUIT:
                    is_exit = True
                    break
                if event.type == pygame.KEYDOWN and event.key == pygame.K_q:
                    is_exit = True
                    break

            # delay to 60fps
            clock.tick(60.)

        print('Episode %d ended with score: %d' % (episode, total_reward))
        ale.reset_game()
示例#60
0
# Set USE_SDL to true to display the screen. ALE must be compilied
# with SDL enabled for this to work. On OSX, pygame init is used to
# proxy-call SDL_main.
USE_SDL = False
if USE_SDL:
    if sys.platform == 'darwin':
        import pygame
        pygame.init()
        ale.setBool('sound', False)  # Sound doesn't work on OSX
    elif sys.platform.startswith('linux'):
        ale.setBool('sound', True)
    ale.setBool('display_screen', True)

# Load the ROM file
rom_file = str.encode(sys.argv[1])
ale.loadROM(rom_file)

# Get the list of legal actions
legal_actions = ale.getLegalActionSet()

# Play 10 episodes
for episode in range(10):
    total_reward = 0
    while not ale.game_over():
        a = legal_actions[randrange(len(legal_actions))]
        # Apply an action and get the resulting reward
        reward = ale.act(a)
        total_reward += reward
    print('Episode %d ended with score: %d' % (episode, total_reward))
    ale.reset_game()