result_file = open("result_16_20.csv", "w")
result_file.write(
    "# frame_skip_agent_A, frame_skip_agent_B, episode, score_A, score_B, reward\n"
)

for frame_skip_agent_A in xrange(
        16, 21):  # Agent A is allowed to change his action every X frames
    for frame_skip_agent_B in xrange(
            1, 31):  # Agent B is allowed to change his action every Y frames
        frame_counter = 0
        frame_counter_reset = frame_skip_agent_A * frame_skip_agent_B
        # Play 300 episodes
        for episode in xrange(300):
            total_reward = score_A = score_B = 0
            ale.setMode(1)  # Switch Boxing to two player mode

            while not ale.game_over():
                # Set action for agent A
                if frame_counter % frame_skip_agent_A == 0:
                    a = legal_actions[randrange(len(legal_actions))]
                # Set action for agent B
                if frame_counter % frame_skip_agent_B == 0:
                    b = legal_actionsB[randrange(len(legal_actionsB))]

                # Reset frame counter to prevent overflow
                frame_counter += 1
                if frame_counter == frame_counter_reset:
                    frame_counter = 0

                # Apply actions and get the resulting reward
示例#2
0
class AtariEnvironment:
    num_actions = 18  # Use full action set

    def __init__(self, frame_shape, frame_postprocess=lambda x: x):
        self.ale = ALEInterface()
        self.ale.setBool(b"display_screen", cfg.display_screen)
        self.ale.setInt(b"frame_skip", 1)
        self.ale.setBool(b"color_averaging", False)
        self.ale.setInt(b"random_seed", cfg.random_seed)
        self.ale.setFloat(b"repeat_action_probability", cfg.sticky_prob)

        self.ale.loadROM(str.encode(cfg.rom))

        self.ale.setMode(cfg.mode)
        self.ale.setDifficulty(cfg.difficulty)

        self.action_set = self.ale.getLegalActionSet()
        assert len(self.action_set) == AtariEnvironment.num_actions

        screen_dims = tuple(reversed(self.ale.getScreenDims())) + (1,)
        self._frame_buffer = CircularBuffer(
            cfg.frame_buffer_size, screen_dims, np.uint8
        )
        self._frame_stack = CircularBuffer(
            cfg.frame_history_size, frame_shape, np.uint8
        )
        self._frame_postprocess = frame_postprocess

        self._episode_count = 0
        self.reset(inc_episode_count=False)

    def _is_terminal(self):
        return self.ale.game_over()

    def _get_single_frame(self):
        stacked_frames = np.concatenate(self._frame_buffer, axis=2)
        maxed_frame = np.amax(stacked_frames, axis=2)
        expanded_frame = np.expand_dims(maxed_frame, 3)
        frame = self._frame_postprocess(expanded_frame)

        return frame

    def reset(self, inc_episode_count=True):
        self._episode_frames = 0
        self._episode_reward = 0
        if inc_episode_count:
            self._episode_count += 1

        self.ale.reset_game()
        for _ in range(cfg.frame_buffer_size):
            self._frame_buffer.append(self.ale.getScreenGrayscale())
        for _ in range(cfg.frame_history_size):
            self._frame_stack.append(self._get_single_frame())

    def act(self, action):
        assert not self._is_terminal()

        cum_reward = 0
        for _ in range(cfg.frame_skip):
            cum_reward += self.ale.act(self.action_set[action])
            self._frame_buffer.append(self.ale.getScreenGrayscale())

        self._frame_stack.append(self._get_single_frame())
        self._episode_frames += cfg.frame_skip
        self._episode_reward += cum_reward
        cum_reward = np.clip(cum_reward, -1, 1)

        return cum_reward, self.state, self._is_terminal()

    @property
    def state(self):
        assert len(self._frame_buffer) == cfg.frame_buffer_size
        assert len(self._frame_stack) == cfg.frame_history_size
        return np.concatenate(self._frame_stack, axis=-1)

    @property
    def episode_reward(self):
        return self._episode_reward

    @property
    def episode_frames(self):
        return self._episode_frames

    @property
    def episode_steps(self):
        return self._episode_frames // cfg.frame_skip

    @property
    def episode_count(self):
        return self._episode_count
示例#3
0
class AleInterface(object):
    def __init__(self, game, args):

        #self.game = game

        self.ale = ALEInterface()

        # if sys.platform == 'darwin':
        #     self.ale.setBool('sound', False)  # Sound doesn't work on OSX
        # elif sys.platform.startswith('linux'):
        #     self.ale.setBool('sound', True)
        # self.ale.setBool('display_screen', True)
        #

        self.ale.setBool('display_screen', args.display_screen)
        self.ale.setInt('frame_skip', args.frame_skip)
        self.ale.setFloat('repeat_action_probability',
                          args.repeat_action_probability)
        self.ale.setBool('color_averaging', args.color_averaging)
        self.ale.setInt('random_seed', args.random_seed)

        #
        # if rand_seed is not None:
        #     self.ale.setInt('random_seed', rand_seed)

        rom_file = "./roms/%s.bin" % game
        if not os.path.exists(rom_file):
            print "not found rom file:", rom_file
            sys.exit(-1)
        self.ale.loadROM(rom_file)

        self.actions = self.ale.getMinimalActionSet()
        self.actionsB = self.ale.getMinimalActionSetB()

    def get_actions_num(self):
        return len(self.actions)

    def get_actions_numB(self):
        return len(self.actionsB)

    def act(self, action):
        reward = self.ale.act(self.actions[action])
        return reward

    def actAB(self, actionA, actionB):
        reward = self.ale.actAB(self.actions[actionA],
                                self.actionsB[actionB - 18])
        return reward

    def get_screen_gray(self):
        return self.ale.getScreenGrayscale()

    def get_screen_rgb(self):
        return self.ale.getScreenRGB()

    def game_over(self):
        return self.ale.game_over()

    def reset_game(self):
        return self.ale.reset_game()

    def set_mode(self, mode):
        return self.ale.setMode(mode)
示例#4
0
class UpdatedAtariEnv(AtariEnv):
    def __init__(self,
                 rom_path,
                 obs_type,
                 frameskip=(2, 5),
                 repeat_action_probability=0.,
                 mode=0,
                 difficulty=0):
        """Frameskip should be either a tuple (indicating a random range to
        choose from, with the top value exclude), or an int."""

        utils.EzPickle.__init__(self, rom_path, obs_type)
        assert obs_type in ('ram', 'image')

        self.rom_path = rom_path
        if not os.path.exists(self.rom_path):
            raise IOError('You asked for ROM %s but path %s does not exist' %
                          (game, self.game_path))
        self._obs_type = obs_type
        self.frameskip = frameskip
        # Load new ALE interface, instead of atari-py
        self.ale = ALEInterface()
        self.viewer = None

        # Tune (or disable) ALE's action repeat:
        # https://github.com/openai/gym/issues/349
        assert isinstance(
            repeat_action_probability,
            (float, int)), "Invalid repeat_action_probability: {!r}".format(
                repeat_action_probability)
        self.ale.setFloat('repeat_action_probability'.encode('utf-8'),
                          repeat_action_probability)

        self.seed()

        # Set mode and difficulty
        self.ale.setMode(mode)
        self.ale.setDifficulty(difficulty)

        self._action_set = self.ale.getMinimalActionSet()
        self.action_space = spaces.Discrete(len(self._action_set))

        (screen_width, screen_height) = self.ale.getScreenDims()
        if self._obs_type == 'ram':
            self.observation_space = spaces.Box(low=0, high=255, shape=(128, ))
        elif self._obs_type == 'image':
            self.observation_space = spaces.Box(low=0,
                                                high=255,
                                                shape=(screen_height,
                                                       screen_width, 3))
        else:
            raise error.Error('Unrecognized observation type: {}'.format(
                self._obs_type))

    def seed(self, seed=None):
        self.np_random, seed1 = seeding.np_random(seed)
        # Derive a random seed. This gets passed as a uint, but gets
        # checked as an int elsewhere, so we need to keep it below
        # 2**31.
        seed2 = seeding.hash_seed(seed1 + 1) % 2**31
        # Empirically, we need to seed before loading the ROM.
        self.ale.setInt(b'random_seed', seed2)
        # Load game from ROM instead of game path
        self.ale.loadROM(self.rom_path)
        return [seed1, seed2]

    def _get_image(self):
        return self.ale.getScreenRGB()
示例#5
0
class AtariEnvironment:
    def __init__(self, seed=1, record=False):
        self.ale = ALEInterface()
        self.ale.setBool(b'display_screen', FLAGS.display_screen or record)
        self.ale.setInt(b'frame_skip', 1)
        self.ale.setBool(b'color_averaging', False)
        self.ale.setInt(b'random_seed', seed)
        self.ale.setFloat(b'repeat_action_probability', FLAGS.sticky_prob)
        self.ale.setInt(b'max_num_frames_per_episode', FLAGS.max_num_frames_per_episode)

        if record:
            if not tf.gfile.Exists(FLAGS.record_dir):
                tf.gfile.MakeDirs(FLAGS.record_dir)
            self.ale.setBool(b'sound', True)
            self.ale.setString(b'record_screen_dir', str.encode(FLAGS.record_dir))
            self.ale.setString(b'record_sound_filename', str.encode(FLAGS.record_dir + '/sound.wav'))
            self.ale.setInt(b'fragsize', 64)

        self.ale.loadROM(str.encode(FLAGS.rom))

        self.ale.setMode(FLAGS.mode)
        self.ale.setDifficulty(FLAGS.difficulty)

        self.action_set = self.ale.getLegalActionSet()

        screen_dims = tuple(reversed(self.ale.getScreenDims())) + (1,)
        self._frame_buffer = CircularBuffer(FLAGS.frame_buffer_size, screen_dims, np.uint8)

        self.reset()

    def _is_terminal(self):
        return self.ale.game_over()

    def _get_single_frame(self):
        stacked_frames = np.concatenate(self._frame_buffer, axis=2)
        maxed_frame = np.amax(stacked_frames, axis=2)
        expanded_frame = np.expand_dims(maxed_frame, 3)

        return expanded_frame

    def reset(self):
        self._episode_frames = 0
        self._episode_reward = 0

        self.ale.reset_game()
        for _ in range(FLAGS.frame_buffer_size):
            self._frame_buffer.append(self.ale.getScreenGrayscale())

    def act(self, action):
        assert not self._is_terminal()

        cum_reward = 0
        for _ in range(FLAGS.frame_skip):
            cum_reward += self.ale.act(self.action_set[action])
            self._frame_buffer.append(self.ale.getScreenGrayscale())

        self._episode_frames += FLAGS.frame_skip
        self._episode_reward += cum_reward
        cum_reward = np.clip(cum_reward, -1, 1)

        return cum_reward, self._get_single_frame(), self._is_terminal()

    def state(self):
        assert len(self._frame_buffer) == FLAGS.frame_buffer_size
        return self._get_single_frame()

    def num_actions(self):
        return len(self.action_set)

    def episode_reward(self):
        return self._episode_reward

    def episode_frames(self):
        return self._episode_frames

    def frame_skip(self):
        return FLAGS.frame_skip
示例#6
0
# Load the ROM file
ale.loadROM(sys.argv[1])

#Get the list of available modes and difficulties
avail_modes = ale.getAvailableModes()
avail_diff = ale.getAvailableDifficulties()

print 'Number of available modes: ', len(avail_modes)
print 'Number of available difficulties: ', len(avail_diff)

# Get the list of legal actions
legal_actions = ale.getLegalActionSet()

# Play one episode in each mode and in each difficulty
for mode in avail_modes:
    for diff in avail_diff:

        ale.setDifficulty(diff)
        ale.setMode(mode)
        ale.reset_game()
        print 'Mode {0} difficulty {1}:'.format(mode, diff)

        total_reward = 0
        while not ale.game_over():
            a = legal_actions[randrange(len(legal_actions))]
            # Apply an action and get the resulting reward
            reward = ale.act(a)
            total_reward += reward

        print 'Episode ended with score: ', total_reward
示例#7
0
class ALEEnvironment(Environment):
    def __init__(self, rom_file, args):
        from ale_python_interface import ALEInterface
        self.ale = ALEInterface()

        # Set ALE configuration
        self.ale.setInt(b'frame_skip', args.frame_skip)
        self.ale.setFloat(b'repeat_action_probability',
                          args.repeat_action_probability)
        self.ale.setBool(b'color_averaging', args.color_averaging)

        if args.random_seed:
            self.ale.setInt(b'random_seed', args.random_seed)

        if args.record_screen_path:
            if not os.path.exists(args.record_screen_path):
                os.makedirs(args.record_screen_path)
            self.ale.setString(b'record_screen_dir',
                               args.record_screen_path.encode())

        if args.record_sound_filename:
            self.ale.setBool(b'sound', True)
            self.ale.setString(b'record_sound_filename',
                               args.record_sound_filename.encode())

        # Load ROM
        self.ale.loadROM(rom_file.encode())

        # Set game difficulty and mode (after loading)
        self.ale.setDifficulty(args.game_difficulty)
        self.ale.setMode(args.game_mode)

        # Whether to use minimum set or set
        if args.minimal_action_set:
            self.actions = self.ale.getMinimalActionSet()
        else:
            self.actions = self.ale.getLegalActionSet()

        # Life lost control
        self.life_lost = False

        # Initialize base class
        super(ALEEnvironment, self).__init__(args)

    def action_dim(self):
        return len(self.actions)

    def reset(self):
        # In test mode, the game is simply initialized. In train mode, if the game
        # is in terminal state due to a life loss but not yet game over, then only
        # life loss flag is reset so that the next game starts from the current
        # state. Otherwise, the game is simply initialized.
        if (self.mode == 'test' or not self.life_lost
                or  # `reset` called in a middle of episode
                self.ale.game_over()  # all lives are lost
            ):
            self.ale.reset_game()
        self.life_lost = False
        screen = self._get_state(self.ale.getScreenRGB())
        return screen

    def step(self, action, action_b=0, ignore_screen=False):
        lives = self.ale.lives()
        # Act on environment
        reward = self.ale.act(self.actions[action],
                              self.actions[action_b] + 18)
        # Check if life was lost
        self.life_lost = (not lives == self.ale.lives())
        # Check terminal state
        terminal = (self.ale.game_over() or self.life_lost
                    ) if self.mode == 'train' else self.ale.game_over()
        # Check if should ignore the screen (in case of RobotEnvironment)
        if ignore_screen:
            screen = None
        else:
            # Get screen from ALE
            screen = self._get_state(self.ale.getScreenRGB())
            # Wait for next frame to start
            self.fps_control.wait_next_frame()
        return screen, reward, terminal