Exemplo n.º 1
0
def launch(args, defaults, description):
    """
    Execute a complete training run.
    """

    rec_screen = ""
    if "--nn-file" in args:
        temp_params = vars(load_params(args[args.index("--nn-file")+1]))
        for p in temp_params:
            try:
                vars(defaults)[p.upper()] = temp_params[p]
            except:
                print "warning: parameter", p, "from param file doesn't exist."
        #rec_screen = args[args.index("--nn-file")+1][:-len("last_model.pkl")]+"/frames"

    parameters = process_args(args, defaults, description)

    if parameters.rom.endswith('.bin'):
        rom = parameters.rom
    else:
        rom = "%s.bin" % parameters.rom
    parameters.rom_path = os.path.join(defaults.BASE_ROM_PATH, rom)

    rng = np.random.RandomState(parameters.seed)

    folder_name = None if parameters.folder_name == "" else parameters.folder_name

    ale = ALEInterface()
    ale.setInt('random_seed', rng.randint(parameters.seed))
    ale.setBool('display_screen', parameters.display_screen)
    ale.setString('record_screen_dir', rec_screen)
    trainer = Q_Learning(model_params=parameters,
                         ale_env=ale, folder_name=folder_name)
    trainer.train()
Exemplo n.º 2
0
 def __init__(self):
     
     self.ale = ALEInterface();
     self.ale.loadROM(get_game_path('boxing'));
     self.legal_actions = self.ale.getMinimalActionSet();
     self.policyModel = PolicyModel(self.legal_actions);
     #load model
     if True == os.path.exists('model'): self.policyModel.load_weights('./model/vpg_model');
     self.status_size_ = 4
     self.gamma_ = 1; #the reward it too small
    def __init__(self,
                 rom_path,
                 seed=123,
                 frameskip=4,
                 show_display=False,
                 stack_num_states=4,
                 concatenate_state_every=4):
        """

        Parameters:
            Frameskip should be either a tuple (indicating a random range to
            choose from, with the top value exclude), or an int. It's aka action repeat.

            stack_num_states: Number of dimensions/channels to have.

            concatenate_state_every: After how many frames should one channel be appended to state.
                Number is in terms of absolute frames independent of frameskip
        """

        self.stack_num_states = stack_num_states
        self.concatenate_state_every = concatenate_state_every

        self.game_path = rom_path
        if not os.path.exists(self.game_path):
            raise IOError('You asked for game %s but path %s does not exist' %
                          (game, self.game_path))
        self.frameskip = frameskip

        try:
            self.ale = ALEInterface()
        except Exception as e:
            print(
                "ALEInterface could not be loaded. ale_python_interface import failed"
            )
            raise e

        # Set some default options
        self.ale.setInt(b'random_seed', seed)
        self.ale.setBool(b'sound', False)
        self.ale.setBool(b'display_screen', show_display)
        self.ale.setFloat(b'repeat_action_probability', 0.)

        # Load the rom
        self.ale.loadROM(self.game_path)

        (self.screen_width, self.screen_height) = self.ale.getScreenDims()
        self.latest_frame_fifo = deque(
            maxlen=2)  # Holds the two closest frames to max.
        self.state_fifo = deque(maxlen=stack_num_states)
Exemplo n.º 4
0
 def __init__(self):
     
     # ale related members
     self.ale = ALEInterface();
     self.ale.loadROM(get_game_path('boxing'));
     self.legal_actions = self.ale.getMinimalActionSet();
     self.status = list();
     # use qnet_latest to hold the latest updated weights 
     self.qnet_latest = QNet(len(self.legal_actions));
     # use qnet_target to hold the target model weights
     self.qnet_target = QNet(len(self.legal_actions));
     if True == os.path.exists('model'):
         self.qnet_latest.load_weights('./model/dqn_model');
     # use qnet_target as the rollout model
     self.qnet_target.set_weights(self.qnet_latest.get_weights());
     # loss
     self.loss = Loss(len(self.legal_actions), self.GAMMA);
     # status transition memory
     self.memory = list();
     # optimizer
     self.optimizer = tf.keras.optimizers.Adam(tf.keras.optimizers.schedules.ExponentialDecay(0.00025, 5 * self.SCALE, 0.96));
     # episode count
     self.ep_count = 0;
Exemplo n.º 5
0
# python_example.py
# Author: Ben Goodrich
#
# This is a direct port to python of the shared library example from
# ALE provided in doc/examples/sharedLibraryInterfaceExample.cpp
from __future__ import print_function

import sys
from random import randrange
from atari_py import ALEInterface

if len(sys.argv) < 2:
    print('Usage:', sys.argv[0], 'rom_file')
    sys.exit()

ale = ALEInterface()

# Get & Set the desired settings
ale.setInt('random_seed', 123)

# Set USE_SDL to true to display the screen. ALE must be compilied
# with SDL enabled for this to work. On OSX, pygame init is used to
# proxy-call SDL_main.
USE_SDL = False
if USE_SDL:
    if sys.platform == 'darwin':
        import pygame
        pygame.init()
        ale.setBool('sound', False)  # Sound doesn't work on OSX
    elif sys.platform.startswith('linux'):
        ale.setBool('sound', True)
Exemplo n.º 6
0
    def __init__(self,
                 rand_seed,
                 options,
                 display=False,
                 no_op_max=30,
                 thread_index=-1):
        if options.use_gym:
            self._display = options.display
        else:
            self.ale = ALEInterface()
            self.ale.setInt(b'random_seed', rand_seed)
            self.ale.setFloat(b'repeat_action_probability',
                              options.repeat_action_probability)
            self.ale.setInt(b'frame_skip', options.frames_skip_in_ale)
            self.ale.setBool(b'color_averaging',
                             options.color_averaging_in_ale)
        self._no_op_max = no_op_max

        self.options = options
        self.color_maximizing = options.color_maximizing_in_gs
        self.color_averaging = options.color_averaging_in_gs
        self.color_no_change = options.color_no_change_in_gs
        # for screen output in _process_frame()
        self.thread_index = thread_index
        self.record_gs_screen_dir = self.options.record_gs_screen_dir
        self.episode_record_dir = None
        self.episode = 1
        self.rooms = np.zeros((24), dtype=np.int)
        self.prev_room_no = 1
        self.room_no = 1
        self.new_room = -1

        if options.use_gym:
            # see https://github.com/openai/gym/issues/349
            def _seed(self, seed=None):
                self.ale.setFloat(b'repeat_action_probability',
                                  options.repeat_action_probability)
                from gym.utils import seeding
                self.np_random, seed1 = seeding.np_random(seed)
                # Derive a random seed. This gets passed as a uint, but gets
                # checked as an int elsewhere, so we need to keep it below
                # 2**31.
                seed2 = seeding.hash_seed(seed1 + 1) % 2**31
                # Empirically, we need to seed before loading the ROM.
                self.ale.setInt(b'random_seed', seed2)
                self.ale.loadROM(self.game_path)
                return [seed1, seed2]

            AtariEnv._seed = _seed
            self.gym = gym.make(options.gym_env)
            self.ale = self.gym.ale
            print(self.gym.action_space)
        else:
            if display:
                self._setup_display()

            self.ale.loadROM(options.rom.encode('ascii'))

            # collect minimal action set
            self.real_actions = self.ale.getMinimalActionSet()
            print("real_actions=", self.real_actions)
            if (len(self.real_actions) != self.options.action_size):
                print(
                    "***********************************************************"
                )
                print("* action_size != len(real_actions)")
                print(
                    "***********************************************************"
                )
                sys.exit(1)

        # height=210, width=160
        self._screen = np.empty((210 * 160 * 1), dtype=np.uint8)
        if (not options.use_gym) and (self.color_maximizing
                                      or self.color_averaging
                                      or self.color_no_change):
            self._screen_RGB = np.empty((210 * 160 * 3), dtype=np.uint8)
            self._prev_screen_RGB = np.empty((210 * 160 * 3), dtype=np.uint8)
        self._have_prev_screen_RGB = False

        # for pseudo-count
        self.psc_use = options.psc_use
        if options.psc_use:
            self.psc_frsize = options.psc_frsize
            self.psc_k = options.psc_frsize**2
            self.psc_rev_pow = 1.0 / options.psc_pow
            self.psc_alpha = math.pow(0.1, options.psc_pow)
            self.psc_beta = options.psc_beta
            self.psc_maxval = options.psc_maxval
            self.psc_vcount = np.zeros((self.psc_k, self.psc_maxval + 1),
                                       dtype=np.float64)
            self.psc_n = 0

        self.reset()
Exemplo n.º 7
0
    def __init__(self,
                 rom_file,
                 viz=0,
                 frame_skip=4,
                 nullop_start=30,
                 live_lost_as_eoe=True,
                 max_num_frames=0):
        """
        Args:
            rom_file: path to the rom
            frame_skip: skip every k frames and repeat the action
            viz: visualization to be done.
                Set to 0 to disable.
                Set to a positive number to be the delay between frames to show.
                Set to a string to be a directory to store frames.
            nullop_start: start with random number of null ops.
            live_losts_as_eoe: consider lost of lives as end of episode. Useful for training.
            max_num_frames: maximum number of frames per episode.
        """
        super(AtariPlayer, self).__init__()
        assert os.path.isfile(rom_file), \
            "rom {} not found. Please download at {}".format(rom_file, ROM_URL)

        try:
            ALEInterface.setLoggerMode(ALEInterface.Logger.Error)
        except AttributeError:
            print("You're not using latest ALE")

        # avoid simulator bugs: https://github.com/mgbellemare/Arcade-Learning-Environment/issues/86
        with _ALE_LOCK:
            self.ale = ALEInterface()
            self.ale.setInt(b"random_seed", np.random.randint(0, 30000))
            self.ale.setInt(b"max_num_frames_per_episode", max_num_frames)
            self.ale.setBool(b"showinfo", False)

            self.ale.setInt(b"frame_skip", 1)
            self.ale.setBool(b'color_averaging', False)
            # manual.pdf suggests otherwise.
            self.ale.setFloat(b'repeat_action_probability', 0.0)

            # viz setup
            if isinstance(viz, str):
                assert os.path.isdir(viz), viz
                self.ale.setString(b'record_screen_dir', viz)
                viz = 0
            if isinstance(viz, int):
                viz = float(viz)
            self.viz = viz
            if self.viz and isinstance(self.viz, float):
                self.windowname = os.path.basename(rom_file)
                cv2.startWindowThread()
                cv2.namedWindow(self.windowname)

            self.ale.loadROM(rom_file.encode('utf-8'))
        self.width, self.height = self.ale.getScreenDims()
        self.actions = self.ale.getMinimalActionSet()

        self.live_lost_as_eoe = live_lost_as_eoe
        self.frame_skip = frame_skip
        self.nullop_start = nullop_start

        self.action_space = spaces.Discrete(len(self.actions))
        self.observation_space = spaces.Box(low=0,
                                            high=255,
                                            shape=(self.height, self.width),
                                            dtype=np.uint8)
        self._restart_episode()