示例#1
0
    def __init__(self, game='pong', obs_type='ram', frameskip=(2, 5), repeat_action_probability=0.):
        """Frameskip should be either a tuple (indicating a random range to
        choose from, with the top value exclude), or an int."""

        utils.EzPickle.__init__(self, game, obs_type)
        assert obs_type in ('ram', 'image')

        self.game_path = atari_py.get_game_path(game)
        if not os.path.exists(self.game_path):
            raise IOError('You asked for game %s but path %s does not exist'%(game, self.game_path))
        self._obs_type = obs_type
        self.frameskip = frameskip
        self.ale = atari_py.ALEInterface()
        self.viewer = None

        # Tune (or disable) ALE's action repeat:
        # https://github.com/openai/gym/issues/349
        assert isinstance(repeat_action_probability, (float, int)), "Invalid repeat_action_probability: {!r}".format(repeat_action_probability)
        self.ale.setFloat('repeat_action_probability'.encode('utf-8'), repeat_action_probability)

        self._seed()

        (screen_width, screen_height) = self.ale.getScreenDims()

        self._action_set = self.ale.getMinimalActionSet()
        self.action_space = spaces.Discrete(len(self._action_set))

        (screen_width,screen_height) = self.ale.getScreenDims()
        if self._obs_type == 'ram':
            self.observation_space = spaces.Box(low=np.zeros(128), high=np.zeros(128)+255)
        elif self._obs_type == 'image':
            self.observation_space = spaces.Box(low=0, high=255, shape=(screen_height, screen_width, 3))
        else:
            raise error.Error('Unrecognized observation type: {}'.format(self._obs_type))
示例#2
0
    def __init__(self, model_path, frame_skip):
        if model_path.startswith("/"):
            fullpath = model_path
        else:
            fullpath = os.path.join(os.path.dirname(__file__), "assets",
                                    model_path)
        if not path.exists(fullpath):
            raise IOError("File %s does not exist" % fullpath)
        self.frame_skip = frame_skip
        self.model = mujoco_py.load_model_from_path(fullpath)
        self.sim = mujoco_py.MjSim(self.model)
        self.data = self.sim.data
        self.viewer = None

        self.metadata = {
            'render.modes': ['human', 'rgb_array'],
            'video.frames_per_second': int(np.round(1.0 / self.dt))
        }

        self.init_qpos = self.sim.data.qpos.ravel().copy()
        self.init_qvel = self.sim.data.qvel.ravel().copy()
        observation, _reward, done, _info = self._step(np.zeros(self.model.nu))
        assert not done
        self.obs_dim = observation.size

        bounds = self.model.actuator_ctrlrange.copy()
        low = bounds[:, 0]
        high = bounds[:, 1]
        self.action_space = spaces.Box(low, high)

        high = np.inf * np.ones(self.obs_dim)
        low = -high
        self.observation_space = spaces.Box(low, high)

        self._seed()
示例#3
0
    def __init__(self):
        self.max_speed = 8
        self.max_torque = 2.
        self.dt = .05
        self.viewer = None

        high = np.array([1., 1., self.max_speed])
        self.action_space = spaces.Box(low=-self.max_torque,
                                       high=self.max_torque,
                                       shape=(1, ))
        self.observation_space = spaces.Box(low=-high, high=high)

        self._seed()
示例#4
0
    def __init__(self,
                 initialWealth=25.0,
                 edgePriorAlpha=7,
                 edgePriorBeta=3,
                 maxWealthAlpha=5.0,
                 maxWealthM=200.0,
                 maxRoundsMean=300.0,
                 maxRoundsSD=25.0,
                 reseed=True):
        # store the hyperparameters for passing back into __init__() during resets so the same hyperparameters govern the next game's parameters, as the user expects: TODO: this is boilerplate, is there any more elegant way to do this?
        self.initialWealth = float(initialWealth)
        self.edgePriorAlpha = edgePriorAlpha
        self.edgePriorBeta = edgePriorBeta
        self.maxWealthAlpha = maxWealthAlpha
        self.maxWealthM = maxWealthM
        self.maxRoundsMean = maxRoundsMean
        self.maxRoundsSD = maxRoundsSD

        # draw this game's set of parameters:
        edge = prng.np_random.beta(edgePriorAlpha, edgePriorBeta)
        maxWealth = round(
            genpareto.rvs(maxWealthAlpha,
                          maxWealthM,
                          random_state=prng.np_random))
        maxRounds = int(
            round(prng.np_random.normal(maxRoundsMean, maxRoundsSD)))

        # add an additional global variable which is the sufficient statistic for the Pareto distribution on wealth cap;
        # alpha doesn't update, but x_m does, and simply is the highest wealth count we've seen to date:
        self.maxEverWealth = float(self.initialWealth)
        # for the coinflip edge, it is total wins/losses:
        self.wins = 0
        self.losses = 0
        # for the number of rounds, we need to remember how many rounds we've played:
        self.roundsElapsed = 0

        # the rest proceeds as before:
        self.action_space = spaces.Discrete(int(maxWealth * 100))
        self.observation_space = spaces.Tuple((
            spaces.Box(0, maxWealth, shape=[1]),  # current wealth
            spaces.Discrete(maxRounds + 1),  # rounds elapsed
            spaces.Discrete(maxRounds + 1),  # wins
            spaces.Discrete(maxRounds + 1),  # losses
            spaces.Box(0, maxWealth, [1])))  # maximum observed wealth
        self.reward_range = (0, maxWealth)
        self.edge = edge
        self.wealth = self.initialWealth
        self.maxRounds = maxRounds
        self.rounds = self.maxRounds
        self.maxWealth = maxWealth
        if reseed or not hasattr(self, 'np_random'): self._seed()
示例#5
0
    def __init__(self):
        self._seed()
        self.viewer = None

        self.world = Box2D.b2World()
        self.terrain = None
        self.hull = None

        self.prev_shaping = None
        self._reset()

        high = np.array([np.inf]*24)
        self.action_space = spaces.Box(np.array([-1,-1,-1,-1]), np.array([+1,+1,+1,+1]))
        self.observation_space = spaces.Box(-high, high)
示例#6
0
    def __init__(self):
        self.gravity = 9.8
        self.masscart = 1.0
        self.masspole = 0.1
        self.total_mass = (self.masspole + self.masscart)
        self.length = 0.5  # actually half the pole's length
        self.polemass_length = (self.masspole * self.length)
        self.force_mag = 10.0
        self.tau = 0.02  # seconds between state updates

        # Angle at which to fail the episode
        self.theta_threshold_radians = 12 * 2 * math.pi / 360
        self.x_threshold = 2.4

        # Angle limit set to 2 * theta_threshold_radians so failing observation is still within bounds
        high = np.array([
            self.x_threshold * 2,
            np.finfo(np.float32).max, self.theta_threshold_radians * 2,
            np.finfo(np.float32).max
        ])

        self.action_space = spaces.Discrete(2)
        self.observation_space = spaces.Box(-high, high)

        self._seed()
        self.viewer = None
        self.state = None

        self.steps_beyond_done = None
示例#7
0
 def __init__(self):
     self.viewer = None
     high = np.array([1.0, 1.0, 1.0, 1.0, self.MAX_VEL_1, self.MAX_VEL_2])
     low = -high
     self.observation_space = spaces.Box(low, high)
     self.action_space = spaces.Discrete(3)
     self.state = None
     self._seed()
示例#8
0
    def __init__(self):
        self._seed()
        self.contactListener_keepref = FrictionDetector(self)
        self.world = Box2D.b2World(
            (0, 0), contactListener=self.contactListener_keepref)
        self.viewer = None
        self.invisible_state_window = None
        self.invisible_video_window = None
        self.road = None
        self.car = None
        self.reward = 0.0
        self.prev_reward = 0.0

        self.action_space = spaces.Box(np.array([-1, 0, 0]),
                                       np.array([+1, +1,
                                                 +1]))  # steer, gas, brake
        self.observation_space = spaces.Box(low=0,
                                            high=255,
                                            shape=(STATE_H, STATE_W, 3))
示例#9
0
    def __init__(self):
        self.min_action = -1.0
        self.max_action = 1.0
        self.min_position = -1.2
        self.max_position = 0.6
        self.max_speed = 0.07
        self.goal_position = 0.45 # was 0.5 in gym, 0.45 in Arnaud de Broissia's version
        self.power = 0.0015

        self.low_state = np.array([self.min_position, -self.max_speed])
        self.high_state = np.array([self.max_position, self.max_speed])

        self.viewer = None

        self.action_space = spaces.Box(self.min_action, self.max_action, shape = (1,))
        self.observation_space = spaces.Box(self.low_state, self.high_state)

        self._seed()
        self.reset()
示例#10
0
 def __init__(self, env, k):
     """Stack k last frames.
     Returns lazy array, which is much more memory efficient.
     See Also
     --------
     baselines.common.atari_wrappers.LazyFrames
     """
     gym.Wrapper.__init__(self, env)
     self.k = k
     self.frames = deque([], maxlen=k)
     shp = env.observation_space.shape
     self.observation_space = spaces.Box(low=0, high=255, shape=(shp[0]*k, shp[1], shp[2]), dtype=np.float32)
示例#11
0
    def __init__(self):
        self.range = 1000  # Randomly selected number is within +/- this value
        self.bounds = 10000

        self.action_space = spaces.Box(low=np.array([-self.bounds]), high=np.array([self.bounds]))
        self.observation_space = spaces.Discrete(4)

        self.number = 0
        self.guess_count = 0
        self.guess_max = 200
        self.observation = 0

        self._seed()
        self._reset()
示例#12
0
    def __init__(self):
        self.range = 1000  # +/- value the randomly select number can be between
        self.bounds = 2000  # Action space bounds

        self.action_space = spaces.Box(low=np.array([-self.bounds]), high=np.array([self.bounds]))
        self.observation_space = spaces.Discrete(4)

        self.number = 0
        self.guess_count = 0
        self.guess_max = 200
        self.observation = 0

        self._seed()
        self._reset()
示例#13
0
    def __init__(self, player_color, opponent, observation_type,
                 illegal_move_mode, board_size):
        """
        Args:
            player_color: Stone color for the agent. Either 'black' or 'white'
            opponent: An opponent policy
            observation_type: State encoding
            illegal_move_mode: What to do when the agent makes an illegal move. Choices: 'raise' or 'lose'
        """
        assert isinstance(
            board_size,
            int) and board_size >= 1, 'Invalid board size: {}'.format(
                board_size)
        self.board_size = board_size

        self._seed()

        colormap = {
            'black': pachi_py.BLACK,
            'white': pachi_py.WHITE,
        }
        try:
            self.player_color = colormap[player_color]
        except KeyError:
            raise error.Error(
                "player_color must be 'black' or 'white', not {}".format(
                    player_color))

        self.opponent_policy = None
        self.opponent = opponent

        assert observation_type in ['image3c']
        self.observation_type = observation_type

        assert illegal_move_mode in ['lose', 'raise']
        self.illegal_move_mode = illegal_move_mode

        if self.observation_type != 'image3c':
            raise error.Error('Unsupported observation type: {}'.format(
                self.observation_type))

        shape = pachi_py.CreateBoard(self.board_size).encode().shape
        self.observation_space = spaces.Box(np.zeros(shape), np.ones(shape))
        # One action for each board position, pass, and resign
        self.action_space = spaces.Discrete(self.board_size**2 + 2)

        # Filled in by _reset()
        self.state = None
        self.done = True
示例#14
0
    def __init__(self):
        self._seed()
        self.viewer = None

        self.world = Box2D.b2World()
        self.moon = None
        self.lander = None
        self.particles = []

        self.prev_reward = None

        high = np.array([np.inf]*8)  # useful range is -1 .. +1, but spikes can be higher
        self.observation_space = spaces.Box(-high, high)

        if self.continuous:
            # Action is two floats [main engine, left-right engines].
            # Main engine: -1..0 off, 0..+1 throttle from 50% to 100% power. Engine can't work with less than 50% power.
            # Left-right:  -1.0..-0.5 fire left engine, +0.5..+1.0 fire right engine, -0.5..0.5 off
            self.action_space = spaces.Box(-1, +1, (2,))
        else:
            # Nop, fire left engine, main engine, right engine
            self.action_space = spaces.Discrete(4)

        self._reset()
示例#15
0
    def __init__(self):
        self.min_position = -1.2
        self.max_position = 0.6
        self.max_speed = 0.07
        self.goal_position = 0.5

        self.low = np.array([self.min_position, -self.max_speed])
        self.high = np.array([self.max_position, self.max_speed])

        self.viewer = None

        self.action_space = spaces.Discrete(3)
        self.observation_space = spaces.Box(self.low, self.high)

        self._seed()
        self.reset()
示例#16
0
    def __init__(self, player_color, opponent, observation_type,
                 illegal_move_mode, board_size):
        """
        Args:
            player_color: Stone color for the agent. Either 'black' or 'white'
            opponent: An opponent policy
            observation_type: State encoding
            illegal_move_mode: What to do when the agent makes an illegal move. Choices: 'raise' or 'lose'
            board_size: size of the Hex board
        """
        assert isinstance(
            board_size,
            int) and board_size >= 1, 'Invalid board size: {}'.format(
                board_size)
        self.board_size = board_size

        colormap = {
            'black': HexEnv.BLACK,
            'white': HexEnv.WHITE,
        }
        try:
            self.player_color = colormap[player_color]
        except KeyError:
            raise error.Error(
                "player_color must be 'black' or 'white', not {}".format(
                    player_color))

        self.opponent = opponent

        assert observation_type in ['numpy3c']
        self.observation_type = observation_type

        assert illegal_move_mode in ['lose', 'raise']
        self.illegal_move_mode = illegal_move_mode

        if self.observation_type != 'numpy3c':
            raise error.Error('Unsupported observation type: {}'.format(
                self.observation_type))

        # One action for each board position and resign
        self.action_space = spaces.Discrete(self.board_size**2 + 1)
        observation = self.reset()
        self.observation_space = spaces.Box(np.zeros(observation.shape),
                                            np.ones(observation.shape))

        self._seed()
示例#17
0
    def __init__(self, natural=False):
        """
        Initialize environment
        """

        # I use array of len 1 to store constants (otherwise there were some errors)
        self.action_space = spaces.Tuple((
            spaces.Box(-5.0, 0.0, 1),  # learning rate
            spaces.Box(-7.0, -2.0, 1),  # decay
            spaces.Box(-5.0, 0.0, 1),  # momentum
            spaces.Box(2, 8, 1),  # batch size
            spaces.Box(-6.0, 1.0, 1),  # l1 reg
            spaces.Box(-6.0, 1.0, 1),  # l2 reg
            spaces.Box(0.0, 1.0, (5, 2)),  # convolutional layer parameters
            spaces.Box(0.0, 1.0, (2, 2)),  # fully connected layer parameters
        ))

        # observation features, in order: num of instances, num of labels,
        # validation accuracy after training with given parameters
        self.observation_space = spaces.Box(-1e5, 1e5,
                                            2)  # validation accuracy

        # Start the first game
        self._reset()
示例#18
0
    def __init__(self,
                 initialWealth=25.0,
                 edge=0.6,
                 maxWealth=250.0,
                 maxRounds=300):

        self.action_space = spaces.Discrete(int(
            maxWealth * 100))  # betting in penny increments
        self.observation_space = spaces.Tuple((
            spaces.Box(0, maxWealth, [1]),  # (w,b)
            spaces.Discrete(maxRounds + 1)))
        self.reward_range = (0, maxWealth)
        self.edge = edge
        self.wealth = initialWealth
        self.initialWealth = initialWealth
        self.maxRounds = maxRounds
        self.maxWealth = maxWealth
        self._seed()
        self._reset()
示例#19
0
    def __init__(self, natural=False):
        """
        Initialize environment
        """

        # I use array of len 1 to store constants (otherwise there were some errors)
        self.action_space = spaces.Tuple((
            spaces.Box(-5.0, 0.0, 1),  # learning rate
            spaces.Box(-7.0, -2.0, 1),  # decay
            spaces.Box(-5.0, 0.0, 1),  # momentum
            spaces.Box(2, 8, 1),  # batch size
            spaces.Box(-6.0, 1.0, 1),  # l1 reg
            spaces.Box(-6.0, 1.0, 1),  # l2 reg
        ))

        # observation features, in order: num of instances, num of labels,
        # number of filter in part A / B of neural net, num of neurons in
        # output layer, validation accuracy after training with given
        # parameters
        self.observation_space = spaces.Box(-1e5, 1e5,
                                            6)  # validation accuracy

        # Start the first game
        self._reset()
示例#20
0
 def __init__(self, env=None):
     super(ProcessFrame84, self).__init__(env)
     self.observation_space = spaces.Box(low=0, high=255, shape=(84, 84, 1), dtype=np.uint8)
示例#21
0
 def __init__(self, env):
     super(CropAtari, self).__init__(env)
     self.observation_space = gym_spaces.Box(0, 255, shape=(ATARI_HEIGHT, ATARI_WIDTH, 3))