示例#1
0
def should_skip_env_spec_for_tests(spec):
    # We skip tests for envs that require dependencies or are otherwise
    # troublesome to run frequently
    ep = spec._entry_point
    # Skip mujoco tests for pull request CI
    if skip_mujoco and (ep.startswith('gym.envs.mujoco') or ep.startswith('gym.envs.robotics:')):
        return True
    try:
        import atari_py
    except ImportError:
        if ep.startswith('gym.envs.atari'):
            return True
    try:
        import Box2D
    except ImportError:
        if ep.startswith('gym.envs.box2d'):
            return True

    if (    'GoEnv' in ep or
            'HexEnv' in ep or
            (ep.startswith("gym.envs.atari") and not spec.id.startswith("Pong") and not spec.id.startswith("Seaquest"))
    ):
        logger.warn("Skipping tests for env {}".format(ep))
        return True
    return False
示例#2
0
    def step(self, action):
        assert self.action_space.contains(action), "%r (%s) invalid"%(action, type(action))
        state = self.state
        x, x_dot, theta, theta_dot = state
        force = self.force_mag if action==1 else -self.force_mag
        costheta = math.cos(theta)
        sintheta = math.sin(theta)
        temp = (force + self.polemass_length * theta_dot * theta_dot * sintheta) / self.total_mass
        thetaacc = (self.gravity * sintheta - costheta* temp) / (self.length * (4.0/3.0 - self.masspole * costheta * costheta / self.total_mass))
        xacc  = temp - self.polemass_length * thetaacc * costheta / self.total_mass
        x  = x + self.tau * x_dot
        x_dot = x_dot + self.tau * xacc
        theta = theta + self.tau * theta_dot
        theta_dot = theta_dot + self.tau * thetaacc
        self.state = (x,x_dot,theta,theta_dot)
        done =  x < -self.x_threshold \
                or x > self.x_threshold \
                or theta < -self.theta_threshold_radians \
                or theta > self.theta_threshold_radians
        done = bool(done)

        if not done:
            reward = 1.0
        elif self.steps_beyond_done is None:
            # Pole just fell!
            self.steps_beyond_done = 0
            reward = 1.0
        else:
            if self.steps_beyond_done == 0:
                logger.warn("You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior.")
            self.steps_beyond_done += 1
            reward = 0.0

        return np.array(self.state), reward, done, {}
示例#3
0
 def step(self, action):
     assert self.action_space.contains(action)
     self.last_action = action
     inp_act, out_act, pred = action
     done = False
     reward = 0.0
     self.time += 1
     assert 0 <= self.write_head_position
     if out_act == 1:
         try:
             correct = pred == self.target[self.write_head_position]
         except IndexError:
             logger.warn("It looks like you're calling step() even though this "+
                 "environment has already returned done=True. You should always call "+
                 "reset() once you receive done=True. Any further steps are undefined "+
                 "behaviour.")
             correct = False
         if correct:
             reward = 1.0
         else:
             # Bail as soon as a wrong character is written to the tape
             reward = -0.5
             done = True
         self.write_head_position += 1
         if self.write_head_position >= len(self.target):
             done = True
     self._move(inp_act)
     if self.time > self.time_limit:
         reward = -1.0
         done = True
     obs = self._get_obs()
     self.last_reward = reward
     self.episode_total_reward += reward
     return (obs, reward, done, {})
示例#4
0
    def _start(self, directory, video_callable=None, force=False, resume=False,
              write_upon_reset=False, uid=None, mode=None):
        """Start monitoring.

        Args:
            directory (str): A per-training run directory where to record stats.
            video_callable (Optional[function, False]): function that takes in the index of the episode and outputs a boolean, indicating whether we should record a video on this episode. The default (for video_callable is None) is to take perfect cubes, capped at 1000. False disables video recording.
            force (bool): Clear out existing training data from this directory (by deleting every file prefixed with "openaigym.").
            resume (bool): Retain the training data already in this directory, which will be merged with our new data
            write_upon_reset (bool): Write the manifest file on each reset. (This is currently a JSON file, so writing it is somewhat expensive.)
            uid (Optional[str]): A unique id used as part of the suffix for the file. By default, uses os.getpid().
            mode (['evaluation', 'training']): Whether this is an evaluation or training episode.
        """
        if self.env.spec is None:
            logger.warn("Trying to monitor an environment which has no 'spec' set. This usually means you did not create it via 'gym.make', and is recommended only for advanced users.")
            env_id = '(unknown)'
        else:
            env_id = self.env.spec.id

        if not os.path.exists(directory):
            logger.info('Creating monitor directory %s', directory)
            if six.PY3:
                os.makedirs(directory, exist_ok=True)
            else:
                os.makedirs(directory)

        if video_callable is None:
            video_callable = capped_cubic_video_schedule
        elif video_callable == False:
            video_callable = disable_videos
        elif not callable(video_callable):
            raise error.Error('You must provide a function, None, or False for video_callable, not {}: {}'.format(type(video_callable), video_callable))
        self.video_callable = video_callable

        # Check on whether we need to clear anything
        if force:
            clear_monitor_files(directory)
        elif not resume:
            training_manifests = detect_training_manifests(directory)
            if len(training_manifests) > 0:
                raise error.Error('''Trying to write to monitor directory {} with existing monitor files: {}.

 You should use a unique directory for each training run, or use 'force=True' to automatically clear previous monitor files.'''.format(directory, ', '.join(training_manifests[:5])))

        self._monitor_id = monitor_closer.register(self)

        self.enabled = True
        self.directory = os.path.abspath(directory)
        # We use the 'openai-gym' prefix to determine if a file is
        # ours
        self.file_prefix = FILE_PREFIX
        self.file_infix = '{}.{}'.format(self._monitor_id, uid if uid else os.getpid())

        self.stats_recorder = stats_recorder.StatsRecorder(directory, '{}.episode_batch.{}'.format(self.file_prefix, self.file_infix), autoreset=self.env_semantics_autoreset, env_id=env_id)

        if not os.path.exists(directory): os.mkdir(directory)
        self.write_upon_reset = write_upon_reset

        if mode is not None:
            self._set_mode(mode)
示例#5
0
    def _encode_image_frame(self, frame):
        if not self.encoder:
            self.encoder = ImageEncoder(self.path, frame.shape, self.frames_per_sec)
            self.metadata['encoder_version'] = self.encoder.version_info

        try:
            self.encoder.capture_frame(frame)
        except error.InvalidFrame as e:
            logger.warn('Tried to pass invalid video frame, marking as broken: %s', e)
            self.broken = True
        else:
            self.empty = False
示例#6
0
def should_skip_env_spec_for_tests(spec):
    # We skip tests for envs that require dependencies or are otherwise
    # troublesome to run frequently
    ep = spec._entry_point
    # Skip mujoco tests for pull request CI
    skip_mujoco = not (os.environ.get('MUJOCO_KEY_BUNDLE') or os.path.exists(os.path.expanduser('~/.mujoco')))
    if skip_mujoco and ep.startswith('gym.envs.mujoco:'):
        return True
    if (    'GoEnv' in ep or
            'HexEnv' in ep or
            (ep.startswith("gym.envs.atari") and not spec.id.startswith("Pong") and not spec.id.startswith("Seaquest"))
    ):
        logger.warn("Skipping tests for env {}".format(ep))
        return True
    return False
示例#7
0
文件: core.py 项目: olegklimov/gym
    def seed(self, seed=None):
        """Sets the seed for this env's random number generator(s).

        Note:
            Some environments use multiple pseudorandom number generators.
            We want to capture all such seeds used in order to ensure that
            there aren't accidental correlations between multiple generators.

        Returns:
            list<bigint>: Returns the list of seeds used in this env's random
              number generators. The first value in the list should be the
              "main" seed, or the value which a reproducer should pass to
              'seed'. Often, the main seed equals the provided 'seed', but
              this won't be true if seed=None, for example.
        """
        logger.warn("Could not seed environment %s", self)
        return
示例#8
0
def patch_deprecated_methods(env):
    """
    Methods renamed from '_method' to 'method', render() no longer has 'close' parameter, close is a separate method.
    For backward compatibility, this makes it possible to work with unmodified environments.
    """
    global warn_once
    if warn_once:
        logger.warn("Environment '%s' has deprecated methods '_step' and '_reset' rather than 'step' and 'reset'. Compatibility code invoked. Set _gym_disable_underscore_compat = True to disable this behavior." % str(type(env)))
        warn_once = False
    env.reset = env._reset
    env.step  = env._step
    env.seed  = env._seed
    def render(mode):
        return env._render(mode, close=False)
    def close():
        env._render("human", close=True)
    env.render = render
    env.close = close
示例#9
0
文件: box.py 项目: joschu/gym
 def __init__(self, low=None, high=None, shape=None, dtype=np.float32):
     """
     Two kinds of valid input:
         Box(low=-1.0, high=1.0, shape=(3,4)) # low and high are scalars, and shape is provided
         Box(np.array(low=[-1.0,-2.0]), high=np.array([2.0,4.0])) # low and high are arrays of the same shape
     """
     if shape is None:
         assert low.shape == high.shape
         shape = low.shape
     else:
         assert np.isscalar(low) and np.isscalar(high)
         low = low + np.zeros(shape)
         high = high + np.zeros(shape)
     self.low = low.astype(dtype)
     self.high = high.astype(dtype)
     if (self.high == 255).all() and dtype != np.uint8:
         logger.warn('Box constructor got high=255 but dtype!=uint8')
     Space.__init__(self, shape, dtype)
示例#10
0
    def step(self, action):
        if self.curr_episode > self.episodes:
            logger.warn(
                "You are calling 'step()' even though this environment has already returned done = True. You should always call 'initialize()' and 'reset()' once you receive 'done = True'"
            )
            return

        action = np.array(action)
        if action.shape != (self.numAgents, 2):
            logger.error(
                "Incorrect dimenions of action. Action must have destination position for each agent"
            )
            return

        self.curr_episode += 1

        reward = np.zeros(self.numAgents)
        self.agentPosIncrements = np.array([(-1000.0, -1000.0)] *
                                           self.numAgents)
        agentReachedDest = [False] * self.numAgents
        for _ in xrange(self.updateRate):
            self.curr_step += 1

            #Move targets
            for i in xrange(self.numTargets):
                self.moveTarget(i)

            #Move agent
            for i in xrange(self.numAgents):
                if not agentReachedDest[i]:
                    agentReachedDest[i] = self.moveAgent(i, action[i])
                else:  #Already reached. Removes precision errors
                    self.agentLocations[i] = action[i].astype('float32')

            #Calculate reward at this step
            reward += self.calculateAgentRewards()[0]

            if self.viewer is not None:
                self.render()

        return self.reset(), reward, self.curr_episode >= self.episodes, {}
    def close_extras(self, timeout=None, terminate=False):
        """
        Parameters
        ----------
        timeout : int or float, optional
            Number of seconds before the call to `close` times out. If `None`,
            the call to `close` never times out. If the call to `close` times
            out, then all processes are terminated.

        terminate : bool (default: `False`)
            If `True`, then the `close` operation is forced and all processes
            are terminated.
        """
        timeout = 0 if terminate else timeout
        try:
            if self._state != AsyncState.DEFAULT:
                logger.warn('Calling `close` while waiting for a pending '
                            'call to `{0}` to complete.'.format(
                                self._state.value))
                function = getattr(self, '{0}_wait'.format(self._state.value))
                function(timeout)
        except mp.TimeoutError:
            terminate = True

        if terminate:
            for process in self.processes:
                if process.is_alive():
                    process.terminate()
        else:
            for pipe in self.parent_pipes:
                if (pipe is not None) and (not pipe.closed):
                    pipe.send(('close', None))
            for pipe in self.parent_pipes:
                if (pipe is not None) and (not pipe.closed):
                    pipe.recv()

        for pipe in self.parent_pipes:
            if pipe is not None:
                pipe.close()
        for process in self.processes:
            process.join()
示例#12
0
    def step(self, action):
        """
        implementation of the classic “agent-environment loop”.

        Args:
            action (object) : the board

        Returns:
            observation (object):
            reward (float)
            done (boolean)
            info (dict)
        """
        #assert self.action_space.contains(action), f"{action} ({type(action)})"

        reward = 0
        info = {
            'turn': self.game.turns_count,
            'move_type': None,
            'player': self.game.current_player,
            'player_name': ['white', 'black'][self.game.current_player]
        }

        if self.done:
            logger.warn(
                "You are calling 'step()' even though this environment has already returned done = True."
                "You should always call 'reset()' once you receive 'done = True'"
                "-- any further steps are undefined behavior.")
        else:
            pos0, pos1 = action
            move_check = self.game.action_handler(pos0,
                                                  pos1,
                                                  return_modif=True)

            if move_check:  # if the move is a valid move
                move_type, self._modifications = move_check
                reward = Reward.method_1(self.game.board, move_type)
                # for debug
                info['move_type'] = move_type

        return self.observation, reward, self.done, info
示例#13
0
    def step(self, a):
        """
        @brief      Run a simulation step for a given.

        @param[in]  a       The action to perform (in the action space rather than
                            the original torque space).

        @return     The next observation, the reward, the status of the simulation
                    (done or not), and a dictionary of extra information
        """
        if self.continuous:
            torque = a
        else:
            torque = self.AVAIL_TORQUE[a] * self.torque_mag

        # Add noise to the force action
        if self.torque_noise_max > 0:
            torque += self.np_random.uniform(-self.torque_noise_max, self.torque_noise_max)

        # Bypass 'self.engine_py.step' method and use direct assignment to max out the performances
        self.engine_py._action[0] = torque
        self.engine_py.step(dt_desired=self.dt)
        self.state = self.engine_py.state

        # Get information
        info, obs = self._get_info()
        done = info['is_success']

        # Make sure the simulation is not already over
        if done:
            if self.steps_beyond_done is None:
                self.steps_beyond_done = 0
            else:
                if self.steps_beyond_done == 0:
                    logger.warn("You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior.")
                self.steps_beyond_done += 1

        # Compute the reward
        reward = self.compute_reward(obs['achieved_goal'], self.goal, info)

        return obs, reward, done, info
示例#14
0
    def step(self, action):
        thrust = action[0]  # Thrust command
        w = action[1]  # Angular velocity command

        state = self.state
        ref_pos = self.ref_pos
        ref_vel = self.ref_vel

        pos = np.array([state[0], state[1]]).flatten()
        att = np.array([state[2]]).flatten()
        vel = np.array([state[3], state[4]]).flatten()

        acc = thrust / self.mass * np.array(
            [cos(att + pi / 2), sin(att + pi / 2)]) + self.g
        pos = pos + vel * self.dt + 0.5 * acc * self.dt * self.dt
        vel = vel + acc * self.dt
        att = att + w * self.dt

        self.state = (pos[0], pos[1], att, vel[0], vel[1])

        done =  linalg.norm(pos, 2) < -self.pos_threshold \
         or  linalg.norm(pos, 2) > self.pos_threshold \
         or linalg.norm(vel, 2) < -self.vel_threshold \
         or linalg.norm(vel, 2) > self.vel_threshold
        done = bool(done)

        if not done:
            reward = (-linalg.norm(pos, 2))
        elif self.steps_beyond_done is None:
            # Pole just fell!
            self.steps_beyond_done = 0
            reward = 1.0
        else:
            if self.steps_beyond_done == 0:
                logger.warn(
                    "You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior."
                )
            self.steps_beyond_done += 1
            reward = 0.0

        return np.array(self.state), reward, done, {}
示例#15
0
    def step(self, action):
        assert self.action_space.contains(
            action), "%r (%s) invalid" % (action, type(action))
        state = self.state
        x, x_dot, theta, theta_dot = state
        force = self.force_mag if action == 1 else -self.force_mag
        costheta = math.cos(theta)
        sintheta = math.sin(theta)
        temp = (force + self.polemass_length * theta_dot * theta_dot *
                sintheta) / self.total_mass
        thetaacc = (self.gravity * sintheta - costheta * temp) / (
            self.length *
            (4.0 / 3.0 -
             self.masspole * costheta * costheta / self.total_mass))
        xacc = temp - self.polemass_length * thetaacc * costheta / self.total_mass
        x = x + self.tau * x_dot
        x_dot = x_dot + self.tau * xacc
        theta = theta + self.tau * theta_dot
        theta_dot = theta_dot + self.tau * thetaacc
        self.state = (x, x_dot, theta, theta_dot)
        done =  x < -self.x_threshold \
                or x > self.x_threshold \
                or theta < -self.theta_threshold_radians \
                or theta > self.theta_threshold_radians
        done = bool(done)

        if not done:
            reward = 1.0
        elif self.steps_beyond_done is None:
            # Pole just fell!
            self.steps_beyond_done = 0
            reward = 1.0
        else:
            if self.steps_beyond_done == 0:
                logger.warn(
                    "You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior."
                )
            self.steps_beyond_done += 1
            reward = 0.0

        return np.array(self.state), reward, done, {}
示例#16
0
    def step(self, action):
        assert self.action_space.contains(action), "%r (%s) invalid"%(action, type(action))
        #state = np.array(self.state).reshape(4, 4)
        goal_pos = list(zip(*np.where(self.state == 1)))[0]
        agent_pos = list(zip(*np.where(self.state == 3)))[0]
        
        #move
        new_agent_pos = np.array(agent_pos)
        if action==0:
            new_agent_pos[1]-=1
        elif action==1:
            new_agent_pos[1]+=1
        elif action==2:
            new_agent_pos[0]-=1
        elif action==3:
            new_agent_pos[0]-=1    
        new_agent_pos = np.clip(new_agent_pos, 0, 3)
        
        self.state[agent_pos[0], agent_pos[1]] = 0 #moved from this position so it is empty
        self.state[new_agent_pos[0], new_agent_pos[1]] = 3 #moved to this position
        #self.state = tuple(self.state.flatten())
        
        #check if done
        done=False
        if goal_pos==list(new_agent_pos):
            done=True
        
        #assign reward
        if not done:
            reward = 0
        elif self.steps_beyond_done is None:
            # Just arrived at the goal
            self.steps_beyond_done = 0
            reward = 1
        else:
            if self.steps_beyond_done == 0:
                logger.warn("You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior.")
            self.steps_beyond_done += 1
            reward = 0.0

        return self.state, reward, done, {}
示例#17
0
    def step(self, action):
        """
        @brief      Run a simulation step for a given action.

        @param[in]  action   The action to perform in the action space
                             Set to None to NOT update the action.

        @return     The next observation, the reward, the status of the episode
                    (done or not), and a dictionary of extra information
        """

        # Bypass 'self.engine_py.action' setter and use
        # direct assignment to max out the performances
        if action is None:
            action = self.action_prev
        self.engine_py._action[:] = action
        self.engine_py.step(dt_desired=self.dt)
        self.is_running = True
        self.action_prev = action

        # Extract information about the current simulation state
        self._update_observation(self.observation)
        done = self._is_done()
        self.learning_info = {'is_success': done}

        reward = self._compute_reward()

        # Make sure the simulation is not already over
        if done:
            if self._steps_beyond_done is None:
                self._steps_beyond_done = 0
            else:
                if self._steps_beyond_done == 0:
                    logger.warn(
                        "You are calling 'step()' even though this environment has already \
                                 returned done = True. You should always call 'reset()' once you \
                                 receive 'done = True' -- any further steps are undefined behavior."
                    )
                self._steps_beyond_done += 1

        return self.observation, reward, done, self.learning_info
示例#18
0
    def _step(self, action):
        assert self.action_space.contains(
            action), "%r (%s) invalid" % (action, type(action))
        state = self.state

        reward = 0.0
        if state == 0:
            if action == 0:
                if self.np_random.rand() < self.signal_prob:
                    state = self.n_press
            else:
                reward = -1.0
        else:
            if action == 1:
                # reducing state to 0
                state -= 1
                if state == 0:
                    # this transition is rewarded
                    reward = self.reward_seq_complete
            else:
                # if not pressing, then move directly to 0
                state = 0
        signal = [float(state == self.n_press)]

        self.state = state
        done = False
        done = bool(done)

        if not done:
            pass
        elif self.steps_beyond_done is None:
            # Pole just fell!
            self.steps_beyond_done = 0
        else:
            if self.steps_beyond_done == 0:
                logger.warn(
                    "You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior."
                )
            self.steps_beyond_done += 1

        return np.array(signal), reward, done, {}
示例#19
0
    def step(self, action):

        self._take_action(action)

        traci.simulationStep()
        self.current_step += 1

        obs = self._next_observation()
        reward = self._get_reward()

        if self.is_done:
            logger.warn(
                "You are calling 'step()' even though this environment has already returned done = True. "
                "You should always call 'reset()' once you receive 'done = True' "
                "-- any further steps are undefined behavior.")
            reward = 0.0

        if self.current_step + 1 == self.steps_per_episode:
            self.is_done = True

        return obs, reward, self.is_done, {}
示例#20
0
    def render(self, mode='matplotlib'):
      """Renders this environment in its current state.

      Note that, in order to support rendering, 
      `render=True` must be passed to the environment 
      constructor.
    
      Arguments:
        mode(str) Rendering mode. Currently, only 
                  `"matplotlib"` is supported.
      """
      if mode == 'matplotlib' and self._render:
        self._painter.draw()
      elif not self._render:
        logger.warn(
          'Need to pass `render=True` to support '
          'rendering.')
      else:
        logger.warn(
          'Invalid rendering mode "%s". '
          'Only "matplotlib" is supported.')
示例#21
0
def test_env_semantics(spec):
	logger.warn("Skipping this test. Existing hashes were generated in a bad way")	
	return
	with open(ROLLOUT_FILE) as data_file:
		rollout_dict = json.load(data_file)

	if spec.id not in rollout_dict:
		if not spec.nondeterministic:
			logger.warn("Rollout does not exist for {}, run generate_json.py to generate rollouts for new envs".format(spec.id))
		return

	logger.info("Testing rollout for {} environment...".format(spec.id))

	observations_now, actions_now, rewards_now, dones_now = generate_rollout_hash(spec)

	errors = []
	if rollout_dict[spec.id]['observations'] != observations_now:
		errors.append('Observations not equal for {} -- expected {} but got {}'.format(spec.id, rollout_dict[spec.id]['observations'], observations_now))
	if rollout_dict[spec.id]['actions'] != actions_now:
		errors.append('Actions not equal for {} -- expected {} but got {}'.format(spec.id, rollout_dict[spec.id]['actions'], actions_now))
	if rollout_dict[spec.id]['rewards'] != rewards_now:
		errors.append('Rewards not equal for {} -- expected {} but got {}'.format(spec.id, rollout_dict[spec.id]['rewards'], rewards_now))
	if rollout_dict[spec.id]['dones'] != dones_now:
		errors.append('Dones not equal for {} -- expected {} but got {}'.format(spec.id, rollout_dict[spec.id]['dones'], dones_now))
	if len(errors):
		for error in errors:
			logger.warn(error)
		raise ValueError(errors)
示例#22
0
def _check_spec_register(spec: EnvSpec):
    """Checks whether the spec is valid to be registered. Helper function for `register`."""
    global registry, current_namespace
    if current_namespace is not None:
        if spec.namespace is not None:
            logger.warn(
                f"Custom namespace `{spec.namespace}` is being overridden "
                f"by namespace `{current_namespace}`. If you are developing a "
                "plugin you shouldn't specify a namespace in `register` "
                "calls. The namespace is specified through the "
                "entry point package metadata.")

    latest_versioned_spec = max(
        (spec_
         for spec_ in registry.values() if spec_.namespace == spec.namespace
         and spec_.name == spec.name and spec_.version is not None),
        key=lambda spec_: int(spec_.version),  # type: ignore
        default=None,
    )

    unversioned_spec = next(
        (spec_
         for spec_ in registry.values() if spec_.namespace == spec.namespace
         and spec_.name == spec.name and spec_.version is None),
        None,
    )

    if unversioned_spec is not None and spec.version is not None:
        raise error.RegistrationError(
            "Can't register the versioned environment "
            f"`{spec.id}` when the unversioned environment "
            f"`{unversioned_spec.id}` of the same name already exists.")
    elif latest_versioned_spec is not None and spec.version is None:
        raise error.RegistrationError(
            "Can't register the unversioned environment "
            f"`{spec.id}` when the versioned environment "
            f"`{latest_versioned_spec.id}` of the same name "
            f"already exists. Note: the default behavior is "
            f"that `gym.make` with the unversioned environment "
            f"will return the latest versioned environment")
示例#23
0
文件: env_checker.py 项目: chksi/gym
def _check_nan(env: gym.Env, check_inf: bool = True) -> None:
    """Check for NaN and Inf."""
    for _ in range(10):
        action = env.action_space.sample()
        observation, reward, _, _ = env.step(action)

        if np.any(np.isnan(observation)):
            logger.warn("Encountered NaN value in observations.")
        if np.any(np.isnan(reward)):
            logger.warn("Encountered NaN value in rewards.")
        if check_inf and np.any(np.isinf(observation)):
            logger.warn("Encountered inf value in observations.")
        if check_inf and np.any(np.isinf(reward)):
            logger.warn("Encountered inf value in rewards.")
示例#24
0
文件: registration.py 项目: chksi/gym
def load_env_plugins(entry_point: str = "gym.envs") -> None:
    # Load third-party environments
    for plugin in metadata.entry_points(group=entry_point):
        # Python 3.8 doesn't support plugin.module, plugin.attr
        # So we'll have to try and parse this ourselves
        try:
            module, attr = plugin.module, plugin.attr  # type: ignore  ## error: Cannot access member "attr" for type "EntryPoint"
        except AttributeError:
            if ":" in plugin.value:
                module, attr = plugin.value.split(":", maxsplit=1)
            else:
                module, attr = plugin.value, None
        except:
            module, attr = None, None
        finally:
            if attr is None:
                raise error.Error(
                    f"Gym environment plugin `{module}` must specify a function to execute, not a root module"
                )

        context = namespace(plugin.name)
        if plugin.name.startswith("__") and plugin.name.endswith("__"):
            # `__internal__` is an artifact of the plugin system when
            # the root namespace had an allow-list. The allow-list is now
            # removed and plugins can register environments in the root
            # namespace with the `__root__` magic key.
            if plugin.name == "__root__" or plugin.name == "__internal__":
                context = contextlib.nullcontext()
            else:
                logger.warn(
                    f"The environment namespace magic key `{plugin.name}` is unsupported. "
                    "To register an environment at the root namespace you should specify "
                    "the `__root__` namespace.")

        with context:
            fn = plugin.load()
            try:
                fn()
            except Exception as e:
                logger.warn(str(e))
示例#25
0
    def step(self, action):
        assert self.action_space.contains(action), "%r (%s) invalid"%(action, type(action))
        state = self.state

        # Create a prey with probability 1%
        if not self.prey.tolist():
            px = np.float(random.randint(-.5*self.world_width, .5*self.world_width - 1))
            py = np.float(random.randint(-.5*self.world_height, .5*self.world_height - 1))
            while [px, py] in self.snake.blocks.tolist():
                px = np.float(random.randint(-.5*self.world_width, .5*self.world_width - 1))
                py = np.float(random.randint(-.5*self.world_height, .5*self.world_height - 1))

            self.prey = np.array([px, py])
            logger.info("[INFO] -- New Prey at {}, {} ".format(px,py))
            

        # print(self.snake.blocks[0].tolist()) 
        if self.snake.blocks[0].tolist() in [self.prey.tolist()]:
            self.snake.eat_and_move(action)
            self.state = np.array([self.get_state()])
            self.prey = np.array([])
            logger.info("[INFO] -- Manger")
            reward = 500.
        else:
            self.snake.move(action)
            reward = -.5
            self.state = np.array([self.get_state()])
        
        done = self.snake.is_dead or self.oob(*self.snake.blocks[0])

        if done:
            logger.warn("DONE")
            if self.steps_beyond_done is None:
                self.steps_beyond_done = 0
                reward = -1000
            else:
                if self.steps_beyond_done == 0:
                    logger.warn("You are calling 'step()' but it's already done !")
                self.steps_beyond_done += 1
        return self.state, reward, done, {}
示例#26
0
    def step(self, action):
        assert self.action_space.contains(
            action), "%r (%s) invalid" % (action, type(action))
        state = self.state

        # TODO: rename...It's not really delta.
        action_0 = action[0]
        next_state = np.copy(state)
        if action_0 != 0:
            next_state[0] = action_0
        else:
            # The last action_input is do nothing.
            pass

        # After intervention, the state evolves into the next following the transition prob.
        next_state[0] = (next_state[0] + 1) % 10
        # next_second_digit = next_first_digit % 2
        next_state[1] = (next_state[1] + 1) % 2
        assert self._observed_state_space.contains(
            next_state), 'internal error. Illegal next state'

        self.state = next_state
        self.step_count += 1
        if self.step_count >= self.max_num_steps:
            if self.steps_beyond_done is None:
                self.steps_beyond_done = 0
            else:
                if self.steps_beyond_done == 0:
                    logger.warn(
                        'You are calling \'step()\' even though this environment has already returned done = True. You should '
                        'always call \'reset()\' once you receive \'done = True\' -- any further steps are undefined behavior.'
                    )
                self.steps_beyond_done += 1
        done = self._get_is_done()

        info = {'done': done, 'steps_beyond_done': self.steps_beyond_done}
        reward = self.compute_reward(self._get_achieved_goal(),
                                     self._get_desired_goal(), done)

        return self._get_observation(), reward, done, info
    def step(self, action):
        #TODO: assert action is a scalar
        # assert self.action_space.contains(action), "%r (%s) invalid"%(action, type(action))

        # get state
        x, th, x_dot, th_dot = self.state
        theta = self._unwrap_angle(th)

        # clip torque, update dynamics
        u = np.clip(action, -self.force_mag, self.force_mag)
        acc = self._accels(anp.array([x, th, x_dot, th_dot, u]))

        # integrate
        xacc, thacc = acc[0], acc[1]
        x_dot = x_dot + self.tau * xacc
        x = x + self.tau * x_dot
        th_dot = th_dot + self.tau * thacc
        th = th + self.tau * th_dot + 0.5 * self.tau**2 * thacc

        # update state
        self._unwrap_angle(th)
        self.state = np.array([x, th, x_dot, th_dot])

        done = self.is_done()

        if not done:
            reward = 1.0
        elif self.steps_beyond_done is None:
            # Pole just fell!
            self.steps_beyond_done = 0
            reward = 1.0
        else:
            if self.steps_beyond_done == 0:
                logger.warn(
                    "You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior."
                )
            self.steps_beyond_done += 1
            reward = 0.0

        return self.state, reward, done, {}
示例#28
0
def test_env_semantics(spec):
    logger.warn("Skipping this test. Existing hashes were generated in a bad way")
    return
    with open(ROLLOUT_FILE) as data_file:
        rollout_dict = json.load(data_file)

    if spec.id not in rollout_dict:
        if not spec.nondeterministic:
            logger.warn("Rollout does not exist for {}, run generate_json.py to generate rollouts for new envs".format(spec.id))
        return

    logger.info("Testing rollout for {} environment...".format(spec.id))

    observations_now, actions_now, rewards_now, dones_now = generate_rollout_hash(spec)

    errors = []
    if rollout_dict[spec.id]['observations'] != observations_now:
        errors.append('Observations not equal for {} -- expected {} but got {}'.format(spec.id, rollout_dict[spec.id]['observations'], observations_now))
    if rollout_dict[spec.id]['actions'] != actions_now:
        errors.append('Actions not equal for {} -- expected {} but got {}'.format(spec.id, rollout_dict[spec.id]['actions'], actions_now))
    if rollout_dict[spec.id]['rewards'] != rewards_now:
        errors.append('Rewards not equal for {} -- expected {} but got {}'.format(spec.id, rollout_dict[spec.id]['rewards'], rewards_now))
    if rollout_dict[spec.id]['dones'] != dones_now:
        errors.append('Dones not equal for {} -- expected {} but got {}'.format(spec.id, rollout_dict[spec.id]['dones'], dones_now))
    if len(errors):
        for error in errors:
            logger.warn(error)
        raise ValueError(errors)
示例#29
0
    def _step(self, action):
        assert self.action_space.contains(
            action), "%r (%s) invalid" % (action, type(action))
        state = self.state

        reward = 0.0
        if action == 0:
            # poke
            if state <= 0:
                # reward and reset state
                reward = self.reward_seq_complete
                state = self.n_press
        elif action == 1:
            # press
            state -= 1
            state = max(0, state)
        else:
            raise ValueError

        self.state = state
        done = False

        if not done:
            pass
        elif self.steps_beyond_done is None:
            # Pole just fell!
            self.steps_beyond_done = 0
        else:
            if self.steps_beyond_done == 0:
                logger.warn(
                    "You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior."
                )
            self.steps_beyond_done += 1

        if self.observe_state:
            obs = np.array([self.state])
        else:
            obs = np.array([1.])

        return obs, reward, done, {}
示例#30
0
    def capture_frame(self):
        """Render the given `env` and add the resulting frame to the video."""
        if not self.functional: return
        logger.debug('Capturing video frame: path=%s', self.path)

        render_mode = 'ansi' if self.ansi_mode else 'rgb_array'
        frame = self.env.render(mode=render_mode)

        if frame is None:
            if self._async:
                return
            else:
                # Indicates a bug in the environment: don't want to raise
                # an error here.
                logger.warn('Env returned None on render(). Disabling further rendering for video recorder by marking as disabled: path=%s metadata_path=%s', self.path, self.metadata_path)
                self.broken = True
        else:
            self.last_frame = frame
            if self.ansi_mode:
                self._encode_ansi_frame(frame)
            else:
                self._encode_image_frame(frame)
示例#31
0
    def step(self, action):
        state = self.state
        x, x_dot, theta, theta_dot = state
        force = self.force_mag * action
        costheta = math.cos(theta)
        sintheta = math.sin(theta)
        temp = (force + self.polemass_length * theta_dot * theta_dot * sintheta) / self.total_mass
        thetaacc = (self.gravity * sintheta - costheta* temp) / (self.length * (4.0/3.0 - self.masspole * costheta * costheta / self.total_mass))
        xacc  = temp - self.polemass_length * thetaacc * costheta / self.total_mass
        if self.kinematics_integrator == 'euler':
            x  = x + self.tau * x_dot
            x_dot = x_dot + self.tau * xacc
            theta = theta + self.tau * theta_dot
            theta_dot = theta_dot + self.tau * thetaacc
        else: # semi-implicit euler
            x_dot = x_dot + self.tau * xacc
            x  = x + self.tau * x_dot
            theta_dot = theta_dot + self.tau * thetaacc
            theta = theta + self.tau * theta_dot
        self.state = (x,x_dot,theta,theta_dot)
        done =  x < -self.x_threshold \
                or x > self.x_threshold \
                or theta < -self.theta_threshold_radians \
                or theta > self.theta_threshold_radians
        done = bool(done)

        if not done:
            reward = 1.0
        elif self.steps_beyond_done is None:
            # Pole just fell!
            self.steps_beyond_done = 0
            reward = 1.0
        else:
            if self.steps_beyond_done == 0:
                logger.warn("You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior.")
            self.steps_beyond_done += 1
            reward = 0.0

        return np.array(self.state), reward, done, {}
示例#32
0
 def __init__(self, low=None, high=None, shape=None, dtype=None):
     """
     Two kinds of valid input:
         Box(low=-1.0, high=1.0, shape=(3,4)) # low and high are scalars, and shape is provided
         Box(low=np.array([-1.0,-2.0]), high=np.array([2.0,4.0])) # low and high are arrays of the same shape
     """
     if shape is None:
         assert low.shape == high.shape
         shape = low.shape
     else:
         assert np.isscalar(low) and np.isscalar(high)
         low = low + np.zeros(shape)
         high = high + np.zeros(shape)
     if dtype is None:  # Autodetect type
         if (high == 255).all():
             dtype = np.uint8
         else:
             dtype = np.float32
         logger.warn("gym.spaces.Box autodetected dtype as %s. Please provide explicit dtype." % dtype)
     self.low = low.astype(dtype)
     self.high = high.astype(dtype)
     gym.Space.__init__(self, shape, dtype)
示例#33
0
    def step(self, action):
        assert self.action_space.contains(action), "%r (%s) invalid"%(action, type(action))
        state = self.state
        x, x_dot, theta, theta_dot = state
        force = self.force_mag if action==1 else -self.force_mag
        costheta = math.cos(theta)
        sintheta = math.sin(theta)
        temp = (force + self.polemass_length * theta_dot * theta_dot * sintheta) / self.total_mass
        thetaacc = (self.gravity * sintheta - costheta* temp) / (self.length * (4.0/3.0 - self.masspole * costheta * costheta / self.total_mass))
        xacc  = temp - self.polemass_length * thetaacc * costheta / self.total_mass
        if self.kinematics_integrator == 'euler':
            x  = x + self.tau * x_dot
            x_dot = x_dot + self.tau * xacc
            theta = theta + self.tau * theta_dot
            theta_dot = theta_dot + self.tau * thetaacc
        else: # semi-implicit euler
            x_dot = x_dot + self.tau * xacc
            x  = x + self.tau * x_dot
            theta_dot = theta_dot + self.tau * thetaacc
            theta = theta + self.tau * theta_dot
        self.state = (x,x_dot,theta,theta_dot)
        done =  x < -self.x_threshold \
                or x > self.x_threshold
        done = bool(done)

        distance_from_desired_angle = (theta - self.desired_angle) / self.flexibility
        if not done:
            reward = math.exp(-distance_from_desired_angle*distance_from_desired_angle/2)
        elif self.steps_beyond_done is None:
            # Pole just fell!
            self.steps_beyond_done = 0
            reward = math.exp(-distance_from_desired_angle*distance_from_desired_angle/2)
        else:
            if self.steps_beyond_done == 0:
                logger.warn("You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior.")
            self.steps_beyond_done += 1
            reward = 0.0

        return np.array(self.state), reward, done, {}
示例#34
0
 def __init__(self, low=None, high=None, shape=None, dtype=None):
     """
     Two kinds of valid input:
         Box(low=-1.0, high=1.0, shape=(3,4)) # low and high are scalars, and shape is provided
         Box(low=np.array([-1.0,-2.0]), high=np.array([2.0,4.0])) # low and high are arrays of the same shape
     """
     if shape is None:
         assert low.shape == high.shape
         shape = low.shape
     else:
         assert np.isscalar(low) and np.isscalar(high)
         low = low + np.zeros(shape)
         high = high + np.zeros(shape)
     if dtype is None:  # Autodetect type
         if (high == 255).all():
             dtype = np.uint8
         else:
             dtype = np.float32
         logger.warn("gym.spaces.Box autodetected dtype as %s. Please provide explicit dtype." % dtype)
     self.low = low.astype(dtype)
     self.high = high.astype(dtype)
     gym.Space.__init__(self, shape, dtype)
示例#35
0
    def step(self, action):
        assert self.action_space.contains(
            action), "%r (%s) invalid" % (action, type(action))
        self.cityflow.set_tl_phase(self.intersection_id, action)
        self.cityflow.next_step()

        state = self._get_state()
        reward = self._get_reward()

        self.current_step += 1

        if self.is_done:
            logger.warn(
                "You are calling 'step()' even though this environment has already returned done = True. "
                "You should always call 'reset()' once you receive 'done = True' "
                "-- any further steps are undefined behavior.")
            reward = 0.0

        if self.current_step + 1 == self.steps_per_episode:
            self.is_done = True

        return state, reward, self.is_done, {}
示例#36
0
def _check_render(
    env: gym.Env, warn: bool = True, headless: bool = False
) -> None:  # pragma: no cover
    """
    Check the declared render modes/fps and the `render()`/`close()`
    method of the environment.
    :param env: The environment to check
    :param warn: Whether to output additional warnings
    :param headless: Whether to disable render modes
        that require a graphical interface. False by default.
    """
    render_modes = env.metadata.get("render_modes")
    if render_modes is None:
        if warn:
            logger.warn(
                "No render modes was declared in the environment "
                " (env.metadata['render_modes'] is None or not defined), "
                "you may have trouble when calling `.render()`"
            )

    render_fps = env.metadata.get("render_fps")
    # We only require `render_fps` if rendering is actually implemented
    if render_fps is None and render_modes is not None and len(render_modes) > 0:
        if warn:
            logger.warn(
                "No render fps was declared in the environment "
                " (env.metadata['render_fps'] is None or not defined), "
                "rendering may occur at inconsistent fps"
            )

    else:
        # Don't check render mode that require a
        # graphical interface (useful for CI)
        if headless and "human" in render_modes:
            render_modes.remove("human")
        # Check all declared render modes
        for render_mode in render_modes:
            env.render(mode=render_mode)
        env.close()
示例#37
0
    def capture_frame(self):
        """Render the given `env` and add the resulting frame to the video."""
        if not self.functional: return
        logger.debug('Capturing video frame: path=%s', self.path)

        render_mode = 'ansi' if self.ansi_mode else 'rgb_array'
        frame = self.env.render(mode=render_mode)

        if frame is None:
            if self._async:
                return
            else:
                # Indicates a bug in the environment: don't want to raise
                # an error here.
                logger.warn('Env returned None on render(). Disabling further rendering for video recorder by marking as disabled: path=%s metadata_path=%s', self.path, self.metadata_path)
                self.broken = True
        else:
            self.last_frame = frame
            if self.ansi_mode:
                self._encode_ansi_frame(frame)
            else:
                self._encode_image_frame(frame)
示例#38
0
    def step(self, action):
        if action <= -10.0 and action >= 10.0: raise Exception

        reward = self.reward(self.state, action)

        if action != 0.0:
            self.steps_without_correct_action += 1.0

        done = self.steps_without_correct_action >= 20

        if done:
            if self.steps_beyond_done is None:
                self.steps_beyond_done = 0
            elif self.steps_beyond_done == 0:
                logger.warn(
                    "You are calling 'step()' even though this "
                    "environment has already returned done = True. You "
                    "should always call 'reset()' once you receive 'done = "
                    "True' -- any further steps are undefined behavior.")
            self.steps_beyond_done += 1

        return np.array(self.state), reward, done, {}
示例#39
0
    def step(self, action):
        action = np.clip(action, self.action_space.low, self.action_space.high)

        ds = action[0]
        dtheta = action[1]

        x, y, theta = self.state

        # update theta and keep normalised to [0, 2pi] range
        theta = (theta + dtheta) % (2 * math.pi)
        # update position
        x = x + math.cos(theta) * ds
        y = y + math.sin(theta) * ds

        wall_collision = self.is_colliding(x, y, 1)
        if not wall_collision:
            self.state[0] = x
            self.state[1] = y
            self.state[2] = theta

        done = self.is_colliding(self.state[0], self.state[1], 'r')

        reward = -0.1

        if done and self.steps_beyond_done is None:
            # solved the maze!
            reward += 100.0
            self.steps_beyond_done = 0
        elif self.steps_beyond_done is not None:
            if self.steps_beyond_done == 0:
                logger.warn(
                    "You are calling 'step()' even though this "
                    "environment has already returned done = True. You "
                    "should always call 'reset()' once you receive 'done = "
                    "True' -- any further steps are undefined behavior.")
            self.steps_beyond_done += 1

        return self.normalised_state(), reward, done, {}
示例#40
0
    def step(self, action):

        if not self.done:

            # MDP Transition
            new_grid, new_context = self.coordinator(self.grid, action,
                                                     self.context)

            # New State
            self.grid = new_grid
            self.context = new_context

            # Termination as a function of New State
            self._is_done()

            # API Formatting
            # Necessary condition for MDP, its New State is public
            obs = new_grid, new_context
            # Reward as a function of New State
            reward = self._award()
            info = self._report()

            return obs, reward, self.done, info

        else:

            if self.steps_beyond_done == 0:

                logger.warn(
                    "You are calling 'step()' even though this "
                    "environment has already returned done = True. You "
                    "should always call 'reset()' once you receive 'done = "
                    "True' -- any further steps are undefined behavior.")

            self.steps_beyond_done += 1

            # Graceful after termination
            return (self.grid, self.context), 0.0, True, {}
示例#41
0
    def __init__(
        self,
        env,
        video_folder: str,
        episode_trigger: Callable[[int], bool] = None,
        step_trigger: Callable[[int], bool] = None,
        video_length: int = 0,
        name_prefix: str = "rl-video",
    ):
        super().__init__(env)

        if episode_trigger is None and step_trigger is None:
            episode_trigger = capped_cubic_video_schedule

        trigger_count = sum(x is not None
                            for x in [episode_trigger, step_trigger])
        assert trigger_count == 1, "Must specify exactly one trigger"

        self.episode_trigger = episode_trigger
        self.step_trigger = step_trigger
        self.video_recorder = None

        self.video_folder = os.path.abspath(video_folder)
        # Create output folder if needed
        if os.path.isdir(self.video_folder):
            logger.warn(
                f"Overwriting existing videos at {self.video_folder} folder (try specifying a different `video_folder` for the `RecordVideo` wrapper if this is not desired)"
            )
        os.makedirs(self.video_folder, exist_ok=True)

        self.name_prefix = name_prefix
        self.step_id = 0
        self.video_length = video_length

        self.recording = False
        self.recorded_frames = 0
        self.is_vector_env = getattr(env, "is_vector_env", False)
        self.episode_id = 0
def patch_deprecated_methods(env):
    """
    Methods renamed from '_method' to 'method', render() no longer has 'close' parameter, close is a separate method.
    For backward compatibility, this makes it possible to work with unmodified environments.
    """
    global warn_once
    if warn_once:
        logger.warn(
            "Environment '%s' has deprecated methods '_step' and '_reset' rather than 'step' and 'reset'. Compatibility code invoked. Set _gym_disable_underscore_compat = True to disable this behavior."
            % str(type(env)))
        warn_once = False
    env.reset = env._reset
    env.step = env._step
    env.seed = env._seed

    def render(mode):
        return env._render(mode, close=False)

    def close():
        env._render("human", close=True)

    env.render = render
    env.close = close
示例#43
0
文件: core.py 项目: olegklimov/gym
def deprecated_warn_once(text):
    global warn_once
    if not warn_once: return
    warn_once = False
    logger.warn(text)
示例#44
0
文件: play.py 项目: openai/gym
import gym
import pygame
import matplotlib
import argparse
from gym import logger
try:
    matplotlib.use('TkAgg')
    import matplotlib.pyplot as plt
except ImportError as e:
    logger.warn('failed to set matplotlib backend, plotting will not work: %s' % str(e))
    plt = None

from collections import deque
from pygame.locals import VIDEORESIZE

def display_arr(screen, arr, video_size, transpose):
    arr_min, arr_max = arr.min(), arr.max()
    arr = 255.0 * (arr - arr_min) / (arr_max - arr_min)
    pyg_img = pygame.surfarray.make_surface(arr.swapaxes(0, 1) if transpose else arr)
    pyg_img = pygame.transform.scale(pyg_img, video_size)
    screen.blit(pyg_img, (0,0))

def play(env, transpose=True, fps=30, zoom=None, callback=None, keys_to_action=None):
    """Allows one to play the game using keyboard.

    To simply play the game use:

        play(gym.make("Pong-v4"))

    Above code works also if env is wrapped, so it's particularly useful in
    verifying that the frame-level preprocessing does not render the game