Пример #1
0
 def __init__(self,
              obs_space,
              action_space,
              num_outputs,
              model_config,
              name,
              true_obs_shape=(4, ),
              action_embed_size=6,
              **kw):
     super(ParametricActionsModel, self).__init__(
         obs_space, action_space, num_outputs, model_config, name, **kw)
     if model_config['custom_options']['spy']:
         true_obs_space = make_spy_space(model_config['custom_options']['parties'], model_config['custom_options']['blocks'])
     else:
         true_obs_space = make_blind_space(model_config['custom_options']['parties'], model_config['custom_options']['blocks'])
     if model_config['custom_options']['extended']:
         action_embed_size = 6
     else:
         action_embed_size = 4
     total_dim = 0
     for space in true_obs_space:
         total_dim += get_preprocessor(space)(space).size
     self.action_embed_model = FullyConnectedNetwork(
         Box(-1, 1, shape = (total_dim,)), action_space, action_embed_size,
         model_config, name + "_action_embed")
     self.register_variables(self.action_embed_model.variables())
Пример #2
0
    def get_preprocessor(env, options=None):
        """Returns a suitable processor for the given environment.

        Args:
            env (gym.Env|VectorEnv|ExternalEnv): The environment to wrap.
            options (dict): Options to pass to the preprocessor.

        Returns:
            preprocessor (Preprocessor): Preprocessor for the env observations.
        """
        options = options or MODEL_DEFAULTS
        for k in options.keys():
            if k not in MODEL_DEFAULTS:
                raise Exception("Unknown config key `{}`, all keys: {}".format(
                    k, list(MODEL_DEFAULTS)))

        if options.get("custom_preprocessor"):
            preprocessor = options["custom_preprocessor"]
            logger.info("Using custom preprocessor {}".format(preprocessor))
            prep = _global_registry.get(RLLIB_PREPROCESSOR,
                                        preprocessor)(env.observation_space,
                                                      options)
        else:
            cls = get_preprocessor(env.observation_space)
            prep = cls(env.observation_space, options)

        logger.debug("Created preprocessor {}: {} -> {}".format(
            prep, env.observation_space, prep.shape))
        return prep
Пример #3
0
def simulate_env_interaction(env, restart=True) -> SampleBatch:
    prep = get_preprocessor(env.observation_space)(env.observation_space)
    batch_builder = SampleBatchBuilder()

    # get reverse action functions
    env_ptr = env
    reverse_action_fns = []
    while hasattr(env_ptr, "env"):
        if isinstance(env_ptr, gym.ActionWrapper):
            reverse_action_fns.append(env_ptr.reverse_action)
        env_ptr = env_ptr.env

    def reverse_action(action):
        for f in reversed(reverse_action_fns):
            action = f(action)
        return action

    while restart:
        for eps_id, trajectory_name in enumerate(env.trajectory_names):
            t = 0
            prev_action = None
            prev_reward = 0
            done = False
            try:
                obs = env.reset()
            except TypeError:
                continue
            while not done:
                new_obs, reward, done, info = env.step(
                    env.action_space.sample())
                action = info["action"]
                action = reverse_action(action)
                if prev_action is None:
                    prev_action = np.zeros_like(action)

                batch = {
                    "t": t,
                    SampleBatch.EPS_ID: eps_id,
                    SampleBatch.AGENT_INDEX: eps_id,
                    SampleBatch.OBS: prep.transform(obs),
                    SampleBatch.ACTIONS: action,
                    SampleBatch.ACTION_PROB: 1.0,
                    SampleBatch.ACTION_LOGP: 0,
                    SampleBatch.ACTION_DIST_INPUTS: 0,
                    SampleBatch.REWARDS: reward,
                    SampleBatch.PREV_ACTIONS: prev_action,
                    SampleBatch.PREV_REWARDS: prev_reward,
                    SampleBatch.DONES: done,
                    SampleBatch.INFOS: {
                        "trajectory_name": trajectory_name
                    },
                    SampleBatch.NEXT_OBS: prep.transform(new_obs),
                }

                batch_builder.add_values(**batch)
                obs = new_obs
                prev_action = action
                prev_reward = reward
                t += 1
            yield batch_builder.build_and_reset()
Пример #4
0
    def test_one_hot_preprocessor(self):
        space = Discrete(5)
        pp = get_preprocessor(space)(space)
        self.assertTrue(isinstance(pp, OneHotPreprocessor))
        self.assertTrue(pp.shape == (5, ))
        check(pp.transform(3), [0.0, 0.0, 0.0, 1.0, 0.0])
        check(pp.transform(0), [1.0, 0.0, 0.0, 0.0, 0.0])

        space = MultiDiscrete([2, 3, 4])
        pp = get_preprocessor(space)(space)
        self.assertTrue(isinstance(pp, OneHotPreprocessor))
        self.assertTrue(pp.shape == (9, ))
        check(pp.transform(np.array([1, 2, 0])),
              [0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0])
        check(pp.transform(np.array([0, 1, 3])),
              [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0])
Пример #5
0
    def __init__(self,
                 obs_space,
                 action_space,
                 num_outputs,
                 model_config,
                 name,
                 fc_size=64,
                 lstm_state_size=256):
        nn.Module.__init__(self)
        super().__init__(obs_space, action_space, num_outputs, model_config,
                         name)

        self.obs_size = get_preprocessor(obs_space)(obs_space).size
        self.fc_size = fc_size
        self.lstm_state_size = lstm_state_size

        # Build the Module from fc + LSTM + 2xfc (action + value outs).
        self.fc1 = nn.Linear(self.obs_size, self.fc_size)
        self.lstm = nn.LSTM(self.fc_size,
                            self.lstm_state_size,
                            batch_first=True)
        self.action_branch = nn.Linear(self.lstm_state_size, num_outputs)
        self.value_branch = nn.Linear(self.lstm_state_size, 1)
        # Holds the current "base" output (before logits layer).
        self._features = None
Пример #6
0
    def get_preprocessor(registry, env, options=dict()):
        """Returns a suitable processor for the given environment.

        Args:
            registry (obj): Registry of named objects (ray.tune.registry).
            env (gym.Env): The gym environment to preprocess.
            options (dict): Options to pass to the preprocessor.

        Returns:
            preprocessor (Preprocessor): Preprocessor for the env observations.
        """

        for k in options.keys():
            if k not in MODEL_CONFIGS:
                raise Exception("Unknown config key `{}`, all keys: {}".format(
                    k, MODEL_CONFIGS))

        if "custom_preprocessor" in options:
            preprocessor = options["custom_preprocessor"]
            print("Using custom preprocessor {}".format(preprocessor))
            return registry.get(RLLIB_PREPROCESSOR,
                                preprocessor)(env.observation_space, options)

        preprocessor = get_preprocessor(env.observation_space)
        return preprocessor(env.observation_space, options)
Пример #7
0
def restore_policy_from_checkpoint(
        policy_class: type,
        env_creator: Callable[[Dict[str, Any]], gym.Env],
        checkpoint_path: str,
        config: Dict[str, Any]) -> Policy:
    """ TODO: Write documentation
    """
    # Load checkpoint policy state
    with open(checkpoint_path, "rb") as checkpoint_dump:
        checkpoint_state = pickle.load(checkpoint_dump)
        worker_dump = checkpoint_state['worker']
        worker_state = pickle.loads(worker_dump)
        policy_state = worker_state['state']['default_policy']

    # Initiate temporary environment to get observation and action spaces
    env = env_creator(config.get("env_config", {}))

    # Get preprocessed observation space
    preprocessor_class = get_preprocessor(env.observation_space)
    preprocessor = preprocessor_class(env.observation_space)
    observation_space = preprocessor.observation_space

    # Instantiate policy and load checkpoint state
    policy = policy_class(observation_space, env.action_space, config)
    policy.set_state(policy_state)

    return policy
Пример #8
0
def _unpack_obs(obs, space):
    if (isinstance(space, gym.spaces.Dict)
            or isinstance(space, gym.spaces.Tuple)):
        prep = get_preprocessor(space)(space)
        if len(obs.shape) != 2 or obs.shape[1] != prep.shape[0]:
            raise ValueError(
                "Expected flattened obs shape of [None, {}], got {}".format(
                    prep.shape[0], obs.shape))
        assert len(prep.preprocessors) == len(space.spaces), \
            (len(prep.preprocessors) == len(space.spaces))
        offset = 0
        if isinstance(space, gym.spaces.Tuple):
            u = []
            for p, v in zip(prep.preprocessors, space.spaces):
                obs_slice = obs[:, offset:offset + p.size]
                offset += p.size
                u.append(
                    _unpack_obs(
                        tf.reshape(obs_slice, [-1] + list(p.shape)), v))
        else:
            u = OrderedDict()
            for p, (k, v) in zip(prep.preprocessors, space.spaces.items()):
                obs_slice = obs[:, offset:offset + p.size]
                offset += p.size
                u[k] = _unpack_obs(
                    tf.reshape(obs_slice, [-1] + list(p.shape)), v)
        return u
    else:
        return obs
Пример #9
0
    def get_preprocessor(env, options=None):
        """Returns a suitable processor for the given environment.

        Args:
            env (gym.Env|VectorEnv|ServingEnv): The environment to wrap.
            options (dict): Options to pass to the preprocessor.

        Returns:
            preprocessor (Preprocessor): Preprocessor for the env observations.
        """
        options = options or MODEL_DEFAULTS
        for k in options.keys():
            if k not in MODEL_DEFAULTS:
                raise Exception("Unknown config key `{}`, all keys: {}".format(
                    k, list(MODEL_DEFAULTS)))

        if options.get("custom_preprocessor"):
            preprocessor = options["custom_preprocessor"]
            print("Using custom preprocessor {}".format(preprocessor))
            return _global_registry.get(RLLIB_PREPROCESSOR,
                                        preprocessor)(env.observation_space,
                                                      options)

        preprocessor = get_preprocessor(env.observation_space)
        return preprocessor(env.observation_space, options)
Пример #10
0
def _make_continuous_space(space):
    if isinstance(space, gym.spaces.Box):
        return space
    elif isinstance(space, gym.spaces.Discrete):
        return gym.spaces.Box(low=np.zeros((space.n,)), high=np.ones((space.n,)))
    else:
        return get_preprocessor(space)(space).observation_space
Пример #11
0
    def get_preprocessor_for_space(observation_space, options=None):
        """Returns a suitable preprocessor for the given observation space.

        Args:
            observation_space (Space): The input observation space.
            options (dict): Options to pass to the preprocessor.

        Returns:
            preprocessor (Preprocessor): Preprocessor for the observations.
        """

        options = options or MODEL_DEFAULTS
        for k in options.keys():
            if k not in MODEL_DEFAULTS:
                raise Exception("Unknown config key `{}`, all keys: {}".format(
                    k, list(MODEL_DEFAULTS)))

        if options.get("custom_preprocessor"):
            preprocessor = options["custom_preprocessor"]
            logger.info("Using custom preprocessor {}".format(preprocessor))
            prep = _global_registry.get(RLLIB_PREPROCESSOR, preprocessor)(
                observation_space, options)
        else:
            cls = get_preprocessor(observation_space)
            prep = cls(observation_space, options)

        logger.debug("Created preprocessor {}: {} -> {}".format(
            prep, observation_space, prep.shape))
        return prep
Пример #12
0
def render_q_function(env, agent):
    action = np.array([0,0])
    prep = get_preprocessor(env.observation_space)(env.observation_space)

    start = time.time()
    observation, nx, ny = env.default_env.get_observation_array()
    print("Got %i observations in %.3f seconds"%(len(observation),time.time()-start))

    # Reshape action and observation so that the first dimension is the batch
    #nx, ny, ns = observation.shape
    #observation = np.reshape(observation, (-1, ns))
    #action = np.tile(action, (nx*ny,1))
    obs_t = []
    act_t = []
    for i in range(len(observation)):
        act_t.append(np.expand_dims(action, axis=0))
        obs_t.append(np.expand_dims(prep.transform(observation[i]), axis=0))
    print("Prep took %.3f seconds"%(time.time()-start))

    q, qt = agent.get_policy().compute_q(obs_t, act_t)
    q_img = np.reshape(q, (nx,ny,1))
    print("Policy took %.3f seconds"%(time.time()-start))

    q_img = np.tile(q_img, (1,1,3))
    q_img = cv2.blur(q_img, (5,5))
    q_img = np.mean(q_img, axis=-1)
    q_img = 1-(np.clip(q_img, -0.6, 1)+0.6)/1.6
    q_img = 255*viridis(q_img)
    q_img = q_img.astype(np.uint8)
    q_img = q_img[:,:,:3] # Remove alpha
    q_img = q_img[:,:,::-1] # Flip colormap to RGB
    return q_img
Пример #13
0
    def get_preprocessor(registry, env, options=dict()):
        """Returns a suitable processor for the given environment.

        Args:
            registry (obj): Registry of named objects (ray.tune.registry).
            env (gym.Env): The gym environment to preprocess.
            options (dict): Options to pass to the preprocessor.

        Returns:
            preprocessor (Preprocessor): Preprocessor for the env observations.
        """
        for k in options.keys():
            if k not in MODEL_CONFIGS:
                raise Exception(
                    "Unknown config key `{}`, all keys: {}".format(
                        k, MODEL_CONFIGS))

        if "custom_preprocessor" in options:
            preprocessor = options["custom_preprocessor"]
            print("Using custom preprocessor {}".format(preprocessor))
            return registry.get(RLLIB_PREPROCESSOR, preprocessor)(
                env.observation_space, options)

        preprocessor = get_preprocessor(env.observation_space)
        return preprocessor(env.observation_space, options)
Пример #14
0
    def get_preprocessor_for_space(observation_space, options=None):
        """Returns a suitable preprocessor for the given observation space.

        Args:
            observation_space (Space): The input observation space.
            options (dict): Options to pass to the preprocessor.

        Returns:
            preprocessor (Preprocessor): Preprocessor for the observations.
        """

        options = options or MODEL_DEFAULTS
        for k in options.keys():
            if k not in MODEL_DEFAULTS:
                raise Exception("Unknown config key `{}`, all keys: {}".format(
                    k, list(MODEL_DEFAULTS)))

        if options.get("custom_preprocessor"):
            preprocessor = options["custom_preprocessor"]
            logger.info("Using custom preprocessor {}".format(preprocessor))
            prep = _global_registry.get(RLLIB_PREPROCESSOR, preprocessor)(
                observation_space, options)
        else:
            cls = get_preprocessor(observation_space)
            prep = cls(observation_space, options)

        logger.debug("Created preprocessor {}: {} -> {}".format(
            prep, observation_space, prep.shape))
        return prep
Пример #15
0
    def get_preprocessor_for_space(observation_space, options=None):
        """Returns a suitable preprocessor for the given observation space.

        Args:
            observation_space (Space): The input observation space.
            options (dict): Options to pass to the preprocessor.

        Returns:
            preprocessor (Preprocessor): Preprocessor for the observations.
        """

        options = options or MODEL_DEFAULTS
        for k in options.keys():
            if k not in MODEL_DEFAULTS:
                raise Exception("Unknown config key `{}`, all keys: {}".format(
                    k, list(MODEL_DEFAULTS)))

        if options.get("custom_preprocessor"):
            preprocessor = options["custom_preprocessor"]
            logger.info("Using custom preprocessor {}".format(preprocessor))
            logger.warning(
                "DeprecationWarning: Custom preprocessors are deprecated, "
                "since they sometimes conflict with the built-in "
                "preprocessors for handling complex observation spaces. "
                "Please use wrapper classes around your environment "
                "instead of preprocessors.")
            prep = _global_registry.get(RLLIB_PREPROCESSOR, preprocessor)(
                observation_space, options)
        else:
            cls = get_preprocessor(observation_space)
            prep = cls(observation_space, options)

        logger.debug("Created preprocessor {}: {} -> {}".format(
            prep, observation_space, prep.shape))
        return prep
Пример #16
0
    def __init__(self,
                 obs_space,
                 action_space,
                 num_outputs,
                 model_config,
                 name,
                 true_obs_shape=(24, ),
                 action_embed_size=None):
        super(ParametricActionsModel,
              self).__init__(obs_space, action_space, num_outputs,
                             model_config, name)

        if action_embed_size is None:
            action_embed_size = action_space.n  # this works for Discrete() action

        # we get the size of the output of the preprocessor automatically chosen by rllib for the real_obs space
        real_obs = obs_space.original_space['real_obs']
        true_obs_shape = get_preprocessor(real_obs)(
            real_obs).size  # this will we an integer
        # true_obs_shape = obs_space.original_space['real_obs']
        self.action_embed_model = FullyConnectedNetwork(
            obs_space=Box(-1, 1, shape=(true_obs_shape, )),
            action_space=action_space,
            num_outputs=action_embed_size,
            model_config=model_config,
            name=name + "_action_embed")
        self.base_model = self.action_embed_model.base_model
        self.register_variables(self.action_embed_model.variables())
Пример #17
0
    def __init__(self,
                 obs_space,
                 action_space,
                 num_outputs,
                 model_config,
                 name,
                 fc_size=64,
                 lstm_state_size=256):
        nn.Module.__init__(self)
        super().__init__(obs_space, action_space, num_outputs, model_config,
                         name)

        self.obs_size = get_preprocessor(obs_space)(obs_space).size
        self.fc_size = fc_size
        self.lstm_state_size = lstm_state_size

        # Build the Module from fc + LSTM + 2xfc (action + value outs).
        self.fc1 = nn.Linear(self.obs_size, self.fc_size)
        self.lstm = nn.LSTM(
            self.fc_size, self.lstm_state_size, batch_first=True)
        self.action_branch = nn.Linear(self.lstm_state_size, num_outputs)
        self.value_branch = nn.Linear(self.lstm_state_size, 1)
        # Holds the current "base" output (before logits layer).
        self._features = None

        # Add state-ins to this model's view.
        for i in range(2):
            self.inference_view_requirements["state_in_{}".format(i)] = \
                ViewRequirement(
                    "state_out_{}".format(i),
                    shift=-1,
                    space=Box(-1.0, 1.0, shape=(self.lstm_state_size,)))
Пример #18
0
def test_preprocessor(env):
	Preprocessor = get_preprocessor(env.observation_space)
	preprocessor = Preprocessor(env.observation_space)
	action = {i:[0.2, -0.5] for i in range(len(env.default_env.base.robots))}
	obs, reward, done, _ = env.step(action)
	out = preprocessor.transform(obs[0])
	print(len(out))
Пример #19
0
 def test_nested_multidiscrete_one_hot_preprocessor(self):
     space = Tuple((MultiDiscrete([2, 3, 4]), ))
     pp = get_preprocessor(space)(space)
     self.assertTrue(pp.shape == (9, ))
     check(pp.transform((np.array([1, 2, 0]), )),
           [0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0])
     check(pp.transform((np.array([0, 1, 3]), )),
           [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0])
Пример #20
0
    def __init__(
        self,
        obs_space: spaces.Space,
        action_space: spaces.Space,
        num_outputs: int,
        model_config: ModelConfigDict,
        name: str,
    ):
        super(CentralizedActorCriticModel,
              self).__init__(obs_space, action_space, num_outputs,
                             model_config, name)

        model_config = model_config["custom_model_config"]
        self.n_agents = model_config["agent_number"]
        if model_config["critic_mode"] == "mean":
            self.critic_obs = spaces.Dict(
                OrderedDict({
                    "own_obs": self.obs_space,
                    "own_act": self.action_space,
                    "oppo_act": self.action_space,
                }))
        else:
            self.critic_obs = spaces.Dict(
                OrderedDict({
                    **{
                        f"AGENT-{i}": self.obs_space
                        for i in range(self.n_agents)
                    },
                    **{
                        f"AGENT-{i}-action": self.action_space
                        for i in range(self.n_agents)
                    },
                }))
        self.critic_preprocessor = get_preprocessor(self.critic_obs)(
            self.critic_obs)
        self.obs_preprocessor = get_preprocessor(self.obs_space)(
            self.obs_space)
        self.act_preprocessor = get_preprocessor(self.action_space)(
            self.action_space)

        self.action_model = self._build_action_model(
            model_config["action_model"])
        self.value_model = self._build_value_model(model_config["value_model"])
        self.register_variables(self.action_model.variables)
        self.register_variables(self.value_model.variables)
Пример #21
0
    def __init__(self, ioctx: IOContext = None):
        super().__init__()
        print("Input reader initialization success!")
        import minerl

        patch_data_pipeline()

        input_config = ioctx.input_config
        env_name = ioctx.config.get("env")
        env_config = ioctx.config.get("env_config", {})
        self.data = minerl.data.make(
            env_name,
            data_dir=os.getenv("MINERL_DATA_ROOT",
                               input_config.get("data_dir", "data")),
            num_workers=input_config.get("num_workers", 4),
            worker_batch_size=input_config.get("worker_batch_size", 32),
            minimum_size_to_dequeue=input_config.get("minimum_size_to_dequeue",
                                                     32),
            force_download=input_config.get("force_download", False),
        )
        batch_size = input_config.get("batch_size", 1)
        seq_len = input_config.get("seq_len", 32)
        num_epochs = input_config.get("num_epochs", -1)
        preload_buffer_size = input_config.get("preload_buffer_size", 2)
        seed = input_config.get("seed", None)
        self.load_complete_episodes = input_config.get(
            "load_complete_episodes", True)
        self.generator = self.data.batch_iter(
            batch_size,
            seq_len,
            num_epochs=num_epochs,
            preload_buffer_size=preload_buffer_size,
            seed=seed,
        )
        env = MinerRLDataEnv(self.data)
        env = wrap_env(env, env_config, env_name)
        self.episode_generator = simulate_env_interaction(env)
        self.prep = get_preprocessor(env.observation_space)(
            env.observation_space)

        env_ptr = env
        self.obs_fns = []
        self.action_fns = []
        self.reverse_action_fns = []
        self.reward_fns = []
        while hasattr(env_ptr, "env"):
            if isinstance(env_ptr, gym.ObservationWrapper):
                self.obs_fns.append(env_ptr.observation)
            if isinstance(env_ptr, gym.ActionWrapper):
                self.action_fns.append(env_ptr.action)
                self.reverse_action_fns.append(env_ptr.reverse_action)
            if isinstance(env_ptr, gym.RewardWrapper):
                self.reward_fns.append(env_ptr.reward)
            env_ptr = env_ptr.env
Пример #22
0
 def _init_shape(self, obs_space, options):
     logger.debug('obs_space:%s, options:%s' % (obs_space, options))
     assert isinstance(self._obs_space, spaces.Dict)
     size = 0
     self.preprocessors = []
     for space in self._obs_space.spaces.values():
         logger.debug("Creating sub-preprocessor for {}".format(space))
         preprocessor = get_preprocessor(space)(space, self._options)
         self.preprocessors.append(preprocessor)
         size += preprocessor.size
     return size,
Пример #23
0
 def test_dict_flattening_preprocessor(self):
     space = Dict({
         "a": Discrete(2),
         "b": Tuple([Discrete(3), Box(-1.0, 1.0, (4, ))]),
     })
     pp = get_preprocessor(space)(space)
     self.assertTrue(isinstance(pp, DictFlatteningPreprocessor))
     self.assertEqual(pp.shape, (9, ))
     check(
         pp.transform({
             "a": 1,
             "b": (1, np.array([0.0, -0.5, 0.1, 0.6]))
         }), [0.0, 1.0, 0.0, 1.0, 0.0, 0.0, -0.5, 0.1, 0.6])
Пример #24
0
    def __init__(self, ctx: ConnectorContext):
        super().__init__(ctx)

        if hasattr(ctx.observation_space, "original_space"):
            # ctx.observation_space is the space this Policy deals with.
            # We need to preprocess data from the original observation space here.
            obs_space = ctx.observation_space.original_space
        else:
            obs_space = ctx.observation_space

        self._preprocessor = get_preprocessor(obs_space)(obs_space,
                                                         ctx.config.get(
                                                             "model", {}))
Пример #25
0
    def __init__(
        self, obs_space, action_space, num_outputs, model_config, name, **kwargs
    ):
        super(CCModel, self).__init__(
            obs_space, action_space, num_outputs, model_config, name
        )

        # ordered dict
        agent_number = 4
        critic_obs = gym.spaces.Dict(
            {
                **{f"AGENT-{i}": obs_space for i in range(agent_number)},
                **{f"AGENT-{i}-action": action_space for i in range(agent_number)},
            }
        )

        self.critic_preprocessor = get_preprocessor(critic_obs)(critic_obs)
        self.obs_preprocessor = get_preprocessor(obs_space)(obs_space)
        self.act_preprocessor = get_preprocessor(action_space)(action_space)
        model_config["custom_model_config"] = dict()
        # inner network
        self.action_model = DictCNN(
            obs_space,
            action_space,
            num_outputs,
            model_config,
            name + "_action",
            **kwargs,
        )
        self.value_model = FullyConnectedNetwork(
            gym.spaces.Box(low=-1e10, high=1e10, shape=self.critic_preprocessor.shape),
            action_space,
            1,
            model_config,
            name + "_vf",
        )
        self.register_variables(self.action_model.variables())
        self.register_variables(self.value_model.variables())
Пример #26
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
                              model_config, name)
        nn.Module.__init__(self)

        self.preprocessor = get_preprocessor(obs_space.original_space)(
            obs_space.original_space)

        self.shared_layers = None
        self.actor_layers = None
        self.critic_layers = None

        self._value_out = None
Пример #27
0
def _unpack_obs(obs, space, tensorlib=tf):
    """Unpack a flattened Dict or Tuple observation array/tensor.

    Arguments:
        obs: The flattened observation tensor
        space: The original space prior to flattening
        tensorlib: The library used to unflatten (reshape) the array/tensor
    """

    if (isinstance(space, gym.spaces.Dict)
            or isinstance(space, gym.spaces.Tuple)):
        if id(space) in _cache:
            prep = _cache[id(space)]
        else:
            prep = get_preprocessor(space)(space)
            # Make an attempt to cache the result, if enough space left.
            if len(_cache) < 999:
                _cache[id(space)] = prep
        if len(obs.shape) != 2 or obs.shape[1] != prep.shape[0]:
            raise ValueError(
                "Expected flattened obs shape of [None, {}], got {}".format(
                    prep.shape[0], obs.shape))
        assert len(prep.preprocessors) == len(space.spaces), \
            (len(prep.preprocessors) == len(space.spaces))
        offset = 0
        if isinstance(space, gym.spaces.Tuple):
            u = []
            for p, v in zip(prep.preprocessors, space.spaces):
                obs_slice = obs[:, offset:offset + p.size]
                offset += p.size
                u.append(
                    _unpack_obs(tensorlib.reshape(obs_slice,
                                                  [-1] + list(p.shape)),
                                v,
                                tensorlib=tensorlib))
        else:
            u = OrderedDict()
            for p, (k, v) in zip(prep.preprocessors, space.spaces.items()):
                obs_slice = obs[:, offset:offset + p.size]
                offset += p.size
                u[k] = _unpack_obs(tensorlib.reshape(obs_slice,
                                                     [-1] + list(p.shape)),
                                   v,
                                   tensorlib=tensorlib)
        return u
    else:
        return obs
Пример #28
0
def _unpack_obs(obs, space, tensorlib=tf):
    """Unpack a flattened Dict or Tuple observation array/tensor.

    Arguments:
        obs: The flattened observation tensor
        space: The original space prior to flattening
        tensorlib: The library used to unflatten (reshape) the array/tensor
    """

    if (isinstance(space, gym.spaces.Dict)
            or isinstance(space, gym.spaces.Tuple)):
        prep = get_preprocessor(space)(space)
        if len(obs.shape) != 2 or obs.shape[1] != prep.shape[0]:
            raise ValueError(
                "Expected flattened obs shape of [None, {}], got {}".format(
                    prep.shape[0], obs.shape))
        assert len(prep.preprocessors) == len(space.spaces), \
            (len(prep.preprocessors) == len(space.spaces))
        offset = 0
        if isinstance(space, gym.spaces.Tuple):
            u = []
            for p, v in zip(prep.preprocessors, space.spaces):
                obs_slice = obs[:, offset:offset + p.size]
                offset += p.size
                u.append(
                    _unpack_obs(
                        tensorlib.reshape(obs_slice, [-1] + list(p.shape)),
                        v,
                        tensorlib=tensorlib))
        else:
            u = OrderedDict()
            for p, (k, v) in zip(prep.preprocessors, space.spaces.items()):
                obs_slice = obs[:, offset:offset + p.size]
                offset += p.size
                u[k] = _unpack_obs(
                    tensorlib.reshape(obs_slice, [-1] + list(p.shape)),
                    v,
                    tensorlib=tensorlib)
        return u
    else:
        return obs
Пример #29
0
    def __init__(self,
                 obs_space,
                 action_space,
                 num_outputs,
                 model_config,
                 name,
                 fc_size=64,
                 lstm_state_size=64):
        super().__init__(obs_space, action_space, num_outputs, model_config,
                         name)

        self.obs_size = get_preprocessor(obs_space)(obs_space).size
        self.fc_size = fc_size
        self.lstm_state_size = lstm_state_size

        # Build the Module from fc + LSTM + 2xfc (action + value outs).
        self.fc1 = nn.Linear(self.obs_size, self.fc_size)
        self.lstm = nn.LSTM(
            self.fc_size, self.lstm_state_size, batch_first=True)
        self.action_branch = nn.Linear(self.lstm_state_size, num_outputs)
        self.value_branch = nn.Linear(self.lstm_state_size, 1)
        # Store the value output to save an extra forward pass.
        self._cur_value = None
Пример #30
0
 def __init__(self, env_config):
     if env_config['extended']:
         self.action_n = 6
     else:
         self.action_n = 4
     
     self.extended = env_config['extended']
     self.action_space = Discrete(self.action_n)
     self.wrapped = BitcoinEnv(env_config)
     self.config = env_config
     self.alphas = env_config['alphas']
     self.max_hidden_block = env_config['max_hidden_block']
     self.game_trace = deque(''*10, 10)
     self.observation_space = Dict({
         "action_mask": Box(0,1,shape = (self.action_n,)),
         "avail_actions": Box(-10, 10, shape=(self.action_n, self.action_n)),
         "bitcoin":self.wrapped.observation_space,
     })
     spy_space = constants.make_spy_space(len(self.alphas), self.max_hidden_block)
     blind_space = constants.make_blind_space(len(self.alphas), self.max_hidden_block)
     self.prep = get_preprocessor(Discrete(3))(Discrete(3))
     self.action_assignments = np.zeros((self.action_n, self.action_n))
     for i in range(self.action_n):
         self.action_assignments[i,i] = 1
Пример #31
0
def _get_size(obs_space):
    return get_preprocessor(obs_space)(obs_space).size
Пример #32
0
def _get_size(obs_space):
    return get_preprocessor(obs_space)(obs_space).size
Пример #33
0
# flake8: noqa

# __preprocessing_observations_start__
import gym

env = gym.make("Pong-v0")

# RLlib uses preprocessors to implement transforms such as one-hot encoding
# and flattening of tuple and dict observations.
from ray.rllib.models.preprocessors import get_preprocessor

prep = get_preprocessor(env.observation_space)(env.observation_space)
# <ray.rllib.models.preprocessors.GenericPixelPreprocessor object at 0x7fc4d049de80>

# Observations should be preprocessed prior to feeding into a model
env.reset().shape
# (210, 160, 3)
prep.transform(env.reset()).shape
# (84, 84, 3)
# __preprocessing_observations_end__

# __query_action_dist_start__
# Get a reference to the policy
import numpy as np
from ray.rllib.agents.ppo import PPOTrainer

trainer = PPOTrainer(env="CartPole-v0",
                     config={
                         "framework": "tf2",
                         "num_workers": 0
                     })
Пример #34
0
    def __init__(self, ctx: ConnectorContext):
        super().__init__(ctx)

        self._preprocessor = get_preprocessor(ctx.observation_space)(
            ctx.observation_space, ctx.config.get("model", {}))