# <ray.rllib.models.preprocessors.GenericPixelPreprocessor object at 0x7fc4d049de80> # Observations should be preprocessed prior to feeding into a model env.reset().shape # (210, 160, 3) prep.transform(env.reset()).shape # (84, 84, 3) # __preprocessing_observations_end__ # __query_action_dist_start__ # Get a reference to the policy import numpy as np from ray.rllib.algorithms.ppo import PPO algo = PPO(env="CartPole-v0", config={"framework": "tf2", "num_workers": 0}) policy = algo.get_policy() # <ray.rllib.policy.eager_tf_policy.PPOTFPolicy_eager object at 0x7fd020165470> # Run a forward pass to get model output logits. Note that complex observations # must be preprocessed as in the above code block. logits, _ = policy.model({"obs": np.array([[0.1, 0.2, 0.3, 0.4]])}) # (<tf.Tensor: id=1274, shape=(1, 2), dtype=float32, numpy=...>, []) # Compute action distribution given logits policy.dist_class # <class_object 'ray.rllib.models.tf.tf_action_dist.Categorical'> dist = policy.dist_class(logits, policy.model) # <ray.rllib.models.tf.tf_action_dist.Categorical object at 0x7fd02301d710> # Query the distribution for samples, sample logps dist.sample()
# <ray.rllib.models.preprocessors.GenericPixelPreprocessor object at 0x7fc4d049de80> # Observations should be preprocessed prior to feeding into a model env.reset().shape # (210, 160, 3) prep.transform(env.reset()).shape # (84, 84, 3) # __preprocessing_observations_end__ # __query_action_dist_start__ # Get a reference to the policy import numpy as np from ray.rllib.algorithms.ppo import PPO trainer = PPO(env="CartPole-v0", config={"framework": "tf2", "num_workers": 0}) policy = trainer.get_policy() # <ray.rllib.policy.eager_tf_policy.PPOTFPolicy_eager object at 0x7fd020165470> # Run a forward pass to get model output logits. Note that complex observations # must be preprocessed as in the above code block. logits, _ = policy.model({"obs": np.array([[0.1, 0.2, 0.3, 0.4]])}) # (<tf.Tensor: id=1274, shape=(1, 2), dtype=float32, numpy=...>, []) # Compute action distribution given logits policy.dist_class # <class_object 'ray.rllib.models.tf.tf_action_dist.Categorical'> dist = policy.dist_class(logits, policy.model) # <ray.rllib.models.tf.tf_action_dist.Categorical object at 0x7fd02301d710> # Query the distribution for samples, sample logps dist.sample()