示例#1
0
  def test_episode_count(self):
    cartpole_env = gym.make('CartPole-v1')
    env = gym_wrapper.GymWrapper(cartpole_env)
    env = wrappers.RunStats(env)

    self.assertEqual(0, env.episodes)
    time_step = env.reset()
    self.assertEqual(0, env.episodes)

    for episode_num in range(1, 4):
      while not time_step.is_last():
        time_step = env.step(np.array(1, dtype=np.int32))
      self.assertEqual(episode_num, env.episodes)
      time_step = env.step(np.array(1, dtype=np.int32))
示例#2
0
  def test_episode_count_with_time_limit(self):
    cartpole_env = gym.make('CartPole-v1')
    env = gym_wrapper.GymWrapper(cartpole_env)
    env = wrappers.TimeLimit(env, 2)
    env = wrappers.RunStats(env)

    env.reset()
    self.assertEqual(0, env.episodes)

    env.step(np.array(0, dtype=np.int32))
    time_step = env.step(np.array(0, dtype=np.int32))

    self.assertTrue(time_step.is_last())
    self.assertEqual(1, env.episodes)
示例#3
0
  def test_resets_count(self):
    cartpole_env = gym.make('CartPole-v1')
    env = gym_wrapper.GymWrapper(cartpole_env)
    env = wrappers.RunStats(env)

    self.assertEqual(0, env.resets)
    time_step = env.reset()
    self.assertEqual(1, env.resets)

    resets = 1
    for _ in range(0, 4):
      while not time_step.is_last():
        self.assertEqual(resets, env.resets)
        time_step = env.step(np.array(1, dtype=np.int32))
      time_step = env.step(np.array(1, dtype=np.int32))
      resets += 1
示例#4
0
  def test_step_count(self):
    cartpole_env = gym.make('CartPole-v1')
    env = gym_wrapper.GymWrapper(cartpole_env)
    env = wrappers.RunStats(env)

    self.assertEqual(0, env.episodes)
    time_step = env.reset()
    self.assertEqual(0, env.episodes)

    steps = 0
    for _ in range(0, 4):
      while not time_step.is_last():
        self.assertEqual(steps, env.total_steps)
        time_step = env.step(np.array(1, dtype=np.int32))
        steps += 1
      time_step = env.step(np.array(1, dtype=np.int32))
示例#5
0
from dual_goal_maze_env import DualGoalMaze
from tf_agents.environments import utils
from tf_agents.environments import wrappers
import tensorflow as tf
import numpy as np

from tf_agents.environments import py_environment
from tf_agents.environments import tf_environment
from tf_agents.environments import tf_py_environment
from tf_agents.specs import array_spec
from tf_agents.environments import suite_gym
from tf_agents.trajectories import time_step as ts

environment = DualGoalMaze()
stats_env = wrappers.RunStats(environment)

utils.validate_py_environment(stats_env, episodes=5)

time_step = stats_env.reset()
rewards = []
steps = []
num_episodes = 5

for _ in range(num_episodes):
    episode_reward = 0
    episode_steps = 0
    while not time_step.is_last():
        action = np.random.randint(0, 4)
        time_step = stats_env.step(action)
        episode_steps += 1
        episode_reward += time_step.reward