def check_reset_assert_error(env, new_reset_return):
    """
    Helper to check that the error is caught.
    :param env: (gym.Env)
    :param new_reset_return: (Any)
    """

    def wrong_reset():
        return new_reset_return

    # Patch the reset method with a wrong one
    env.reset = wrong_reset
    with pytest.raises(AssertionError):
        check_env(env)
def test_high_dimension_action_space():
    """
    Test for continuous action space
    with more than one action.
    """
    env = FakeImageEnv()
    # Patch the action space
    env.action_space = spaces.Box(low=-1, high=1, shape=(20,), dtype=np.float32)

    # Patch to avoid error
    def patched_step(_action):
        return env.observation_space.sample(), 0.0, False, {}
    env.step = patched_step
    check_env(env)
def check_step_assert_error(env, new_step_return=()):
    """
    Helper to check that the error is caught.
    :param env: (gym.Env)
    :param new_step_return: (tuple)
    """

    def wrong_step(_action):
        return new_step_return

    # Patch the step method with a wrong one
    env.step = wrong_step
    with pytest.raises(AssertionError):
        check_env(env)
示例#4
0
def PendulumExample():
    builder = DiagramBuilder()
    plant = builder.AddSystem(PendulumPlant())
    scene_graph = builder.AddSystem(SceneGraph())
    PendulumGeometry.AddToBuilder(builder, plant.get_state_output_port(),
                                  scene_graph)
    MeshcatVisualizerCpp.AddToBuilder(
        builder, scene_graph, meshcat,
        MeshcatVisualizerParams(publish_period=np.inf))

    builder.ExportInput(plant.get_input_port(), "torque")
    # Note: The Pendulum-v0 in gym outputs [cos(theta), sin(theta), thetadot]
    builder.ExportOutput(plant.get_state_output_port(), "state")

    # Add a camera (I have sugar for this in the manip repo)
    X_WC = RigidTransform(RotationMatrix.MakeXRotation(-np.pi / 2),
                          [0, -1.5, 0])
    rgbd = AddRgbdSensor(builder, scene_graph, X_WC)
    builder.ExportOutput(rgbd.color_image_output_port(), "camera")

    diagram = builder.Build()
    simulator = Simulator(diagram)

    def reward(system, context):
        plant_context = plant.GetMyContextFromRoot(context)
        state = plant_context.get_continuous_state_vector()
        u = plant.get_input_port().Eval(plant_context)[0]
        theta = state[0] % 2 * np.pi  # Wrap to 2*pi
        theta_dot = state[1]
        return (theta - np.pi)**2 + 0.1 * theta_dot**2 + 0.001 * u

    max_torque = 3
    env = DrakeGymEnv(simulator,
                      time_step=0.05,
                      action_space=gym.spaces.Box(low=np.array([-max_torque]),
                                                  high=np.array([max_torque])),
                      observation_space=gym.spaces.Box(
                          low=np.array([-np.inf, -np.inf]),
                          high=np.array([np.inf, np.inf])),
                      reward=reward,
                      render_rgb_port_id="camera")
    check_env(env)

    if show:
        env.reset()
        image = env.render(mode='rgb_array')
        fig, ax = plt.subplots()
        ax.imshow(image)
        plt.show()
示例#5
0
def test_gym_env(kwargs):
    server = FakeServer()
    server.start()
    env = gym.make("SpaceEngineers-WalkingRobot-IK-v2", detach=True, verbose=2, **kwargs)

    check_env(env, warn=True)

    env.reset()
    for _ in range(10):
        _, _, done, _ = env.step(env.action_space.sample())

        if done:
            env.reset()

    env.close()
示例#6
0
def test_dqn():
    env = gym.make("fishing-v0")
    check_env(env)

    model = DQN("MlpPolicy", env, verbose=1)
    model.learn(total_timesteps=200)

    # Simulate a run with the trained model, visualize result
    df = env.simulate(model)
    env.plot(df, "dqn-test.png")

    # Evaluate model
    mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=5)

    df = env.policyfn(model)
    env.plot_policy(df, "policy-test.png")
示例#7
0
def test_env(env_id):
    """
    Check that environmnent integrated in Gym pass the test.

    :param env_id: (str)
    """
    env = gym.make(env_id)
    with pytest.warns(None) as record:
        check_env(env)

    # Pendulum-v0 will produce a warning because the action space is
    # in [-2, 2] and not [-1, 1]
    if env_id == "Pendulum-v0":
        assert len(record) == 1
    else:
        # The other environments must pass without warning
        assert len(record) == 0
示例#8
0
def test_all_environments():

    envs_id = [
        env_spec.id for env_spec in gym.envs.registry.all()
        if env_spec.id.startswith('Eplus')
    ]
    envs_id = [env_id for env_id in envs_id if 'IWMullion' not in env_id]
    for env_id in envs_id:
        # Create env with TEST name
        env = gym.make(env_id)

        # stable_baselines 3 environment checker. Check if environment follows
        # Gym API.
        check_env(env)

        # Rename directory with name TEST for future remove
        os.rename(
            env.simulator._env_working_dir_parent, 'Eplus-env-TEST' +
            env.simulator._env_working_dir_parent.split('/')[-1])
示例#9
0
def main():
    # Initialize the environment
    env = WebotsStickEnv()
    check_env(env)

    # Train
    model = PPO('MlpPolicy', env, n_steps=2048, verbose=1)
    model.learn(total_timesteps=1e5)

    # Replay
    print('Training is finished, press `Y` for replay...')
    env.wait_keyboard()

    obs = env.reset()
    for t in range(100000):
        action, _states = model.predict(obs)
        obs, reward, done, info = env.step(action)
        print(obs)
        if done:
            obs = env.reset()
示例#10
0
    def __init__(self,
                 rap,
                 log_dir="/tmp/gym",
                 training_config=None,
                 algorithm="A2C",
                 checkpoint_results=None):
        super(ResourceManager,
              self).__init__(rap,
                             log_dir=log_dir,
                             algorithm=algorithm,
                             checkpoint_results=checkpoint_results)

        self.model_name = rap["name"] + "_baseline"

        self.environment = ResourceAllocationEnvironment(self.ra_problem)
        check_env(self.environment, warn=True)

        self.vector_environment = make_vec_env(lambda: self.environment,
                                               n_envs=1,
                                               monitor_dir=self.log_dir)

        self.training_steps = training_config["stage1_training_steps"]
示例#11
0
def test_tipping():
    np.random.seed(0)
    env = gym.make("fishing-v2", sigma=0, init_state=0.75)
    check_env(env)
    env.reset()
    # increases above tipping point
    obs, reward, done, info = env.step(env.get_action(0))
    assert env.get_fish_population(obs) >= 0.75

    # Decreases below the tipping point
    env.init_state = 0.3
    env.reset()
    obs, reward, done, info = env.step(env.get_action(0))
    assert env.get_fish_population(obs) <= 0.3

    # model = user_action(env)
    model = msy(env)
    df = env.simulate(model)
    env.plot(df, "tip_msy-test.png")
    model = escapement(env)
    df = env.simulate(model)
    env.plot(df, "tip_escapement-test.png")
示例#12
0
    def test_metro_network_env(self):
        # config = get_config(AGENT_CONFIG)
        # # simulator = Simulator(NETWORK, SERVICE, USER_CONFIG)
        # env = gym.make('metro_network-env-v1', agent_config=config, network_file=NETWORK, service_file=SERVICE, user_trace_file = USER_TRACE)
        # np.random.seed(123)
        # env.seed(123)
        # obs = env.reset()
        # logger.info(f"obs {obs}")
        # action = env.action_space.sample()
        # env.step(action)

        config = get_config(AGENT_CONFIG)
        env = MetroNetworkEnv(
            agent_config=config,
            network_file=NETWORK_TRIANGLE,
            service_file=SERVICE_TRIANGLE,
            user_trace_file=USER_TRACE,
            service_requirement_file=SERVICE_REQUIREMENT_TRIANGLE,
            ingress_distribution_file=ingress_distribution_file_path,
            container_client_file=docker_client_services_path,
            container_server_file=docker_server_services_path,
            container_lb_file=docker_lb_container_path)
        check_env(env)
示例#13
0
def train(env, type, timesteps):
    env.reset()
    print(check_env(env))
    env = FlattenObservation(env)
    print(env.reward_range)
    print(env.action_space)
    if type == "DQN":
        model = DQN('MlpPolicy',
                    exploration_fraction=0.999,
                    env=env,
                    verbose=1)
    elif type == "A2C":
        model = A2C('MlpPolicy', env=env, verbose=1)
    elif type == "PPO":
        model = PPO('MlpPolicy', env=env, verbose=1)

    model.learn(total_timesteps=timesteps)
    model.save("model_cups")
示例#14
0
from stable_baselines3.common.env_checker import check_env
from swarmsim_env import SingleExplorer

print(check_env(SingleExplorer()))
def test_check_env_dict_action():
    test_env = ActionDictTestEnv()

    with pytest.warns(Warning):
        check_env(env=test_env, warn=True)
from stable_baselines3.common.env_checker import check_env
import gym
import envs
ENV_ID = "CustomEnv-v0"
my_env = gym.make(ENV_ID)
# It will check your custom environment and output additional warnings if needed
check_env(my_env)
示例#17
0
        self.task.reset()
        self.reward_calculator.reset()
        self._steps_curr_episode = 0
        obs, _ = self.observation_collector.get_observations()
        return obs  # reward, done, info can't be included

    def close(self):
        pass


if __name__ == '__main__':

    rospy.init_node('flatland_gym_env', anonymous=True)
    print("start")

    flatland_env = FlatlandEnv()
    check_env(flatland_env, warn=True)

    # init env
    obs = flatland_env.reset()

    # run model
    n_steps = 200
    for step in range(n_steps):
        # action, _states = model.predict(obs)
        action = flatland_env.action_space.sample()

        obs, rewards, done, info = flatland_env.step(action)

        time.sleep(0.1)
""" Test env with random actions """

import time
import gym
import widowx_env
from stable_baselines3.common.env_checker import check_env

env = gym.make('widowx_reacher-v14')
print("any warnings?", check_env(env))

# Comment this for goal environments
# print("Action space: ", env.action_space)
# print(env.action_space.high)
# print(env.action_space.low)
# print("Observation space: ", env.observation_space)
# print(env.observation_space.high)
# print(env.observation_space.low)

# env.render()

for episode in range(2):
    obs = env.reset()
    rewards = []

    for t in range(100):
        action = env.action_space.sample()

        obs, reward, done, info = env.step(action)

        print("action: ", action)
        print("obs: ", obs)
示例#19
0
            raise ValueError(f'Unrecognized action {action}')

        self._state = np.clip(self._state, 0, self._grid_size - 1)
        done = bool(self._state == self._grid_size - 1)
        reward = 1 if done else 0
        return np.array([self._state]).astype(np.float32), reward, done, {}

    def reset(self):
        self._state = 0
        return np.array([self._state]).astype(np.float32)

    def render(self, mode='human'):
        pass


if __name__ == '__main__':
    check_env(GridWorld(10))
    env = make_vec_env(lambda: GridWorld(10), n_envs=1)

    model = PPO('MlpPolicy', env, verbose=1).learn(5000)

    state = env.reset()
    for _ in range(20):
        action, _ = model.predict(state, deterministic=True)
        # action = 0
        next_state, reward, done, info = env.step(action)
        print(f'{state} -> {action} -> {next_state}: {reward}')
        state = next_state
        if done:
            break
def test_by_stablebaseline():
    from stable_baselines3.common.env_checker import check_env
    env = gym.make(GYM_VERSION_NAME, **env_kwargs)
    check_env(env)
示例#21
0
def test_custom_envs(env_class):
    env = env_class()
    with pytest.warns(None) as record:
        check_env(env)
    # No warnings for custom envs
    assert len(record) == 0
示例#22
0
    def _on_training_end(self) -> None:
        pass


def make_env():
    def _init():
        env = Monitor(CoinrunEnv())
        return env

    return _init


if __name__ == '__main__':
    eval_env = CoinrunEnv()
    check_env(eval_env)

    # vectorize env (1 per cpu)
    n_cpus = 4
    env = SubprocVecEnv([make_env() for _ in range(n_cpus)])
    # env = Monitor(env, filename='logs.txt')

    # init model
    model = PPO(
        policy='CnnPolicy',
        env=env,
        gamma=0.999,
        gae_lambda=0.95,
        learning_rate=5e-4,
        n_steps=
        2048,  # 2048,  # true train batch size is n_steps * n_cpus (n_cpus being number of envs running in parallel)
def test_custom_envs(env_class):
    env = env_class()
    check_env(env)
示例#24
0
from stable_baselines3 import A2C

from gym_quarto import QuartoEnvV0, OnePlayerWrapper, RandomPlayer, HumanPlayer, random_action

logging.basicConfig(level=logging.INFO)


def make_env(player=None):
    env = gym.make('quarto-v1')
    if player is None:
        player = RandomPlayer(env)
    env = OnePlayerWrapper(env, player)
    return env


check_env(make_env())


def random_vs_random():
    env = make_env()
    NB_EPISODE = 1
    for episode in range(NB_EPISODE):
        obs = env.reset()
        done = False
        while not done:
            action = random.choice(list(env.legal_actions))
            obs, reward, done, info = env.step(action)
            #print(f"{info['turn']: <4} | ")
            env.render()
        print("done")
示例#25
0
import gym
import numpy as np

from stable_baselines3.common.env_checker import check_env
from stable_baselines3 import PPO

from environment import EggnoggEnv
from game import Game
import time

game = Game()

env = EggnoggEnv(game, 1, 0)
print(check_env(env))

# set the screenshot to find the window automatically

model = PPO("CnnPolicy", env, verbose=1)
model.learn(total_timesteps=1)
time.sleep(3)


model.save("ppo_eggnogg")
obs = env.reset()

#for i in range(2000):
#    action, _states = model.predict(obs)
#    obs, rewards, done, info = env.step(action)  
#    env.render()
示例#26
0
        gui=ARGS.gui,
        record=ARGS.record_video,
        obstacles=ARGS.obstacles,
        user_debug_gui=ARGS.user_debug_gui,
        goal_xyz=GOAL_XYZ,
        collision_point=COLLISION_POINT,
        protected_radius=protected_radius,
        goal_radius=ARGS.goal_radius,
        collision_time=ARGS.collision_time,
    )

    #### Obtain the PyBullet Client ID from the environment ####
    PYB_CLIENT = env.getPyBulletClient()
    DRONE_IDS = env.getDroneIds()

    check_env(env, warn=True, skip_render_check=True)

    #### Compute number of control steps in the simlation ######
    PERIOD = ARGS.duration_sec
    NUM_WP = ARGS.control_freq_hz * PERIOD

    #### Initialize the logger #################################
    logger = Logger(logging_freq_hz=int(ARGS.simulation_freq_hz /
                                        AGGR_PHY_STEPS),
                    num_drones=ARGS.num_drones)

    #### Run the simulation ####################################
    CTRL_EVERY_N_STEPS = int(np.floor(env.SIM_FREQ / ARGS.control_freq_hz))
    START = time.time()

    #Start the model learning
示例#27
0
        self.task.reset()
        self.reward_calculator.reset()
        self._steps_curr_episode = 0
        obs, _ = self.observation_collector.get_observations()
        return obs  # reward, done, info can't be included

    def close(self):
        pass


if __name__ == '__main__':

    rospy.init_node('wp3_gym_env', anonymous=True)
    print("start")

    wp3_env = wp3Env()
    check_env(wp3_env, warn=True)

    # init env
    obs = wp3_env.reset()

    # run model
    n_steps = 200
    for step in range(n_steps):
        # action, _states = model.predict(obs)
        action = wp3_env.action_space.sample()

        obs, rewards, done, info = wp3_env.step(action)

        time.sleep(0.1)
from stable_baselines3.common.env_checker import check_env
from constructionSite import ConstructionSite


env = ConstructionSite()
# If the environment don't follow the interface, an error will be thrown

obs = env.reset()

print(env.observation_space)
print(env.action_space)
print(env.action_space.sample())

for action in range(0, 6) :
    observation, reward, done, info = env.step(action)
    print(f"observation.shape : {observation.shape}\t\treward : {reward}\t\tdone : {done}\t\taction : {action}")

check_env(env, warn=True)
示例#29
0
            froms, tos, results = rayTest(self.robot.robot,
                                          ray_length=self.LASER_LENGTH,
                                          ray_num=self.LASER_NUM)
            for index, result in enumerate(results):
                color = self.MISS_COLOR if result[0] == -1 else self.HIT_COLOR
                self.rayDebugLineIds.append(
                    p.addUserDebugLine(froms[index], tos[index], color,
                                       self.RAY_DEBUG_LINE_WIDTH))

        return np.array(state)

    def render(self, mode='human'):
        pass

    def seed(self, seed=None):
        self.np_random, seed = gym.utils.seeding.np_random(seed)
        return [seed]

    def close(self):
        if self._physics_client_id >= 0:
            p.disconnect()
        self._physics_client_id = -1


if __name__ == "__main__":
    env = MaplessNaviEnv()
    # pprint([attr for attr in dir(env) if attr[:2] != "__"])

    from stable_baselines3.common.env_checker import check_env
    check_env(env)
示例#30
0
import gym
import gym_fishing
from stable_baselines3.common.env_checker import check_env

env0 = gym.make('fishing-v0')
env1 = gym.make('fishing-v1')
env2 = gym.make('fishing-v2')
#env3 = gym.make('fishing-v3')
env4 = gym.make('fishing-v4')

check_env(env0)
check_env(env1)
check_env(env2)
#check_env(env3)
check_env(env4)

env0.close()