示例#1
0
def make_atari(env_id, max_episode_steps=None):
    env = gym.make(env_id)
    assert 'NoFrameskip' in env.spec.id
    env = NoopResetEnv(env, noop_max=30)
    env = MaxAndSkipEnv(env, skip=4)
    if max_episode_steps is not None:
        env = TimeLimit(env, max_episode_steps=max_episode_steps)
    return env
def pretrain_make_retro(*, game, state=None, max_episode_steps=4500, **kwargs):
    import retro
    if state is None:
        state = retro.State.DEFAULT
    env = retro.make(game, state, **kwargs)
    env = wrap_deepmind_retro(env)
    if max_episode_steps is not None:
        env = TimeLimit(env, max_episode_steps=max_episode_steps)
    return env
def make_retro(*, game, state=None, max_episode_steps=4500, **kwargs):
    import retro
    if state is None:
        state = retro.State.DEFAULT
    env = retro.make(game, state, **kwargs)
    env = wrap_deepmind_retro(env)
    env = StochasticFrameSkip(env, n=4, stickprob=0.25)
    if max_episode_steps is not None:
        env = TimeLimit(env, max_episode_steps=max_episode_steps)
    return env
示例#4
0
文件: envs.py 项目: yuanleirl/seac
    def _thunk():

        env = gym.make(env_id)
        env.seed(seed + rank)

        if time_limit:
            env = TimeLimit(env, time_limit)
        for wrapper in wrappers:
            env = wrapper(env)

        if monitor_dir:
            env = Monitor(env,
                          monitor_dir,
                          lambda ep: int(ep == 0),
                          force=True,
                          uid=str(rank))

        return env
示例#5
0
def make_env(cfg):
    if 'mw_sawyer' in cfg.env:
        suite, task = cfg.env.split('_', 1)
        env = MetaWorld(task,
                        cfg.action_repeat,
                        cfg.rand_init_goal,
                        cfg.rand_init_hand,
                        cfg.rand_init_obj,
                        width=cfg.image_size)
        env = TimeLimit(env, cfg.time_limit / cfg.action_repeat)
        env = utils.FrameStackMetaWorld(env, k=cfg.frame_stack)
        return env
    """Helper function to create dm_control environment"""
    if cfg.env == 'ball_in_cup_catch':
        domain_name = 'ball_in_cup'
        task_name = 'catch'
    elif cfg.env == 'point_mass_easy':
        domain_name = 'point_mass'
        task_name = 'easy'
    else:
        domain_name = cfg.env.split('_')[0]
        task_name = '_'.join(cfg.env.split('_')[1:])

    # per dreamer: https://github.com/danijar/dreamer/blob/02f0210f5991c7710826ca7881f19c64a012290c/wrappers.py#L26
    camera_id = 2 if domain_name == 'quadruped' else 0

    env = dmc2gym.make(domain_name=domain_name,
                       task_name=task_name,
                       seed=cfg.seed,
                       visualize_reward=False,
                       from_pixels=True,
                       height=cfg.image_size,
                       width=cfg.image_size,
                       frame_skip=cfg.action_repeat,
                       camera_id=camera_id)

    env = utils.FrameStack(env, k=cfg.frame_stack)

    env.seed(cfg.seed)
    assert env.action_space.low.min() >= -1
    assert env.action_space.high.max() <= 1

    return env
示例#6
0
if __name__ == '__main__':
    #parser = argparse.ArgumentParser()
    #parser.add_argument("--cuda", default=False, action='store_true', help='Enable CUDA')
    #parser.add_argument("-n", "--name", required=True, help="Name of the run")
    #parser.add_argument("-e", "--env", default=ENV_ID, help="Environment id, default=" + ENV_ID)
    #args = parser.parse_args()
    #device = torch.device("cuda" if args.cuda else "cpu")
    device = 'cpu'

    save_path = os.path.join("saves", "ppo_test1")
    os.makedirs(save_path, exist_ok=True)

    env = GymEnv42()  ###*****CHANGE
    #test_env = GymEnv42()
    env = TimeLimit(env, max_episode_steps=200)

    net_act = ModelActor(env.observation_space.shape[0],
                         env.action_space.n).to(device)
    net_crt = ModelCritic(env.observation_space.shape[0]).to(device)
    print(net_act)
    print(net_crt)

    writer = SummaryWriter(comment="-ppo_test1")
    agent = AgentA2C(net_act, device=device)
    exp_source = ptan.experience.ExperienceSource(env, agent, steps_count=1)

    opt_act = optim.Adam(net_act.parameters(), lr=LEARNING_RATE_ACTOR)
    opt_crt = optim.Adam(net_crt.parameters(), lr=LEARNING_RATE_CRITIC)

    trajectory = []
示例#7
0
import torch
import robotic_warehouse
import lbforaging
import gym

from a2c import A2C
from wrappers import RecordEpisodeStatistics, TimeLimit

path = "pretrained/rware-small-4ag"
env_name = "rware-small-4ag-v1"
time_limit = 500  # 25 for LBF

RUN_STEPS = 1500

env = gym.make(env_name)
env = TimeLimit(env, time_limit)
env = RecordEpisodeStatistics(env)

agents = [
    A2C(i, osp, asp, 0.1, 0.1, False, 1, 1, "cpu")
    for i, (osp,
            asp) in enumerate(zip(env.observation_space, env.action_space))
]

for agent in agents:
    agent.restore(path + f"/agent{agent.agent_id}")

obs = env.reset()

for i in range(RUN_STEPS):
    obs = [torch.from_numpy(o) for o in obs]