def make_atari(env_id, max_episode_steps=None): env = gym.make(env_id) assert 'NoFrameskip' in env.spec.id env = NoopResetEnv(env, noop_max=30) env = MaxAndSkipEnv(env, skip=4) if max_episode_steps is not None: env = TimeLimit(env, max_episode_steps=max_episode_steps) return env
def pretrain_make_retro(*, game, state=None, max_episode_steps=4500, **kwargs): import retro if state is None: state = retro.State.DEFAULT env = retro.make(game, state, **kwargs) env = wrap_deepmind_retro(env) if max_episode_steps is not None: env = TimeLimit(env, max_episode_steps=max_episode_steps) return env
def make_retro(*, game, state=None, max_episode_steps=4500, **kwargs): import retro if state is None: state = retro.State.DEFAULT env = retro.make(game, state, **kwargs) env = wrap_deepmind_retro(env) env = StochasticFrameSkip(env, n=4, stickprob=0.25) if max_episode_steps is not None: env = TimeLimit(env, max_episode_steps=max_episode_steps) return env
def _thunk(): env = gym.make(env_id) env.seed(seed + rank) if time_limit: env = TimeLimit(env, time_limit) for wrapper in wrappers: env = wrapper(env) if monitor_dir: env = Monitor(env, monitor_dir, lambda ep: int(ep == 0), force=True, uid=str(rank)) return env
def make_env(cfg): if 'mw_sawyer' in cfg.env: suite, task = cfg.env.split('_', 1) env = MetaWorld(task, cfg.action_repeat, cfg.rand_init_goal, cfg.rand_init_hand, cfg.rand_init_obj, width=cfg.image_size) env = TimeLimit(env, cfg.time_limit / cfg.action_repeat) env = utils.FrameStackMetaWorld(env, k=cfg.frame_stack) return env """Helper function to create dm_control environment""" if cfg.env == 'ball_in_cup_catch': domain_name = 'ball_in_cup' task_name = 'catch' elif cfg.env == 'point_mass_easy': domain_name = 'point_mass' task_name = 'easy' else: domain_name = cfg.env.split('_')[0] task_name = '_'.join(cfg.env.split('_')[1:]) # per dreamer: https://github.com/danijar/dreamer/blob/02f0210f5991c7710826ca7881f19c64a012290c/wrappers.py#L26 camera_id = 2 if domain_name == 'quadruped' else 0 env = dmc2gym.make(domain_name=domain_name, task_name=task_name, seed=cfg.seed, visualize_reward=False, from_pixels=True, height=cfg.image_size, width=cfg.image_size, frame_skip=cfg.action_repeat, camera_id=camera_id) env = utils.FrameStack(env, k=cfg.frame_stack) env.seed(cfg.seed) assert env.action_space.low.min() >= -1 assert env.action_space.high.max() <= 1 return env
if __name__ == '__main__': #parser = argparse.ArgumentParser() #parser.add_argument("--cuda", default=False, action='store_true', help='Enable CUDA') #parser.add_argument("-n", "--name", required=True, help="Name of the run") #parser.add_argument("-e", "--env", default=ENV_ID, help="Environment id, default=" + ENV_ID) #args = parser.parse_args() #device = torch.device("cuda" if args.cuda else "cpu") device = 'cpu' save_path = os.path.join("saves", "ppo_test1") os.makedirs(save_path, exist_ok=True) env = GymEnv42() ###*****CHANGE #test_env = GymEnv42() env = TimeLimit(env, max_episode_steps=200) net_act = ModelActor(env.observation_space.shape[0], env.action_space.n).to(device) net_crt = ModelCritic(env.observation_space.shape[0]).to(device) print(net_act) print(net_crt) writer = SummaryWriter(comment="-ppo_test1") agent = AgentA2C(net_act, device=device) exp_source = ptan.experience.ExperienceSource(env, agent, steps_count=1) opt_act = optim.Adam(net_act.parameters(), lr=LEARNING_RATE_ACTOR) opt_crt = optim.Adam(net_crt.parameters(), lr=LEARNING_RATE_CRITIC) trajectory = []
import torch import robotic_warehouse import lbforaging import gym from a2c import A2C from wrappers import RecordEpisodeStatistics, TimeLimit path = "pretrained/rware-small-4ag" env_name = "rware-small-4ag-v1" time_limit = 500 # 25 for LBF RUN_STEPS = 1500 env = gym.make(env_name) env = TimeLimit(env, time_limit) env = RecordEpisodeStatistics(env) agents = [ A2C(i, osp, asp, 0.1, 0.1, False, 1, 1, "cpu") for i, (osp, asp) in enumerate(zip(env.observation_space, env.action_space)) ] for agent in agents: agent.restore(path + f"/agent{agent.agent_id}") obs = env.reset() for i in range(RUN_STEPS): obs = [torch.from_numpy(o) for o in obs]