def make_mujoco_env(env_id, seed): """ Create a wrapped, monitored gym.Env for MuJoCo. """ set_global_seeds(seed) env = gym.make(env_id) env = Monitor(env, logger.get_dir()) env.seed(seed) return env
def make_mujoco_env(env_id, seed): """ Create a wrapped, monitored gym.Env for MuJoCo. """ rank = MPI.COMM_WORLD.Get_rank() set_global_seeds(seed + 10000 * rank) env = gym.make(env_id) env = Monitor(env, os.path.join(logger.get_dir(), str(rank))) env.seed(seed) return env
def make_mujoco_env(env_id, seed, reward_scale=1.0): """ Create a wrapped, monitored gym.Env for MuJoCo. """ rank = MPI.COMM_WORLD.Get_rank() myseed = seed + 1000 * rank if seed is not None else None set_global_seeds(myseed) env = gym.make(env_id) logger_path = None if logger.get_dir() is None else os.path.join(logger.get_dir(), str(rank)) env = Monitor(env, logger_path, allow_early_resets=True) env.seed(seed) if reward_scale != 1.0: from baselines.common.retro_wrappers import RewardScaler env = RewardScaler(env, reward_scale) return env
def make_env(env_id, env_type, topic, truth, corpus, subrank=0, seed=None, reward_scale=1.0, gamestate=None, wrapper_kwargs={}): mpi_rank = 0 env = gym.make(env_id) env.set_topic(topic, truth_path=truth, corpus_path=corpus, env_rank=subrank) env.seed(seed + subrank if seed is not None else None) env = Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(mpi_rank) + '.' + str(subrank)), allow_early_resets=True) return env
def _thunk(): env = make_atari(env_id) env.seed(seed + rank) env = Monitor(env, os.path.join('./logs', str(rank))) env = wrap_deepmind(env) env = WrapPyTorch(env) return env
def make_dart_env(env_id, seed): print("#####################################") print("seed",seed) set_global_seeds(seed) env = gym.make(env_id) env = Monitor(env, logger.get_dir()) return env
def make_env_all_params(rank, add_monitor, args, sleep_multiple=2): if args["env_kind"] == 'ObstacleTowerEnv': env = _make_obs_env(rank, add_monitor, args, sleep_multiple) elif args["env_kind"] == 'atari': env = gym.make(args['env']) assert 'NoFrameskip' in env.spec.id env = NoopResetEnv(env, noop_max=args['noop_max']) env = MaxAndSkipEnv(env, skip=4) env = ProcessFrame84(env, crop=False) env = FrameStack(env, 4) env = ExtraTimeLimit(env, args['max_episode_steps']) if 'Montezuma' in args['env']: env = MontezumaInfoWrapper(env) env = AddRandomStateToInfo(env) if rank == 2: env = RenderWrapper(env) elif args["env_kind"] == 'mario': env = make_mario_env() elif args["env_kind"] == "retro_multi": env = make_multi_pong() elif args["env_kind"] == 'robopong': if args["env"] == "pong": env = make_robo_pong() elif args["env"] == "hockey": env = make_robo_hockey() if add_monitor: logdir = osp.join('summaries', args["exp_name"]) logger.configure(logdir) env = Monitor(env, osp.join(logger.get_dir(), '%.2i' % rank)) return env
def wrap_env_dqn(env): env = ThresholdResizeFrame(env) env = ClipRewardEnv(env) env = MaxAndSkipEnv(env, skip=4) env = Monitor(env, logger.get_dir(), allow_early_resets=True) env = FrameStack(env, 4) return env
def _thunk(): env = make_atari(env_id) env.seed(seed + rank) env = Monitor( env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank))) return wrap_deepmind(env, **wrapper_kwargs)
def make_env(stack=True, scale_rew=True, game=None, state=None, seed=0, render=False): """ Create an environment with some standard wrappers. """ # if not is_remote: # if game is None or state is None: # import data_set_reader # train_set = data_set_reader.read_train_set() # game, state = random.choice(train_set) # print("it's local env: ", game, state) # from retro_contest.local import make # env = make(game=game, state=state) # else: # print("it's remote env") # import gym_remote.client as grc # env = grc.RemoteEnv('tmp/sock') env = make(game=game, state=state) env.seed(seed) env = AllowBacktracking(env) env = Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(seed)), allow_early_resets=True) env = SonicDiscretizer(env, render) if scale_rew: env = RewardScaler(env) env = WarpFrame(env) if stack: env = FrameStack(env, 4) return env
def make_env_all_params(rank, add_monitor, args): if args["env_kind"] == 'atari': env = gym.make(args['env']) assert 'NoFrameskip' in env.spec.id env = NoopResetEnv(env, noop_max=args['noop_max']) env = MaxAndSkipEnv(env, skip=4) env = ProcessFrame84(env, crop=False) env = FrameStack(env, 4) env = ExtraTimeLimit(env, args['max_episode_steps']) if 'Montezuma' in args['env']: env = MontezumaInfoWrapper(env) env = AddRandomStateToInfo(env) elif args["env_kind"] == 'field': import gym_fieldedmove env = gym.make('FieldedMove-v0') # env = FrameStack(env, 4) elif args["env_kind"] == "ple": import gym_ple env = gym.make(args['env']) env._max_episode_steps = args['max_episode_steps'] # env = MaxAndSkipEnv(env, skip=4) env = ProcessFrame84(env, crop=False) env = FrameStack(env, 4) if add_monitor: env = Monitor(env, osp.join(logger.get_dir(), '%.2i' % rank)) return env
def make_env(self, env_id, seed, logger_dir=None, reward_scale=1.0, mpi_rank=0, subrank=0, info_keywords=()): """ Create a wrapped, monitored gym.Env for safety. """ scenario = scenarios.load('{}.py'.format(env_id)).Scenario() world = scenario.make_world() env_dict = { "world": world, 'reset_callback': scenario.reset_world, 'reward_callback': scenario.reward, 'observation_callback': scenario.observation, 'info_callback': None, 'done_callback': scenario.done, 'shared_viewer': True } env = gym.make('MultiAgent-v0', **env_dict) env.seed(seed + subrank if seed is not None else None) env = Monitor(env, logger_dir and os.path.join(logger_dir, str(mpi_rank) + '.' + str(subrank)), allow_early_resets=True, info_keywords=info_keywords) env = ClipActionsWrapper(env) if reward_scale != 1.0: from baselines.common.retro_wrappers import RewardScaler env = RewardScaler(env, reward_scale) return env
def make_env_all_params(rank, add_monitor, args): if args["env_kind"] == "atari": env = gym.make(args["env"]) assert "NoFrameskip" in env.spec.id # from self-supervised exploration via disagreement if args["stickyAtari"] == "true": env = StickyActionEnv(env) env._max_episode_steps = args["max_episode_steps"] * 4 env = MaxAndSkipEnv(env, skip=4) env = ProcessFrame84(env, crop=False) env = FrameStack(env, 4) env = ExtraTimeLimit(env, args["max_episode_steps"]) if "Montezuma" in args["env"]: env = MontezumaInfoWrapper(env) env = AddRandomStateToInfo(env) if args["noisy_tv"] == "true": env = NoisyTVEnvWrapper(env) # assert env.action_space == spaces.Discrete(7) elif args["env_kind"] == "mario": env = make_mario_env() if args["noisy_tv"] == "true": env = NoisyTVEnvWrapperMario(env) elif args["env_kind"] == "retro_multi": env = make_multi_pong() elif args["env_kind"] == "robopong": if args["env"] == "pong": env = make_robo_pong() elif args["env"] == "hockey": env = make_robo_hockey() if add_monitor: env = Monitor(env, osp.join(logger.get_dir(), "%.2i" % rank)) return env
def make_env(env_id, env_type, subrank=0, seed=None, reward_scale=1.0, gamestate=None, wrapper_kwargs={}): mpi_rank = MPI.COMM_WORLD.Get_rank() if MPI else 0 if env_type == 'atari': print("making atari") env = make_atari(env_id) elif env_type == 'retro': import retro gamestate = gamestate or retro.State.DEFAULT env = retro_wrappers.make_retro(game=env_id, max_episode_steps=10000, use_restricted_actions=retro.Actions.DISCRETE, state=gamestate) else: env = gym.make(env_id) env.seed(seed + subrank if seed is not None else None) env = Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(mpi_rank) + '.' + str(subrank)), allow_early_resets=True) if env_type == 'atari': env = wrap_deepmind(env, **wrapper_kwargs) elif env_type == 'retro': env = retro_wrappers.wrap_deepmind_retro(env, **wrapper_kwargs) if reward_scale != 1: env = retro_wrappers.RewardScaler(env, reward_scale) return env
def _thunk(): env = retro.make( env_id, use_restricted_actions=retro.ACTIONS_MULTI_DISCRETE) env.seed(seed + rank) return Monitor( env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank)))
def make_env_staliro(env_id, env_type, mpi_rank=0, subrank=0, seed=None, reward_scale=1.0, gamestate=None, flatten_dict_observations=True, wrapper_kwargs=None, logger_dir=None): wrapper_kwargs = wrapper_kwargs or {} env_params = dict() env_kwargs = dict( render_params=dict(zoom=2.5, viz_dir="/tmp/env_{}".format(subrank))) env = gym.make(env_id, env_params=env_params, **env_kwargs) # env = gym.make(env_id) # subrank if flatten_dict_observations and isinstance(env.observation_space, gym.spaces.Dict): keys = env.observation_space.spaces.keys() env = gym.wrappers.FlattenDictWrapper(env, dict_keys=list(keys)) env.seed(seed + subrank if seed is not None else None) env = Monitor(env, logger_dir and os.path.join(logger_dir, str(mpi_rank) + '.' + str(subrank)), allow_early_resets=True) return env
def _thunk(): unity_env = UnityEnvironment(env_directory) env = UnityToGymWrapper(unity_env, rank, uint8_visual=False) env = Monitor( env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank))) return env
def make_env_all_params(rank, add_monitor, args): if args["env_kind"] == 'atari': env = gym.make(args['env']) assert 'NoFrameskip' in env.spec.id if args["stickyAtari"]: # 在智能体执行动作时增加随机性 env._max_episode_steps = args['max_episode_steps'] * 4 env = StickyActionEnv(env) else: env = NoopResetEnv(env, noop_max=args['noop_max']) env = MaxAndSkipEnv(env, skip=4) # 每个动作连续执行4步 env = ProcessFrame84(env, crop=False) # 处理观测 env = FrameStack(env, 4) # 将连续4帧叠加起来作为输入 env = ExtraTimeLimit(env, args['max_episode_steps']) if not args["stickyAtari"]: env = ExtraTimeLimit(env, args['max_episode_steps']) # 限制了一个周期的最大时间步 if 'Montezuma' in args['env']: # 记录智能体的位置, 所在的房间, 已经访问的房间 env = MontezumaInfoWrapper(env) env = AddRandomStateToInfo(env) elif args["env_kind"] == 'mario': # 超级马里奥 env = make_mario_env() elif args["env_kind"] == "retro_multi": # 多智能体游戏, Multi-Pong env = make_multi_pong() elif args["env_kind"] == 'robopong': if args["env"] == "pong": env = make_robo_pong() elif args["env"] == "hockey": env = make_robo_hockey() if add_monitor: env = Monitor(env, osp.join(logger.get_dir(), '%.2i' % rank)) return env
def _thunk(): env = gym.make(env_id) env.seed(seed + rank) env = Monitor( env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank))) return env
def f(): config = ffa_competition_env() env = Wrapped_Env(**config["env_kwargs"]) env.observation_space = spaces.Box(0, 20, shape=(11, 11, 18), dtype=np.float32) # Add 3 random agents agents = [] for agent_id in range(3): # if agent_id == env.winner_id: # agents.append(TrainingAgent(config["agent"](agent_id, config["game_type"]))) # else: agents.append( SimpleAgent(config["agent"](agent_id, config["game_type"]))) agent_id += 1 agents.append( TrainingAgent(config["agent"](agent_id, config["game_type"]))) env.set_agents(agents) env.set_training_agent(agents[-1].agent_id) env.set_init_game_state(None) if logger.get_dir(): env = Monitor(env, logger.get_dir(), allow_early_resets=True) return env
def make_env(env_id, env_type, mpi_rank=0, subrank=0, seed=None, reward_scale=1.0, gamestate=None, flatten_dict_observations=True, wrapper_kwargs=None, env_kwargs=None, logger_dir=None, initializer=None): if initializer is not None: initializer(mpi_rank=mpi_rank, subrank=subrank) wrapper_kwargs = wrapper_kwargs or {} env_kwargs = env_kwargs or {} env = gym.make(env_id, **env_kwargs) if flatten_dict_observations and isinstance(env.observation_space, gym.spaces.Dict): keys = env.observation_space.spaces.keys() env = gym.wrappers.FlattenDictWrapper(env, dict_keys=list(keys)) env.seed(seed + subrank if seed is not None else None) env = Monitor(env, logger_dir and os.path.join(logger_dir, str(mpi_rank) + '.' + str(subrank)), allow_early_resets=True) # env = MaxAndSkipEnv(env) # if 'frame_stack' in wrapper_kwargs and wrapper_kwargs['frame_stack']: # env = FrameStack(env, 4) if isinstance(env.action_space, gym.spaces.Box): env = ClipActionsWrapper(env) if reward_scale != 1: env = retro_wrappers.RewardScaler(env, reward_scale) return env
def make_env_all_params(rank, add_monitor, args): if args["env_kind"] == 'atari': env = gym.make(args['env']) assert 'NoFrameskip' in env.spec.id env = NoopResetEnv(env, noop_max=args['noop_max']) env = MaxAndSkipEnv(env, skip=4) env = ProcessFrame84(env, crop=False) env = FrameStack(env, 4) env = ExtraTimeLimit(env, args['max_episode_steps']) if 'Montezuma' in args['env']: env = MontezumaInfoWrapper(env) env = AddRandomStateToInfo(env) elif args["env_kind"] == 'mario': env = make_mario_env() elif args["env_kind"] == "retro_multi": env = make_multi_pong() elif args["env_kind"] == 'robopong': if args["env"] == "pong": env = make_robo_pong() elif args["env"] == "hockey": env = make_robo_hockey() elif args["env_kind"] == "my_games": env = gym.make(args['env']) env = MaxAndSkipEnv(env, skip=4) env = WarpFrame(env) env = FrameStack(env, 4) if add_monitor: env = Monitor(env, osp.join(logger.get_dir(), '%.2i' % rank)) return env
def make_env(env_id, env_type, mpi_rank=0, subrank=0, seed=None, reward_scale=1.0, gamestate=None, flatten_dict_observations=True, wrapper_kwargs=None, logger_dir=None): wrapper_kwargs = wrapper_kwargs or {} if env_type == 'atari': env = make_atari(env_id) elif env_type == 'retro': import retro gamestate = gamestate or retro.State.DEFAULT env = retro_wrappers.make_retro(game=env_id, max_episode_steps=10000, use_restricted_actions=retro.Actions.DISCRETE, state=gamestate) else: env = gym.make(env_id) if flatten_dict_observations and isinstance(env.observation_space, gym.spaces.Dict): keys = env.observation_space.spaces.keys() env = gym.wrappers.FlattenDictWrapper(env, dict_keys=list(keys)) env.seed(seed + subrank if seed is not None else None) env = Monitor(env, logger_dir and os.path.join(logger_dir, str(mpi_rank) + '.' + str(subrank)), allow_early_resets=True) if env_type == 'atari': env = wrap_deepmind(env, **wrapper_kwargs) elif env_type == 'retro': if 'frame_stack' not in wrapper_kwargs: wrapper_kwargs['frame_stack'] = 1 env = retro_wrappers.wrap_deepmind_retro(env, **wrapper_kwargs) if reward_scale != 1: env = retro_wrappers.RewardScaler(env, reward_scale) return env
def build_env(args, extra_args): ncpu = multiprocessing.cpu_count() if sys.platform == 'darwin': ncpu //= 2 nenv = args.num_env or ncpu alg = args.alg seed = args.seed env_type, env_id = get_env_type(args) if env_type in {'atari', 'retro'}: if alg == 'deepq': env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True}) elif alg == 'trpo_mpi': env = make_env(env_id, env_type, seed=seed) else: frame_stack_size = 4 env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale) env = VecFrameStack(env, frame_stack_size) elif env_type == "custom": try: if extra_args["step_size"] == "hour": env = custom_envs.HourlySimEnv(action_space_string=extra_args["action_space"], one_day=extra_args["one_day"], energy_in_state=extra_args["energy_in_state"]) elif extra_args["step_size"] == "day": env = custom_envs.BehavSimEnv(action_space_string=extra_args["action_space"], one_day=extra_args["one_day"], energy_in_state=extra_args["energy_in_state"]) else: print("step_size argument not recognized. Needs to be 'hour' or 'day'. Defaulting to day.") env = custom_envs.BehavSimEnv(action_space_string=extra_args["action_space"], one_day=extra_args["one_day"], energy_in_state=extra_args["energy_in_state"]) except KeyError as e: raise KeyError("You didn't specify", e.args[0], "as an argument. Please do. or change the code.") # wrap it #timestamp = datetime.now().strftime('_%m_%d_%Y_%H_%M') #log_file = os.path.join(os.getcwd(), "baselines", "behavioral_sim", "logs", timestamp) logger_dir = logger.get_dir() # hard coded mpi_rank and subrank to 0 env = Monitor(env, logger_dir and os.path.join(logger_dir, "0.0"), allow_early_resets=True) env = DummyVecEnv([lambda: env]) else: config = tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) config.gpu_options.allow_growth = True get_session(config=config) flatten_dict_observations = alg not in {'her'} env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations) if env_type == 'mujoco': env = VecNormalize(env, use_tf=True) return env
def _thunk(): env = ObstacleTowerEnv(env_directory, worker_id=rank, realtime_mode=True) env = Monitor( env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank))) return env
def make_control_env(env_id, seed, hist_len, block_high, version0, give_state): """ Create a wrapped, monitored gym.Env for MuJoCo. """ set_global_seeds(seed) if env_id == 'LunarLanderContinuousPOMDP-v0': newenv(hist_len=hist_len, block_high=block_high, version0=version0, give_state=give_state) env = gym.make(env_id) env = Monitor(env, logger.get_dir(), allow_early_resets=True, version0=version0) env.seed(seed) return env
def _thunk(): env = make_atari(env_id) env.seed(seed + rank) # Monitor is a wrapper of gym env, 对环境Env进行封装, 主要添加了对episode结束时信息的记录。 env = Monitor( env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank))) return wrap_deepmind(env, **wrapper_kwargs)
def _make_robosuite_env(): from gym.wrappers import FlattenDictWrapper from baselines.bench import Monitor env = suite.make(env_id) env = FlattenDictWrapper(env, ['robot-state', 'object-state']) env = Monitor(env, logger.get_dir(), allow_early_resets=True) return env
def _thunk(): env = gym.make(env_id) # env.seed(seed + rank) env = Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank)), allow_early_resets=True) return wrap_gvgai(env)
def make_env(env_id, env_type, mpi_rank=0, subrank=0, seed=None, reward_scale=1.0, gamestate=None, flatten_dict_observations=True, wrapper_kwargs=None, env_kwargs=None, logger_dir=None, initializer=None): if initializer is not None: initializer(mpi_rank=mpi_rank, subrank=subrank) wrapper_kwargs = wrapper_kwargs or {} env_kwargs = env_kwargs or {} if ':' in env_id: import re import importlib module_name = re.sub(':.*','',env_id) env_id = re.sub('.*:', '', env_id) importlib.import_module(module_name) if env_type == 'atari': env = make_atari(env_id) elif env_type == 'retro': import retro gamestate = gamestate or retro.State.DEFAULT env = retro_wrappers.make_retro(game=env_id, max_episode_steps=10000, use_restricted_actions=retro.Actions.DISCRETE, state=gamestate) else: # here create our own environment which should be able to handle the parallelism: if (env_type in {'nf-par'}): if env_id == 'Pendulumnf-v0': from gym.envs.registration import register register( id='Pendulumnf-v0', entry_point='nfunk.envs_nf.pendulum_nf:PendulumEnv', max_episode_steps=200, ) env = gym.make(env_id, **env_kwargs) else: env = gym.make(env_id, **env_kwargs) if flatten_dict_observations and isinstance(env.observation_space, gym.spaces.Dict): keys = env.observation_space.spaces.keys() env = gym.wrappers.FlattenDictWrapper(env, dict_keys=list(keys)) env.seed(seed + subrank if seed is not None else None) env = Monitor(env, logger_dir and os.path.join(logger_dir, str(mpi_rank) + '.' + str(subrank)), allow_early_resets=True) if env_type == 'atari': env = wrap_deepmind(env, **wrapper_kwargs) elif env_type == 'retro': if 'frame_stack' not in wrapper_kwargs: wrapper_kwargs['frame_stack'] = 1 env = retro_wrappers.wrap_deepmind_retro(env, **wrapper_kwargs) if isinstance(env.action_space, gym.spaces.Box): env = ClipActionsWrapper(env) if reward_scale != 1: env = retro_wrappers.RewardScaler(env, reward_scale) return env
def _thunk(): env_single.seed(seed + 10000 * mpi_rank + rank if seed is not None else None) env = Monitor( env_single, filename= None, #logger.get_dir() and os.path.join(logger.get_dir(), str(mpi_rank) + '.' + str(rank)), allow_early_resets=True) return env
def _thunk(): if very_sparse: env = doom_env.DoomMyWayHomeFixed15Env() else: env = doom_env.DoomMyWayHomeEnv() env.seed(seed + rank) monitor_fname = logger.get_dir() and os.path.join(logger.get_dir(), str(rank)) env = Monitor(env, monitor_fname, rank) return wrap_doom_deepmind_like(env, **wrapper_kwargs)