def create_env(env_id, output_path, seed=0): rank = MPI.COMM_WORLD.Get_rank() set_global_seeds(seed + 10000 * rank) env = gym.make(env_id) env = Monitor(env, os.path.join(output_path, str(rank)), allow_early_resets=True) env.seed(seed) return env
def seed(self, seed=None): """ :return seed: current seed of pseudorandom numbers generator """ set_global_seeds(seed) return [seed]
def __init__(self, *args, **kwargs): global seed # we need to set seed here, once we are in the graph if seed != None: set_global_seeds(seed) super(MobilenetPolicy, self).__init__(*args, cnn_extractor=hub_module, **kwargs)
def __init__(self, *args, **kwargs): global seed # we need to set seed here, once we are in the graph if seed != None: set_global_seeds(seed) super(NatureLitePolicy, self).__init__(*args, cnn_extractor=nature_cnn_lite, **kwargs)
def make_env(env_create_fkt, env_config, rank, seed=0): def _init(): env = env_create_fkt(env_config) env.seed(seed + rank) return env set_global_seeds(seed) return _init
def _make_env(rank): def _init(): task = generate_task(task_generator_id="pushing") env = CausalWorld(task=task, enable_visualization=False, seed=rank) return env set_global_seeds(0) return _init
def make_env(rank, seed=0): def _init(): env = SimulatedTradingEnvironment(backtest=False) env.seed(seed + rank) return env set_global_seeds(seed) return _init
def make_env(env_id, seed=0): def _init(): env = gym.make(env_id) env.seed(seed) return env set_global_seeds(seed) return _init
def make_env(rank, seed=0): def _init(): env = KeepTradingEnv(df) env.seed(seed + rank) return env set_global_seeds(seed) return _init
def _init(): set_global_seeds(seed + rank) env = gym.make(env_id) env.seed(seed + rank) env = Monitor(env, os.path.join(log_dir, str(rank)), allow_early_resets=True) return env
def _init(): set_global_seeds(seed) env = Env(client, frame_skip=frame_skip, vae=vae, min_throttle=MIN_THROTTLE, max_throttle=MAX_THROTTLE, n_command_history=N_COMMAND_HISTORY, n_stack=n_stack) env.seed(seed) env = Monitor(env, log_dir, allow_early_resets=True) return env
def _init(): set_global_seeds(seed) env = BeamNGenv(level=LEVEL, frame_skip=frame_skip, vae=vae, const_throttle=None, min_throttle=MIN_THROTTLE, max_throttle=MAX_THROTTLE, max_cte_error=MAX_CTE_ERROR, n_command_history=N_COMMAND_HISTORY, n_stack=n_stack, simulation=simulation) env.seed(seed) if not teleop: env = Monitor(env, log_dir, allow_early_resets=True) return env
def make_env(env_id, rank, log_dir, seed=0): def _init(): env = make_atari(env_id) env = Monitor(env, log_dir + str(rank), allow_early_resets=True) env.seed(seed + rank) return env set_global_seeds(seed) return _init
def set_random_seed(config): """ Sets the random seed to python, numpy and tensorflow. The selected seed will be saved in the config['meta'] section. :param config: The lab config file """ random_seed = config['meta'].get('seed', time.time()) config['meta']['seed'] = random_seed set_global_seeds(random_seed)
def main(args): """ start training the model :param args: (ArgumentParser) the training argument """ with tf_util.make_session(num_cpu=1): set_global_seeds(args.seed) env = gym.make(args.env_id) def policy_fn(name, ob_space, ac_space, reuse=False, placeholders=None, sess=None): return mlp_policy.MlpPolicy(name=name, ob_space=ob_space, ac_space=ac_space, reuse=reuse, sess=sess, hid_size=args.policy_hidden_size, num_hid_layers=2, placeholders=placeholders) #======================================================================================================== env = bench.Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), "monitor.json")) env.seed(args.seed) gym.logger.setLevel(logging.WARN) # 길고 긴 task name을 받아옵니다. =========================================================================== task_name = get_task_name(args) args.checkpoint_dir = os.path.join(args.checkpoint_dir, task_name) args.log_dir = os.path.join(args.log_dir, task_name) # ======================================================================================================= if args.task == 'train': dataset = MujocoDset(expert_path=args.expert_path, traj_limitation=args.traj_limitation) #discriminator 네트워크 생성 reward_giver = TransitionClassifier(env, args.adversary_hidden_size, entcoeff=args.adversary_entcoeff) #policy 네트워크 학습 #policy network 가 policy_fn 으로 선언되어있다는 것에 주의 train(env, args.seed, policy_fn, reward_giver, dataset, args.algo, args.g_step, args.d_step, args.policy_entcoeff, args.num_timesteps, args.save_per_iter, args.checkpoint_dir, args.pretrained, args.bc_max_iter, task_name) # ======================================= 이것은 나중에 이해하보는 거시여============================================= # 학습된 모델을 evaluate 할 때 사용합니다. elif args.task == 'evaluate': runner(env, policy_fn, args.load_model_path, timesteps_per_batch=1024, number_trajs=10, stochastic_policy=args.stochastic_policy, save=args.save_sample ) else: raise NotImplementedError env.close()
def make_env(env_id, rank, seed=0): def _init(): env = make_atari(env_id) # env = VecFrameStack(env, n_stack=4) env.seed(seed + rank) return env set_global_seeds(seed) return _init
def make_env(data_provider: BaseDataProvider, rank: int = 0, seed: int = 0): def _init(): env = TradingEnv(data_provider) env.seed(seed + rank) return env set_global_seeds(seed) return _init
def make_env(env_id, rank, seed=0): def _init(): env = gym.make(env_id) env = RGBImgPartialObsWrapper(env) env = ImgObsWrapper(env) env.seed(seed + rank) return env set_global_seeds(seed) return _init
def make_env(rank, seed=0): """ :param rank: (int) index of the subprocess """ def _init(): env = stoch2_gym_env.Stoch2Env() env.seed(seed + rank) return env set_global_seeds(seed) return _init
def make_env(rank, seed=0): # pylint: disable=C0111 def _thunk(): env = IoTNode(**env_kwargs) env.seed(seed + rank) env = Monitor(env, os.path.join(log_dir, str(rank)), allow_early_resets=True) return env set_global_seeds(0) return _thunk
def set_seed(seed): set_global_seeds(seed) os.environ['PYTHONHASHSEED'] = str(seed) random.seed(seed) np.random.seed(seed) tf.compat.v1.set_random_seed(seed) torch.manual_seed(seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False
def _setup_learn(self, seed): """ check the environment, set the seed, and set the logger :param seed: (int) the seed value """ if self.env is None: raise ValueError("Error: cannot train the model without a valid environment, please set an environment with" "set_env(self, env) method.") if seed is not None: set_global_seeds(seed)
def make_env(rank, seed=0): """ :param rank: (int) index of the subprocess """ def _init(): env = vision60_gym_bullet_env.Vision60BulletEnv() env.seed(seed + rank) return env set_global_seeds(seed) return _init
def make_env(env_id, env_level, rank, log_dir, frame_stack=False, useMonitor=True, seed=0, map_file=None, render_as_observation=False, exponential_agent_training_curve=False): def _init(): if env_id == "WarehouseEnv": # if map_file is "None" or map_file is None: simple_agent = np.zeros((11, 11)) simple_agent[5, 5] = 1 # [[ 0, 1, 0, 0, 0, 0, 2, 0, 0], # [ 0, 0, 0, 0, 0, 0, 0, 0, 0], # [ 0, 0, 0, 0, 0, 0, 0, 0, 0], # [ 0, 0, 0, 0, 0, 0, 0, 0, 0], # [ 0, 0, 0, 0, 0, 0, 0, 0, 0], # [ 0, 0, 0, 0, 0, 0, 0, 0, 0], # [ 0, 0, 0, 0, 3, 0, 0, 0, 0]] # simple_agent = \ # [[ 0, 1, 0, 0, 0, 0, 0, 0, 0], # [ 0, 0, 0, 0, 0, 0, 0, 0, 0], # [ 0, 0, 0, 0, 0, 0, 0, 0, 0], # [ 0, 0, 0, 0, 0, 0, 0, 0, 0], # [ 0, 0, 0, 0, 0, 0, 0, 0, 0], # [ 0, 0, 0, 0, 0, 0, 0, 0, 0], # [ 0, 0, 0, 0, 0, 0, 0, 0, 0]] simple_world = np.zeros((11, 11)) # [[ 0, 0, 0, 0, 0, 0, 0, 0, 0], # [ 0, 0, 0, 0, 0, 0, 0, 0, 0], # [ 0, 0, 0, 0, 1, 0, 0, 0, 0], # [ 0, 1, 0, 0, 0, 1, 0, 0, 0], # [ 0, 0, 0, 0, 1, 0, 0, 0, 0], # [ 0, 0, 0, 0, 0, 0, 0, 0, 0], # [ 0, 0, 0, 0, 0, 0, 0, 0, 0]] env = WarehouseEnv(agent_map=simple_agent, obstacle_map=simple_world, render_as_observation=render_as_observation, exponential_agent_training_curve= exponential_agent_training_curve) else: env = gym.make(env_id, level=env_level) if frame_stack: env = FrameStack(env, 4) if useMonitor: env = Monitor(env, log_dir + str(rank), allow_early_resets=True) return env set_global_seeds(seed) return _init
def make_tutankham_env(num_env, seed=0, start_index=0): def make_env(rank): def _thunk(): env = gym.make('Tutankham-v4') env.seed(seed + rank) env = Monitor(env, filename=None, allow_early_resets=True) return wrap_env(env, True) return _thunk set_global_seeds(seed) return SubprocVecEnv([make_env(i + start_index) for i in range(num_env)])
def make_env(rank, seed=0): def _init(): sub_id = 6 enable_draw = False env = SimpleHumanoidMimicEnv(sub_id=sub_id, enable_draw=enable_draw) # Important: use a different seed for each environment env.seed(seed + rank) return env set_global_seeds(seed) return _init
def train(params): rank = MPI.COMM_WORLD.Get_rank() if rank == 0: logger.configure() else: logger.configure(format_strs=[]) # setup config if params.get("policy") == 'mlp': policy = MlpPolicy env = gym.make(params.get("environment")) env.configure(envConfig) env.reset() else: policy = CnnPolicy env = gym.make(params.get("environment")) env.configure(CnnNet) env.reset() exp_name = ("{0}_{1}_{2}".format(params.get("model_name"), params.get("policy"), params.get("environment"))) log_dir = './logs/' + exp_name if params.get("seed") > 0: workerseed = params.get("seed"), +10000 * MPI.COMM_WORLD.Get_rank() set_global_seeds(workerseed) env.seed(workerseed) # create model model = TRPO(policy, env, verbose=1, tensorboard_log=log_dir, timesteps_per_batch=params.get("timesteps_per_batch"), max_kl=params.get("max_kl"), cg_iters=params.get("cg_iters"), cg_damping=params.get("cg_damping"), entcoeff=params.get("entcoeff"), gamma=params.get("gamma"), lam=params.get("lam"), vf_iters=params.get("vf_iters"), vf_stepsize=params.get("vf_stepsize") # ,policy_kwargs=policy_kwargs ) model.learn(total_timesteps=params.get("train_steps")) model.save(exp_name) env.close() del env
def _init(): set_global_seeds(seed + rank) env = gym.make(env_id, **kwargs) # Dict observation space is currently not supported. # https://github.com/hill-a/stable-baselines/issues/321 # We allow a Gym env wrapper (a subclass of gym.Wrapper) if wrapper_class: env = wrapper_class(env) env.seed(seed + rank) env = Monitor(env, os.path.join(log_dir, str(rank)), allow_early_resets=True) return env
def _make_env(rank): def _init(): task = generate_task(task_generator_id=task_name) env = CausalWorld(task=task, skip_frame=skip_frame, enable_visualization=False, seed=seed_num + rank, max_episode_length=maximum_episode_length) env = HERGoalEnvWrapper(env) return env set_global_seeds(seed_num) return _init
def make_env(rank, log_dir, seed=0): def _init(): env = ToyEnv( train=True, log_dir=log_dir, ) env = Monitor(env, log_dir + str(rank), allow_early_resets=True) env.seed(seed + rank) return env set_global_seeds(seed) return _init