def create_env(env_id, output_path, seed=0):
    rank = MPI.COMM_WORLD.Get_rank()
    set_global_seeds(seed + 10000 * rank)
    env = gym.make(env_id)
    env = Monitor(env, os.path.join(output_path, str(rank)), allow_early_resets=True)
    env.seed(seed)
    return env
Exemplo n.º 2
0
    def seed(self, seed=None):
        """
        :return seed: current seed of pseudorandom
            numbers generator
        """
        set_global_seeds(seed)

        return [seed]
Exemplo n.º 3
0
 def __init__(self, *args, **kwargs):
     global seed
     # we need to set seed here, once we are in the graph
     if seed != None:
         set_global_seeds(seed)
     super(MobilenetPolicy, self).__init__(*args,
                                           cnn_extractor=hub_module,
                                           **kwargs)
Exemplo n.º 4
0
 def __init__(self, *args, **kwargs):
     global seed
     # we need to set seed here, once we are in the graph
     if seed != None:
         set_global_seeds(seed)
     super(NatureLitePolicy, self).__init__(*args,
                                            cnn_extractor=nature_cnn_lite,
                                            **kwargs)
Exemplo n.º 5
0
def make_env(env_create_fkt, env_config, rank, seed=0):
    def _init():
        env = env_create_fkt(env_config)
        env.seed(seed + rank)
        return env

    set_global_seeds(seed)
    return _init
Exemplo n.º 6
0
def _make_env(rank):
    def _init():
        task = generate_task(task_generator_id="pushing")
        env = CausalWorld(task=task, enable_visualization=False, seed=rank)
        return env

    set_global_seeds(0)
    return _init
Exemplo n.º 7
0
def make_env(rank, seed=0):
    def _init():
        env = SimulatedTradingEnvironment(backtest=False)
        env.seed(seed + rank)
        return env

    set_global_seeds(seed)
    return _init
Exemplo n.º 8
0
    def make_env(env_id, seed=0):
        def _init():
            env = gym.make(env_id)
            env.seed(seed)
            return env

        set_global_seeds(seed)
        return _init
Exemplo n.º 9
0
    def make_env(rank, seed=0):
        def _init():
            env = KeepTradingEnv(df)
            env.seed(seed + rank)
            return env

        set_global_seeds(seed)
        return _init
Exemplo n.º 10
0
 def _init():
     set_global_seeds(seed + rank)
     env = gym.make(env_id)
     env.seed(seed + rank)
     env = Monitor(env,
                   os.path.join(log_dir, str(rank)),
                   allow_early_resets=True)
     return env
Exemplo n.º 11
0
 def _init():
     set_global_seeds(seed)
     env = Env(client, frame_skip=frame_skip, vae=vae, min_throttle=MIN_THROTTLE,
         max_throttle=MAX_THROTTLE, n_command_history=N_COMMAND_HISTORY,
         n_stack=n_stack)
     env.seed(seed)
     env = Monitor(env, log_dir, allow_early_resets=True)
     return env
Exemplo n.º 12
0
 def _init():
     set_global_seeds(seed)
     env = BeamNGenv(level=LEVEL, frame_skip=frame_skip, vae=vae, const_throttle=None, min_throttle=MIN_THROTTLE,
                        max_throttle=MAX_THROTTLE, max_cte_error=MAX_CTE_ERROR, n_command_history=N_COMMAND_HISTORY,
                        n_stack=n_stack, simulation=simulation)
     env.seed(seed)
     if not teleop:
         env = Monitor(env, log_dir, allow_early_resets=True)
     return env
Exemplo n.º 13
0
def make_env(env_id, rank, log_dir, seed=0):
    def _init():
        env = make_atari(env_id)
        env = Monitor(env, log_dir + str(rank), allow_early_resets=True)
        env.seed(seed + rank)
        return env

    set_global_seeds(seed)
    return _init
Exemplo n.º 14
0
def set_random_seed(config):
    """
    Sets the random seed to python, numpy and tensorflow. The selected seed will be saved in the config['meta'] section.
    :param config: The lab config file
    """
    random_seed = config['meta'].get('seed', time.time())
    config['meta']['seed'] = random_seed

    set_global_seeds(random_seed)
def main(args):
    """
    start training the model

    :param args: (ArgumentParser) the training argument
    """
    with tf_util.make_session(num_cpu=1):
        set_global_seeds(args.seed)
        env = gym.make(args.env_id)

        def policy_fn(name, ob_space, ac_space, reuse=False, placeholders=None, sess=None):
            return mlp_policy.MlpPolicy(name=name, ob_space=ob_space, ac_space=ac_space, reuse=reuse, sess=sess,
                                        hid_size=args.policy_hidden_size, num_hid_layers=2, placeholders=placeholders)


        #========================================================================================================
        env = bench.Monitor(env, logger.get_dir() and
                            os.path.join(logger.get_dir(), "monitor.json"))
        env.seed(args.seed)
        gym.logger.setLevel(logging.WARN)

        # 길고 긴 task name을 받아옵니다. ===========================================================================
        task_name = get_task_name(args)
        args.checkpoint_dir = os.path.join(args.checkpoint_dir, task_name)
        args.log_dir = os.path.join(args.log_dir, task_name)
        # =======================================================================================================


        if args.task == 'train':
            dataset = MujocoDset(expert_path=args.expert_path, traj_limitation=args.traj_limitation)

            #discriminator 네트워크 생성
            reward_giver = TransitionClassifier(env, args.adversary_hidden_size, entcoeff=args.adversary_entcoeff)

            #policy 네트워크 학습
            #policy network 가 policy_fn 으로 선언되어있다는 것에 주의
            train(env, args.seed, policy_fn, reward_giver,
                  dataset, args.algo, args.g_step, args.d_step,
                  args.policy_entcoeff, args.num_timesteps, args.save_per_iter, args.checkpoint_dir, args.pretrained, args.bc_max_iter, task_name)



        # ======================================= 이것은 나중에 이해하보는 거시여=============================================
        # 학습된 모델을 evaluate 할 때 사용합니다.
        elif args.task == 'evaluate':
            runner(env,
                   policy_fn,
                   args.load_model_path,
                   timesteps_per_batch=1024,
                   number_trajs=10,
                   stochastic_policy=args.stochastic_policy,
                   save=args.save_sample
                   )
        else:
            raise NotImplementedError

        env.close()
Exemplo n.º 16
0
def make_env(env_id, rank, seed=0):
    def _init():
        env = make_atari(env_id)
        # env = VecFrameStack(env, n_stack=4)
        env.seed(seed + rank)
        return env

    set_global_seeds(seed)
    return _init
Exemplo n.º 17
0
def make_env(data_provider: BaseDataProvider, rank: int = 0, seed: int = 0):
    def _init():
        env = TradingEnv(data_provider)
        env.seed(seed + rank)
        return env

    set_global_seeds(seed)

    return _init
def create_env(env_id, output_path, seed=0):
    rank = MPI.COMM_WORLD.Get_rank()
    set_global_seeds(seed + 10000 * rank)
    env = gym.make(env_id)
    env = Monitor(env,
                  os.path.join(output_path, str(rank)),
                  allow_early_resets=True)
    env.seed(seed)
    return env
Exemplo n.º 19
0
def make_env(env_id, rank, seed=0):
    def _init():
        env = gym.make(env_id)
        env = RGBImgPartialObsWrapper(env)
        env = ImgObsWrapper(env)
        env.seed(seed + rank)
        return env

    set_global_seeds(seed)
    return _init
Exemplo n.º 20
0
def make_env(rank, seed=0):
    """
    :param rank: (int) index of the subprocess
    """
    def _init():
        env = stoch2_gym_env.Stoch2Env()
        env.seed(seed + rank)
        return env
    set_global_seeds(seed)
    return _init
Exemplo n.º 21
0
    def make_env(rank, seed=0):  # pylint: disable=C0111
        def _thunk():
            env = IoTNode(**env_kwargs)
            env.seed(seed + rank)
            env = Monitor(env,
                          os.path.join(log_dir, str(rank)),
                          allow_early_resets=True)
            return env

        set_global_seeds(0)
        return _thunk
Exemplo n.º 22
0
def set_seed(seed):
    set_global_seeds(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    np.random.seed(seed)

    tf.compat.v1.set_random_seed(seed)

    torch.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
Exemplo n.º 23
0
    def _setup_learn(self, seed):
        """
        check the environment, set the seed, and set the logger

        :param seed: (int) the seed value
        """
        if self.env is None:
            raise ValueError("Error: cannot train the model without a valid environment, please set an environment with"
                             "set_env(self, env) method.")
        if seed is not None:
            set_global_seeds(seed)
Exemplo n.º 24
0
def make_env(rank, seed=0):
    """
    :param rank: (int) index of the subprocess
    """
    def _init():
        env = vision60_gym_bullet_env.Vision60BulletEnv()
        env.seed(seed + rank)
        return env

    set_global_seeds(seed)
    return _init
Exemplo n.º 25
0
def make_env(env_id,
             env_level,
             rank,
             log_dir,
             frame_stack=False,
             useMonitor=True,
             seed=0,
             map_file=None,
             render_as_observation=False,
             exponential_agent_training_curve=False):
    def _init():
        if env_id == "WarehouseEnv":
            #             if map_file is "None" or map_file is None:
            simple_agent = np.zeros((11, 11))
            simple_agent[5, 5] = 1
            #                      [[ 0, 1,  0,  0,  0,  0,  2, 0, 0],
            #                       [ 0, 0,  0,  0,  0,  0,  0, 0, 0],
            #                       [ 0, 0,  0,  0,  0,  0,  0, 0, 0],
            #                       [ 0, 0,  0,  0,  0,  0,  0, 0, 0],
            #                       [ 0, 0,  0,  0,  0,  0,  0, 0, 0],
            #                       [ 0, 0,  0,  0,  0,  0,  0, 0, 0],
            #                       [ 0, 0,  0,  0,  3,  0,  0, 0, 0]]
            #             simple_agent = \
            #                      [[ 0, 1,  0,  0,  0,  0,  0, 0, 0],
            #                       [ 0, 0,  0,  0,  0,  0,  0, 0, 0],
            #                       [ 0, 0,  0,  0,  0,  0,  0, 0, 0],
            #                       [ 0, 0,  0,  0,  0,  0,  0, 0, 0],
            #                       [ 0, 0,  0,  0,  0,  0,  0, 0, 0],
            #                       [ 0, 0,  0,  0,  0,  0,  0, 0, 0],
            #                       [ 0, 0,  0,  0,  0,  0,  0, 0, 0]]
            simple_world = np.zeros((11, 11))
            #                      [[ 0, 0,  0,  0,  0,  0,  0, 0, 0],
            #                       [ 0, 0,  0,  0,  0,  0,  0, 0, 0],
            #                       [ 0, 0,  0,  0,  1,  0,  0, 0, 0],
            #                       [ 0, 1,  0,  0,  0,  1,  0, 0, 0],
            #                       [ 0, 0,  0,  0,  1,  0,  0, 0, 0],
            #                       [ 0, 0,  0,  0,  0,  0,  0, 0, 0],
            #                       [ 0, 0,  0,  0,  0,  0,  0, 0, 0]]
            env = WarehouseEnv(agent_map=simple_agent,
                               obstacle_map=simple_world,
                               render_as_observation=render_as_observation,
                               exponential_agent_training_curve=
                               exponential_agent_training_curve)
        else:
            env = gym.make(env_id, level=env_level)
        if frame_stack:
            env = FrameStack(env, 4)
        if useMonitor:
            env = Monitor(env, log_dir + str(rank), allow_early_resets=True)
        return env

    set_global_seeds(seed)
    return _init
Exemplo n.º 26
0
def make_tutankham_env(num_env, seed=0, start_index=0):
    def make_env(rank):
        def _thunk():
            env = gym.make('Tutankham-v4')
            env.seed(seed + rank)
            env = Monitor(env, filename=None, allow_early_resets=True)
            return wrap_env(env, True)

        return _thunk

    set_global_seeds(seed)
    return SubprocVecEnv([make_env(i + start_index) for i in range(num_env)])
Exemplo n.º 27
0
def make_env(rank, seed=0):
    def _init():
        sub_id = 6
        enable_draw = False
        env = SimpleHumanoidMimicEnv(sub_id=sub_id, enable_draw=enable_draw)

        # Important: use a different seed for each environment
        env.seed(seed + rank)
        return env

    set_global_seeds(seed)
    return _init
Exemplo n.º 28
0
def train(params):
    rank = MPI.COMM_WORLD.Get_rank()

    if rank == 0:
        logger.configure()
    else:
        logger.configure(format_strs=[])

    # setup config
    if params.get("policy") == 'mlp':
        policy = MlpPolicy
        env = gym.make(params.get("environment"))
        env.configure(envConfig)
        env.reset()
    else:
        policy = CnnPolicy
        env = gym.make(params.get("environment"))
        env.configure(CnnNet)
        env.reset()

    exp_name = ("{0}_{1}_{2}".format(params.get("model_name"),
                                     params.get("policy"),
                                     params.get("environment")))

    log_dir = './logs/' + exp_name

    if params.get("seed") > 0:
        workerseed = params.get("seed"), +10000 * MPI.COMM_WORLD.Get_rank()
        set_global_seeds(workerseed)
        env.seed(workerseed)

    # create model
    model = TRPO(policy,
                 env,
                 verbose=1,
                 tensorboard_log=log_dir,
                 timesteps_per_batch=params.get("timesteps_per_batch"),
                 max_kl=params.get("max_kl"),
                 cg_iters=params.get("cg_iters"),
                 cg_damping=params.get("cg_damping"),
                 entcoeff=params.get("entcoeff"),
                 gamma=params.get("gamma"),
                 lam=params.get("lam"),
                 vf_iters=params.get("vf_iters"),
                 vf_stepsize=params.get("vf_stepsize")
                 # ,policy_kwargs=policy_kwargs
                 )

    model.learn(total_timesteps=params.get("train_steps"))
    model.save(exp_name)
    env.close()
    del env
Exemplo n.º 29
0
    def _init():
        set_global_seeds(seed + rank)
        env = gym.make(env_id, **kwargs)

        # Dict observation space is currently not supported.
        # https://github.com/hill-a/stable-baselines/issues/321
        # We allow a Gym env wrapper (a subclass of gym.Wrapper)
        if wrapper_class:
            env = wrapper_class(env)

        env.seed(seed + rank)
        env = Monitor(env, os.path.join(log_dir, str(rank)), allow_early_resets=True)
        return env
Exemplo n.º 30
0
    def _make_env(rank):
        def _init():
            task = generate_task(task_generator_id=task_name)
            env = CausalWorld(task=task,
                              skip_frame=skip_frame,
                              enable_visualization=False,
                              seed=seed_num + rank,
                              max_episode_length=maximum_episode_length)
            env = HERGoalEnvWrapper(env)
            return env

        set_global_seeds(seed_num)
        return _init
Exemplo n.º 31
0
def make_env(rank, log_dir, seed=0):
    def _init():
        env = ToyEnv(
            train=True,
            log_dir=log_dir,
        )
        env = Monitor(env, log_dir + str(rank), allow_early_resets=True)
        env.seed(seed + rank)
        return env

    set_global_seeds(seed)

    return _init