Пример #1
0
    def __init__(self, env_name, discount, num_iterations, lamb, animate,
                 kl_target):
        self.env = gym.make(env_name)
        gym.spaces.seed(1234)
        self.obs_dim = self.env.observation_space.shape[0] + 1
        self.act_dim = self.env.action_space.shape[0]
        self.discount = discount
        self.num_iterations = num_iterations
        self.lamb = lamb
        self.animate = animate
        self.killer = GracefulKiller()
        self.policy = ProximalPolicy(self.obs_dim,
                                     self.act_dim,
                                     self.env.action_space,
                                     kl_target,
                                     discount=discount,
                                     lamb=lamb)
        # using MC return would be more helpful
        self.value_func = l2TargetValueFunc(self.obs_dim)
        # self.value_func = ValueFunc(self.obs_dim, discount=discount, lamb=1)

        # save copies of file
        shutil.copy(inspect.getfile(self.policy.__class__), OUTPATH)
        shutil.copy(inspect.getfile(self.value_func.__class__), OUTPATH)
        shutil.copy(inspect.getfile(self.__class__), OUTPATH)

        self.log_file = open(OUTPATH + 'log.csv', 'w')
        self.write_header = True
        print('observation dimension:', self.obs_dim)
        print('action dimension:', self.act_dim)
        self.scaler = Scaler(self.obs_dim)
        self.init_scaler()
Пример #2
0
    def __init__(self, discount, num_iterations, lamb, animate, kl_target, **kwargs):
        self.env_name = 'RoboschoolHumanoidFlagrun-v1'
        self.env = gym.make(self.env_name)
        gym.spaces.seed(1234) # for reproducibility
        self.obs_dim = self.env.observation_space.shape[0] + 1 # adding time step as feature
        self.act_dim = self.env.action_space.shape[0]
        self.discount = discount
        self.num_iterations = num_iterations
        self.lamb = lamb
        self.animate = animate

        self.buffer = Buffer(1000000, self.obs_dim, self.act_dim) # 1000000 is the size they have used in paper
        self.episodes = 20 # larger episodes can reduce variance
        self.killer = GracefulKiller()

        self.policy = QPropPolicy(self.obs_dim, self.act_dim, self.env.action_space, kl_target, epochs=20)
        self.critic = DeterministicCritic(self.obs_dim, self.act_dim, self.discount, OUTPATH)
        self.value_func = l2TargetValueFunc(self.obs_dim, epochs=10)

        if 'show' in kwargs and not kwargs['show']:
            # save copies of file
            shutil.copy(inspect.getfile(self.policy.__class__), OUTPATH)
            shutil.copy(inspect.getfile(self.value_func.__class__), OUTPATH)
            shutil.copy(inspect.getfile(self.critic.__class__), OUTPATH)
            shutil.copy(inspect.getfile(self.__class__), OUTPATH)

            self.log_file = open(OUTPATH + 'log.csv', 'w')
            self.write_header = True

        print('Observation dimension:', self.obs_dim)
        print('Action dimension:', self.act_dim)

        # The use of a scaler is crucial
        self.scaler = Scaler(self.obs_dim)
        self.init_scaler()
Пример #3
0
    def __init__(self, env_name, discount, num_iterations, lamb, animate,
                 kl_target, show):
        self.env_name = env_name
        self.env = gym.make(env_name)
        if env_name == "FetchReach-v0":
            self.env = gym.wrappers.FlattenDictWrapper(
                self.env, ['observation', 'desired_goal', 'achieved_goal'])
        gym.spaces.seed(1234)
        self.obs_dim = self.env.observation_space.shape[
            0] + 1  # adding time step as feature
        self.act_dim = self.env.action_space.shape[0]
        self.discount = discount
        self.num_iterations = num_iterations
        self.lamb = lamb
        self.animate = animate

        self.buffer = Buffer(1000000, self.obs_dim, self.act_dim)
        self.episodes = 20
        self.killer = GracefulKiller()

        self.policy = QPropPolicy(self.obs_dim,
                                  self.act_dim,
                                  self.env.action_space,
                                  kl_target,
                                  epochs=20)
        self.critic = DeterministicCritic(self.obs_dim, self.act_dim,
                                          self.discount, OUTPATH)
        # using MC return would be more helpful
        self.value_func = l2TargetValueFunc(self.obs_dim, epochs=10)

        if not show:
            # save copies of file
            shutil.copy(inspect.getfile(self.policy.__class__), OUTPATH)
            shutil.copy(inspect.getfile(self.value_func.__class__), OUTPATH)
            shutil.copy(inspect.getfile(self.critic.__class__), OUTPATH)
            shutil.copy(inspect.getfile(self.__class__), OUTPATH)

            self.log_file = open(OUTPATH + 'log.csv', 'w')
            self.write_header = True

        print('observation dimension:', self.obs_dim)
        print('action dimension:', self.act_dim)

        # Use of a scaler is crucial
        self.scaler = Scaler(self.obs_dim)
        self.init_scaler()