Exemplo n.º 1
0
    def __init__(self, params):

        #############
        ## INIT
        #############

        # Get params, create logger
        self.params = params
        self.logger = Logger(self.params["logdir"])

        # Set random seeds
        seed = self.params["seed"]
        np.random.seed(seed)
        torch.manual_seed(seed)
        ptu.init_gpu(use_gpu=not self.params["no_gpu"],
                     gpu_id=self.params["which_gpu"])

        #############
        ## ENV
        #############

        # Make the gym environment
        register_custom_envs()
        self.env = gym.make(self.params["env_name"])
        self.eval_env = gym.make(self.params["env_name"])
        if not ("pointmass" in self.params["env_name"]):
            import matplotlib

            matplotlib.use("Agg")
            self.env.set_logdir(self.params["logdir"] + "/expl_")
            self.eval_env.set_logdir(self.params["logdir"] + "/eval_")

        if "env_wrappers" in self.params:
            # These operations are currently only for Atari envs
            self.env = wrappers.Monitor(self.env,
                                        os.path.join(self.params["logdir"],
                                                     "gym"),
                                        force=True)
            self.eval_env = wrappers.Monitor(self.eval_env,
                                             os.path.join(
                                                 self.params["logdir"], "gym"),
                                             force=True)
            self.env = params["env_wrappers"](self.env)
            self.eval_env = params["env_wrappers"](self.eval_env)
            self.mean_episode_reward = -float("nan")
            self.best_mean_episode_reward = -float("inf")
        if "non_atari_colab_env" in self.params and self.params[
                "video_log_freq"] > 0:
            self.env = wrappers.Monitor(
                self.env,
                os.path.join(self.params["logdir"], "gym"),
                write_upon_reset=True,
            )  # , force=True)
            self.eval_env = wrappers.Monitor(
                self.eval_env,
                os.path.join(self.params["logdir"], "gym"),
                write_upon_reset=True,
            )
            self.mean_episode_reward = -float("nan")
            self.best_mean_episode_reward = -float("inf")
        self.env.seed(seed)
        self.eval_env.seed(seed)

        # Maximum length for episodes
        self.params["ep_len"] = self.params[
            "ep_len"] or self.env.spec.max_episode_steps
        global MAX_VIDEO_LEN
        MAX_VIDEO_LEN = self.params["ep_len"]

        # Is this env continuous, or self.discrete?
        discrete = isinstance(self.env.action_space, gym.spaces.Discrete)
        # Are the observations images?
        img = len(self.env.observation_space.shape) > 2

        self.params["agent_params"]["discrete"] = discrete

        # Observation and action sizes

        ob_dim = (self.env.observation_space.shape
                  if img else self.env.observation_space.shape[0])
        ac_dim = self.env.action_space.n if discrete else self.env.action_space.shape[
            0]
        self.params["agent_params"]["ac_dim"] = ac_dim
        self.params["agent_params"]["ob_dim"] = ob_dim

        # simulation timestep, will be used for video saving
        if "model" in dir(self.env):
            self.fps = 1 / self.env.model.opt.timestep
        elif "env_wrappers" in self.params:
            self.fps = 30  # This is not actually used when using the Monitor wrapper
        elif "video.frames_per_second" in self.env.env.metadata.keys():
            self.fps = self.env.env.metadata["video.frames_per_second"]
        else:
            self.fps = 10

        #############
        ## AGENT
        #############

        agent_class = self.params["agent_class"]
        self.agent = agent_class(self.env, self.params["agent_params"])
Exemplo n.º 2
0
    def __init__(self, params):

        #############
        ## INIT
        #############

        # Get params, create logger
        self.params = params
        self.logger = Logger(self.params['logdir'])

        # Set random seeds
        seed = self.params['seed']
        np.random.seed(seed)
        tf.random.set_seed(seed)

        #############
        ## ENV
        #############

        # Make the gym environment
        register_custom_envs()
        self.env = gym.make(self.params['env_name'])
        self.eval_env = gym.make(self.params['env_name'])
        if not ('pointmass' in self.params['env_name']):
            import matplotlib
            matplotlib.use('Agg')
            self.env.set_logdir(self.params['logdir'] + '/expl_')
            self.eval_env.set_logdir(self.params['logdir'] + '/eval_')

        if 'env_wrappers' in self.params:
            # These operations are currently only for Atari envs
            self.env = wrappers.Monitor(self.env,
                                        os.path.join(self.params['logdir'],
                                                     "gym"),
                                        force=True)
            self.eval_env = wrappers.Monitor(self.eval_env,
                                             os.path.join(
                                                 self.params['logdir'], "gym"),
                                             force=True)
            self.env = params['env_wrappers'](self.env)
            self.eval_env = params['env_wrappers'](self.eval_env)
            self.mean_episode_reward = -float('nan')
            self.best_mean_episode_reward = -float('inf')
        if 'non_atari_colab_env' in self.params and self.params[
                'video_log_freq'] > 0:
            self.env = wrappers.Monitor(self.env,
                                        os.path.join(self.params['logdir'],
                                                     "gym"),
                                        write_upon_reset=True)  #, force=True)
            self.eval_env = wrappers.Monitor(self.eval_env,
                                             os.path.join(
                                                 self.params['logdir'], "gym"),
                                             write_upon_reset=True)
            self.mean_episode_reward = -float('nan')
            self.best_mean_episode_reward = -float('inf')
        self.env.seed(seed)
        self.eval_env.seed(seed)

        # Maximum length for episodes
        self.params['ep_len'] = self.params[
            'ep_len'] or self.env.spec.max_episode_steps
        global MAX_VIDEO_LEN
        MAX_VIDEO_LEN = self.params['ep_len']

        # Is this env continuous, or self.discrete?
        discrete = isinstance(self.env.action_space, gym.spaces.Discrete)
        # Are the observations images?
        img = len(self.env.observation_space.shape) > 2

        self.params['agent_params']['discrete'] = discrete

        # Observation and action sizes

        ob_dim = self.env.observation_space.shape if img else self.env.observation_space.shape[
            0]
        ac_dim = self.env.action_space.n if discrete else self.env.action_space.shape[
            0]
        self.params['agent_params']['ac_dim'] = ac_dim
        self.params['agent_params']['ob_dim'] = ob_dim

        # simulation timestep, will be used for video saving
        if 'model' in dir(self.env):
            self.fps = 1 / self.env.model.opt.timestep
        elif 'env_wrappers' in self.params:
            self.fps = 30  # This is not actually used when using the Monitor wrapper
        elif 'video.frames_per_second' in self.env.env.metadata.keys():
            self.fps = self.env.env.metadata['video.frames_per_second']
        else:
            self.fps = 10

        #############
        ## AGENT
        #############

        agent_class = self.params['agent_class']
        self.agent = agent_class(self.env, self.params['agent_params'])
Exemplo n.º 3
0
    def __init__(self, params):

        #############
        ## INIT
        #############

        # Get params, create logger
        self.params = params
        self.logger = Logger(self.params['logdir'])

        # Set random seeds
        seed = self.params['seed']
        np.random.seed(seed)
        torch.manual_seed(seed)
        ptu.init_gpu(
            use_gpu=not self.params['no_gpu'],
            gpu_id=self.params['which_gpu']
        )

        #############
        ## ENV
        #############

        # Make the gym environment
        register_custom_envs()
        self.env = gym.make(self.params['env_name'])
        if 'env_wrappers' in self.params:
            # These operations are currently only for Atari envs
# <<<<<<< HEAD
#             self.env = wrappers.Monitor(self.env, os.path.join(self.params['logdir'], "gym"), force=True)
# #             self.env.enabled = (self.params['video_log_freq'] > 0)
# =======
            self.env = wrappers.Monitor(
                self.env,
                os.path.join(self.params['logdir'], "gym"),
                force=True,
                video_callable=(None if self.params['video_log_freq'] > 0 else False),
            )
# >>>>>>> b5d34989d30c72b353acc2a64e691d17ddbea81f
            self.env = params['env_wrappers'](self.env)
            self.mean_episode_reward = -float('nan')
            self.best_mean_episode_reward = -float('inf')
        if 'non_atari_colab_env' in self.params and self.params['video_log_freq'] > 0:
            self.env = wrappers.Monitor(
                self.env,
                os.path.join(self.params['logdir'], "gym"),
                force=True,
                video_callable=(None if self.params['video_log_freq'] > 0 else False),
            )
            self.mean_episode_reward = -float('nan')
            self.best_mean_episode_reward = -float('inf')

        self.env.seed(seed)

        # import plotting (locally if 'obstacles' env)
        if not(self.params['env_name']=='obstacles-cs285-v0'):
            import matplotlib
            matplotlib.use('Agg')

        # Maximum length for episodes
        self.params['ep_len'] = self.params['ep_len'] or self.env.spec.max_episode_steps
        global MAX_VIDEO_LEN
        MAX_VIDEO_LEN = self.params['ep_len']

        # Is this env continuous, or self.discrete?
        discrete = isinstance(self.env.action_space, gym.spaces.Discrete)
        # Are the observations images?
        img = len(self.env.observation_space.shape) > 2

        self.params['agent_params']['discrete'] = discrete

        # Observation and action sizes

        ob_dim = self.env.observation_space.shape if img else self.env.observation_space.shape[0]
        ac_dim = self.env.action_space.n if discrete else self.env.action_space.shape[0]
        self.params['agent_params']['ac_dim'] = ac_dim
        self.params['agent_params']['ob_dim'] = ob_dim

        # simulation timestep, will be used for video saving
        if 'model' in dir(self.env):
            self.fps = 1/self.env.model.opt.timestep
        elif 'env_wrappers' in self.params:
            self.fps = 30 # This is not actually used when using the Monitor wrapper
        elif 'video.frames_per_second' in self.env.env.metadata.keys():
            self.fps = self.env.env.metadata['video.frames_per_second']
        else:
            self.fps = 10


        #############
        ## AGENT
        #############

        agent_class = self.params['agent_class']
        if 'hparam' in self.params['exp_name']: 
            print('changing optimizer')
            self.params['agent_params']['optimizer_spec'] = lander_optimizer(self.params['lr'])
            print('using lr  = ', self.params['lr'])
            self.agent = agent_class(self.env, self.params['agent_params'])
        else: 
            self.agent = agent_class(self.env, self.params['agent_params'])