Exemplo n.º 1
0
    def obtain_samples(self,
                       itr,
                       reset_args=None,
                       return_dict=False,
                       log_prefix=''):
        init_policy_params_list = cur_policy_params_list = [
            policy.get_param_values() for policy in self.algo.policy_list
        ]
        if hasattr(self.algo.env, "get_param_values"):
            try:
                cur_env_params = self.algo.env.get_param_values()
            except:
                cur_env_params = None
        else:
            cur_env_params = None
        import time
        start = time.time()
        if type(reset_args) != list and type(reset_args) != np.ndarray:
            reset_args = [reset_args] * self.n_envs
        if self.algo.policy_list[0].all_param_vals is not None:
            cur_policy_params_list = [[
                flatten_tensors(x.values()) for x in policy.all_param_vals
            ] for policy in self.algo.policy_list]
        else:
            cur_policy_params_list = [
                [cur_policy_params] * self.n_envs
                for cur_policy_params in cur_policy_params_list
            ]
        # do tasks sequentially and parallelize within rollouts per task.
        paths = {}
        for n in range(len(self.algo.policy_list)):
            for i in range(self.n_envs):
                paths[str(n) + "_" + str(i)] = parallel_sampler.sample_paths(
                    policy_params=cur_policy_params_list[n][i],
                    env_params=cur_env_params,
                    max_samples=self.algo.batch_size / self.n_envs,
                    max_path_length=self.algo.max_path_length,
                    scope=self.algo.scope,
                    reset_arg=reset_args[i],
                    show_prog_bar=False,
                )
        total_time = time.time() - start
        logger.record_tabular(log_prefix + "TotalExecTime", total_time)

        if not return_dict:
            flatten_list = lambda l: [
                item for sublist in l for item in sublist
            ]
            paths = flatten_list(paths.values())

        for n in range(len(self.algo.policy_list)):
            self.algo.policy_list[n].set_param_values(
                init_policy_params_list[n])

        # currently don't support not whole paths (if desired, add code to truncate paths)
        assert self.algo.whole_paths

        return paths
Exemplo n.º 2
0
    def obtain_samples(self, itr, reset_args=None, return_dict=False, log_prefix=''):
        init_policy_params = cur_policy_params = self.algo.policy.get_param_values()
        if hasattr(self.algo.env,"get_param_values"):
            try:
                cur_env_params = self.algo.env.get_param_values()
            except:
                cur_env_params = None
        else:
            cur_env_params = None
        import time
        start = time.time()
        # first, a naive implementation.
        if type(reset_args) != list and type(reset_args)!=np.ndarray:
            reset_args = [reset_args]*self.n_envs
        if self.algo.policy.all_param_vals:
            cur_policy_params = [flatten_tensors(x.values()) for x in self.algo.policy.all_param_vals]
        else:
            cur_policy_params = [cur_policy_params]*self.n_envs
        # assume that n_envs = num parallel
        if self.n_envs == parallel_sampler.singleton_pool.n_parallel:
            raise NotImplementedError('this implementation is buggy.')
            # 1 thread per env
            paths = parallel_sampler.sample_paths(
                policy_params=cur_policy_params,
                env_params=cur_env_params,
                max_samples=self.algo.batch_size,
                max_path_length=self.algo.max_path_length,
                scope=self.algo.scope,
                reset_arg=reset_args,
                show_prog_bar=True,
                multi_task=True,
            )
        else:
            # do tasks sequentially and parallelize within rollouts per task.
            paths = {}
            for i in range(self.n_envs):
                paths[i] = parallel_sampler.sample_paths(
                    policy_params=cur_policy_params[i],
                    env_params=cur_env_params,
                    max_samples=self.algo.batch_size / self.n_envs,
                    max_path_length=self.algo.max_path_length,
                    scope=self.algo.scope,
                    reset_arg=reset_args[i],
                    show_prog_bar=False,
                )
        total_time = time.time() - start
        logger.record_tabular(log_prefix+"TotalExecTime", total_time)

        if not return_dict:
            flatten_list = lambda l: [item for sublist in l for item in sublist]
            paths = flatten_list(paths.values())

        self.algo.policy.set_param_values(init_policy_params)

        # currently don't support not whole paths (if desired, truncate paths)
        assert self.algo.whole_paths

        return paths
Exemplo n.º 3
0
    def obtain_samples(self, itr, reset_args=None, return_dict=False, log_prefix=''):
        init_policy_params = cur_policy_params = self.algo.policy.get_param_values()
        if hasattr(self.algo.env,"get_param_values"):
            try:
                cur_env_params = self.algo.env.get_param_values()
            except:
                cur_env_params = None
        else:
            cur_env_params = None
        import time
        start = time.time()
        if type(reset_args) != list and type(reset_args)!=np.ndarray:
            reset_args = [reset_args]*self.n_envs
      
        #this following block already handled by get_param_values
        if hasattr(self.algo.policy, 'all_param_vals') and self.algo.policy.all_param_vals:
          
            cur_policy_params = [flatten_tensors(self.algo.policy.all_param_vals.values())]
        else:
            cur_policy_params = [cur_policy_params]*self.n_envs
        # do tasks sequentially and parallelize within rollouts per task.
        
        
        paths = {}
        #import ipdb
        #ipdb.set_trace()
        for i in range(self.n_envs):
            paths[i] = parallel_sampler.sample_paths(
                policy_params=cur_policy_params[i],
                env_params=cur_env_params,
                max_samples=self.algo.batch_size / self.n_envs,
                max_path_length=self.algo.max_path_length,
                scope=self.algo.scope,
                reset_arg=reset_args[i],
                show_prog_bar=False,
            )
        total_time = time.time() - start
        logger.record_tabular(log_prefix+"TotalExecTime", total_time)
        #import ipdb
        #ipdb.set_trace()
        if not return_dict:
            flatten_list = lambda l: [item for sublist in l for item in sublist]
            paths = flatten_list(paths.values())

        self.algo.policy.set_param_values(init_policy_params)

        # currently don't support not whole paths (if desired, add code to truncate paths)
        assert self.algo.whole_paths

        return paths
Exemplo n.º 4
0
    def obtain_samples(self, itr, reset_args=None, return_dict=False, log_prefix='',extra_input=None,extra_input_dim=None,  save_img_obs=False, preupdate=True):
        if extra_input is not None:
            assert False, "not implemented"
        if not preupdate:
            assert False, "not implemented"
        init_policy_params = cur_policy_params = self.algo.policy.get_param_values()
        if hasattr(self.algo.env,"get_param_values"):
            try:
                cur_env_params = self.algo.env.get_param_values()
            except:
                cur_env_params = None
        else:
            cur_env_params = None
        import time
        start = time.time()
        if type(reset_args) != list and type(reset_args)!=np.ndarray:
            reset_args = [reset_args]*self.n_envs
        if hasattr(self.algo.policy, 'all_param_vals'): #TODO: RK, need to make this less hacky and still work with non-maml policies
            if self.algo.policy.all_param_vals:
                cur_policy_params = [flatten_tensors(x.values()) for x in self.algo.policy.all_param_vals]
            else:
                cur_policy_params = [cur_policy_params]*self.n_envs
        else:
            cur_policy_params = [cur_policy_params]*self.n_envs
        # do tasks sequentially and parallelize within rollouts per task.
        paths = {}
        for i in range(self.n_envs):
            paths[i] = parallel_sampler.sample_paths(
                policy_params=cur_policy_params[i],
                env_params=cur_env_params,
                max_samples=self.algo.batch_size / self.n_envs,
                max_path_length=self.algo.max_path_length,
                scope=self.algo.scope,
                reset_arg=reset_args[i],
                show_prog_bar=False,
            )
        total_time = time.time() - start
        logger.record_tabular(log_prefix+"TotalExecTime", total_time)

        if not return_dict:
            flatten_list = lambda l: [item for sublist in l for item in sublist]
            paths = flatten_list(paths.values())

        self.algo.policy.set_param_values(init_policy_params)

        # currently don't support not whole paths (if desired, add code to truncate paths)
        assert self.algo.whole_paths

        return paths
Exemplo n.º 5
0
    def obtain_samples(self, itr, reset_args=None, return_dict=False, log_prefix=''):
        init_policy_params = cur_policy_params = self.algo.policy.get_param_values()
        if hasattr(self.algo.env,"get_param_values"):
            try:
                cur_env_params = self.algo.env.get_param_values()
            except:
                cur_env_params = None
        else:
            cur_env_params = None
        import time
        start = time.time()
        if type(reset_args) != list and type(reset_args)!=np.ndarray:
            reset_args = [reset_args]*self.n_envs
        if self.algo.policy.all_param_vals:
            cur_policy_params = [flatten_tensors(x.values()) for x in self.algo.policy.all_param_vals]
        else:
            cur_policy_params = [cur_policy_params]*self.n_envs
        # do tasks sequentially and parallelize within rollouts per task.
        paths = {}
        for i in range(self.n_envs):
            paths[i] = parallel_sampler.sample_paths(
                policy_params=cur_policy_params[i],
                env_params=cur_env_params,
                max_samples=self.algo.batch_size / self.n_envs,
                max_path_length=self.algo.max_path_length,
                scope=self.algo.scope,
                reset_arg=reset_args[i],
                show_prog_bar=False,
            )
        total_time = time.time() - start
        logger.record_tabular(log_prefix+"TotalExecTime", total_time)

        if not return_dict:
            flatten_list = lambda l: [item for sublist in l for item in sublist]
            paths = flatten_list(paths.values())

        self.algo.policy.set_param_values(init_policy_params)

        # currently don't support not whole paths (if desired, add code to truncate paths)
        assert self.algo.whole_paths

        return paths
 def get_param_values(self, all_params=False, **tags):
     params = self.get_params(all_params, **tags)
     param_values = tf.get_default_session().run(params)
     return flatten_tensors(param_values)
Exemplo n.º 7
0
 def get_param_values(self, **tags):
     params = self.get_params(**tags)
     param_values = self._sess.run(params)
     return flatten_tensors(param_values)
Exemplo n.º 8
0
 def get_param_values(self, **tags):
     params = self.get_params(**tags)
     param_values = tf.get_default_session().run(params)
     return flatten_tensors(param_values)
Exemplo n.º 9
0
 def eval_loss_grad(params):
     self.policy.set_param_values(params, trainable=True)
     grad = f_loss_grad(*input)
     flattened_grad = tensor_utils.flatten_tensors(list(map(np.asarray, grad)))
     return flattened_grad.astype(np.float64)
 def get_param_values(self, **tags):
     return flatten_tensors([
         param.get_value(borrow=True) for param in self.get_params(**tags)
     ])
Exemplo n.º 11
0
 def get_param_values(self, **tags):
     params = self.get_params(**tags)
     # import ipdb; ipdb.set_trace()
     param_values = tf.get_default_session().run(params)
     return flatten_tensors(param_values)
Exemplo n.º 12
0
 def get_param_values(self, **tags):
     return flatten_tensors(
         [param.get_value(borrow=True)
          for param in self.get_params(**tags)]
     )
Exemplo n.º 13
0
 def eval_loss_grad(params):
     self.policy.set_param_values(params, trainable=True)
     grad = f_loss_grad(*input)
     flattened_grad = tensor_utils.flatten_tensors(
         list(map(np.asarray, grad)))
     return flattened_grad.astype(np.float64)
Exemplo n.º 14
0
 def get_param_values(self, **tags):
     if config.TF_NN_SETTRACE:
         ipdb.set_trace()
     params = self.get_params(**tags)
     param_values = tf.get_default_session().run(params)
     return flatten_tensors(param_values)
Exemplo n.º 15
0
 def get_param_values(self, **tags):
     params = self.get_params(**tags)
     # import ipdb; ipdb.set_trace()
     param_values = tf.get_default_session().run(params)
     return flatten_tensors(param_values)
Exemplo n.º 16
0
    def obtain_samples(self,
                       itr,
                       reset_args=None,
                       return_dict=False,
                       log_prefix='',
                       extra_input=None,
                       extra_input_dim=None,
                       save_img_obs=False,
                       preupdate=True,
                       numTrajs_perTask=None):

        # if not preupdate:
        #     assert False, "not implemented"
        init_policy_params = cur_policy_params = self.algo.policy.get_param_values(
        )
        if hasattr(self.algo.env, "get_param_values"):
            try:
                cur_env_params = self.algo.env.get_param_values()
            except:
                cur_env_params = None
        else:
            cur_env_params = None
        import time
        start = time.time()
        if type(reset_args) != list and type(reset_args) != np.ndarray:
            reset_args = [reset_args] * self.n_envs

        cur_policy_params = [cur_policy_params] * self.n_envs
        # do tasks sequentially and parallelize within rollouts per task.
        paths = {}
        all_param_vals_list = self.algo.policy.all_param_vals

        if extra_input == None:
            extra_infos = None
        else:
            assert extra_input in [
                "onehot_exploration", 'gaussian_exploration', 'onehot_hacked'
            ]
            extra_infos = [extra_input, extra_input_dim, preupdate]

        for i in range(self.n_envs):

            if self.algo.policy.all_param_vals is None:
                policy_params = cur_policy_params[i]

            else:
                policy_params = flatten_tensors(
                    all_param_vals_list[i].values())

            paths_i = parallel_sampler.sample_paths(
                policy_params=policy_params,
                env_params=cur_env_params,
                max_samples=self.algo.batch_size / self.n_envs,
                max_path_length=self.algo.max_path_length,
                scope=self.algo.scope,
                reset_arg=reset_args[i],
                taskIdx=i,
                show_prog_bar=False,
                extra_infos=extra_infos)
            if numTrajs_perTask != None:
                paths[i] = paths_i[:numTrajs_perTask]
            else:
                paths[i] = paths_i

        total_time = time.time() - start
        logger.record_tabular(log_prefix + "TotalExecTime", total_time)

        if not return_dict:
            flatten_list = lambda l: [
                item for sublist in l for item in sublist
            ]
            paths = flatten_list(paths.values())

        #self.algo.policy.set_param_values(init_policy_params)

        # currently don't support not whole paths (if desired, add code to truncate paths)
        assert self.algo.whole_paths

        return paths