Exemplos de CategoricalMLPPolicy.get_params em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: garage.tf.policies

Método / Função: get_params

Exemplos em hotexamples.com: 2

CategoricalMLPPolicy.get_params em Python - 2 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de garage.tf.policies.CategoricalMLPPolicy.get_params em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Métodos Frequentes

Exibir Ocultar

CategoricalMLPPolicy(30)

build(6)

get_action(6)

get_actions(4)

clone(2)

get_param_values(2)

get_params(2)

dist_info(1)

dist_info_sym(1)

Métodos Frequentes

CategoricalMLPPolicy (30)

build (6)

get_action (6)

get_actions (4)

clone (2)

get_param_values (2)

get_params (2)

dist_info (1)

dist_info_sym (1)

Exemplo n.º 1

0

Exibir arquivo

Arquivo: adaptive_skill_acquisition.py Projeto: holasjuraj/asa

def integrate_new_skill(self, new_skill_id, new_skill_subpath): skill_integration_method = CategoricalMLPSkillIntegrator.Method.SUBPATH_SKILLS_AVG ## Hierarchized environment hrl_env = HierarchizedEnv( # base env that was wrapped in HierarchizedEnv (not fully unwrapped - may be normalized!) env=self.env.env.env, num_orig_skills=self._hrl_policy.num_skills ) tf_hrl_env = TfEnv(hrl_env) ## Top policy # 1) Get old policy from saved data old_top_policy = self._hrl_policy.get_top_policy() # 2) Get weights of old top policy otp_weights = unflatten_tensors( old_top_policy.get_param_values(), old_top_policy.get_param_shapes() ) # 3) Create weights for new top policy skill_integrator = CategoricalMLPSkillIntegrator() ntp_weight_values = skill_integrator.integrate_skill( old_policy_weights=otp_weights, method=skill_integration_method, # Specific parameters for START_OBSS_SKILLS_AVG subpath_start_obss=new_skill_subpath['start_observations'], top_policy=old_top_policy, # Specific parameters for SUBPATH_SKILLS_AVG, SUBPATH_SKILLS_SMOOTH_AVG and SUBPATH_FIRST_SKILL subpath_actions=new_skill_subpath['actions'] ) # 4) Create new policy and randomly initialize its weights new_top_policy = CategoricalMLPPolicy( env_spec=tf_hrl_env.spec, # This env counts with new skill (action space = n + 1) hidden_sizes=(32, 32), # As was in asa_test.py, name='CategoricalMLPPolicyWithSkill{}'.format(new_skill_id) ) ntp_init_op = tf.variables_initializer(new_top_policy.get_params()) ntp_init_op.run() # 5) Fill new policy with adjusted weights new_top_policy.set_param_values( flattened_params=flatten_tensors(ntp_weight_values) ) ## Adjust HRL policy and training algorithms self._hrl_policy.top_policy = new_top_policy hrl_env.set_hrl_policy(self._hrl_policy) self.env = tf_hrl_env self.policy=self._hrl_policy.get_top_policy() self._top_algo = self._top_algo_cls( env=tf_hrl_env, policy=self._hrl_policy.get_top_policy(), baseline=self.baseline, **self._top_algo_kwargs ) self.sampler = self._top_algo.sampler self.start_worker(self._tf_sess)

Exemplo n.º 2

0

Exibir arquivo

def run_task(*_): # Configure TF session config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config).as_default() as tf_session: ## Load data from itr_N.pkl with open(snapshot_file, 'rb') as file: saved_data = dill.load(file) ## Load data of new skill global new_skill_subpath if new_skill_policy_file: with open(new_skill_policy_file, 'rb') as file: new_skill_data = dill.load(file) new_skill_policy = new_skill_data['policy'] new_skill_subpath = new_skill_data['subpath'] unique_end_obss = np.unique(new_skill_subpath['end_observations'], axis=0) new_skill_stop_func = lambda path: (path['observations'][-1] == unique_end_obss).all(axis=1).any() ## Lower level environment & policies # Base (original) environment. base_env = saved_data['env'].env.env # <NormalizedEnv<MinibotEnv instance>> # Skill policies, operating in base environment skill_targets = [ # 13 basic room regions ( 6, 5), ( 6, 18), ( 6, 33), ( 6, 47), ( 6, 61), (21, 5), (21, 18), (21, 33), (21, 47), (21, 61), (37, 5), (37, 18), (37, 33), ] trained_skill_policies = \ [GridworldTargetPolicy(env_spec=base_env.spec, target=t) for t in skill_targets] + \ [GridworldStepPolicy(env_spec=base_env.spec, direction=d, n=7) for d in range(4)] + \ [ new_skill_policy # GridworldTargetPolicy(env_spec=base_env.spec, target=(43, 54)) # DEBUG use GridworldTargetPolicy as new skill # GridworldRandomPolicy(env_spec=base_env.spec, n=25) # DEBUG use GridworldRandomPolicy as new skill # GridworldStayPolicy(env_spec=base_env.spec, n=25) # DEBUG use GridworldStayPolicy as new skill ] trained_skill_policies_stop_funcs = \ [pol.skill_stopping_func for pol in trained_skill_policies[:-1]] + \ [ new_skill_stop_func # trained_skill_policies[-1].skill_stopping_func # DEBUG use Gridworld*Policy as new skill ] skill_policy_prototype = saved_data['hrl_policy'].skill_policy_prototype ## Upper level environment & policies # Hierarchized environment hrl_env = HierarchizedEnv( env=base_env, num_orig_skills=len(trained_skill_policies) ) tf_hrl_env = TfEnv(hrl_env) ## Top policy # 1) Get old policy from saved data old_top_policy = saved_data['policy'] # 2) Get weights of old top policy otp_weights = unflatten_tensors( old_top_policy.get_param_values(), old_top_policy.get_param_shapes() ) # 3) Create weights for new top policy skill_integrator = CategoricalMLPSkillIntegrator() ntp_weight_values = skill_integrator.integrate_skill( old_policy_weights=otp_weights, method=skill_integration_method, # Specific parameters for START_OBSS_SKILLS_AVG subpath_start_obss=new_skill_subpath['start_observations'], top_policy=old_top_policy, # Specific parameters for SUBPATH_SKILLS_AVG, SUBPATH_SKILLS_SMOOTH_AVG and SUBPATH_FIRST_SKILL subpath_actions=new_skill_subpath['actions'] ) # 4) Create new policy and randomly initialize its weights new_top_policy = CategoricalMLPPolicy( env_spec=tf_hrl_env.spec, # This env counts with new skill (action space = n + 1) hidden_sizes=(32, 32), # As was in asa_basic_run.py, name="TopCategoricalMLPPolicy2" ) ntp_init_op = tf.variables_initializer(new_top_policy.get_params()) ntp_init_op.run() # 5) Fill new policy with adjusted weights new_top_policy.set_param_values( flattened_params=flatten_tensors(ntp_weight_values) ) ## Hierarchy of policies hrl_policy = HierarchicalPolicy( top_policy=new_top_policy, skill_policy_prototype=skill_policy_prototype, skill_policies=trained_skill_policies, skill_stop_functions=trained_skill_policies_stop_funcs, skill_max_timesteps=150 ) # Link hrl_policy and hrl_env, so that hrl_env can use skills hrl_env.set_hrl_policy(hrl_policy) ## Other # Baseline baseline = saved_data['baseline'] # Take trained baseline # Main ASA algorithm asa_algo = AdaptiveSkillAcquisition( env=tf_hrl_env, hrl_policy=hrl_policy, baseline=baseline, top_algo_cls=TRPO, low_algo_cls=TRPO, # Top algo kwargs batch_size=5000, max_path_length=50, n_itr=300, start_itr=saved_data['itr'] + 1, # Continue from previous iteration number discount=0.99, force_batch_sampler=True, low_algo_kwargs={ 'batch_size': 20000, 'max_path_length': 800, 'n_itr': 300, 'discount': 0.99, } ) ## Launch training train_info = asa_algo.train( sess=tf_session, snapshot_mode='none' ) ## Save last iteration out_file = os.path.join(train_info['snapshot_dir'], 'final.pkl') empty_samples_data = {'paths': None} with open(out_file, 'wb') as file: out_data = asa_algo.get_itr_snapshot( itr=asa_algo.n_itr - 1, samples_data=empty_samples_data ) dill.dump(out_data, file)