def load_trained_policy_and_mdp(env_name, policy_state_str): """ Creates the specialized MDP and policy objects needed to sample expert trajectories for a given environment. Returns: mdp: An instance of `RLGymMDP`, similar to a real gym env except with customized obs/action spaces and an internal `RLGyMSim` object. policy: The agent's policy, encoded as either rl.GaussianPolicy for continuous actions, or rl.GibbsPolicy for discrete actions. train_args: A dictionary of arguments (like argparse dicts) based on the trained policy's TRPO run. """ import gym import policyopt from policyopt import nn, rl from environments import rlgymenv # Load the saved state policy_file, policy_key = util.split_h5_name(policy_state_str) print 'Loading policy parameters from %s in %s' % (policy_key, policy_file) with h5py.File(policy_file, 'r') as f: train_args = json.loads(f.attrs['args']) # Initialize the MDP print 'Loading environment', env_name mdp = rlgymenv.RLGymMDP(env_name) print 'MDP observation space, action space sizes: %d, %d\n' % (mdp.obs_space.dim, mdp.action_space.storage_size) # Initialize the policy nn.reset_global_scope() enable_obsnorm = bool(train_args['enable_obsnorm']) if 'enable_obsnorm' in train_args else train_args['obsnorm_mode'] != 'none' if isinstance(mdp.action_space, policyopt.ContinuousSpace): policy_cfg = rl.GaussianPolicyConfig( hidden_spec=train_args['policy_hidden_spec'], min_stdev=0., init_logstdev=0., enable_obsnorm=enable_obsnorm) policy = rl.GaussianPolicy(policy_cfg, mdp.obs_space, mdp.action_space, 'GaussianPolicy') else: policy_cfg = rl.GibbsPolicyConfig( hidden_spec=train_args['policy_hidden_spec'], enable_obsnorm=enable_obsnorm) policy = rl.GibbsPolicy(policy_cfg, mdp.obs_space, mdp.action_space, 'GibbsPolicy') # Load the policy parameters policy.load_h5(policy_file, policy_key) return mdp, policy, train_args
def load_trained_policy_and_mdp(env_name, policy_state_str): import gym import policyopt from policyopt import nn, rl from environments import rlgymenv # Load the saved state policy_file, policy_key = util.split_h5_name(policy_state_str) print('Loading policy parameters from %s in %s' % (policy_key, policy_file)) with h5py.File(policy_file, 'r') as f: train_args = json.loads(f.attrs['args']) # Initialize the MDP print('Loading environment', env_name) mdp = rlgymenv.RLGymMDP(env_name) print('MDP observation space, action space sizes: %d, %d\n' % (mdp.obs_space.dim, mdp.action_space.storage_size)) # Initialize the policy nn.reset_global_scope() enable_obsnorm = bool(train_args['enable_obsnorm'] ) if 'enable_obsnorm' in train_args else train_args[ 'obsnorm_mode'] != 'none' if isinstance(mdp.action_space, policyopt.ContinuousSpace): policy_cfg = rl.GaussianPolicyConfig( hidden_spec=train_args['policy_hidden_spec'], min_stdev=0., init_logstdev=0., enable_obsnorm=enable_obsnorm) policy = rl.GaussianPolicy(policy_cfg, mdp.obs_space, mdp.action_space, 'GaussianPolicy') else: policy_cfg = rl.GibbsPolicyConfig( hidden_spec=train_args['policy_hidden_spec'], enable_obsnorm=enable_obsnorm) policy = rl.GibbsPolicy(policy_cfg, mdp.obs_space, mdp.action_space, 'GibbsPolicy') # Load the policy parameters policy.load_h5(policy_file, policy_key) return mdp, policy, train_args
def load_trained_policy_and_mdp(env_name, policy_state_str): import gym import policyopt from policyopt import nn, rl from environments import rlgymenv # Load the saved state policy_file, policy_key = util.split_h5_name(policy_state_str) print 'Loading policy parameters from %s in %s' % (policy_key, policy_file) with h5py.File(policy_file, 'r') as f: train_args = json.loads(f.attrs['args']) # Initialize the MDP print 'Loading environment', env_name mdp = rlgymenv.RLGymMDP(env_name) print 'MDP observation space, action space sizes: %d, %d\n' % (mdp.obs_space.dim, mdp.action_space.storage_size) # Initialize the policy nn.reset_global_scope() enable_obsnorm = bool(train_args['enable_obsnorm']) if 'enable_obsnorm' in train_args else train_args['obsnorm_mode'] != 'none' if isinstance(mdp.action_space, policyopt.ContinuousSpace): policy_cfg = rl.GaussianPolicyConfig( hidden_spec=train_args['policy_hidden_spec'], min_stdev=0., init_logstdev=0., enable_obsnorm=enable_obsnorm) policy = rl.GaussianPolicy(policy_cfg, mdp.obs_space, mdp.action_space, 'GaussianPolicy') else: policy_cfg = rl.GibbsPolicyConfig( hidden_spec=train_args['policy_hidden_spec'], enable_obsnorm=enable_obsnorm) policy = rl.GibbsPolicy(policy_cfg, mdp.obs_space, mdp.action_space, 'GibbsPolicy') # Load the policy parameters policy.load_h5(policy_file, policy_key) return mdp, policy, train_args