def create_policy(env, seed, c, name='learner_policy'): pol_cls = getattr(Pol, c['policy_cls']) ob_dim = env.observation_space.shape[0] ac_dim = env.action_space.shape[0] build_nor = Nor.create_build_nor_from_str(c['nor_cls'], c['nor_kwargs']) policy = pol_cls(ob_dim, ac_dim, name=name, seed=seed, build_nor=build_nor, **c['pol_kwargs']) return policy
def create_advantage_estimator(policy, adv_configs, name='value_function_approximator'): adv_configs = copy.deepcopy(adv_configs) # create the value function SupervisedLearner c = adv_configs['vfn_params'] build_nor = Nor.create_build_nor_from_str(c['nor_cls'], c['nor_kwargs']) vfn_cls = getattr(Sup, c['fun_class']) vfn = vfn_cls(policy.ob_dim, 1, name=name, build_nor=build_nor, **c['fun_kwargs']) # create the adv object adv_configs.pop('vfn_params') adv_configs['vfn'] = vfn ae = AdvantageEstimator(policy, **adv_configs) return ae
def create_predict(config, out=None): # Create predict of a supervised learner. # out_dim defaulted to be st_dim st = env.reset() st_dim, ac_dim = len(st), env.action_space.shape[0] cls = getattr(Sup, config['fun_cls']) build_nor = Nor.create_build_nor_from_str(config['nor_cls'], config['nor_kwargs']) if out == 'dyn': out_dim = st_dim elif out == 'rw': out_dim = 1 else: raise ValueError('Invalid out: {}'.format(out)) # XXX learn residue for dynamics! svl = cls(st_dim + ac_dim, out_dim, name=config['name'], build_nor=build_nor, **config['fun_kwargs']) return svl.predict