def main(c): # Setup logz and save c ps.configure_log(c) # Create mdp and fix randomness mdp = ps.setup_mdp(c['mdp'], c['seed']) # Create learnable objects ob_shape = mdp.ob_shape ac_shape = mdp.ac_shape if mdp.use_time_info: ob_shape = (np.prod(ob_shape) + 1, ) # Define the learner policy = RobustKerasMLPGassian(ob_shape, ac_shape, name='policy', init_lstd=c['init_lstd'], units=c['policy_units']) vfn = SuperRobustKerasMLP(ob_shape, (1, ), name='value function', units=c['value_units']) # Create algorithm alg = PolicyGradient(policy, vfn, gamma=mdp.gamma, horizon=mdp.horizon, **c['algorithm']) # Let's do some experiments! exp = Exp.Experimenter(alg, mdp, c['experimenter']['ro_kwargs']) exp.run(**c['experimenter']['run_kwargs'])
def main(c): # Setup logz and save c ps.configure_log(c) # Create mdp and fix randomness mdp = ps.setup_mdp(c['mdp'], c['seed']) # Create learnable objects ob_shape = mdp.ob_shape ac_shape = mdp.ac_shape if mdp.use_time_info: ob_shape = (np.prod(ob_shape) + 1, ) # Define the learner policy = RobustKerasMLPGassian(ob_shape, ac_shape, name='learner_policy', init_lstd=c['init_lstd'], units=c['policy_units']) vfn = SuperRobustKerasMLP(ob_shape, (1, ), name='learner_vfn', units=c['value_units']) policy, vfn = create_learner(c['mdp']['envid'], c['seed'], policy, vfn) # Define experts if c['use_experts']: experts = create_experts(c['mdp']['envid'], **c['expert_info']) if c['n_experts'] is not None and len(experts) > c['n_experts']: experts = experts[:c['n_experts']] if len(experts) < 1: experts = None else: experts = None # Create algorithm ro_by_n_samples = c['experimenter']['ro_kwargs'] is not None alg = Mamba(policy, vfn, experts=experts, horizon=mdp.horizon, gamma=mdp.gamma, mix_unroll_kwargs={'ro_by_n_samples': ro_by_n_samples}, **c['algorithm']) # Let's do some experiments! exp = Exp.Experimenter(alg, mdp, c['experimenter']['ro_kwargs']) exp.run( seed=c['seed'], **c['experimenter']['run_kwargs'], )
def main(c): # Setup logz and save c ps.configure_log(c) # Create mdp and fix randomness mdp = ps.setup_mdp(c['mdp'], c['seed']) # Create learnable objects ob_shape = mdp.ob_shape ac_shape = mdp.ac_shape if mdp.use_time_info: ob_shape = (np.prod(ob_shape)+1,) # define expert expert = RobustKerasMLPGassian(ob_shape, ac_shape, name='policy', init_lstd=-1, units=(64,)) expert.restore('./experts', name='cp1000_mlp_policy_64_seed_9') expert.name = 'expert' # define the learner policy = RobustKerasMLPGassian(ob_shape, ac_shape, name='policy', init_lstd=-1, units=(128,128)) vfn = SuperRobustKerasMLP(ob_shape, (1,), name='expert value function', units=(256,256)) # Create algorithm alg = AggreVaTeD(policy, expert, vfn, horizon=mdp.horizon, gamma=mdp.gamma, **c['algorithm']) # Let's do some experiments! exp = Exp.Experimenter(alg, mdp, c['experimenter']['rollout_kwargs']) exp.run(**c['experimenter']['run_kwargs'])