Пример #1
0
def main(c):

    # Setup logz and save c
    ps.configure_log(c)

    # Create mdp and fix randomness
    mdp = ps.setup_mdp(c['mdp'], c['seed'])

    # Create learnable objects
    ob_shape = mdp.ob_shape
    ac_shape = mdp.ac_shape
    if mdp.use_time_info:
        ob_shape = (np.prod(ob_shape) + 1, )

    # Define the learner
    policy = RobustKerasMLPGassian(ob_shape,
                                   ac_shape,
                                   name='policy',
                                   init_lstd=c['init_lstd'],
                                   units=c['policy_units'])

    vfn = SuperRobustKerasMLP(ob_shape, (1, ),
                              name='value function',
                              units=c['value_units'])
    # Create algorithm
    alg = PolicyGradient(policy,
                         vfn,
                         gamma=mdp.gamma,
                         horizon=mdp.horizon,
                         **c['algorithm'])

    # Let's do some experiments!
    exp = Exp.Experimenter(alg, mdp, c['experimenter']['ro_kwargs'])
    exp.run(**c['experimenter']['run_kwargs'])
Пример #2
0
def main(c):

    # Setup logz and save c
    ps.configure_log(c)

    # Create mdp and fix randomness
    mdp = ps.setup_mdp(c['mdp'], c['seed'])

    # Create learnable objects
    ob_shape = mdp.ob_shape
    ac_shape = mdp.ac_shape
    if mdp.use_time_info:
        ob_shape = (np.prod(ob_shape) + 1, )

    # Define the learner
    policy = RobustKerasMLPGassian(ob_shape,
                                   ac_shape,
                                   name='learner_policy',
                                   init_lstd=c['init_lstd'],
                                   units=c['policy_units'])
    vfn = SuperRobustKerasMLP(ob_shape, (1, ),
                              name='learner_vfn',
                              units=c['value_units'])
    policy, vfn = create_learner(c['mdp']['envid'], c['seed'], policy, vfn)

    # Define experts
    if c['use_experts']:
        experts = create_experts(c['mdp']['envid'], **c['expert_info'])
        if c['n_experts'] is not None and len(experts) > c['n_experts']:
            experts = experts[:c['n_experts']]
        if len(experts) < 1:
            experts = None
    else:
        experts = None

    # Create algorithm
    ro_by_n_samples = c['experimenter']['ro_kwargs'] is not None
    alg = Mamba(policy,
                vfn,
                experts=experts,
                horizon=mdp.horizon,
                gamma=mdp.gamma,
                mix_unroll_kwargs={'ro_by_n_samples': ro_by_n_samples},
                **c['algorithm'])

    # Let's do some experiments!
    exp = Exp.Experimenter(alg, mdp, c['experimenter']['ro_kwargs'])
    exp.run(
        seed=c['seed'],
        **c['experimenter']['run_kwargs'],
    )
Пример #3
0
def main(c):

    # Setup logz and save c
    ps.configure_log(c)

    # Create mdp and fix randomness
    mdp = ps.setup_mdp(c['mdp'], c['seed'])

    # Create learnable objects
    ob_shape = mdp.ob_shape
    ac_shape = mdp.ac_shape
    if mdp.use_time_info:
        ob_shape = (np.prod(ob_shape)+1,)

    # define expert
    expert = RobustKerasMLPGassian(ob_shape, ac_shape, name='policy',
                                   init_lstd=-1,
                                   units=(64,))
    expert.restore('./experts', name='cp1000_mlp_policy_64_seed_9')
    expert.name = 'expert'

    # define the learner
    policy = RobustKerasMLPGassian(ob_shape, ac_shape, name='policy',
                                   init_lstd=-1,
                                   units=(128,128))

    vfn = SuperRobustKerasMLP(ob_shape, (1,), name='expert value function',
                                   units=(256,256))

    # Create algorithm
    alg = AggreVaTeD(policy, expert, vfn,
                     horizon=mdp.horizon, gamma=mdp.gamma,
                     **c['algorithm'])

    # Let's do some experiments!
    exp = Exp.Experimenter(alg, mdp, c['experimenter']['rollout_kwargs'])
    exp.run(**c['experimenter']['run_kwargs'])