Пример #1
0
def create_policy(env, seed, c, name='learner_policy'):
    pol_cls = getattr(Pol, c['policy_cls'])
    ob_dim = env.observation_space.shape[0]
    ac_dim = env.action_space.shape[0]
    build_nor = Nor.create_build_nor_from_str(c['nor_cls'], c['nor_kwargs'])
    policy = pol_cls(ob_dim,
                     ac_dim,
                     name=name,
                     seed=seed,
                     build_nor=build_nor,
                     **c['pol_kwargs'])
    return policy
Пример #2
0
def create_advantage_estimator(policy,
                               adv_configs,
                               name='value_function_approximator'):
    adv_configs = copy.deepcopy(adv_configs)
    # create the value function SupervisedLearner
    c = adv_configs['vfn_params']
    build_nor = Nor.create_build_nor_from_str(c['nor_cls'], c['nor_kwargs'])
    vfn_cls = getattr(Sup, c['fun_class'])
    vfn = vfn_cls(policy.ob_dim,
                  1,
                  name=name,
                  build_nor=build_nor,
                  **c['fun_kwargs'])
    # create the adv object
    adv_configs.pop('vfn_params')
    adv_configs['vfn'] = vfn
    ae = AdvantageEstimator(policy, **adv_configs)
    return ae
Пример #3
0
    def create_predict(config, out=None):
        # Create predict of a supervised learner.
        # out_dim defaulted to be st_dim
        st = env.reset()
        st_dim, ac_dim = len(st), env.action_space.shape[0]
        cls = getattr(Sup, config['fun_cls'])
        build_nor = Nor.create_build_nor_from_str(config['nor_cls'], config['nor_kwargs'])
        if out == 'dyn':
            out_dim = st_dim
        elif out == 'rw':
            out_dim = 1
        else:
            raise ValueError('Invalid out: {}'.format(out))

        # XXX learn residue for dynamics!
        svl = cls(st_dim + ac_dim, out_dim, name=config['name'],
                  build_nor=build_nor, **config['fun_kwargs'])
        return svl.predict