Пример #1
0
    def predict_symbolic(self, mx, Sx=None, **kwargs):
        if self.network_spec is None:
            self.network_spec = dropout_mlp(
                input_dims=self.D,
                output_dims=self.E,
                hidden_dims=[50]*2,
                p=0.1, p_input=0.0,
                nonlinearities=lasagne.nonlinearities.rectify,
                output_nonlinearity=self.sat_func,
                dropout_class=layers.DenseDropoutLayer,
                name=self.name)

        if self.network is None:
            params = self.network_params\
                     if self.network_params is not None\
                     else {}

            self.network = self.build_network(self.network_spec,
                                              params=params,
                                              name=self.name)

        ret = super(NNPolicy, self).predict_symbolic(mx, Sx, **kwargs)

        if Sx is None:
            if isinstance(ret, list) or isinstance(ret, tuple):
                ret = ret[0]
            M = ret
            return M
        else:
            M, S, V = ret
            return M, S, V
Пример #2
0
def setup_mc_pilco_experiment(params, pol=None, dyn=None):
    # initial state distribution
    p0 = params['state0_dist']
    D = p0.mean.size
    pol_spec = params.get('pol_spec', None)
    dyn_spec = params.get('dyn_spec', None)

    # init policy
    if pol is None:
        pol = control.NNPolicy(D, **params['policy'])
        if pol_spec is None:
            pol_spec = regression.mlp(
                input_dims=pol.D,
                output_dims=pol.E,
                hidden_dims=[50] * 2,
                p=0.05,
                p_input=0.0,
                nonlinearities=nonlinearities.rectify,
                output_nonlinearity=pol.sat_func,
                dropout_class=regression.DenseDropoutLayer,
                name=pol.name)
        pol.network = pol.build_network(pol_spec)

    # init dynmodel
    if dyn is None:
        dyn = regression.BNN(**params['dynamics_model'])
        if dyn_spec is None:
            odims = 2 * dyn.E if dyn.heteroscedastic else dyn.E
            dyn_spec = regression.dropout_mlp(
                input_dims=dyn.D,
                output_dims=odims,
                hidden_dims=[200] * 2,
                p=0.1,
                p_input=0.1,
                nonlinearities=nonlinearities.rectify,
                dropout_class=regression.DenseLogNormalDropoutLayer,
                name=dyn.name)
        dyn.network = dyn.build_network(dyn_spec)

    # create experience dataset
    exp = ExperienceDataset()

    # init policy optimizer
    polopt = optimizers.SGDOptimizer(**params['optimizer'])

    # module where get_loss and build_rollout are defined
    # (can also be a class)
    learner = algorithms.mc_pilco

    return p0, pol, dyn, exp, polopt, learner
Пример #3
0
def get_scenario(experiment_id, *args, **kwargs):
    pol = None
    dyn = None

    if experiment_id == 1:
        # PILCO with rbf controller
        scenario_params = experiment1_params(*args, **kwargs)
        learner_setup = experiment_utils.setup_pilco_experiment
        params = scenario_params[0]
        pol = control.RBFPolicy(**params['policy'])

    elif experiment_id == 2:
        # PILCO with nn controller 1
        scenario_params = experiment1_params(*args, **kwargs)
        learner_setup = experiment_utils.setup_pilco_experiment
        params = scenario_params[0]

        pol_spec = dict(hidden_dims=[200] * 2,
                        nonlinearities=regression.nonlinearities.rectify,
                        W_init=lasagne.init.GlorotNormal(),
                        build_fn=regression.mlp)
        pol = control.NNPolicy(dyn.E,
                               network_spec=pol_spec,
                               **params['policy'])

    elif experiment_id == 3:
        # mc PILCO with RBF controller and binary dropout mlp dynamics
        scenario_params = experiment2_params(*args, **kwargs)
        learner_setup = experiment_utils.setup_mc_pilco_experiment
        params = scenario_params[0]
        pol = control.RBFPolicy(**params['policy'])

        # init dyn to use dropout
        dyn_spec = dict(hidden_dims=[200] * 2,
                        p=0.1,
                        p_input=0.0,
                        nonlinearities=regression.nonlinearities.rectify,
                        W_init=lasagne.init.GlorotNormal(),
                        dropout_class=regression.layers.DenseDropoutLayer,
                        build_fn=regression.dropout_mlp)
        dyn = regression.BNN(network_spec=dyn_spec, **params['dynamics_model'])

    elif experiment_id == 4:
        # mc PILCO with NN controller and binary dropout mlp dynamics
        scenario_params = experiment2_params(*args, **kwargs)
        learner_setup = experiment_utils.setup_mc_pilco_experiment
        params = scenario_params[0]

        # init dyn to use dropout
        dyn_spec = dict(hidden_dims=[200] * 2,
                        p=0.1,
                        p_input=0.0,
                        nonlinearities=regression.nonlinearities.rectify,
                        W_init=lasagne.init.GlorotNormal(),
                        dropout_class=regression.layers.DenseDropoutLayer,
                        build_fn=regression.dropout_mlp)
        dyn = regression.BNN(network_spec=dyn_spec, **params['dynamics_model'])

        # init policy
        pol_spec = dict(hidden_dims=[200] * 2,
                        nonlinearities=regression.nonlinearities.rectify,
                        W_init=lasagne.init.GlorotNormal(),
                        build_fn=regression.mlp)
        pol = control.NNPolicy(dyn.E,
                               network_spec=pol_spec,
                               **params['policy'])

    elif experiment_id == 5:
        # mc PILCO with RBF controller and log normal dropout dynamics
        scenario_params = experiment2_params(*args, **kwargs)
        learner_setup = experiment_utils.setup_mc_pilco_experiment
        params = scenario_params[0]
        pol = control.RBFPolicy(**params['policy'])

        # init dyn to use dropout
        # for the log normal dropout layers, the dropout probabilities
        # are dummy variables to enable dropout (not actual dropout probs)
        dyn_spec = dict(
            hidden_dims=[200] * 2,
            p=0.1,
            p_input=0.0,
            nonlinearities=regression.nonlinearities.rectify,
            W_init=lasagne.init.GlorotNormal(),
            dropout_class=regression.layers.DenseLogNormalDropoutLayer,
            build_fn=regression.dropout_mlp)
        dyn = regression.BNN(network_spec=dyn_spec, **params['dynamics_model'])

    elif experiment_id == 6:
        # mc PILCO with NN controller and log normal dropout dynamics
        scenario_params = experiment2_params(*args, **kwargs)
        learner_setup = experiment_utils.setup_mc_pilco_experiment
        params = scenario_params[0]

        # init dyn to use dropout
        dyn_spec = dict(
            hidden_dims=[200] * 2,
            p=0.1,
            p_input=0.0,
            nonlinearities=regression.nonlinearities.rectify,
            W_init=lasagne.init.GlorotNormal(),
            dropout_class=regression.layers.DenseLogNormalDropoutLayer,
            build_fn=regression.dropout_mlp)
        dyn = regression.BNN(network_spec=dyn_spec, **params['dynamics_model'])

        # init policy
        pol_spec = dict(hidden_dims=[200] * 2,
                        nonlinearities=regression.nonlinearities.rectify,
                        W_init=lasagne.init.GlorotNormal(),
                        build_fn=regression.mlp)
        pol = control.NNPolicy(dyn.E,
                               network_spec=pol_spec,
                               **params['policy'])

    elif experiment_id == 7:
        # mc PILCO with dropout controller and binary dropout mlp dynamics
        scenario_params = experiment2_params(*args, **kwargs)
        learner_setup = experiment_utils.setup_mc_pilco_experiment
        params = scenario_params[0]

        # init dyn to use dropout
        dyn_spec = dict(hidden_dims=[200] * 2,
                        p=0.1,
                        p_input=0.0,
                        nonlinearities=regression.nonlinearities.rectify,
                        W_init=lasagne.init.GlorotNormal(),
                        dropout_class=regression.layers.DenseDropoutLayer,
                        build_fn=regression.dropout_mlp)
        dyn = regression.BNN(network_spec=dyn_spec, **params['dynamics_model'])

        # init policy
        pol_spec = dict(hidden_dims=[200] * 2,
                        p=0.1,
                        p_input=0.0,
                        nonlinearities=regression.nonlinearities.rectify,
                        W_init=lasagne.init.GlorotNormal(),
                        dropout_class=regression.layers.DenseDropoutLayer,
                        build_fn=regression.dropout_mlp)
        pol = control.NNPolicy(dyn.E,
                               network_spec=pol_spec,
                               **params['policy'])

    elif experiment_id == 8:
        # mc PILCO with dropout controller and log-normal dropout dynamics
        scenario_params = experiment2_params(*args, **kwargs)
        learner_setup = experiment_utils.setup_mc_pilco_experiment
        params = scenario_params[0]

        # init dyn to use dropout
        dyn_spec = dict(
            hidden_dims=[200] * 2,
            p=0.1,
            p_input=0.0,
            nonlinearities=regression.nonlinearities.rectify,
            W_init=lasagne.init.GlorotNormal(),
            dropout_class=regression.layers.DenseLogNormalDropoutLayer,
            build_fn=regression.dropout_mlp)
        dyn = regression.BNN(network_spec=dyn_spec, **params['dynamics_model'])

        # init policy
        pol_spec = dict(hidden_dims=[200] * 2,
                        p=0.1,
                        p_input=0.0,
                        nonlinearities=regression.nonlinearities.rectify,
                        W_init=lasagne.init.GlorotNormal(),
                        dropout_class=regression.layers.DenseDropoutLayer,
                        build_fn=regression.dropout_mlp)
        pol = control.NNPolicy(dyn.E,
                               network_spec=pol_spec,
                               **params['policy'])

    elif experiment_id == 9:
        # mc PILCO with dropout controller and concrete dropout dynamics
        scenario_params = experiment2_params(*args, **kwargs)
        learner_setup = experiment_utils.setup_mc_pilco_experiment
        params = scenario_params[0]
        p0 = params['state0_dist']

        # init dyn to use dropout
        dyn = regression.BNN(**params['dynamics_model'])
        odims = 2 * dyn.E if dyn.heteroscedastic else dyn.E
        dyn_spec = regression.dropout_mlp(
            input_dims=dyn.D,
            output_dims=odims,
            hidden_dims=[200] * 2,
            p=0.1,
            p_input=0.0,
            nonlinearities=regression.nonlinearities.rectify,
            W_init=lasagne.init.GlorotNormal(),
            dropout_class=regression.layers.DenseConcreteDropoutLayer,
            name=dyn.name)
        dyn.build_network(dyn_spec)

        # init policy
        pol = control.NNPolicy(p0.mean.size, **params['policy'])
        pol_spec = regression.dropout_mlp(
            input_dims=pol.D,
            output_dims=pol.E,
            hidden_dims=[200] * 2,
            p=0.1,
            p_input=0.0,
            nonlinearities=regression.nonlinearities.rectify,
            W_init=lasagne.init.GlorotNormal(),
            output_nonlinearity=pol.sat_func,
            dropout_class=regression.layers.DenseDropoutLayer,
            name=pol.name)
        pol.build_network(pol_spec)

    return scenario_params, pol, dyn, learner_setup