Пример #1
0
def setup_mc_pilco_experiment(params, pol=None, dyn=None):
    # initial state distribution
    p0 = params['state0_dist']
    D = p0.mean.size
    pol_spec = params.get('pol_spec', None)
    dyn_spec = params.get('dyn_spec', None)

    # init policy
    if pol is None:
        pol = control.NNPolicy(D, **params['policy'])
        if pol_spec is None:
            pol_spec = regression.mlp(
                input_dims=pol.D,
                output_dims=pol.E,
                hidden_dims=[50] * 2,
                p=0.05,
                p_input=0.0,
                nonlinearities=nonlinearities.rectify,
                output_nonlinearity=pol.sat_func,
                dropout_class=regression.DenseDropoutLayer,
                name=pol.name)
        pol.network = pol.build_network(pol_spec)

    # init dynmodel
    if dyn is None:
        dyn = regression.BNN(**params['dynamics_model'])
        if dyn_spec is None:
            odims = 2 * dyn.E if dyn.heteroscedastic else dyn.E
            dyn_spec = regression.dropout_mlp(
                input_dims=dyn.D,
                output_dims=odims,
                hidden_dims=[200] * 2,
                p=0.1,
                p_input=0.1,
                nonlinearities=nonlinearities.rectify,
                dropout_class=regression.DenseLogNormalDropoutLayer,
                name=dyn.name)
        dyn.network = dyn.build_network(dyn_spec)

    # create experience dataset
    exp = ExperienceDataset()

    # init policy optimizer
    polopt = optimizers.SGDOptimizer(**params['optimizer'])

    # module where get_loss and build_rollout are defined
    # (can also be a class)
    learner = algorithms.mc_pilco

    return p0, pol, dyn, exp, polopt, learner
Пример #2
0
def setup_pilco_experiment(params, pol=None, dyn=None):
    # initial state distribution
    p0 = params['state0_dist']

    # init policy
    if pol is None:
        pol = control.RBFPolicy(**params['policy'])

    # init dynmodel
    if dyn is None:
        dynmodel_class = params.get('dynmodel_class', regression.SSGP_UI)
        dyn = dynmodel_class(**params['dynamics_model'])

    # create experience dataset
    exp = ExperienceDataset()

    # init policy optimizer
    polopt = optimizers.ScipyOptimizer(**params['optimizer'])

    # module where get_loss and build_rollout are defined
    # (can also be a class)
    learner = algorithms.pilco

    return p0, pol, dyn, exp, polopt, learner
Пример #3
0
    dropout_layers=[models.modules.CDropout(0.1)] * 2,
    nonlin=torch.nn.ReLU),
                           reward_func=reward_func).float()
forward_fn = partial(forward, dynamics=dyn)

# init policy
pol = models.Policy(
    models.dropout_mlp(D,
                       U,
                       output_nonlin=torch.nn.Tanh,
                       dropout_layers=[models.modules.BDropout(0.1)] * 2),
    maxU).float()
randpol = RandPolicy(maxU)

# init experience dataset
exp = ExperienceDataset()

# init policy optimizer
params = filter(lambda p: p.requires_grad, pol.parameters())
opt = torch.optim.Adam(params, 1e-3, amsgrad=True)


def cb(*args, **kwargs):
    env.render()


for rand_it in range(n_rnd):
    ret = apply_controller(env,
                           randpol,
                           H,
                           callback=lambda *args, **kwargs: env.render())
Пример #4
0
    files = os.listdir(args.results_path)
    spec_paths = check_files_suffix(files, '_spec.dill')
    if len(spec_paths) == 0:
        utils.print_with_stamp("No *_spec.dill file found. Quitting...")
        sys.exit(-1)
    spec_path = os.path.join(args.results_path, spec_paths[0])
    print spec_path, spec_paths
    f = open(spec_path)  # TODO what if we have more than one dill file
    config = dill.load(f)

    #  load experience dataset
    exp_paths = check_files_suffix(files, '_dataset.zip')
    if len(spec_paths) == 0:
        utils.print_with_stamp("No *_dataset.zip file found. Quitting...")
        sys.exit(-1)
    exp = ExperienceDataset(filename=exp_paths[0].split('.')[0])

    # init environment with task params
    plant_params = config['plant']
    if args.cost_width is not None:
        # replace cost function with new one
        cost_params = config['cost']['params']
        config['cost']['graph'].func.keywords['cw'] = args.cost_width
        loss = config['cost']['graph']
        plant_params['loss_func'] = build_loss_func(loss,
                                                    **config['cost']['params'])

    env = ROSPlant(**plant_params)

    # get dynamics model and policy
    angle_dims = config['angle_dims']
Пример #5
0
    # init task queue and list of learning threads
    tasks = Queue()
    task_state = {}
    polopt_threads = []

    # populate task queue
    for task_name in config['tasks']:
        task_state[task_name] = 'init'
        spec = config['tasks'][task_name]
        exp = spec.get('experience', None)
        pol = spec['policy']
        pol(np.zeros(pol.D))
        random_exp_path = spec.get('random_exp_path', None)
        if exp is None:
            exp = ExperienceDataset(name=task_name)
            if not exp.load() and random_exp_path is not None:
                base_path, filename = os.path.split(random_exp_path)
                fname = exp.filename
                exp.load(base_path, filename)
                # restore previous filename
                exp.filename = fname

            if exp.n_episodes() > 0:
                if len(exp.policy_parameters[-1]) > 0:
                    pol.set_params(exp.policy_parameters[-1])
                # exp.truncate(spec['initial_random_trials'])
                spec['initial_random_trials'] -= exp.n_episodes()
                # in case we are loading policy parameters, increase the n_opt counter
                n_polopt_iters = len(
                    [p for p in exp.policy_parameters if len(p) > 0])
Пример #6
0
    kwargs = dict(args.kwarg)
    n_trials = int(kwargs.get('n_trials', 5))
    last_iteration = int(kwargs.get('last_iteration', 5))
    config_path = os.path.join(odir, 'initial_config.dill')
    exp_path = os.path.join(odir, 'experience_%d' % (last_iteration))
    pol_path = os.path.join(odir, 'policy_%d' % (last_iteration))
    env_class = recursive_getattr(kusanagi.shell, args.env)
    cost_func = recursive_getattr(kusanagi.shell, args.cost)
    policy_class = getattr(control, args.policy_class)

    with open(config_path, 'rb') as f:
        config_dict = dill.load(f)

    params = config_dict['params']
    p0 = params['state0_dist']
    exp = ExperienceDataset(filename=exp_path)
    if args.policy_class == 'NNPolicy':
        pol = policy_class(p0.mean.size, filename=pol_path, **params['policy'])
    else:
        pol = policy_class(filename=pol_path, **params['policy'])

    # init cost model
    cost = partial(cost_func, **params['cost'])
    # init environment
    env = env_class(loss_func=cost, **params['plant'])

    # evaluate policy
    results = experiment_utils.evaluate_policy(
        env, pol, exp, params, n_trials, render=args.render)

    # dump results to file
Пример #7
0
    files = os.listdir(args.results_path)
    spec_paths = check_files_suffix(files, '_spec.dill')
    if len(spec_paths) == 0:
        utils.print_with_stamp("No *_spec.dill file found. Quitting...")
        sys.exit(-1)
    spec_path = os.path.join(args.results_path, spec_paths[0])
    print spec_path, spec_paths
    f = open(spec_path)  # TODO what if we have more than one dill file
    config = dill.load(f)

    #  load experience dataset
    exp_paths = check_files_suffix(files, '_dataset.zip')
    if len(spec_paths) == 0:
        utils.print_with_stamp("No *_dataset.zip file found. Quitting...")
        sys.exit(-1)
    exp = ExperienceDataset(filename=exp_paths[0].split('.')[0])

    # init environment with task params
    plant_params = config['plant']
    print 'COST WIDTH', config['cost']['graph'].func.keywords['cw']
    if args.cost_width is not None:
        # replace cost function with new one
        cost_params = config['cost']['params']
        config['cost']['graph'].func.keywords['cw'] = args.cost_width
        loss = config['cost']['graph']
        plant_params['loss_func'] = build_loss_func(loss,
                                                    **config['cost']['params'])

    env = ROSPlant(**plant_params)

    # get dynamics model and policy