def setup_mc_pilco_experiment(params, pol=None, dyn=None): # initial state distribution p0 = params['state0_dist'] D = p0.mean.size pol_spec = params.get('pol_spec', None) dyn_spec = params.get('dyn_spec', None) # init policy if pol is None: pol = control.NNPolicy(D, **params['policy']) if pol_spec is None: pol_spec = regression.mlp( input_dims=pol.D, output_dims=pol.E, hidden_dims=[50] * 2, p=0.05, p_input=0.0, nonlinearities=nonlinearities.rectify, output_nonlinearity=pol.sat_func, dropout_class=regression.DenseDropoutLayer, name=pol.name) pol.network = pol.build_network(pol_spec) # init dynmodel if dyn is None: dyn = regression.BNN(**params['dynamics_model']) if dyn_spec is None: odims = 2 * dyn.E if dyn.heteroscedastic else dyn.E dyn_spec = regression.dropout_mlp( input_dims=dyn.D, output_dims=odims, hidden_dims=[200] * 2, p=0.1, p_input=0.1, nonlinearities=nonlinearities.rectify, dropout_class=regression.DenseLogNormalDropoutLayer, name=dyn.name) dyn.network = dyn.build_network(dyn_spec) # create experience dataset exp = ExperienceDataset() # init policy optimizer polopt = optimizers.SGDOptimizer(**params['optimizer']) # module where get_loss and build_rollout are defined # (can also be a class) learner = algorithms.mc_pilco return p0, pol, dyn, exp, polopt, learner
def setup_pilco_experiment(params, pol=None, dyn=None): # initial state distribution p0 = params['state0_dist'] # init policy if pol is None: pol = control.RBFPolicy(**params['policy']) # init dynmodel if dyn is None: dynmodel_class = params.get('dynmodel_class', regression.SSGP_UI) dyn = dynmodel_class(**params['dynamics_model']) # create experience dataset exp = ExperienceDataset() # init policy optimizer polopt = optimizers.ScipyOptimizer(**params['optimizer']) # module where get_loss and build_rollout are defined # (can also be a class) learner = algorithms.pilco return p0, pol, dyn, exp, polopt, learner
dropout_layers=[models.modules.CDropout(0.1)] * 2, nonlin=torch.nn.ReLU), reward_func=reward_func).float() forward_fn = partial(forward, dynamics=dyn) # init policy pol = models.Policy( models.dropout_mlp(D, U, output_nonlin=torch.nn.Tanh, dropout_layers=[models.modules.BDropout(0.1)] * 2), maxU).float() randpol = RandPolicy(maxU) # init experience dataset exp = ExperienceDataset() # init policy optimizer params = filter(lambda p: p.requires_grad, pol.parameters()) opt = torch.optim.Adam(params, 1e-3, amsgrad=True) def cb(*args, **kwargs): env.render() for rand_it in range(n_rnd): ret = apply_controller(env, randpol, H, callback=lambda *args, **kwargs: env.render())
files = os.listdir(args.results_path) spec_paths = check_files_suffix(files, '_spec.dill') if len(spec_paths) == 0: utils.print_with_stamp("No *_spec.dill file found. Quitting...") sys.exit(-1) spec_path = os.path.join(args.results_path, spec_paths[0]) print spec_path, spec_paths f = open(spec_path) # TODO what if we have more than one dill file config = dill.load(f) # load experience dataset exp_paths = check_files_suffix(files, '_dataset.zip') if len(spec_paths) == 0: utils.print_with_stamp("No *_dataset.zip file found. Quitting...") sys.exit(-1) exp = ExperienceDataset(filename=exp_paths[0].split('.')[0]) # init environment with task params plant_params = config['plant'] if args.cost_width is not None: # replace cost function with new one cost_params = config['cost']['params'] config['cost']['graph'].func.keywords['cw'] = args.cost_width loss = config['cost']['graph'] plant_params['loss_func'] = build_loss_func(loss, **config['cost']['params']) env = ROSPlant(**plant_params) # get dynamics model and policy angle_dims = config['angle_dims']
# init task queue and list of learning threads tasks = Queue() task_state = {} polopt_threads = [] # populate task queue for task_name in config['tasks']: task_state[task_name] = 'init' spec = config['tasks'][task_name] exp = spec.get('experience', None) pol = spec['policy'] pol(np.zeros(pol.D)) random_exp_path = spec.get('random_exp_path', None) if exp is None: exp = ExperienceDataset(name=task_name) if not exp.load() and random_exp_path is not None: base_path, filename = os.path.split(random_exp_path) fname = exp.filename exp.load(base_path, filename) # restore previous filename exp.filename = fname if exp.n_episodes() > 0: if len(exp.policy_parameters[-1]) > 0: pol.set_params(exp.policy_parameters[-1]) # exp.truncate(spec['initial_random_trials']) spec['initial_random_trials'] -= exp.n_episodes() # in case we are loading policy parameters, increase the n_opt counter n_polopt_iters = len( [p for p in exp.policy_parameters if len(p) > 0])
kwargs = dict(args.kwarg) n_trials = int(kwargs.get('n_trials', 5)) last_iteration = int(kwargs.get('last_iteration', 5)) config_path = os.path.join(odir, 'initial_config.dill') exp_path = os.path.join(odir, 'experience_%d' % (last_iteration)) pol_path = os.path.join(odir, 'policy_%d' % (last_iteration)) env_class = recursive_getattr(kusanagi.shell, args.env) cost_func = recursive_getattr(kusanagi.shell, args.cost) policy_class = getattr(control, args.policy_class) with open(config_path, 'rb') as f: config_dict = dill.load(f) params = config_dict['params'] p0 = params['state0_dist'] exp = ExperienceDataset(filename=exp_path) if args.policy_class == 'NNPolicy': pol = policy_class(p0.mean.size, filename=pol_path, **params['policy']) else: pol = policy_class(filename=pol_path, **params['policy']) # init cost model cost = partial(cost_func, **params['cost']) # init environment env = env_class(loss_func=cost, **params['plant']) # evaluate policy results = experiment_utils.evaluate_policy( env, pol, exp, params, n_trials, render=args.render) # dump results to file
files = os.listdir(args.results_path) spec_paths = check_files_suffix(files, '_spec.dill') if len(spec_paths) == 0: utils.print_with_stamp("No *_spec.dill file found. Quitting...") sys.exit(-1) spec_path = os.path.join(args.results_path, spec_paths[0]) print spec_path, spec_paths f = open(spec_path) # TODO what if we have more than one dill file config = dill.load(f) # load experience dataset exp_paths = check_files_suffix(files, '_dataset.zip') if len(spec_paths) == 0: utils.print_with_stamp("No *_dataset.zip file found. Quitting...") sys.exit(-1) exp = ExperienceDataset(filename=exp_paths[0].split('.')[0]) # init environment with task params plant_params = config['plant'] print 'COST WIDTH', config['cost']['graph'].func.keywords['cw'] if args.cost_width is not None: # replace cost function with new one cost_params = config['cost']['params'] config['cost']['graph'].func.keywords['cw'] = args.cost_width loss = config['cost']['graph'] plant_params['loss_func'] = build_loss_func(loss, **config['cost']['params']) env = ROSPlant(**plant_params) # get dynamics model and policy