def test_save_and_laod_yaml(): ex_dir = setup_experiment('testenv', 'testalgo', 'testinfo', base_dir=TEMP_DIR) # Save test data to YAML-file (ndarrays should be converted to lists) save_list_of_dicts_to_yaml( [dict(a=1), dict(b=2.0), dict(c=np.array([1., 2.]).tolist())], ex_dir, 'testfile') data = load_dict_from_yaml(osp.join(ex_dir, 'testfile.yaml')) assert isinstance(data, dict) # Delete the created folder recursively shutil.rmtree(osp.join(TEMP_DIR, 'testenv'), ignore_errors=True) # also deletes read-only files
algo_hparam = dict( max_iter=15, num_eval_rollouts=5, warmstart=True, thold_succ_subrtn=100, subrtn_snapshot_mode='latest', ) algo = SimOpt(ex_dir, env_sim, env_real, subrtn_policy, subrtn_distr, **algo_hparam) # Save the environments and the hyper-parameters save_list_of_dicts_to_yaml( [ dict(env=env_hparams, seed=args.seed), dict(behav_policy=behav_policy_hparam), # dict(critic=critic_hparam, vfcn=vfcn_hparam), dict(ddp_policy=ddp_policy_hparam, subrtn_distr_name=ddp_policy.name), dict(subrtn_distr=subrtn_distr_hparam, subrtn_distr_name=subrtn_distr.name), dict(subsubrtn_distr=subsubrtn_distr_hparam, subsubrtn_distr_name=subsubrtn_distr.name), dict(subrtn_policy=subrtn_policy_hparam, subrtn_policy_name=subrtn_policy.name), dict(algo=algo_hparam, algo_name=algo.name) ], ex_dir) # Jeeeha algo.train(seed=args.seed)
rollout(env_real, policy, eval=True, max_steps=args.max_steps, render_mode=RenderMode())) # Print and save results avg_return = np.mean([ro.undiscounted_return() for ro in ros]) print_cbt(f'Average return: {avg_return}', 'g', bright=True) save_dir = setup_experiment('evaluation', 'qbb_experiment', ex_tag, base_dir=pyrado.TEMP_DIR) joblib.dump(ros, osp.join(save_dir, 'experiment_rollouts.pkl')) save_list_of_dicts_to_yaml([ dict(ex_dir=ex_dir, init_state=init_state, avg_return=avg_return, num_runs=len(ros)) ], save_dir, file_name='experiment_summary') # Stabilize at the end pdctrl.reset(state_des=np.zeros(2)) rollout(env_real, pdctrl, eval=True, max_steps=1000, render_mode=RenderMode(text=True))
lr_scheduler_hparam=dict(gamma=0.99) ) critic = GAE(vfcn, **critic_hparam) # Algorithm algo_hparam = dict( max_iter=500, min_steps=10000, num_workers=4, vfcn_coeff=0.7, entropy_coeff=4e-5, batch_size=100, std_init=0.8, lr=2e-3, lr_scheduler=lr_scheduler.ExponentialLR, lr_scheduler_hparam=dict(gamma=0.99) ) algo = A2C(ex_dir, env, policy, critic, **algo_hparam) # Save the hyper-parameters save_list_of_dicts_to_yaml([ dict(env=env_hparams, seed=args.seed), dict(policy=policy_hparam), dict(critic=critic_hparam, vfcn=vfcn_hparam), dict(algo=algo_hparam, algo_name=algo.name)], ex_dir ) # Jeeeha algo.train(seed=args.seed)
# Set the seed pyrado.set_seed(1001, verbose=True) # Set the hyper-parameters of SysIdViaEpisodicRL num_eval_rollouts = 5 algo_hparam = dict( metric=None, std_obs_filt=5, obs_dim_weight=[1, 1, 1, 1, 10, 10.], num_rollouts_per_distr=len(dp_map) * 10, # former 50 num_workers=subrtn_hparam['num_workers']) # Save the environments and the hyper-parameters save_list_of_dicts_to_yaml([ dict(env=env_hparams), dict(subrtn=subrtn_hparam, subrtn_name=subrtn.name), dict( algo=algo_hparam, algo_name=SysIdViaEpisodicRL.name, dp_map=dp_map) ], ex_dir) algo = SysIdViaEpisodicRL(subrtn, behavior_policy, **algo_hparam) # Jeeeha while algo.curr_iter < algo.max_iter and not algo.stopping_criterion_met(): algo.logger.add_value(algo.iteration_key, algo.curr_iter) # Creat fake real-world data ro_real = [] for _ in range(num_eval_rollouts): ro_real.append(rollout(env_real, behavior_policy, eval=True)) algo.step(snapshot_mode='latest',
bound_lo=np.array([0.8*dp_nom['Mp'], 1e-12, 0.8*dp_nom['Mr'], 1e-12]), bound_up=np.array([1.2*dp_nom['Mp'], 1e-11, 1.2*dp_nom['Mr'], 1e-11]) ) # Algorithm bayrn_hparam = dict( max_iter=15, acq_fc='UCB', acq_param=dict(beta=0.25), acq_restarts=500, acq_samples=1000, num_init_cand=4, warmstart=False, num_eval_rollouts_real=10, # sim-2-sim # thold_succ_subrtn=300, ) # Save the environments and the hyper-parameters (do it before the init routine of BayRn) save_list_of_dicts_to_yaml([ dict(env_sim=env_sim_hparams, env_real=env_real_hparams, seed=args.seed), dict(policy=policy_hparam), dict(subrtn=subrtn_hparam, subrtn_name=PoWER.name), dict(algo=bayrn_hparam, algo_name=BayRn.name, dp_map=dp_map)], ex_dir ) algo = BayRn(ex_dir, env_sim, env_real, subrtn, ddp_space, **bayrn_hparam) # Jeeeha algo.train(snapshot_mode='latest', seed=args.seed)
# Subroutine algo_hparam = dict( max_iter=1000, min_steps=30 * env.max_steps, num_sampler_envs=20, num_epoch=5, eps_clip=0.1, batch_size=100, std_init=0.8, lr=2e-4, # max_grad_norm=1., ) ppo = PPO(ex_dir, env, policy, critic, **algo_hparam) # Meta-Algorithm epopt_hparam = dict(skip_iter=100, epsilon=0.2, gamma=critic.gamma) algo = EPOpt(ppo, **epopt_hparam) # Save the hyper-parameters save_list_of_dicts_to_yaml([ dict(env=env_hparams, seed=ex_dir.seed), dict(policy=policy_hparam), dict(critic=critic_hparam, value_fcn=value_fcn_hparam), dict(algo=algo_hparam, algo_name=algo.name), dict(EPOpt=epopt_hparam) ], ex_dir) # Jeeeha algo.train(seed=ex_dir.seed)
algo.train(snapshot_mode='latest', seed=seed) # Evaluate min_rollouts = 1000 sampler = ParallelSampler(env, policy, num_envs=20, min_rollouts=min_rollouts) ros = sampler.sample() mean_ret = sum([r.undiscounted_return() for r in ros])/min_rollouts return mean_ret if __name__ == '__main__': # Parse command line arguments args = get_argparser().parse_args() ex_dir = setup_experiment('hyperparams', QBallBalancerSim.name, 'ppo_250Hz_actnorm', seed=args.seed) # Run hyper-parameter optimization name = f'{ex_dir.algo_name}_{ex_dir.add_info}' # e.g. qbb_ppo_fnn_actnorm study = optuna.create_study( study_name=name, storage=f"sqlite:////{osp.join(pyrado.TEMP_DIR, ex_dir, f'{name}.db')}", direction='maximize', pruner=MedianPruner(), load_if_exists=True ) study.optimize(functools.partial(train_and_eval, ex_dir=ex_dir, seed=args.seed), n_trials=100, n_jobs=6) # Save the best hyper-parameters save_list_of_dicts_to_yaml([study.best_params, dict(seed=args.seed)], ex_dir, 'best_hyperparams')
ros = [] for r in range(args.num_runs): if args.mode == 'wo': ro = experiment_wo_distruber(env_real, env_sim) elif args.mode == 'w': ro = experiment_w_distruber(env_real, env_sim) else: raise pyrado.ValueErr( given=args.mode, eq_constraint="without (wo), or with (w) disturber") ros.append(ro) env_real.close() # Print and save results avg_return = np.mean([ro.undiscounted_return() for ro in ros]) print_cbt(f'Average return: {avg_return}', 'g', bright=True) save_dir = setup_experiment('evaluation', 'qcp-st_experiment', ex_tag, base_dir=pyrado.TEMP_DIR) joblib.dump(ros, osp.join(save_dir, 'experiment_rollouts.pkl')) save_list_of_dicts_to_yaml([ dict(ex_dir=ex_dir, avg_return=avg_return, num_runs=len(ros), steps_disturb=steps_disturb) ], save_dir, file_name='experiment_summary')
env = wrap_like_other_env(env, env_sim) # Sample rollouts ros = eval_randomized_domain(pool, env, pert, policy, init_state_list) # internally calls DomainRandWrapperLive # Compute results metrics rets = [ro.undiscounted_return() for ro in ros] lengths = [float(ro.length) for ro in ros] # int values are not numeric in pandas df = df.append(pd.DataFrame(dict(policy=ex_labels[i], ret=rets, len=lengths)), ignore_index=True) metrics = dict( avg_len=df.groupby('policy').mean()['len'].to_dict(), avg_ret=df.groupby('policy').mean()['ret'].to_dict(), median_ret=df.groupby('policy').median()['ret'].to_dict(), min_ret=df.groupby('policy').min()['ret'].to_dict(), max_ret=df.groupby('policy').max()['ret'].to_dict(), std_ret=df.groupby('policy').std()['ret'].to_dict() ) pprint(metrics, indent=4) # Create subfolder and save save_dir = setup_experiment('multiple_policies', args.env_name, 'randomized', base_dir=pyrado.EVAL_DIR) save_list_of_dicts_to_yaml( [{'ex_dirs': ex_dirs}, {'num_rpp': args.num_ro_per_config, 'seed': args.seed}, dict_arraylike_to_float(metrics)], save_dir, file_name='summary' ) df.to_pickle(osp.join(save_dir, 'df_dr_mp.pkl'))
vaired_param_values = [ro.rollout_info['domain_param'][varied_param_key] for ro in ros] varied_param = {varied_param_key: vaired_param_values} df = df.append(pd.DataFrame(dict(policy=exp_labels[i], ret=rets, len=lengths, **varied_param)), ignore_index=True) metrics = dict( avg_len=df.groupby('policy').mean()['len'].to_dict(), avg_ret=df.groupby('policy').mean()['ret'].to_dict(), median_ret=df.groupby('policy').median()['ret'].to_dict(), min_ret=df.groupby('policy').min()['ret'].to_dict(), max_ret=df.groupby('policy').max()['ret'].to_dict(), std_ret=df.groupby('policy').std()['ret'].to_dict(), quantile5_ret=df.groupby('policy').quantile(q=0.05)['ret'].to_dict(), quantile95_ret=df.groupby('policy').quantile(q=0.95)['ret'].to_dict() ) pprint.pprint(metrics) # Create subfolder and save save_dir = setup_experiment('multiple_policies', args.env_name, varied_param_key, base_dir=pyrado.EVAL_DIR) save_list_of_dicts_to_yaml([ {'ex_dirs': ex_dirs}, { 'varied_param': varied_param_key, 'num_rpp': args.num_ro_per_config, 'seed': args.seed, 'dt': args.dt, 'max_steps': args.max_steps }, dict_arraylike_to_float(metrics)], save_dir, file_name='summary' ) df.to_pickle(osp.join(save_dir, 'df_mp_grid_1d.pkl'))
num_epoch=5, lr=1e-3, standardize_adv=False, max_grad_norm=5., ) particle_hparam = dict(actor=actor_hparam, vfcn=vfcn_hparam, critic=critic_hparam) # Algorithm algo_hparam = dict( max_iter=200, min_steps=30 * env.max_steps, num_particles=3, temperature=1, lr=1e-3, std_init=1.0, horizon=50, num_workers=12, ) algo = SVPG(ex_dir, env, particle_hparam, **algo_hparam) # Save the hyper-parameters save_list_of_dicts_to_yaml([ dict(env=env_hparams, seed=args.seed), dict(algo=algo_hparam, algo_name=algo.name) ], ex_dir) # Jeeeha algo.train(seed=args.seed)
pass lod.append(d) df = pd.DataFrame(lod) metrics = dict(avg_len=df['len'].mean(), avg_ret=df['ret'].mean(), median_ret=df['ret'].median(), min_ret=df['ret'].min(), max_ret=df['ret'].max(), std_ret=df['ret'].std()) pprint.pprint(metrics) # Create subfolder and save timestamp = datetime.datetime.now() add_info = timestamp.strftime(timestamp_format) + '--' + add_info save_dir = osp.join(ex_dir, 'eval_domain_grid', add_info) os.makedirs(save_dir, exist_ok=True) save_list_of_dicts_to_yaml([{ 'ex_dir': str(ex_dir) }, { 'varied_params': list(param_spec.keys()) }, { 'num_rpp': args.num_ro_per_config, 'seed': args.seed }, dict_arraylike_to_float(metrics)], save_dir, file_name='summary') df.to_pickle(osp.join(save_dir, 'df_sp_grid_nd.pkl'))
env = DomainRandWrapperLive(env, randomizer) # Policy policy = to.load(osp.join(ref_ex_dir, 'policy.pt')) policy.init_param() # Critic vfcn = to.load(osp.join(ref_ex_dir, 'valuefcn.pt')) vfcn.init_param() critic = GAE(vfcn, **hparams['critic']) # Algorithm algo_hparam = hparams['subrtn'] algo_hparam.update({'num_workers': 1}) # should be equivalent to the number of cores per job # algo_hparam.update({'max_iter': 300}) # algo_hparam.update({'max_iter': 600}) # algo_hparam.update({'min_steps': 3*algo_hparam['min_steps']}) algo = PPO(ex_dir, env, policy, critic, **algo_hparam) # Save the hyper-parameters save_list_of_dicts_to_yaml([ dict(env=env_hparams, seed=args.seed), dict(policy=hparams['policy']), dict(critic=hparams['critic']), dict(algo=algo_hparam, algo_name=algo.name)], ex_dir ) # Jeeeha algo.train(seed=args.seed, snapshot_mode='latest')
max_iter=10, alpha=0.05, beta=0.1, nG=20, nJ=120, ntau=5, nc_init=5, nr_init=1, sequence_cand=sequence_add_init, sequence_refs=sequence_const, warmstart_cand=True, warmstart_refs=True, cand_policy_param_init=init_policy_param_values, num_bs_reps=1000, studentized_ci=False, ) algo = SPOTA(ex_dir, env, sr_cand, sr_refs, **algo_hparam) # Save the environments and the hyper-parameters save_list_of_dicts_to_yaml([ dict(env=env_hparams, seed=args.seed), dict(policy=policy_hparam), dict(subrtn_name=sr_cand.name, subrtn_cand=subrtn_hparam_cand, subrtn_refs=subrtn_hparam_refs), dict(algo=algo_hparam, algo_name=algo.name) ], ex_dir) # Jeeeha algo.train(seed=args.seed)
env = QCartPoleStabSim(**env_hparams) env = ActNormWrapper(env) # Policy policy_hparam = dict(feats=FeatureStack([identity_feat])) policy = LinearPolicy(spec=env.spec, **policy_hparam) # Algorithm algo_hparam = dict( max_iter=30, pop_size=50, num_rollouts=8, num_is_samples=10, expl_std_init=2., expl_std_min=0.02, full_cov=True, symm_sampling=False, num_sampler_envs=8, ) algo = CEM(ex_dir, env, policy, **algo_hparam) # Save the hyper-parameters save_list_of_dicts_to_yaml([ dict(env=env_hparams, seed=ex_dir.seed), dict(policy=policy_hparam), dict(algo=algo_hparam, algo_name=algo.name) ], ex_dir) # Jeeeha algo.train(seed=ex_dir.seed)
init_param_kwargs=None, use_cuda=False) policy = ADNPolicy(spec=EnvSpec(act_space=InfBoxSpace(shape=1), obs_space=InfBoxSpace(shape=1)), **policy_hparam) # Algorithm algo_hparam = dict( max_iter=1000, windowed=False, cascaded=True, optim_class=optim.Adam, optim_hparam=dict(lr=1e-1, eps=1e-8, weight_decay=1e-4), # momentum=0.7 loss_fcn=nn.MSELoss(), lr_scheduler=lr_scheduler.ExponentialLR, lr_scheduler_hparam=dict(gamma=0.995)) algo = TSPred(ex_dir, dataset, policy, **algo_hparam) # Save the hyper-parameters save_list_of_dicts_to_yaml([ dict(data_set=data_set_hparam, data_set_name=data_set_name, seed=args.seed), dict(policy=policy_hparam), dict(algo=algo_hparam, algo_name=algo.name) ], ex_dir) # Jeeeha algo.train()
spota_hparam = dict( max_iter=10, alpha=0.05, beta=0.1, nG=20, nJ=120, ntau=5, nc_init=5, nr_init=1, sequence_cand=sequence_add_init, sequence_refs=sequence_const, warmstart_cand=True, warmstart_refs=True, cand_policy_param_init=init_policy_param_values, num_bs_reps=1000, studentized_ci=False, ) algo = SPOTA(ex_dir, env, sr_cand, sr_refs, **spota_hparam) # Save the environments and the hyper-parameters save_list_of_dicts_to_yaml([ dict(env=env_hparams, seed=ex_dir.seed), dict(policy=policy_hparam), dict(subroutine_cand=subrtn_hparam_cand, subroutine_refs=subrtn_hparam_cand, subroutine_name=HCNormal.name), dict(algo=spota_hparam, algo_name=SPOTA.name)], ex_dir ) # Jeeeha algo.train(seed=ex_dir.seed)
# Algorithm bayrn_hparam = dict( max_iter=15, acq_fc='UCB', acq_param=dict(beta=0.25), acq_restarts=500, acq_samples=1000, num_init_cand=2, warmstart=False, num_eval_rollouts_real=100 if isinstance(env_real, QQubeSim) else 5, ) # Save the environments and the hyper-parameters (do it before the init routine of BDR) save_list_of_dicts_to_yaml([ dict(env=env_hparams, seed=ex_dir.seed), dict(policy=policy_hparam), dict(subroutine=subroutine_hparam, subroutine_name=PoWER.name), dict(algo=bayrn_hparam, algo_name=BayRn.name, dp_map=dp_map) ], ex_dir) algo = BayRn(ex_dir, env_sim, env_real, subroutine=power, bounds=bounds, **bayrn_hparam) # Jeeeha algo.train(snapshot_mode='best', seed=ex_dir.seed)