# feats=FeatureStack([RandFourierFeat(env.obs_space.flat_dim, num_feat=20, bandwidth=env.obs_space.bound_up)]) feats=FeatureStack([const_feat, identity_feat, sign_feat, abs_feat, squared_feat, cubic_feat, ATan2Feat(1, 2), MultFeat([3, 4])]) ) policy = LinearPolicy(spec=env.spec, **policy_hparam) # Algorithm algo_hparam = dict( max_iter=500, eps=1.0, pop_size=20*policy.num_param, num_rollouts=4, expl_std_init=0.2, expl_std_min=0.02, use_map=True, optim_mode='scipy', num_workers=12, ) algo = REPS(ex_dir, env, policy, **algo_hparam) # Save the hyper-parameters save_list_of_dicts_to_yaml([ dict(env=env_hparams, seed=args.seed), dict(policy=policy_hparam), dict(algo=algo_hparam, algo_name=algo.name)], ex_dir ) # Jeeeha algo.train(snapshot_mode='best', seed=args.seed)
# feats=FeatureStack(RBFFeat(num_feat_per_dim=20, bounds=env.obs_space.bounds, scale=0.8)), feats=FeatureStack(identity_feat, sin_feat)) policy = LinearPolicy(spec=env.spec, **policy_hparam) # Algorithm algo_hparam = dict( max_iter=500, eps=0.2, pop_size=10 * policy.num_param, num_init_states_per_domain=10, expl_std_init=0.2, expl_std_min=0.02, num_epoch_dual=1000, optim_mode="scipy", lr_dual=1e-3, use_map=True, num_workers=8, ) algo = REPS(ex_dir, env, policy, **algo_hparam) # Save the hyper-parameters save_dicts_to_yaml( dict(env=env_hparams, seed=args.seed), dict(policy=policy_hparam), dict(algo=algo_hparam, algo_name=algo.name), save_dir=ex_dir, ) # Jeeeha algo.train(seed=args.seed)
cubic_feat, ATan2Feat(1, 2), MultFeat((3, 4)), ])) policy = LinearPolicy(spec=env.spec, **policy_hparam) # Algorithm algo_hparam = dict( max_iter=500, eps=1.0, pop_size=20 * policy.num_param, num_init_states_per_domain=4, expl_std_init=0.2, expl_std_min=0.02, use_map=True, optim_mode="scipy", num_workers=12, ) algo = REPS(ex_dir, env, policy, **algo_hparam) # Save the hyper-parameters save_dicts_to_yaml( dict(env=env_hparams, seed=args.seed), dict(policy=policy_hparam), dict(algo=algo_hparam, algo_name=algo.name), save_dir=ex_dir, ) # Jeeeha algo.train(snapshot_mode="best", seed=args.seed)