示例#1
0
 def _preprocess_schedules(hyperparams: Dict[str, Any]) -> Dict[str, Any]:
     # Create schedules
     for key in ["learning_rate", "clip_range", "clip_range_vf"]:
         if key not in hyperparams:
             continue
         if isinstance(hyperparams[key], str):
             schedule, initial_value = hyperparams[key].split("_")
             initial_value = float(initial_value)
             hyperparams[key] = linear_schedule(initial_value)
         elif isinstance(hyperparams[key], (float, int)):
             # Negative value: ignore (ex: for clipping)
             if hyperparams[key] < 0:
                 continue
             hyperparams[key] = constant_fn(float(hyperparams[key]))
         else:
             raise ValueError(f"Invalid value for {key}: {hyperparams[key]}")
     return hyperparams
示例#2
0
log_path = os.path.join(args.log_folder, args.algo)
save_path = os.path.join(
    log_path, "{}_{}".format(ENV_ID,
                             get_latest_run_id(log_path, ENV_ID) + 1))
params_path = os.path.join(save_path, ENV_ID)
os.makedirs(params_path, exist_ok=True)

# Create learning rate schedules for ppo2 and sac
if args.algo in ["ppo2", "sac"]:
    for key in ['learning_rate', 'cliprange']:
        if key not in hyperparams:
            continue
        if isinstance(hyperparams[key], str):
            schedule, initial_value = hyperparams[key].split('_')
            initial_value = float(initial_value)
            hyperparams[key] = linear_schedule(initial_value)
        elif isinstance(hyperparams[key], float):
            hyperparams[key] = constfn(hyperparams[key])
        else:
            raise ValueError('Invalid valid for {}: {}'.format(
                key, hyperparams[key]))

# Should we overwrite the number of timesteps?
if args.n_timesteps > 0:
    n_timesteps = args.n_timesteps
else:
    n_timesteps = int(hyperparams['n_timesteps'])
del hyperparams['n_timesteps']

normalize = False
normalize_kwargs = {}