Пример #1
0
def create_algo(envs, pi_train, pi_old, preprocess_obss, iter_type,
                frames_per_proc, discount, lr, gae_lambda, entropy_coef,
                policy_reg_coef, value_reg_coef, value_loss_coef,
                max_grad_norm, optim_eps, clip_eps, epochs, batch_size):
    """Function to be captured by sacred."""
    return torch_rl.PPOAlgo(envs, pi_train, pi_old, iter_type, frames_per_proc,
                            discount, lr, gae_lambda, entropy_coef,
                            policy_reg_coef, value_reg_coef, value_loss_coef,
                            max_grad_norm, optim_eps, clip_eps, epochs,
                            batch_size, preprocess_obss)
Пример #2
0
if torch.cuda.is_available():
    acmodel.cuda()
logger.info("CUDA available: {}\n".format(torch.cuda.is_available()))

# Define actor-critic algo

if args.algo == "a2c":
    algo = torch_rl.A2CAlgo(envs, acmodel, args.frames_per_proc, args.discount,
                            args.lr, args.gae_lambda, args.entropy_coef,
                            args.value_loss_coef, args.max_grad_norm,
                            args.recurrence, args.optim_alpha, args.optim_eps,
                            preprocess_obss)
elif args.algo == "ppo":
    algo = torch_rl.PPOAlgo(envs, acmodel, args.frames_per_proc, args.discount,
                            args.lr, args.gae_lambda, args.entropy_coef,
                            args.value_loss_coef, args.max_grad_norm,
                            args.recurrence, args.optim_eps, args.clip_eps,
                            args.epochs, args.batch_size, preprocess_obss)
else:
    raise ValueError("Incorrect algorithm name: {}".format(args.algo))

# Train model

num_frames = status["num_frames"]
total_start_time = time.time()
update = status["update"]

while num_frames < args.frames:
    # Update model parameters

    update_start_time = time.time()
Пример #3
0
                            args.recurrence, args.optim_alpha, args.optim_eps,
                            preprocess_obss)
    raise NotImplementedError()
elif args.algo == "ppo":
    algo = torch_rl.PPOAlgo(
        envs,
        acmodel,
        args.frames_per_proc,
        args.discount,
        args.lr,
        args.gae_lambda,
        args.entropy_coef,
        args.value_loss_coef,
        args.max_grad_norm,
        args.recurrence,
        args.optim_eps,
        args.clip_eps,
        args.epochs,
        args.batch_size,
        preprocess_obss,
        beta=args.beta,
        use_l2w=args.use_l2w,
        sni_type=args.sni_type,
        policy_loss_coef=args.policy_loss_coef,
        reconstruction_likelihood_coef=args.reconstruction_likelihood_coef,
        KLD_coef=args.KLD_coef,
        latent_transition_coef=args.latent_transition_coef,
        flow=args.flow)
else:
    raise ValueError("Incorrect algorithm name: {}".format(args.algo))

# Train model