def create_algo(envs, pi_train, pi_old, preprocess_obss, iter_type, frames_per_proc, discount, lr, gae_lambda, entropy_coef, policy_reg_coef, value_reg_coef, value_loss_coef, max_grad_norm, optim_eps, clip_eps, epochs, batch_size): """Function to be captured by sacred.""" return torch_rl.PPOAlgo(envs, pi_train, pi_old, iter_type, frames_per_proc, discount, lr, gae_lambda, entropy_coef, policy_reg_coef, value_reg_coef, value_loss_coef, max_grad_norm, optim_eps, clip_eps, epochs, batch_size, preprocess_obss)
if torch.cuda.is_available(): acmodel.cuda() logger.info("CUDA available: {}\n".format(torch.cuda.is_available())) # Define actor-critic algo if args.algo == "a2c": algo = torch_rl.A2CAlgo(envs, acmodel, args.frames_per_proc, args.discount, args.lr, args.gae_lambda, args.entropy_coef, args.value_loss_coef, args.max_grad_norm, args.recurrence, args.optim_alpha, args.optim_eps, preprocess_obss) elif args.algo == "ppo": algo = torch_rl.PPOAlgo(envs, acmodel, args.frames_per_proc, args.discount, args.lr, args.gae_lambda, args.entropy_coef, args.value_loss_coef, args.max_grad_norm, args.recurrence, args.optim_eps, args.clip_eps, args.epochs, args.batch_size, preprocess_obss) else: raise ValueError("Incorrect algorithm name: {}".format(args.algo)) # Train model num_frames = status["num_frames"] total_start_time = time.time() update = status["update"] while num_frames < args.frames: # Update model parameters update_start_time = time.time()
args.recurrence, args.optim_alpha, args.optim_eps, preprocess_obss) raise NotImplementedError() elif args.algo == "ppo": algo = torch_rl.PPOAlgo( envs, acmodel, args.frames_per_proc, args.discount, args.lr, args.gae_lambda, args.entropy_coef, args.value_loss_coef, args.max_grad_norm, args.recurrence, args.optim_eps, args.clip_eps, args.epochs, args.batch_size, preprocess_obss, beta=args.beta, use_l2w=args.use_l2w, sni_type=args.sni_type, policy_loss_coef=args.policy_loss_coef, reconstruction_likelihood_coef=args.reconstruction_likelihood_coef, KLD_coef=args.KLD_coef, latent_transition_coef=args.latent_transition_coef, flow=args.flow) else: raise ValueError("Incorrect algorithm name: {}".format(args.algo)) # Train model