raise ValueError(f"Invalid Policy: {args.policy}!") if args.save_model and not os.path.exists("./models"): os.makedirs("./models") if args.load_model != "": policy_file = file_name if args.load_model == "default" else args.load_model if not os.path.exists(f"./models/{policy_file}"): assert f"The loading model path of `../models/{policy_file}` does not exist! " policy.load(f"./models/{policy_file}") # Setup loggers logger_kwargs = setup_logger_kwargs(args.exp_name, args.seed, datestamp=False) logger = EpochLogger(**logger_kwargs) # Sync params across processes sync_params(policy) # Set up experience buffer local_steps_per_epoch = int(args.steps_per_epoch / num_procs()) _replay_buffer = replay_buffer.VPGBuffer(state_dim, action_dim, local_steps_per_epoch, args.discount, args.lam, is_discrete) state, done = env.reset(), False episode_reward = 0 episode_timesteps = 0 episode_num = 0
raise ValueError(f"Invalid Policy: {args.policy}!") if args.save_model and not os.path.exists("./models"): os.makedirs("./models") if args.load_model != "": policy_file = file_name if args.load_model == "default" else args.load_model if not os.path.exists(f"./models/{policy_file}"): assert f"The loading model path of `../models/{policy_file}` does not exist! " policy.load(f"./models/{policy_file}") # Setup loggers logger_kwargs = setup_logger_kwargs(args.exp_name, args.seed, datestamp=False) logger = EpochLogger(**logger_kwargs) _replay_buffer = replay_buffer.ReplayBuffer(int(args.buffer_size)) print("Collecting experience...") epinfobuf = deque(maxlen=100) # episode step for accumulate reward start_time = time.time() # check learning time states = np.array( env.reset()) # env reset, output array of num of `#num_envs` states step = 0 for t in range(1, int(args.max_timesteps) // int(args.num_envs) + 1): actions = policy.select_action(states, eps_schedule.value) next_states, rewards, dones, infos = env.step( actions) # take actions and get next states