def main(args): parser = argparse.ArgumentParser( description="Train a RL net to play in an OpenAI Gym environment." ) parser.add_argument("-p", "--parameters", help="Path to JSON parameters file.") parser.add_argument( "-s", "--score-bar", help="Bar for averaged tests scores.", type=float, default=None, ) parser.add_argument( "-l", "--log_level", help="If set, use logging level specified (debug, info, warning, error, " "critical). Else defaults to info.", default="info", ) parser.add_argument( "-f", "--file_path", help="If set, save all collected samples as an RLDataset to this file.", default=None, ) parser.add_argument( "-e", "--start_saving_from_score", type=int, help="If file_path is set, start saving episodes after this score is hit.", default=None, ) parser.add_argument( "-r", "--results_file_path", help="If set, save evaluation results to file.", type=str, default=None, ) parser.add_argument( "--offline_train", action="store_true", help="If set, collect data using a random policy then train RL offline.", ) parser.add_argument( "--path_to_pickled_transitions", help="Path to saved transitions to load into replay buffer.", type=str, default=None, ) parser.add_argument( "--seed", help="Seed for the test (numpy, torch, and gym).", type=int, default=None, ) parser.add_argument( "--use_gpu", help="Use GPU, if available; set the device with CUDA_VISIBLE_DEVICES", action="store_true", ) args = parser.parse_args(args) if args.log_level not in ("debug", "info", "warning", "error", "critical"): raise Exception("Logging level {} not valid level.".format(args.log_level)) else: logging.getLogger().setLevel(getattr(logging, args.log_level.upper())) if args.seed is not None: np.random.seed(args.seed) torch.manual_seed(args.seed) random.seed(args.seed) assert ( not args.path_to_pickled_transitions or args.offline_train ), "path_to_pickled_transitions is provided so you must run offline training" with open(args.parameters, "r") as f: params = json_to_object(f.read(), OpenAiGymParameters) if args.use_gpu: assert torch.cuda.is_available(), "CUDA requested but not available" params = params._replace(use_gpu=True) dataset = RLDataset(args.file_path) if args.file_path else None reward_history, iteration_history, trainer, predictor, env = run_gym( params, args.offline_train, args.score_bar, args.seed, dataset, args.start_saving_from_score, args.path_to_pickled_transitions, ) if dataset: dataset.save() logger.info("Saving dataset to {}".format(args.file_path)) final_score_exploit, _ = env.run_ep_n_times( params.run_details.avg_over_num_episodes, predictor, test=True ) final_score_explore, _ = env.run_ep_n_times( params.run_details.avg_over_num_episodes, predictor, test=False ) logger.info( "Final policy scores {} with epsilon={} and {} with epsilon=0 over {} eps.".format( final_score_explore, env.epsilon, final_score_exploit, params.run_details.avg_over_num_episodes, ) ) if args.results_file_path: write_lists_to_csv(args.results_file_path, reward_history, iteration_history) return reward_history
def main(args): parser = argparse.ArgumentParser( description="Train a RL net to play in an OpenAI Gym environment." ) parser.add_argument("-p", "--parameters", help="Path to JSON parameters file.") parser.add_argument( "-s", "--score-bar", help="Bar for averaged tests scores.", type=float, default=None, ) parser.add_argument( "-g", "--gpu_id", help="If set, will use GPU with specified ID. Otherwise will use CPU.", default=USE_CPU, ) parser.add_argument( "-l", "--log_level", help="If set, use logging level specified (debug, info, warning, error, " "critical). Else defaults to info.", default="info", ) parser.add_argument( "-f", "--file_path", help="If set, save all collected samples as an RLDataset to this file.", default=None, ) parser.add_argument( "-e", "--start_saving_from_score", type=int, help="If file_path is set, start saving episodes after this score is hit.", default=None, ) parser.add_argument( "-r", "--results_file_path", help="If set, save evaluation results to file.", type=str, default=None, ) parser.add_argument( "--offline_train", action="store_true", help="If set, collect data using a random policy then train RL offline.", ) parser.add_argument( "--path_to_pickled_transitions", help="Path to saved transitions to load into replay buffer.", type=str, default=None, ) args = parser.parse_args(args) if args.log_level not in ("debug", "info", "warning", "error", "critical"): raise Exception("Logging level {} not valid level.".format(args.log_level)) else: logger.setLevel(getattr(logging, args.log_level.upper())) assert ( not args.path_to_pickled_transitions or args.offline_train ), "path_to_pickled_transitions is provided so you must run offline training" with open(args.parameters, "r") as f: params = json.load(f) dataset = RLDataset(args.file_path) if args.file_path else None reward_history, iteration_history, trainer, predictor, env = run_gym( params, args.offline_train, args.score_bar, args.gpu_id, dataset, args.start_saving_from_score, args.path_to_pickled_transitions, ) if dataset: dataset.save() logger.info("Saving dataset to {}".format(args.file_path)) final_score_exploit, _ = env.run_ep_n_times( params["run_details"]["avg_over_num_episodes"], predictor, test=True ) final_score_explore, _ = env.run_ep_n_times( params["run_details"]["avg_over_num_episodes"], predictor, test=False ) logger.info( "Final policy scores {} with epsilon={} and {} with epsilon=0 over {} eps.".format( final_score_explore, env.epsilon, final_score_exploit, params["run_details"]["avg_over_num_episodes"], ) ) if args.results_file_path: write_lists_to_csv(args.results_file_path, reward_history, iteration_history) return reward_history