def get_argument_parser( cls ) -> ArgumentParser: """ Get argument parser. :return: Argument parser. """ # don't use super's argument parser, so that we do not pick up the --T argument intended for the actual # environment. we're going to use --T-planning instead (see below). parser = get_base_argument_parser() parser.add_argument( '--T-planning', type=int, help='Maximum number of planning time steps to run.' ) parser.add_argument( '--num-planning-improvements-per-direct-improvement', type=int, help='Number of planning improvements to make for each direct improvement.' ) return parser
def get_argument_parser_for_run() -> ArgumentParser: """ Get argument parser for values used in the run function. :return: Argument parser. """ parser = get_base_argument_parser( prog='rlai train', description='Train an agent in an environment.') parser.add_argument( '--train-function', type=str, help= 'Fully-qualified type name of function to use for training the agent.') parser.add_argument( '--resume', action='store_true', help= 'Pass this flag to resume training an agent from a previously saved checkpoint path.' ) parser.add_argument('--save-agent-path', type=str, help='Path to store resulting agent to.') parser.add_argument( '--random-seed', type=int, help='Random seed. Omit to generate an arbitrary random seed.') return parser
def get_argument_parser(cls) -> ArgumentParser: """ Get argument parser. :return: Argument parser. """ return get_base_argument_parser()
def get_argument_parser(cls, ) -> ArgumentParser: """ Get argument parser. :return: Argument parser. """ parser = get_base_argument_parser() parser.add_argument('--T', type=int, help='Maximum number of time steps to run.') return parser
def get_argument_parser(cls) -> ArgumentParser: """ Get argument parser. :return: Argument parser. """ parser = get_base_argument_parser() parser.add_argument( '--epsilon', type=float, help= 'Total probability mass to spread across all actions. Omit or pass 0.0 to produce a purely greedy agent.' ) return parser
def get_argument_parser_for_run() -> ArgumentParser: """ Get argument parser for the run function. :return: Argument parser. """ parser = get_base_argument_parser( prog='rlai run', description= 'Run an agent within an environment. This does not support learning (e.g., monte carlo or temporal difference). See trainer.py for such methods.' ) parser.add_argument('--n-runs', type=int, help='Number of runs.') parser.add_argument('--pdf-save-path', type=str, help='Path where a PDF of all plots is to be saved.') parser.add_argument('--figure-name', type=str, help='Name for figure that is generated.') parser.add_argument('--environment', type=str, help='Fully-qualified type name of environment.') parser.add_argument( '--agent', type=str, help= 'Either (1) the fully-qualified type name of agent, or (2) a path to a pickled agent.' ) parser.add_argument( '--random-seed', type=int, help='Random seed. Omit to generate an arbitrary random seed.') parser.add_argument('--plot', action='store_true', help='Pass this flag to plot rewards.') return parser
def get_argument_parser_for_train_function( function_name: str ) -> ArgumentParser: """ Get argument parser for a train function. :param function_name: Function name. :return: Argument parser. """ argument_parser = get_base_argument_parser(prog=function_name) function = import_function(function_name) # get argument names actually expected by the specified training function # noinspection PyUnresolvedReferences actual_arg_names = function.__code__.co_varnames[:function.__code__.co_argcount] def filter_add_argument( name: str, **kwargs ): """ Filter arguments to those defined by the function before adding them to the argument parser. :param name: Argument name. :param kwargs: Other arguments. """ var_name = name.lstrip('-').replace('-', '_') if var_name in actual_arg_names: argument_parser.add_argument( name, **kwargs ) # attempt to add the superset of all arguments used across all training function. the filter will only retain those # that are actually allowed. filter_add_argument( '--agent', type=str, help='Fully-qualified type name of agent to train.' ) filter_add_argument( '--environment', type=str, help='Fully-qualified type name of environment to train agent in.' ) filter_add_argument( '--planning-environment', type=str, help='Fully-qualified type name of planning environment to train agent in.' ) filter_add_argument( '--policy', type=str, help='Fully-qualified type name of policy to use (for policy gradient methods).' ) filter_add_argument( '--num-improvements', type=int, help='Number of improvements.' ) filter_add_argument( '--num-episodes-per-improvement', type=int, help='Number of episodes per improvement.' ) filter_add_argument( '--num-episodes', type=int, help='Number of episodes.' ) filter_add_argument( '--num-updates-per-improvement', type=int, help='Number of state-action value updates per policy improvement.' ) filter_add_argument( '--update-upon-every-visit', type=str, choices=['True', 'False'], help='Whether or not to update values upon each visit to a state or state-action pair.' ) filter_add_argument( '--alpha', type=float, help='Step size.' ) filter_add_argument( '--make-final-policy-greedy', type=str, choices=['True', 'False'], help='Whether or not to make the final policy greedy after training is complete.' ) filter_add_argument( '--num-improvements-per-plot', type=int, help='Number of improvements per plot.' ) filter_add_argument( '--num-improvements-per-checkpoint', type=int, help='Number of improvements per checkpoint.' ) filter_add_argument( '--num-episodes-per-checkpoint', type=int, help='Number of episodes per checkpoint.' ) filter_add_argument( '--checkpoint-path', type=str, help='Path to checkpoint file.' ) filter_add_argument( '--mode', type=str, help='Temporal difference evaluation mode (SARSA, Q_LEARNING, EXPECTED_SARSA).' ) filter_add_argument( '--n-steps', type=int, help='N-step update value.' ) filter_add_argument( '--q-S-A', type=str, help='Fully-qualified type name of state-action value estimator to use (for action-value methods).' ) filter_add_argument( '--v-S', type=str, help='Fully-qualified type name of state-value estimator to use (for policy gradient methods).' ) filter_add_argument( '--pdf-save-path', type=str, help='Path where a PDF of all plots is to be saved.' ) filter_add_argument( '--plot-state-value', type=str, choices=['True', 'False'], help='Whether or not to plot the state value.' ) return argument_parser
def get_argument_parser_for_train_function( function_name: str) -> ArgumentParser: """ Get argument parser for a train function. :param function_name: Function name. :return: Argument parser. """ argument_parser = get_base_argument_parser(prog=function_name) function = import_function(function_name) # get argument names defined by the specified training function # noinspection PyUnresolvedReferences function_arg_names = function.__code__.co_varnames[:function.__code__. co_argcount] def add_argument(name: str, **kwargs): """ Filter arguments to those defined by the function before adding them to the argument parser. :param name: Argument name. :param kwargs: Other arguments. """ var_name = name.lstrip('-').replace('-', '_') if var_name in function_arg_names: argument_parser.add_argument(name, **kwargs) # add the superset of all arguments used across all training function. the filter will only retain those allowed. add_argument('--agent', type=str, help='Fully-qualified type name of agent to train.') add_argument( '--environment', type=str, help='Fully-qualified type name of environment to train agent in.') add_argument( '--planning-environment', type=str, help= 'Fully-qualified type name of planning environment to train agent in.') add_argument('--num-improvements', type=int, help='Number of improvements.') add_argument('--num-episodes-per-improvement', type=int, help='Number of episodes per improvement.') add_argument('--num-episodes', type=int, help='Number of episodes.') add_argument( '--num-updates-per-improvement', type=int, help='Number of state-action value updates per policy improvement.') add_argument( '--update-upon-every-visit', type=str, choices=['True', 'False'], help= 'Whether or not to update values upon each visit to a state or state-action pair.' ) add_argument('--alpha', type=float, help='Step size.') add_argument( '--make-final-policy-greedy', type=str, choices=['True', 'False'], help= 'Whether or not to make the final policy greedy after training is complete.' ) add_argument('--num-improvements-per-plot', type=int, help='Number of improvements per plot.') add_argument('--num-improvements-per-checkpoint', type=int, help='Number of improvements per checkpoint.') add_argument('--num-episodes-per-checkpoint', type=int, help='Number of episodes per checkpoint.') add_argument('--checkpoint-path', type=str, help='Path to checkpoint file.') add_argument( '--mode', type=str, help= 'Temporal difference evaluation mode (SARSA, Q_LEARNING, EXPECTED_SARSA).' ) add_argument('--n-steps', type=int, help='N-step update value.') add_argument('--pdf-save-path', type=str, help='Path where a PDF of all plots is to be saved.') add_argument('--plot-state-value', type=str, choices=['True', 'False'], help='Whether or not to plot the state value.') add_argument( '--training-pool-directory', type=str, help='Path to directory in which to store pooled training runs.') add_argument('--training-pool-count', type=int, help='Number of runners in the training pool.') add_argument('--training-pool-iterate-episodes', type=int, help='Number of episodes per training pool iteration.') add_argument( '--training-pool-evaluate-episodes', type=int, help= 'Number of episodes to evaluate the agent when iterating the training pool.' ) add_argument( '--training-pool-max-iterations-without-improvement', type=int, help= 'Maximum number of training pool iterations to allow before reverting to the best prior agent, or None to ' 'never revert.') return argument_parser