示例#1
0
    def get_argument_parser(
            cls
    ) -> ArgumentParser:
        """
        Get argument parser.

        :return: Argument parser.
        """

        # don't use super's argument parser, so that we do not pick up the --T argument intended for the actual
        # environment. we're going to use --T-planning instead (see below).

        parser = get_base_argument_parser()

        parser.add_argument(
            '--T-planning',
            type=int,
            help='Maximum number of planning time steps to run.'
        )

        parser.add_argument(
            '--num-planning-improvements-per-direct-improvement',
            type=int,
            help='Number of planning improvements to make for each direct improvement.'
        )

        return parser
示例#2
0
def get_argument_parser_for_run() -> ArgumentParser:
    """
    Get argument parser for values used in the run function.

    :return: Argument parser.
    """

    parser = get_base_argument_parser(
        prog='rlai train', description='Train an agent in an environment.')

    parser.add_argument(
        '--train-function',
        type=str,
        help=
        'Fully-qualified type name of function to use for training the agent.')

    parser.add_argument(
        '--resume',
        action='store_true',
        help=
        'Pass this flag to resume training an agent from a previously saved checkpoint path.'
    )

    parser.add_argument('--save-agent-path',
                        type=str,
                        help='Path to store resulting agent to.')

    parser.add_argument(
        '--random-seed',
        type=int,
        help='Random seed. Omit to generate an arbitrary random seed.')

    return parser
示例#3
0
    def get_argument_parser(cls) -> ArgumentParser:
        """
        Get argument parser.

        :return: Argument parser.
        """

        return get_base_argument_parser()
示例#4
0
    def get_argument_parser(cls, ) -> ArgumentParser:
        """
        Get argument parser.

        :return: Argument parser.
        """

        parser = get_base_argument_parser()

        parser.add_argument('--T',
                            type=int,
                            help='Maximum number of time steps to run.')

        return parser
示例#5
0
    def get_argument_parser(cls) -> ArgumentParser:
        """
        Get argument parser.

        :return: Argument parser.
        """

        parser = get_base_argument_parser()

        parser.add_argument(
            '--epsilon',
            type=float,
            help=
            'Total probability mass to spread across all actions. Omit or pass 0.0 to produce a purely greedy agent.'
        )

        return parser
示例#6
0
def get_argument_parser_for_run() -> ArgumentParser:
    """
    Get argument parser for the run function.

    :return: Argument parser.
    """

    parser = get_base_argument_parser(
        prog='rlai run',
        description=
        'Run an agent within an environment. This does not support learning (e.g., monte carlo or temporal difference). See trainer.py for such methods.'
    )

    parser.add_argument('--n-runs', type=int, help='Number of runs.')

    parser.add_argument('--pdf-save-path',
                        type=str,
                        help='Path where a PDF of all plots is to be saved.')

    parser.add_argument('--figure-name',
                        type=str,
                        help='Name for figure that is generated.')

    parser.add_argument('--environment',
                        type=str,
                        help='Fully-qualified type name of environment.')

    parser.add_argument(
        '--agent',
        type=str,
        help=
        'Either (1) the fully-qualified type name of agent, or (2) a path to a pickled agent.'
    )

    parser.add_argument(
        '--random-seed',
        type=int,
        help='Random seed. Omit to generate an arbitrary random seed.')

    parser.add_argument('--plot',
                        action='store_true',
                        help='Pass this flag to plot rewards.')

    return parser
示例#7
0
def get_argument_parser_for_train_function(
        function_name: str
) -> ArgumentParser:
    """
    Get argument parser for a train function.

    :param function_name: Function name.
    :return: Argument parser.
    """

    argument_parser = get_base_argument_parser(prog=function_name)

    function = import_function(function_name)

    # get argument names actually expected by the specified training function
    # noinspection PyUnresolvedReferences
    actual_arg_names = function.__code__.co_varnames[:function.__code__.co_argcount]

    def filter_add_argument(
            name: str,
            **kwargs
    ):
        """
        Filter arguments to those defined by the function before adding them to the argument parser.

        :param name: Argument name.
        :param kwargs: Other arguments.
        """

        var_name = name.lstrip('-').replace('-', '_')
        if var_name in actual_arg_names:
            argument_parser.add_argument(
                name,
                **kwargs
            )

    # attempt to add the superset of all arguments used across all training function. the filter will only retain those
    # that are actually allowed.

    filter_add_argument(
        '--agent',
        type=str,
        help='Fully-qualified type name of agent to train.'
    )

    filter_add_argument(
        '--environment',
        type=str,
        help='Fully-qualified type name of environment to train agent in.'
    )

    filter_add_argument(
        '--planning-environment',
        type=str,
        help='Fully-qualified type name of planning environment to train agent in.'
    )

    filter_add_argument(
        '--policy',
        type=str,
        help='Fully-qualified type name of policy to use (for policy gradient methods).'
    )

    filter_add_argument(
        '--num-improvements',
        type=int,
        help='Number of improvements.'
    )

    filter_add_argument(
        '--num-episodes-per-improvement',
        type=int,
        help='Number of episodes per improvement.'
    )

    filter_add_argument(
        '--num-episodes',
        type=int,
        help='Number of episodes.'
    )

    filter_add_argument(
        '--num-updates-per-improvement',
        type=int,
        help='Number of state-action value updates per policy improvement.'
    )

    filter_add_argument(
        '--update-upon-every-visit',
        type=str,
        choices=['True', 'False'],
        help='Whether or not to update values upon each visit to a state or state-action pair.'
    )

    filter_add_argument(
        '--alpha',
        type=float,
        help='Step size.'
    )

    filter_add_argument(
        '--make-final-policy-greedy',
        type=str,
        choices=['True', 'False'],
        help='Whether or not to make the final policy greedy after training is complete.'
    )

    filter_add_argument(
        '--num-improvements-per-plot',
        type=int,
        help='Number of improvements per plot.'
    )

    filter_add_argument(
        '--num-improvements-per-checkpoint',
        type=int,
        help='Number of improvements per checkpoint.'
    )

    filter_add_argument(
        '--num-episodes-per-checkpoint',
        type=int,
        help='Number of episodes per checkpoint.'
    )

    filter_add_argument(
        '--checkpoint-path',
        type=str,
        help='Path to checkpoint file.'
    )

    filter_add_argument(
        '--mode',
        type=str,
        help='Temporal difference evaluation mode (SARSA, Q_LEARNING, EXPECTED_SARSA).'
    )

    filter_add_argument(
        '--n-steps',
        type=int,
        help='N-step update value.'
    )

    filter_add_argument(
        '--q-S-A',
        type=str,
        help='Fully-qualified type name of state-action value estimator to use (for action-value methods).'
    )

    filter_add_argument(
        '--v-S',
        type=str,
        help='Fully-qualified type name of state-value estimator to use (for policy gradient methods).'
    )

    filter_add_argument(
        '--pdf-save-path',
        type=str,
        help='Path where a PDF of all plots is to be saved.'
    )

    filter_add_argument(
        '--plot-state-value',
        type=str,
        choices=['True', 'False'],
        help='Whether or not to plot the state value.'
    )

    return argument_parser
示例#8
0
def get_argument_parser_for_train_function(
        function_name: str) -> ArgumentParser:
    """
    Get argument parser for a train function.

    :param function_name: Function name.
    :return: Argument parser.
    """

    argument_parser = get_base_argument_parser(prog=function_name)

    function = import_function(function_name)

    # get argument names defined by the specified training function
    # noinspection PyUnresolvedReferences
    function_arg_names = function.__code__.co_varnames[:function.__code__.
                                                       co_argcount]

    def add_argument(name: str, **kwargs):
        """
        Filter arguments to those defined by the function before adding them to the argument parser.

        :param name: Argument name.
        :param kwargs: Other arguments.
        """

        var_name = name.lstrip('-').replace('-', '_')
        if var_name in function_arg_names:
            argument_parser.add_argument(name, **kwargs)

    # add the superset of all arguments used across all training function. the filter will only retain those allowed.

    add_argument('--agent',
                 type=str,
                 help='Fully-qualified type name of agent to train.')

    add_argument(
        '--environment',
        type=str,
        help='Fully-qualified type name of environment to train agent in.')

    add_argument(
        '--planning-environment',
        type=str,
        help=
        'Fully-qualified type name of planning environment to train agent in.')

    add_argument('--num-improvements',
                 type=int,
                 help='Number of improvements.')

    add_argument('--num-episodes-per-improvement',
                 type=int,
                 help='Number of episodes per improvement.')

    add_argument('--num-episodes', type=int, help='Number of episodes.')

    add_argument(
        '--num-updates-per-improvement',
        type=int,
        help='Number of state-action value updates per policy improvement.')

    add_argument(
        '--update-upon-every-visit',
        type=str,
        choices=['True', 'False'],
        help=
        'Whether or not to update values upon each visit to a state or state-action pair.'
    )

    add_argument('--alpha', type=float, help='Step size.')

    add_argument(
        '--make-final-policy-greedy',
        type=str,
        choices=['True', 'False'],
        help=
        'Whether or not to make the final policy greedy after training is complete.'
    )

    add_argument('--num-improvements-per-plot',
                 type=int,
                 help='Number of improvements per plot.')

    add_argument('--num-improvements-per-checkpoint',
                 type=int,
                 help='Number of improvements per checkpoint.')

    add_argument('--num-episodes-per-checkpoint',
                 type=int,
                 help='Number of episodes per checkpoint.')

    add_argument('--checkpoint-path',
                 type=str,
                 help='Path to checkpoint file.')

    add_argument(
        '--mode',
        type=str,
        help=
        'Temporal difference evaluation mode (SARSA, Q_LEARNING, EXPECTED_SARSA).'
    )

    add_argument('--n-steps', type=int, help='N-step update value.')

    add_argument('--pdf-save-path',
                 type=str,
                 help='Path where a PDF of all plots is to be saved.')

    add_argument('--plot-state-value',
                 type=str,
                 choices=['True', 'False'],
                 help='Whether or not to plot the state value.')

    add_argument(
        '--training-pool-directory',
        type=str,
        help='Path to directory in which to store pooled training runs.')

    add_argument('--training-pool-count',
                 type=int,
                 help='Number of runners in the training pool.')

    add_argument('--training-pool-iterate-episodes',
                 type=int,
                 help='Number of episodes per training pool iteration.')

    add_argument(
        '--training-pool-evaluate-episodes',
        type=int,
        help=
        'Number of episodes to evaluate the agent when iterating the training pool.'
    )

    add_argument(
        '--training-pool-max-iterations-without-improvement',
        type=int,
        help=
        'Maximum number of training pool iterations to allow before reverting to the best prior agent, or None to '
        'never revert.')

    return argument_parser