示例#1
0
def show_help(args: List[str]):
    """
    Show help for a class.

    :param args: Arguments.
    """

    if len(args) == 1:
        try:
            parser = get_argument_parser(args[0])
            parse_arguments(parser, ['--help'])
        except Exception as ex:
            print(f'{ex}')
    else:
        print('Usage:  rlai help CLASS')
示例#2
0
    def init_from_arguments(
            cls, args: List[str], random_state: RandomState,
            environment: Environment) -> Tuple[List[Agent], List[str]]:
        """
        Initialize an MDP agent from arguments.

        :param args: Arguments.
        :param random_state: Random state.
        :param environment: Environment.
        :return: 2-tuple of a list of agents and a list of unparsed arguments.
        """

        parsed_args, unparsed_args = parse_arguments(cls, args)

        # load state-action value estimator
        estimator_class = load_class(parsed_args.q_S_A)
        q_S_A, unparsed_args = estimator_class.init_from_arguments(
            args=unparsed_args,
            random_state=random_state,
            environment=environment)
        del parsed_args.q_S_A

        # noinspection PyUnboundLocalVariable
        agent = cls(name=f'action-value (gamma={parsed_args.gamma})',
                    random_state=random_state,
                    q_S_A=q_S_A,
                    **vars(parsed_args))

        return [agent], unparsed_args
示例#3
0
    def init_from_arguments(
            cls,
            args: List[str],
            random_state: RandomState,
            pi: Policy
    ) -> Tuple[List[Agent], List[str]]:
        """
        Initialize an MDP agent from arguments.

        :param args: Arguments.
        :param random_state: Random state.
        :param pi: Policy.
        :return: 2-tuple of a list of agents and a list of unparsed arguments.
        """

        parsed_args, unparsed_args = parse_arguments(cls, args)

        agent = cls(
            name=f'stochastic (gamma={parsed_args.gamma})',
            random_state=random_state,
            pi=pi,
            **vars(parsed_args)
        )

        return [agent], unparsed_args
示例#4
0
    def init_from_arguments(
            cls,
            args: List[str],
            random_state: RandomState,
            environment: Environment
    ) -> Tuple[List[Agent], List[str]]:
        """
        Initialize a list of agents from arguments.

        :param args: Arguments.
        :param random_state: Random state.
        :param environment: Environment.
        :return: 2-tuple of a list of agents and a list of unparsed arguments.
        """

        parsed_args, unparsed_args = parse_arguments(cls, args)

        # initialize agents
        agents = [
            cls(
                name=f'preference gradient (step size={parsed_args.step_size_alpha})',
                random_state=random_state,
                **vars(parsed_args)
            )
        ]

        return agents, unparsed_args
示例#5
0
    def init_from_arguments(
            cls,
            args: List[str],
            random_state: RandomState
    ) -> Tuple[FunctionApproximationModel, List[str]]:
        """
        Initialize a model from arguments.

        :param args: Arguments.
        :param random_state: Random state.
        :return: 2-tuple of a model and a list of unparsed arguments.
        """

        parsed_args, unparsed_args = parse_arguments(cls, args)

        # process arguments whose names conflict with arguments used elsewhere
        setattr(parsed_args, 'alpha', parsed_args.sgd_alpha)
        del parsed_args.sgd_alpha
        setattr(parsed_args, 'epsilon', parsed_args.sgd_epsilon)
        del parsed_args.sgd_epsilon

        # instantiate model
        model = cls(
            random_state=random_state,
            **vars(parsed_args)
        )

        return model, unparsed_args
示例#6
0
    def init_from_arguments(
            cls,
            args: List[str],
            random_state: RandomState,
            pi: Optional[Policy]
    ) -> Tuple[List[Agent], List[str]]:
        """
        Initialize a list of agents from arguments.

        :param args: Arguments.
        :param random_state: Random state.
        :param pi: Policy.
        :return: 2-tuple of a list of agents and a list of unparsed arguments.
        """

        parsed_args, unparsed_args = parse_arguments(cls, args)

        # grab and delete c values from parsed arguments
        c_values = parsed_args.c
        del parsed_args.c

        # initialize agents
        agents = [
            cls(
                name=f'UCB (c={c})',
                random_state=random_state,
                c=c,
                **vars(parsed_args)
            )
            for c in c_values
        ]

        return agents, unparsed_args
示例#7
0
    def init_from_arguments(
            cls,
            args: List[str],
            random_state: RandomState,
            pi: Optional[Policy]
    ) -> Tuple[List[Agent], List[str]]:
        """
        Initialize a list of agents from arguments.

        :param args: Arguments.
        :param random_state: Random state.
        :param pi: Policy.
        :return: 2-tuple of a list of agents and a list of unparsed arguments.
        """

        parsed_args, unparsed_args = parse_arguments(cls, args)

        # grab and delete epsilons from parsed arguments
        epsilons = parsed_args.epsilon
        del parsed_args.epsilon

        # initialize agents
        agents = [
            cls(
                name=f'epsilon-greedy (e={epsilon:0.2f})',
                random_state=random_state,
                epsilon=epsilon,
                **vars(parsed_args)
            )
            for epsilon in epsilons
        ]

        return agents, unparsed_args
示例#8
0
    def init_from_arguments(
        cls, args: List[str], random_state: RandomState,
        environment: MdpEnvironment
    ) -> Tuple[StateActionValueEstimator, List[str]]:
        """
        Initialize a state-action value estimator from arguments.

        :param args: Arguments.
        :param random_state: Random state.
        :param environment: Environment.
        :return: 2-tuple of a state-action value estimator and a list of unparsed arguments.
        """

        parsed_args, unparsed_args = parse_arguments(cls, args)

        # load model
        model_class = load_class(parsed_args.function_approximation_model)
        model, unparsed_args = model_class.init_from_arguments(
            args=unparsed_args, random_state=random_state)
        del parsed_args.function_approximation_model

        # load feature extractor
        feature_extractor_class = load_class(parsed_args.feature_extractor)
        fex, unparsed_args = feature_extractor_class.init_from_arguments(
            args=unparsed_args, environment=environment)
        del parsed_args.feature_extractor

        # initialize estimator
        estimator = cls(environment=environment,
                        model=model,
                        feature_extractor=fex,
                        **vars(parsed_args))

        return estimator, unparsed_args
示例#9
0
    def init_from_arguments(
            cls, args: List[str],
            environment: ContinuousMdpEnvironment) -> Tuple[Policy, List[str]]:
        """
        Initialize a policy from arguments.

        :param args: Arguments.
        :param environment: Environment.
        :return: 2-tuple of a policy and a list of unparsed arguments.
        """

        parsed_args, unparsed_args = parse_arguments(cls, args)

        # load feature extractor
        feature_extractor_class = load_class(
            parsed_args.policy_feature_extractor)
        feature_extractor, unparsed_args = feature_extractor_class.init_from_arguments(
            args=unparsed_args, environment=environment)
        del parsed_args.policy_feature_extractor

        # initialize policy
        policy = cls(environment=environment,
                     feature_extractor=feature_extractor,
                     **vars(parsed_args))

        return policy, unparsed_args
示例#10
0
    def init_from_arguments(
            cls,
            args: List[str],
            environment: MdpEnvironment
    ) -> Tuple[Policy, List[str]]:
        """
        Initialize a policy from arguments.

        :param args: Arguments.
        :param environment: Environment.
        :return: 2-tuple of a policy and a list of unparsed arguments.
        """

        parsed_args, unparsed_args = parse_arguments(cls, args)

        # load feature extractor
        feature_extractor_class = load_class(parsed_args.policy_feature_extractor)
        feature_extractor, unparsed_args = feature_extractor_class.init_from_arguments(
            args=unparsed_args,
            environment=environment
        )
        del parsed_args.policy_feature_extractor

        # there shouldn't be anything left
        if len(vars(parsed_args)) > 0:  # pragma no cover
            raise ValueError('Parsed args remain. Need to pass to constructor.')

        # initialize policy
        policy = cls(
            feature_extractor=feature_extractor
        )

        return policy, unparsed_args
示例#11
0
    def init_from_arguments(
            cls, args: List[str],
            random_state: RandomState) -> Tuple[Environment, List[str]]:
        """
        Initialize an environment from arguments.

        :param args: Arguments.
        :param random_state: Random state.
        :return: 2-tuple of an environment and a list of unparsed arguments.
        """

        parsed_args, unparsed_args = parse_arguments(cls, args)

        bandit = cls(random_state=random_state, **vars(parsed_args))

        return bandit, unparsed_args
示例#12
0
    def init_from_arguments(
            cls, args: List[str],
            random_state: RandomState) -> Tuple[Environment, List[str]]:
        """
        Initialize an environment from arguments.

        :param args: Arguments.
        :param random_state: Random state.
        :return: 2-tuple of an environment and a list of unparsed arguments.
        """

        parsed_args, unparsed_args = parse_arguments(cls, args)

        gamblers_problem = cls(name=f"gambler's problem (p={parsed_args.p_h})",
                               random_state=random_state,
                               **vars(parsed_args))

        return gamblers_problem, unparsed_args
示例#13
0
    def init_from_arguments(
        cls, args: List[str], random_state: RandomState,
        environment: MdpEnvironment
    ) -> Tuple[StateActionValueEstimator, List[str]]:
        """
        Initialize a state-action value estimator from arguments.

        :param args: Arguments.
        :param random_state: Random state.
        :param environment: Environment.
        :return: 2-tuple of a state-action value estimator and a list of unparsed arguments.
        """

        parsed_args, unparsed_args = parse_arguments(cls, args)

        estimator = cls(environment=environment, **vars(parsed_args))

        return estimator, unparsed_args
示例#14
0
    def init_from_arguments(
            cls,
            args: List[str],
            random_state: RandomState,
            environment: MdpEnvironment
    ) -> Tuple[StateValueEstimator, List[str]]:
        """
        Initialize a state-value estimator from arguments.

        :param args: Arguments.
        :param random_state: Random state.
        :param environment: Environment.
        :return: 2-tuple of a state-value estimator and a list of unparsed arguments.
        """

        parsed_args, unparsed_args = parse_arguments(cls, args)

        # load model
        model_class = load_class(parsed_args.function_approximation_model)
        model, unparsed_args = model_class.init_from_arguments(
            args=unparsed_args,
            random_state=random_state
        )
        del parsed_args.function_approximation_model

        # load feature extractor
        feature_extractor_class = load_class(parsed_args.feature_extractor)
        fex, unparsed_args = feature_extractor_class.init_from_arguments(
            args=unparsed_args,
            environment=environment
        )
        del parsed_args.feature_extractor

        # there shouldn't be anything left
        if len(vars(parsed_args)) > 0:  # pragma no cover
            raise ValueError('Parsed args remain. Need to pass to constructor.')

        # initialize estimator
        estimator = cls(
            model=model,
            feature_extractor=fex
        )

        return estimator, unparsed_args
示例#15
0
    def init_from_arguments(
            cls, args: List[str],
            random_state: RandomState) -> Tuple[Environment, List[str]]:
        """
        Initialize an environment from arguments.

        :param args: Arguments.
        :param random_state: Random state.
        :return: 2-tuple of an environment and a list of unparsed arguments.
        """

        parsed_args, unparsed_args = parse_arguments(cls, args)

        planning_environment = cls(name='trajectory planning',
                                   random_state=random_state,
                                   model=StochasticEnvironmentModel(),
                                   **vars(parsed_args))

        return planning_environment, unparsed_args
示例#16
0
    def init_from_arguments(
            cls, args: List[str],
            environment: Gym) -> Tuple[FeatureExtractor, List[str]]:
        """
        Initialize a feature extractor from arguments.

        :param args: Arguments.
        :param environment: Environment.
        :return: 2-tuple of a feature extractor and a list of unparsed arguments.
        """

        parsed_args, unparsed_args = parse_arguments(cls, args)

        # there shouldn't be anything left
        if len(vars(parsed_args)) > 0:  # pragma no cover
            raise ValueError(
                'Parsed args remain. Need to pass to constructor.')

        fex = cls()

        return fex, unparsed_args
示例#17
0
    def init_from_arguments(
            cls, args: List[str],
            random_state: RandomState) -> Tuple[Environment, List[str]]:
        """
        Initialize an environment from arguments.

        :param args: Arguments.
        :param random_state: Random state.
        :return: 2-tuple of an environment and a list of unparsed arguments.
        """

        parsed_args, unparsed_args = parse_arguments(cls, args)

        mancala = cls(random_state=random_state,
                      player_2=StochasticMdpAgent('environmental agent',
                                                  random_state,
                                                  TabularPolicy(None,
                                                                None), 1),
                      **vars(parsed_args))

        return mancala, unparsed_args
示例#18
0
    def init_from_arguments(
            cls, args: List[str], random_state: RandomState,
            environment: Environment) -> Tuple[List[Agent], List[str]]:
        """
        Initialize an MDP agent from arguments.

        :param args: Arguments.
        :param random_state: Random state.
        :param environment: Environment.
        :return: 2-tuple of a list of agents and a list of unparsed arguments.
        """

        parsed_args, unparsed_args = parse_arguments(cls, args)

        # load state-value estimator, which is optional.
        v_S = None
        if parsed_args.v_S is not None:
            estimator_class = load_class(parsed_args.v_S)
            v_S, unparsed_args = estimator_class.init_from_arguments(
                args=unparsed_args,
                random_state=random_state,
                environment=environment)
        del parsed_args.v_S

        # load parameterized policy
        policy_class = load_class(parsed_args.policy)
        policy, unparsed_args = policy_class.init_from_arguments(
            args=unparsed_args, environment=environment)
        del parsed_args.policy

        # noinspection PyUnboundLocalVariable
        agent = cls(name=f'parameterized (gamma={parsed_args.gamma})',
                    random_state=random_state,
                    pi=policy,
                    v_S=v_S,
                    **vars(parsed_args))

        return [agent], unparsed_args
示例#19
0
    def init_from_arguments(
            cls,
            args: List[str],
            random_state: RandomState
    ) -> Tuple[Environment, List[str]]:
        """
        Initialize an environment from arguments.

        :param args: Arguments.
        :param random_state: Random state.
        :return: 2-tuple of an environment and a list of unparsed arguments.
        """

        parsed_args, unparsed_args = parse_arguments(cls, args)

        gridworld_id = parsed_args.id
        del parsed_args.id

        gridworld = getattr(cls, gridworld_id)(
            random_state=random_state,
            **vars(parsed_args)
        )

        return gridworld, unparsed_args
示例#20
0
def run(
        args: List[str] = None,
        thread_manager: RunThreadManager = None,
        train_function_args_callback: Callable[[Dict], None] = None
) -> Tuple[Optional[str], Optional[str]]:
    """
    Train an agent in an environment.

    :param args: Arguments.
    :param thread_manager: Thread manager for the thread that is executing the current function. If None, then training
    will continue until termination criteria (e.g., number of iterations) are met. If not None, then the passed
    manager will be waited upon before starting each iteration. If the manager blocks, then another thread will need to
    clear the manager before the iteration continues. If the manager aborts, then this function will return as soon as
    possible.
    :param train_function_args_callback: A callback function to be called with the arguments that will be passed to the
    training function. This gives the caller an opportunity to grab references to the internal arguments that will be
    used in training. For example, plotting from the Jupyter Lab interface grabs the state-action value estimator
    (q_S_A) from the passed dictionary to use in updating its plots. This callback is only called for fresh training. It
    is not called when resuming from a checkpoint.
    :returns: 2-tuple of the checkpoint path (if any) and the saved agent path (if any).
    """

    # initialize with flag set if not passed, so that execution will not block. since the caller will not hold a
    # reference to the manager, it cannot be cleared and execution will never block.
    if thread_manager is None:
        thread_manager = RunThreadManager(True)

    parser = get_argument_parser_for_run()
    parsed_args, unparsed_args = parse_arguments(parser, args)

    if parsed_args.train_function is None:
        raise ValueError('No training function specified. Cannot train.')

    if parsed_args.random_seed is None:
        warnings.warn('No random seed provided to the trainer. Results will not be replicable. Consider passing --random-seed argument.')
        random_state = RandomState()
    else:
        random_state = RandomState(parsed_args.random_seed)

    # warn user, as training could take a long time and it'll be wasted effort if the agent is not saved.
    if parsed_args.save_agent_path is None:
        warnings.warn('No --save-agent-path has been specified, so no agent will be saved after training.')

    initial_policy = None

    # load training function and parse any arguments that it requires
    train_function = import_function(parsed_args.train_function)
    train_function_arg_parser = get_argument_parser_for_train_function(parsed_args.train_function)
    parsed_train_function_args, unparsed_args = parse_arguments(train_function_arg_parser, unparsed_args)

    train_function_args = {
        'thread_manager': thread_manager,
        **vars(parsed_train_function_args)
    }

    # convert boolean strings to booleans
    if train_function_args.get('update_upon_every_visit', None) is not None:
        train_function_args['update_upon_every_visit'] = train_function_args['update_upon_every_visit'] == 'True'

    if train_function_args.get('make_final_policy_greedy', None) is not None:
        train_function_args['make_final_policy_greedy'] = train_function_args['make_final_policy_greedy'] == 'True'

    if train_function_args.get('plot_state_value', None) is not None:
        train_function_args['plot_state_value'] = train_function_args['plot_state_value'] == 'True'

    # load environment
    if train_function_args.get('environment', None) is not None:
        environment_class = load_class(train_function_args['environment'])
        train_function_args['environment'], unparsed_args = environment_class.init_from_arguments(
            args=unparsed_args,
            random_state=random_state
        )

    # load planning environment
    if train_function_args.get('planning_environment', None) is not None:
        planning_environment_class = load_class(train_function_args['planning_environment'])
        train_function_args['planning_environment'], unparsed_args = planning_environment_class.init_from_arguments(
            args=unparsed_args,
            random_state=random_state
        )

    # load state-action value estimator
    if train_function_args.get('q_S_A', None) is not None:
        estimator_class = load_class(train_function_args['q_S_A'])
        state_action_value_estimator, unparsed_args = estimator_class.init_from_arguments(
            args=unparsed_args,
            random_state=random_state,
            environment=train_function_args['environment']
        )
        train_function_args['q_S_A'] = state_action_value_estimator
        initial_policy = state_action_value_estimator.get_initial_policy()

    # load state-value estimator
    if train_function_args.get('v_S', None) is not None:
        estimator_class = load_class(train_function_args['v_S'])
        train_function_args['v_S'], unparsed_args = estimator_class.init_from_arguments(
            args=unparsed_args,
            random_state=random_state,
            environment=train_function_args['environment']
        )

    # load parameterized policy
    if train_function_args.get('policy', None) is not None:
        policy_class = load_class(train_function_args['policy'])
        initial_policy, unparsed_args = policy_class.init_from_arguments(
            args=unparsed_args,
            environment=train_function_args['environment']
        )
        train_function_args['policy'] = initial_policy

    # load agent
    if train_function_args.get('agent', None) is not None:
        agent_class = load_class(train_function_args['agent'])
        agents, unparsed_args = agent_class.init_from_arguments(
            args=unparsed_args,
            random_state=random_state,
            pi=initial_policy
        )
        agent = agents[0]
        train_function_args['agent'] = agent
    else:
        agent = None

    if '--help' in unparsed_args:
        unparsed_args.remove('--help')

    if len(unparsed_args) > 0:
        raise ValueError(f'Unparsed arguments remain:  {unparsed_args}')

    new_checkpoint_path = None

    # resumption will return a trained version of the agent contained in the checkpoint file
    if parsed_args.resume:
        agent = resume_from_checkpoint(
            resume_function=train_function,
            **train_function_args
        )

    # fresh training will train the agent that was initialized above and passed in
    else:

        if train_function_args_callback is not None:
            train_function_args_callback(train_function_args)

        new_checkpoint_path = train_function(
            **train_function_args
        )

        train_function_args['environment'].close()

        if isinstance(initial_policy, ParameterizedPolicy):
            initial_policy.close()

    logging.info('Training complete.')

    # try to save agent
    if agent is None:  # pragma no cover
        warnings.warn('No agent resulting at end of training. Nothing to save.')
    elif parsed_args.save_agent_path is None:
        warnings.warn('No --save-agent-path specified. Not saving agent.')
    else:
        with open(os.path.expanduser(parsed_args.save_agent_path), 'wb') as f:
            pickle.dump(agent, f)

        logging.info(f'Saved agent to {parsed_args.save_agent_path}')

    return new_checkpoint_path, parsed_args.save_agent_path