Exemplo n.º 1
0
    def __init__(self,
                 temperature=1.,
                 with_critic=False,
                 linear_annealing_kwargs=None):
        """Initializes SoftmaxAgent.

        Args:
            temperature (float): Softmax temperature parameter.
            with_critic (bool): Run the Actor-Critic agent with a value network.
            linear_annealing_kwargs (dict): Temperature linear annealing
                schedule with keys: 'max_value', 'min_value', 'n_epochs',
                overrides temperature. If None, then uses constant temperature
                value.
        """
        if linear_annealing_kwargs is not None:
            parameter_schedules = {
                'distribution.temperature':
                schedules.LinearAnnealing(**linear_annealing_kwargs)
            }
        else:
            parameter_schedules = {}

        super().__init__(distribution=distributions.CategoricalDistribution(
            temperature=temperature),
                         with_critic=with_critic,
                         parameter_schedules=parameter_schedules)
Exemplo n.º 2
0
    def __init__(self,
                 epsilon=.05,
                 with_critic=False,
                 linear_annealing_kwargs=None):
        """Initializes EpsilonGreedyAgent.

        Args:
            epsilon (float): Probability of taking random action.
            with_critic (bool): Run the Actor-Critic agent with a value network.
            linear_annealing_kwargs (dict): Epsilon linear annealing
                schedule with keys: 'max_value', 'min_value', 'n_epochs',
                overrides epsilon. If None, then uses constant epsilon value.
        """
        if linear_annealing_kwargs is not None:
            parameter_schedules = {
                'distribution.epsilon':
                schedules.LinearAnnealing(**linear_annealing_kwargs)
            }
        else:
            parameter_schedules = {}

        super().__init__(distribution=distributions.EpsilonGreedyDistribution(
            epsilon=epsilon),
                         with_critic=with_critic,
                         parameter_schedules=parameter_schedules)
Exemplo n.º 3
0
def test_linear_annealing_schedule(mock_env):
    # Set up
    attr_name = 'pied_piper'
    param_values = list(range(10, 0, -1))
    max_value = max(param_values)
    min_value = min(param_values)
    n_epochs = len(param_values)

    agent = core.RandomAgent(parameter_schedules={
        attr_name: schedules.LinearAnnealing(max_value, min_value, n_epochs)
    })

    # Run & Test
    for epoch, x_value in enumerate(param_values):
        testing.run_without_suspensions(agent.solve(mock_env, epoch=epoch))
        assert getattr(agent, attr_name) == x_value