Exemplo n.º 1
0
 def __init__(self,
              player_id,
              num_actions,
              state_representation_size,
              step_size=0.5,
              epsilon_schedule=rl_tools.ConstantSchedule(0.2),
              discount_factor=1.0,
              initialization='random',
              name='linear_qlearner'):
     """Initialize the Q-Learning agent."""
     self._player_id = player_id
     self._num_actions = num_actions
     self._state_representation_size = state_representation_size
     self._step_size = step_size
     self._epsilon_schedule = epsilon_schedule
     self._epsilon = epsilon_schedule.value
     self._discount_factor = discount_factor
     # random init state features + one hot encoded actions for (S,a)
     if initialization == 'random':
         self._weights = np.random.random(self._state_representation_size * self._num_actions)
     elif initialization == 'zero':
         self._weights = np.zeros(self._state_representation_size * self._num_actions)
     elif type(initialization) is list and len(initialization) == self._state_representation_size * self._num_actions:
         self._weights = initialization
     else:
         raise ValueError("Not implemented, choose initialization from 'random', 'zero' and a custom list.")
     self._prev_info_state = None
def create_epsilon_schedule(sched_str):
    """Creates an epsilon schedule from the string as desribed in the flags."""
    values = FLAGS.epsilon_schedule.split(",")
    if values[0] == "linear":
        assert len(values) == 4
        return rl_tools.LinearSchedule(float(values[1]), float(values[2]),
                                       int(values[3]))
    elif values[0] == "constant":
        assert len(values) == 2
        return rl_tools.ConstantSchedule(float(values[1]))
    else:
        print("Unrecognized schedule string: {}".format(sched_str))
        sys.exit()
Exemplo n.º 3
0
 def __init__(self,
              player_id,
              num_actions,
              step_size=0.1,
              epsilon_schedule=rl_tools.ConstantSchedule(0.2),
              discount_factor=1.0):
     """Initialize the Q-Learning agent."""
     self._player_id = player_id
     self._num_actions = num_actions
     self._step_size = step_size
     self._epsilon_schedule = epsilon_schedule
     self._epsilon = epsilon_schedule.value
     self._discount_factor = discount_factor
     self._q_values = collections.defaultdict(
         lambda: collections.defaultdict(float))
     self._prev_info_state = None
Exemplo n.º 4
0
 def __init__(self,
              player_id,
              num_actions,
              step_size=0.1,
              epsilon_schedule=rl_tools.ConstantSchedule(0.2),
              discount_factor=1.0,
              centralized=False):
   """Initialize the Q-Learning agent."""
   self._player_id = player_id
   self._num_actions = num_actions
   self._step_size = step_size
   self._epsilon_schedule = epsilon_schedule
   self._epsilon = epsilon_schedule.value
   self._discount_factor = discount_factor
   self._centralized = centralized
   self._q_values = collections.defaultdict(valuedict)
   self._prev_info_state = None
   self._last_loss_value = None