def __init__(self, player_id, num_actions, state_representation_size, step_size=0.5, epsilon_schedule=rl_tools.ConstantSchedule(0.2), discount_factor=1.0, initialization='random', name='linear_qlearner'): """Initialize the Q-Learning agent.""" self._player_id = player_id self._num_actions = num_actions self._state_representation_size = state_representation_size self._step_size = step_size self._epsilon_schedule = epsilon_schedule self._epsilon = epsilon_schedule.value self._discount_factor = discount_factor # random init state features + one hot encoded actions for (S,a) if initialization == 'random': self._weights = np.random.random(self._state_representation_size * self._num_actions) elif initialization == 'zero': self._weights = np.zeros(self._state_representation_size * self._num_actions) elif type(initialization) is list and len(initialization) == self._state_representation_size * self._num_actions: self._weights = initialization else: raise ValueError("Not implemented, choose initialization from 'random', 'zero' and a custom list.") self._prev_info_state = None
def create_epsilon_schedule(sched_str): """Creates an epsilon schedule from the string as desribed in the flags.""" values = FLAGS.epsilon_schedule.split(",") if values[0] == "linear": assert len(values) == 4 return rl_tools.LinearSchedule(float(values[1]), float(values[2]), int(values[3])) elif values[0] == "constant": assert len(values) == 2 return rl_tools.ConstantSchedule(float(values[1])) else: print("Unrecognized schedule string: {}".format(sched_str)) sys.exit()
def __init__(self, player_id, num_actions, step_size=0.1, epsilon_schedule=rl_tools.ConstantSchedule(0.2), discount_factor=1.0): """Initialize the Q-Learning agent.""" self._player_id = player_id self._num_actions = num_actions self._step_size = step_size self._epsilon_schedule = epsilon_schedule self._epsilon = epsilon_schedule.value self._discount_factor = discount_factor self._q_values = collections.defaultdict( lambda: collections.defaultdict(float)) self._prev_info_state = None
def __init__(self, player_id, num_actions, step_size=0.1, epsilon_schedule=rl_tools.ConstantSchedule(0.2), discount_factor=1.0, centralized=False): """Initialize the Q-Learning agent.""" self._player_id = player_id self._num_actions = num_actions self._step_size = step_size self._epsilon_schedule = epsilon_schedule self._epsilon = epsilon_schedule.value self._discount_factor = discount_factor self._centralized = centralized self._q_values = collections.defaultdict(valuedict) self._prev_info_state = None self._last_loss_value = None