Пример #1
0
 def get_action(self, obs):
     act_dist = softmax(self.means / self.softmax_temp)
     act = np.random.choice(range(self.env.nA), p=act_dist)
     return act
Пример #2
0
 def likelihood(self, state, obs, act):
     """ Computes the likelihood of taking the action given the state. """
     act_dist = softmax(state['means'] / state['temp'])
     return act_dist[act]
Пример #3
0
 def act_probs_from_counts(self, counts, **kwargs):
     means = np.array([arm.mean() for arm in self.env.arms])
     act_dist = softmax(means / self.softmax_temp)
     return act_dist
Пример #4
0
 def likelihood(self, state, obs, act):
     arm_vals = state['means'] + state['ucl_bonus']
     act_dist = softmax(arm_vals / state['softmax_temp'])
     return act_dist[act]
Пример #5
0
 def get_action_from_state(self, state, obs):
     act_dist = softmax(state['means'] / state['temp'])
     act = np.random.choice(range(self.env.nA), p=act_dist)
     return act
Пример #6
0
 def get_action_from_state(self, state, obs):
     arm_vals = state['means'] + state['ucl_bonus']
     act_dist = softmax(arm_vals / state['softmax_temp'])
     act = self.np_random.choice(range(self.env.nA), p=act_dist)
     return act
Пример #7
0
 def get_action(self, obs):
     self.arm_vals = self.means + self.ucl_bonus
     act_dist = softmax(self.arm_vals / self.softmax_temp)
     act = self.np_random.choice(range(self.env.nA), p=act_dist)
     return act
Пример #8
0
 def likelihood(self, state, obs, act):
     """ Computes the likelihood of taking the action given the state. """
     return softmax(state['Q_human'][obs] / self.softmax_temp)[act]
Пример #9
0
 def get_action_from_state(self, state, obs):
     act_dist = softmax(state['Q_human'][obs] / self.softmax_temp)
     act = np.random.choice(range(self.env.nA), p=act_dist)
     return act
Пример #10
0
 def get_action(self, obs):
     act_dist = softmax(self.Q_human[obs] / self.softmax_temp)
     act = self.np_random.choice(range(self.env.nA), p=act_dist)
     return act