def eval_step(self, state): batch = [state['obs']] ts = tf.convert_to_tensor(batch) logits, _ = self.bot.predict(ts) probs = softmax(logits, state['legal_actions'])[0] best_action = np.argmax(probs) return best_action, probs
def eval_step(self, state): self.bot.lstm.add_data(state['obs']) batch = [self.bot.lstm.get_data()] ts = tf.convert_to_tensor(batch) logits = self.bot.predict_policy(ts) probs = softmax(logits, state['legal_actions'])[0] best_action = np.argmax(probs) return best_action, probs
def get_action(self, state, legal_actions): batch = [state] ts = tf.convert_to_tensor(batch) logits = self.predict_policy(ts) probs = softmax(logits, legal_actions)[0] selected_action = np.random.choice(self.num_actions, p=probs) return selected_action