def eval_step(self, state):

        batch = [state['obs']]
        ts = tf.convert_to_tensor(batch)

        logits, _ = self.bot.predict(ts)
        probs = softmax(logits, state['legal_actions'])[0]
        best_action = np.argmax(probs)
        return best_action, probs
示例#2
0
    def eval_step(self, state):
        self.bot.lstm.add_data(state['obs'])
        batch = [self.bot.lstm.get_data()]
        ts = tf.convert_to_tensor(batch)

        logits = self.bot.predict_policy(ts)
        probs = softmax(logits, state['legal_actions'])[0]
        best_action = np.argmax(probs)
        return best_action, probs
示例#3
0
    def get_action(self, state, legal_actions):

        batch = [state]
        ts = tf.convert_to_tensor(batch)

        logits = self.predict_policy(ts)
        probs = softmax(logits, legal_actions)[0]
        selected_action = np.random.choice(self.num_actions, p=probs)

        return selected_action