def get_reward(self, _state, _action, _next_state): """ Gets the reward for the state, action, next_state transition. Note that this is not available in reinforcement learning. """ abstract()
def get_transition_states_and_probs(self, _state, _action): """ Returns a list of (next_state, prob) pairs representing the states reachable from 'state' by taking 'action' along with their transition probabilities. Note that in Q-Learning and reinforcement learning in general, we won't know these probabilities nor do we directly model them. """ abstract()
def is_terminal(self, _state): """ Returns true if the current state is a terminal state. Note that, by convention, a terminal state has zero future rewards. It's possible that the terminal state(s) have no possible actions. It's also common to think of the terminal state as having a type of self-loop action 'pass' with zero reward. Those formulations are essentially equivalent. """ abstract()
def get_start_state(self): """ Provides the starting state of the MDP. """ abstract()
def get_possible_actions(self, _state): """ Provides a list of possible actions from the provided state. """ abstract()
def get_states(self): """ Provides a list of all states in the MDP. Note that for very large MDPs this may not possible. """ abstract()
def do_action(self, _action): """ Performs the given action in the current environment state and updates the environment. Returns a (reward, next_state) pair. """ abstract()
def reset(self): """ Returns the environment to its start state. """ abstract()
def get_possible_actions(self, _state): """ Returns the possible actions an agent can take in the given state. An empty list is returned if the environment is in a terminal state. """ abstract()
def get_current_state(self): """ Returns the current state of the environment. """ abstract()