def test_transitions(): """ Ensure that actions result in expected state transition behavior. Note that if the agent attempts to leave the edge (select LEFT from s0 or RIGHT from s49) then the state should not change. NOTE: assume p_action_failure is only noise term. """ # [[initialize domain]] chainSize = 5 domain = ChainMDP(chainSize=chainSize) dummyS = domain.s0() domain.state = np.array([2]) # state s2 left = 0 right = 1 # Check basic step r, ns, terminal, possibleA = domain.step(left) assert ns[0] == 1 and terminal == False assert np.all(possibleA == np.array([left, right])) # all actions available assert r == domain.STEP_REWARD # Ensure all actions available, even on corner case, to meet domain specs r, ns, terminal, possibleA = domain.step(left) assert ns[0] == 0 and terminal == False assert np.all(possibleA == np.array([left, right])) # all actions available assert r == domain.STEP_REWARD # Ensure state does not change or wrap around per domain spec r, ns, terminal, possibleA = domain.step(left) assert ns[0] == 0 and terminal == False assert np.all(possibleA == np.array([left, right])) # all actions available assert r == domain.STEP_REWARD r, ns, terminal, possibleA = domain.step(right) assert ns[0] == 1 and terminal == False assert np.all(possibleA == np.array([left, right])) # all actions available assert r == domain.STEP_REWARD r, ns, terminal, possibleA = domain.step(right) r, ns, terminal, possibleA = domain.step(right) r, ns, terminal, possibleA = domain.step(right) # Ensure goal state gives proper condition assert ns[0] == 4 and terminal == True assert np.all(possibleA == np.array([left, right])) # all actions available assert r == domain.GOAL_REWARD
def make_experiment(exp_id=1, path="./Results/Tutorial/gridworld-sarsa0"): """ Each file specifying an experimental setup should contain a make_experiment function which returns an instance of the Experiment class with everything set up. @param id: number used to seed the random number generators @param path: output directory where logs and results are stored """ opt = {} opt["exp_id"] = exp_id opt["path"] = path ## Domain: chain_size = 5 domain = ChainMDP(chain_size) opt["domain"] = domain ## Representation # discretization only needed for continuous state spaces, discarded otherwise representation = Tabular(domain, discretization=20) ## Policy policy = eGreedy(representation, epsilon=0.2) ## Agent opt["agent"] = PosteriorSampling(representation=representation, policy=policy, discount_factor=domain.discount_factor, initial_learn_rate=0.1) opt["checks_per_policy"] = 100 opt["max_steps"] = 4000 opt["num_policy_checks"] = 10 experiment = Experiment(**opt) return experiment
def _make_experiment(exp_id=1, path="./Results/Tmp/test_ChainMDP/"): """ Each file specifying an experimental setup should contain a make_experiment function which returns an instance of the Experiment class with everything set up. @param id: number used to seed the random number generators @param path: output directory where logs and results are stored """ ## Domain: chainSize = 5 domain = ChainMDP(chainSize=chainSize) ## Representation # discretization only needed for continuous state spaces, discarded otherwise representation = Tabular(domain) ## Policy policy = eGreedy(representation, epsilon=0.2) ## Agent agent = SARSA(representation=representation, policy=policy, discount_factor=domain.discount_factor, learn_rate=0.1) checks_per_policy = 3 max_steps = 50 num_policy_checks = 3 experiment = Experiment(**locals()) return experiment
def test_transitions(): """ Ensure that actions result in expected state transition behavior. Note that if the agent attempts to leave the edge (select LEFT from s0 or RIGHT from s49) then the state should not change. NOTE: assume p_action_failure is only noise term. """ # [[initialize domain]] chainSize = 5 domain = ChainMDP(chainSize=chainSize) dummyS = domain.s0() domain.state = np.array([2]) # state s2 left = 0 right = 1 # Check basic step r,ns,terminal,possibleA = domain.step(left) assert ns[0] == 1 and terminal == False assert np.all(possibleA == np.array([left, right])) # all actions available assert r == domain.STEP_REWARD # Ensure all actions available, even on corner case, to meet domain specs r,ns,terminal,possibleA = domain.step(left) assert ns[0] == 0 and terminal == False assert np.all(possibleA == np.array([left, right])) # all actions available assert r == domain.STEP_REWARD # Ensure state does not change or wrap around per domain spec r,ns,terminal,possibleA = domain.step(left) assert ns[0] == 0 and terminal == False assert np.all(possibleA == np.array([left, right])) # all actions available assert r == domain.STEP_REWARD r,ns,terminal,possibleA = domain.step(right) assert ns[0] == 1 and terminal == False assert np.all(possibleA == np.array([left, right])) # all actions available assert r == domain.STEP_REWARD r,ns,terminal,possibleA = domain.step(right) r,ns,terminal,possibleA = domain.step(right) r,ns,terminal,possibleA = domain.step(right) # Ensure goal state gives proper condition assert ns[0] == 4 and terminal == True assert np.all(possibleA == np.array([left, right])) # all actions available assert r == domain.GOAL_REWARD
def makeComponents(self): chainSize = self.spChainSize.value() domain = ChainMDP(chainSize=chainSize) representation = RepresentationFactory.get( config=self.representationConfig, name=str(self.lstRepresentation.currentItem().text()), domain=domain) policy = PolicyFactory.get(config=self.policyConfig, name=str( self.lstPolicy.currentItem().text()), representation=representation) agent = AgentFactory.get(config=self.agentConfig, name=str(self.lstAgent.currentItem().text()), representation=representation, policy=policy) return domain, agent