def test_continuous_discr(): """ Ensure correct discretization in continuous state spaces """ # NOTE - if possible, test a domain with mixed discr/continuous domain = inf_cp.InfTrackCartPole() # 2 continuous dims rep = Tabular(domain, discretization=20) assert rep.features_num == 400 rep = Tabular(domain, discretization=50) assert rep.features_num == 2500
def test_number_of_cells(): """ Ensure create appropriate # of cells (despite ``discretization``) """ mapDir = os.path.join(__rlpy_location__, "domains", "GridWorldMaps") mapfile = os.path.join(mapDir, "4x5.txt") # expect 4*5 = 20 states domain = GridWorld(mapfile=mapfile) rep = Tabular(domain, discretization=100) assert rep.features_num == 20 rep = Tabular(domain, discretization=5) assert rep.features_num == 20
def tabular_q( domain, epsilon=0.1, epsilon_decay=0.0, epsilon_min=0.0, discretization=20, lambda_=0.3, initial_learn_rate=0.1, boyan_N0=100, incremental=False, ): if incremental: tabular = IncrementalTabular(domain, discretization=discretization) else: tabular = Tabular(domain, discretization=discretization) return Q_Learning( eGreedy( tabular, epsilon=epsilon, epsilon_decay=epsilon_decay, epsilon_min=epsilon_min, ), tabular, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0, )
def test_phi_cells(): """ Ensure correct feature is activated for corresponding state """ mapDir = os.path.join(__rlpy_location__, "domains", "GridWorldMaps") mapfile = os.path.join(mapDir, "4x5.txt") # expect 4*5 = 20 states domain = GridWorld(mapfile=mapfile) # Allow internal represnetation to change -- just make sure each state has # a unique id that is consistently activated. rep = Tabular(domain) seenStates = -1 * np.ones(rep.features_num) for r in np.arange(4): for c in np.arange(5): phiVec = rep.phi(np.array([r, c]), terminal=False) assert sum(phiVec) == 1 # only 1 active feature activeInd = np.where(phiVec > 0) assert seenStates[activeInd][0] != 1 # havent seen it before seenStates[activeInd] = True assert np.all(seenStates) # we've covered all states
def select_agent(name, domain, seed, **kwargs): name = None if name is None else name.lower() tabular = Tabular(domain, discretization=20) if name is None or name == "vi": return ValueIteration(seed, tabular, domain) elif name == "pi": return PolicyIteration(seed, tabular, domain) elif name in ["tpi", "traj-pi"]: return TrajectoryBasedPolicyIteration(seed, tabular, domain) elif name in ["tvi", "traj-vi"]: return TrajectoryBasedValueIteration(seed, tabular, domain) else: raise ValueError("{} is not supported".format(name))
def tabular_nac( domain, gamma=0.9, discretization=20, forgetting_rate=0.3, lambda_=0.7, learn_rate=0.1, ): tabular = Tabular(domain, discretization=discretization) return NaturalActorCritic( GibbsPolicy(tabular), tabular, discount_factor=gamma, forgetting_rate=forgetting_rate, min_steps_between_updates=100, max_steps_between_updates=1000, lambda_=lambda_, learn_rate=learn_rate, )
def tabular_ucbvi( domain, seed, show_reward=False, epsilon=0.1, epsilon_decay=0.0, epsilon_min=0.0, vi_threshold=1e-6, ): tabular = Tabular(domain, discretization=20) policy = eGreedy(tabular, epsilon=epsilon, epsilon_decay=epsilon_decay, epsilon_min=epsilon_min) return UCBVI(policy, tabular, domain.discount_factor, seed=seed, show_reward=show_reward)
def _make_experiment(domain, exp_id=1, path="./Results/Tmp/test_InfTrackCartPole"): ## Representation # discretization only needed for continuous state spaces, discarded otherwise representation = Tabular(domain) ## Policy policy = eGreedy(representation, epsilon=0.2) ## Agent agent = SARSA( representation=representation, policy=policy, discount_factor=domain.discount_factor, initial_learn_rate=0.1, ) checks_per_policy = 3 max_steps = 50 num_policy_checks = 3 experiment = Experiment(**locals()) return experiment
def tabular_opt_psrl( domain, seed, show_reward=False, epsilon=0.1, epsilon_decay=0.0, epsilon_min=0.0, n_samples=10, vi_threshold=1e-6, ): tabular = Tabular(domain, discretization=20) policy = eGreedy(tabular, epsilon=epsilon, epsilon_decay=epsilon_decay, epsilon_min=epsilon_min) return OptimisticPSRL( policy, tabular, domain.discount_factor, seed=seed, show_reward=show_reward, n_samples=n_samples, )
def tabular_mbie_eb( domain, seed, show_reward=False, beta=0.1, epsilon=0.1, epsilon_decay=0.0, epsilon_min=0.0, vi_threshold=1e-6, ): tabular = Tabular(domain, discretization=20) policy = eGreedy(tabular, epsilon=epsilon, epsilon_decay=epsilon_decay, epsilon_min=epsilon_min) return MBIE_EB( policy, tabular, domain.discount_factor, beta=beta, seed=seed, show_reward=show_reward, )
def test_fdcheck_dlogpi(): domain = GridWorld() representation = Tabular(domain=domain, discretization=20) policy = GibbsPolicy(representation=representation) def f(wv, s, a): policy.representation.weight_vec = wv return np.log(policy.prob(s, a)) def df(wv, s, a): policy.representation.weight_vec = wv return policy.dlogpi(s, a) def df_approx(wv, s, a): return approx_fprime(wv, f, 1e-10, s, a) wvs = np.random.rand(10, len(representation.weight_vec)) for i in range(10): s = np.array([np.random.randint(4), np.random.randint(5)]) a = np.random.choice(domain.possible_actions(s)) for wv in wvs: error = check_grad(f, df, wv, s, a) assert np.abs(error) < 1e-6, "Error={}".format(error)
def tabular_sarsa(domain, discretization=20, lambda_=0.3): tabular = Tabular(domain, discretization=discretization) policy = eGreedy(tabular, epsilon=0.1) return SARSA(policy, tabular, domain.discount_factor, lambda_=lambda_)
def tabular_lspi(domain, max_steps, discretization=20): tabular = Tabular(domain, discretization=discretization) policy = eGreedy(tabular, epsilon=0.1) return LSPI(policy, tabular, domain.discount_factor, max_steps, 1000)