Пример #1
0
def test_continuous_discr():
    """ Ensure correct discretization in continuous state spaces """
    # NOTE - if possible, test a domain with mixed discr/continuous
    domain = inf_cp.InfTrackCartPole()  # 2 continuous dims
    rep = Tabular(domain, discretization=20)
    assert rep.features_num == 400
    rep = Tabular(domain, discretization=50)
    assert rep.features_num == 2500
Пример #2
0
def test_number_of_cells():
    """ Ensure create appropriate # of cells (despite ``discretization``) """
    mapDir = os.path.join(__rlpy_location__, "domains", "GridWorldMaps")
    mapfile = os.path.join(mapDir, "4x5.txt")  # expect 4*5 = 20 states
    domain = GridWorld(mapfile=mapfile)

    rep = Tabular(domain, discretization=100)
    assert rep.features_num == 20
    rep = Tabular(domain, discretization=5)
    assert rep.features_num == 20
Пример #3
0
def tabular_q(
    domain,
    epsilon=0.1,
    epsilon_decay=0.0,
    epsilon_min=0.0,
    discretization=20,
    lambda_=0.3,
    initial_learn_rate=0.1,
    boyan_N0=100,
    incremental=False,
):
    if incremental:
        tabular = IncrementalTabular(domain, discretization=discretization)
    else:
        tabular = Tabular(domain, discretization=discretization)
    return Q_Learning(
        eGreedy(
            tabular,
            epsilon=epsilon,
            epsilon_decay=epsilon_decay,
            epsilon_min=epsilon_min,
        ),
        tabular,
        discount_factor=domain.discount_factor,
        lambda_=lambda_,
        initial_learn_rate=initial_learn_rate,
        learn_rate_decay_mode="boyan",
        boyan_N0=boyan_N0,
    )
Пример #4
0
def test_phi_cells():
    """ Ensure correct feature is activated for corresponding state """
    mapDir = os.path.join(__rlpy_location__, "domains", "GridWorldMaps")
    mapfile = os.path.join(mapDir, "4x5.txt")  # expect 4*5 = 20 states
    domain = GridWorld(mapfile=mapfile)

    # Allow internal represnetation to change -- just make sure each state has
    # a unique id that is consistently activated.
    rep = Tabular(domain)
    seenStates = -1 * np.ones(rep.features_num)
    for r in np.arange(4):
        for c in np.arange(5):
            phiVec = rep.phi(np.array([r, c]), terminal=False)
            assert sum(phiVec) == 1  # only 1 active feature
            activeInd = np.where(phiVec > 0)
            assert seenStates[activeInd][0] != 1  # havent seen it before
            seenStates[activeInd] = True
    assert np.all(seenStates)  # we've covered all states
Пример #5
0
def select_agent(name, domain, seed, **kwargs):
    name = None if name is None else name.lower()
    tabular = Tabular(domain, discretization=20)
    if name is None or name == "vi":
        return ValueIteration(seed, tabular, domain)
    elif name == "pi":
        return PolicyIteration(seed, tabular, domain)
    elif name in ["tpi", "traj-pi"]:
        return TrajectoryBasedPolicyIteration(seed, tabular, domain)
    elif name in ["tvi", "traj-vi"]:
        return TrajectoryBasedValueIteration(seed, tabular, domain)
    else:
        raise ValueError("{} is not supported".format(name))
Пример #6
0
def tabular_nac(
    domain,
    gamma=0.9,
    discretization=20,
    forgetting_rate=0.3,
    lambda_=0.7,
    learn_rate=0.1,
):
    tabular = Tabular(domain, discretization=discretization)
    return NaturalActorCritic(
        GibbsPolicy(tabular),
        tabular,
        discount_factor=gamma,
        forgetting_rate=forgetting_rate,
        min_steps_between_updates=100,
        max_steps_between_updates=1000,
        lambda_=lambda_,
        learn_rate=learn_rate,
    )
Пример #7
0
def tabular_ucbvi(
    domain,
    seed,
    show_reward=False,
    epsilon=0.1,
    epsilon_decay=0.0,
    epsilon_min=0.0,
    vi_threshold=1e-6,
):
    tabular = Tabular(domain, discretization=20)
    policy = eGreedy(tabular,
                     epsilon=epsilon,
                     epsilon_decay=epsilon_decay,
                     epsilon_min=epsilon_min)
    return UCBVI(policy,
                 tabular,
                 domain.discount_factor,
                 seed=seed,
                 show_reward=show_reward)
Пример #8
0
def _make_experiment(domain,
                     exp_id=1,
                     path="./Results/Tmp/test_InfTrackCartPole"):
    ## Representation
    # discretization only needed for continuous state spaces, discarded otherwise
    representation = Tabular(domain)

    ## Policy
    policy = eGreedy(representation, epsilon=0.2)

    ## Agent
    agent = SARSA(
        representation=representation,
        policy=policy,
        discount_factor=domain.discount_factor,
        initial_learn_rate=0.1,
    )
    checks_per_policy = 3
    max_steps = 50
    num_policy_checks = 3
    experiment = Experiment(**locals())
    return experiment
Пример #9
0
def tabular_opt_psrl(
    domain,
    seed,
    show_reward=False,
    epsilon=0.1,
    epsilon_decay=0.0,
    epsilon_min=0.0,
    n_samples=10,
    vi_threshold=1e-6,
):
    tabular = Tabular(domain, discretization=20)
    policy = eGreedy(tabular,
                     epsilon=epsilon,
                     epsilon_decay=epsilon_decay,
                     epsilon_min=epsilon_min)
    return OptimisticPSRL(
        policy,
        tabular,
        domain.discount_factor,
        seed=seed,
        show_reward=show_reward,
        n_samples=n_samples,
    )
Пример #10
0
def tabular_mbie_eb(
    domain,
    seed,
    show_reward=False,
    beta=0.1,
    epsilon=0.1,
    epsilon_decay=0.0,
    epsilon_min=0.0,
    vi_threshold=1e-6,
):
    tabular = Tabular(domain, discretization=20)
    policy = eGreedy(tabular,
                     epsilon=epsilon,
                     epsilon_decay=epsilon_decay,
                     epsilon_min=epsilon_min)
    return MBIE_EB(
        policy,
        tabular,
        domain.discount_factor,
        beta=beta,
        seed=seed,
        show_reward=show_reward,
    )
Пример #11
0
def test_fdcheck_dlogpi():
    domain = GridWorld()
    representation = Tabular(domain=domain, discretization=20)
    policy = GibbsPolicy(representation=representation)

    def f(wv, s, a):
        policy.representation.weight_vec = wv
        return np.log(policy.prob(s, a))

    def df(wv, s, a):
        policy.representation.weight_vec = wv
        return policy.dlogpi(s, a)

    def df_approx(wv, s, a):
        return approx_fprime(wv, f, 1e-10, s, a)

    wvs = np.random.rand(10, len(representation.weight_vec))
    for i in range(10):
        s = np.array([np.random.randint(4), np.random.randint(5)])
        a = np.random.choice(domain.possible_actions(s))
        for wv in wvs:
            error = check_grad(f, df, wv, s, a)
            assert np.abs(error) < 1e-6, "Error={}".format(error)
Пример #12
0
def tabular_sarsa(domain, discretization=20, lambda_=0.3):
    tabular = Tabular(domain, discretization=discretization)
    policy = eGreedy(tabular, epsilon=0.1)
    return SARSA(policy, tabular, domain.discount_factor, lambda_=lambda_)
Пример #13
0
def tabular_lspi(domain, max_steps, discretization=20):
    tabular = Tabular(domain, discretization=discretization)
    policy = eGreedy(tabular, epsilon=0.1)
    return LSPI(policy, tabular, domain.discount_factor, max_steps, 1000)