Exemplo n.º 1
0
def select_agent(name, domain, _max_steps, _seed, lambda_=0.1, **kwargs):
    if name is None or name == "tabular-q":
        return methods.tabular_q(domain, initial_learn_rate=0.9)
    elif name == "ifdd-ggq":
        return methods.ifdd_q(
            domain,
            lambda_=lambda_,
            boyan_N0=1220.247254,
            initial_learn_rate=0.27986823,
            ifddplus=1.0 - 1e-7,
        )
    elif name == "ifdd-q":
        return methods.ifdd_q(
            domain,
            threshold=0.03104970,
            lambda_=lambda_,
            boyan_N0=1220.247254,
            initial_learn_rate=0.27986823,
            ifddplus=1.0 - 1e-7,
        )
    elif name == "ifdd-sarsa":
        return methods.ifdd_sarsa(
            domain,
            threshold=0.023476,
            lambda_=lambda_,
            boyan_N0=20.84362,
            initial_learn_rate=0.3356222674,
            ifddplus=1.0 - 1e-7,
        )
    elif name == "tile-ggq":
        mat = np.matrix("""1 1 1 0 0 0;
               0 1 1 1 0 0;
               0 0 1 1 1 0;
               0 0 0 1 1 1;
               0 0 1 0 1 1;
               0 0 1 1 0 1;
               1 0 1 1 0 0;
               1 0 1 0 1 0;
               1 0 0 1 1 0;
               1 0 0 0 1 1;
               1 0 1 0 0 1;
               1 0 0 1 0 1;
               1 1 0 1 0 0;
               1 1 0 0 1 0;
               1 1 0 0 0 1;
               0 1 0 1 1 0;
               0 1 0 0 1 1;
               0 1 0 1 0 1;
               0 1 1 0 1 0;
               0 1 1 0 0 1""")
        return methods.tile_ggq(
            domain,
            mat,
            lambda_=lambda_,
            initial_learn_rate=0.240155681,
            boyan_N0=14.44946,
        )
    else:
        raise NotImplementedError("Method {} is not supported".format(name))
Exemplo n.º 2
0
def select_agent(name, domain, max_steps, seed, **kwargs):
    if name is None or name == "lspi":
        return methods.tabular_lspi(domain, max_steps)
    elif name == "nac":
        return methods.tabular_nac(domain)
    elif name == "tabular-q":
        return methods.tabular_q(domain, initial_learn_rate=0.1)
    elif name == "ifddk-q":
        return methods.ifddk_q(domain, initial_learn_rate=0.1)
    elif name == "psrl":
        return methods.tabular_psrl(domain, seed=seed)
    else:
        raise NotImplementedError("Method {} is not supported".format(name))
Exemplo n.º 3
0
def select_agent(name, domain, max_steps, seed, **kwargs):
    if name is None or name == "ifdd-q":
        return methods.ifdd_q(
            domain,
            discretization=47,
            threshold=77.0,
            lambda_=0.9,
            initial_learn_rate=0.05,
            boyan_N0=11,
            ifddplus=True,
        )
    elif name == "kifdd-q":
        return methods.kifdd_q(
            domain,
            kernel_resolution=13.14,
            threshold=0.21,
            lambda_=0.9,
            initial_learn_rate=0.07,
            boyan_N0=37.0,
            kernel="gaussian_kernel",
        )
    elif name == "tabular-q":
        return methods.tabular_q(
            domain,
            lambda_=0.9,
            initial_learn_rate=0.26,
            boyan_N0=119,
            incremental=True,
        )
    elif name == "rbf-q":
        return methods.rbf_q(
            domain,
            seed,
            num_rbfs=5000,
            resolution=8,
            initial_learn_rate=0.26,
            lambda_=0.9,
            boyan_N0=2120,
        )
    else:
        raise NotImplementedError("Method {} is not supported".format(name))
Exemplo n.º 4
0
def select_agent(name, domain, max_steps, seed):
    if name is None or name == "lspi":
        return methods.tabular_lspi(domain, max_steps)
    elif name == "tabular-q":
        return methods.tabular_q(domain)
    elif name == "tabular-sarsa":
        return methods.tabular_sarsa(domain)
    elif name == "ifdd-q":
        return methods.ifdd_q(
            domain,
            discretization=18,
            lambda_=0.42,
            boyan_N0=202,
            initial_learn_rate=0.7422,
        )
    elif name == "kifdd-q":
        return methods.kifdd_q(
            domain,
            8.567677,
            threshold=0.0807,
            lambda_=0.52738,
            initial_learn_rate=0.4244,
            boyan_N0=389.56,
        )
    elif name == "rbfs-q":
        return methods.rbf_q(
            domain,
            seed,
            num_rbfs=96,
            resolution=21,
            initial_learn_rate=0.6633,
            lambda_=0.1953,
            boyan_N0=13444.0,
        )
    else:
        raise NotImplementedError()
Exemplo n.º 5
0
def select_agent(name, domain, max_steps, seed, epsilon, epsilon_min, beta,
                 show_reward, vi_threshold, **kwargs):
    if epsilon_min is not None:
        eps_decay = (epsilon - epsilon_min) / max_steps * 0.9
        eps_min = epsilon_min
    else:
        eps_decay, eps_min = 0.0, 0.0
    if name is None or name == "tabular-q":
        return methods.tabular_q(
            domain,
            epsilon=epsilon,
            epsilon_decay=eps_decay,
            epsilon_min=eps_min,
            initial_learn_rate=0.5,
        )
    elif name == "lspi":
        return methods.tabular_lspi(domain, max_steps)
    elif name == "nac":
        return methods.tabular_nac(domain)
    elif name == "ifddk-q":
        return methods.ifddk_q(domain, epsilon=epsilon, initial_learn_rate=0.5)
    elif name == "count-based-q":
        return methods.count_based_tabular_q(
            domain,
            beta=beta,
            epsilon=epsilon,
            epsilon_decay=eps_decay,
            epsilon_min=eps_min,
            initial_learn_rate=0.5,
            show_reward=show_reward,
        )
    elif name == "psrl":
        return methods.tabular_psrl(
            domain,
            seed=seed,
            show_reward=show_reward,
            epsilon=epsilon,
            epsilon_decay=eps_decay,
            epsilon_min=eps_min,
        )
    elif name == "mbie-eb":
        return methods.tabular_mbie_eb(
            domain,
            seed=seed,
            show_reward=show_reward,
            epsilon=epsilon,
            epsilon_decay=eps_decay,
            epsilon_min=eps_min,
            vi_threshold=vi_threshold,
        )
    elif name == "opt-psrl":
        return methods.tabular_opt_psrl(
            domain,
            n_samples=10,
            seed=seed,
            show_reward=show_reward,
            epsilon=epsilon,
            epsilon_decay=eps_decay,
            epsilon_min=eps_min,
        )
    elif name == "gaussian-psrl":
        return methods.tabular_gaussian_psrl(
            domain,
            seed=seed,
            show_reward=show_reward,
            epsilon=epsilon,
            epsilon_decay=eps_decay,
            epsilon_min=eps_min,
        )
    elif name == "ucbvi":
        return methods.tabular_ucbvi(
            domain,
            seed=seed,
            show_reward=show_reward,
            epsilon=epsilon,
            epsilon_decay=eps_decay,
            epsilon_min=eps_min,
        )
    else:
        raise NotImplementedError("Method {} is not supported".format(name))