def select_agent(name, domain, _max_steps, _seed, lambda_=0.1, **kwargs): if name is None or name == "tabular-q": return methods.tabular_q(domain, initial_learn_rate=0.9) elif name == "ifdd-ggq": return methods.ifdd_q( domain, lambda_=lambda_, boyan_N0=1220.247254, initial_learn_rate=0.27986823, ifddplus=1.0 - 1e-7, ) elif name == "ifdd-q": return methods.ifdd_q( domain, threshold=0.03104970, lambda_=lambda_, boyan_N0=1220.247254, initial_learn_rate=0.27986823, ifddplus=1.0 - 1e-7, ) elif name == "ifdd-sarsa": return methods.ifdd_sarsa( domain, threshold=0.023476, lambda_=lambda_, boyan_N0=20.84362, initial_learn_rate=0.3356222674, ifddplus=1.0 - 1e-7, ) elif name == "tile-ggq": mat = np.matrix("""1 1 1 0 0 0; 0 1 1 1 0 0; 0 0 1 1 1 0; 0 0 0 1 1 1; 0 0 1 0 1 1; 0 0 1 1 0 1; 1 0 1 1 0 0; 1 0 1 0 1 0; 1 0 0 1 1 0; 1 0 0 0 1 1; 1 0 1 0 0 1; 1 0 0 1 0 1; 1 1 0 1 0 0; 1 1 0 0 1 0; 1 1 0 0 0 1; 0 1 0 1 1 0; 0 1 0 0 1 1; 0 1 0 1 0 1; 0 1 1 0 1 0; 0 1 1 0 0 1""") return methods.tile_ggq( domain, mat, lambda_=lambda_, initial_learn_rate=0.240155681, boyan_N0=14.44946, ) else: raise NotImplementedError("Method {} is not supported".format(name))
def select_agent(name, domain, max_steps, seed, **kwargs): if name is None or name == "lspi": return methods.tabular_lspi(domain, max_steps) elif name == "nac": return methods.tabular_nac(domain) elif name == "tabular-q": return methods.tabular_q(domain, initial_learn_rate=0.1) elif name == "ifddk-q": return methods.ifddk_q(domain, initial_learn_rate=0.1) elif name == "psrl": return methods.tabular_psrl(domain, seed=seed) else: raise NotImplementedError("Method {} is not supported".format(name))
def select_agent(name, domain, max_steps, seed, **kwargs): if name is None or name == "ifdd-q": return methods.ifdd_q( domain, discretization=47, threshold=77.0, lambda_=0.9, initial_learn_rate=0.05, boyan_N0=11, ifddplus=True, ) elif name == "kifdd-q": return methods.kifdd_q( domain, kernel_resolution=13.14, threshold=0.21, lambda_=0.9, initial_learn_rate=0.07, boyan_N0=37.0, kernel="gaussian_kernel", ) elif name == "tabular-q": return methods.tabular_q( domain, lambda_=0.9, initial_learn_rate=0.26, boyan_N0=119, incremental=True, ) elif name == "rbf-q": return methods.rbf_q( domain, seed, num_rbfs=5000, resolution=8, initial_learn_rate=0.26, lambda_=0.9, boyan_N0=2120, ) else: raise NotImplementedError("Method {} is not supported".format(name))
def select_agent(name, domain, max_steps, seed): if name is None or name == "lspi": return methods.tabular_lspi(domain, max_steps) elif name == "tabular-q": return methods.tabular_q(domain) elif name == "tabular-sarsa": return methods.tabular_sarsa(domain) elif name == "ifdd-q": return methods.ifdd_q( domain, discretization=18, lambda_=0.42, boyan_N0=202, initial_learn_rate=0.7422, ) elif name == "kifdd-q": return methods.kifdd_q( domain, 8.567677, threshold=0.0807, lambda_=0.52738, initial_learn_rate=0.4244, boyan_N0=389.56, ) elif name == "rbfs-q": return methods.rbf_q( domain, seed, num_rbfs=96, resolution=21, initial_learn_rate=0.6633, lambda_=0.1953, boyan_N0=13444.0, ) else: raise NotImplementedError()
def select_agent(name, domain, max_steps, seed, epsilon, epsilon_min, beta, show_reward, vi_threshold, **kwargs): if epsilon_min is not None: eps_decay = (epsilon - epsilon_min) / max_steps * 0.9 eps_min = epsilon_min else: eps_decay, eps_min = 0.0, 0.0 if name is None or name == "tabular-q": return methods.tabular_q( domain, epsilon=epsilon, epsilon_decay=eps_decay, epsilon_min=eps_min, initial_learn_rate=0.5, ) elif name == "lspi": return methods.tabular_lspi(domain, max_steps) elif name == "nac": return methods.tabular_nac(domain) elif name == "ifddk-q": return methods.ifddk_q(domain, epsilon=epsilon, initial_learn_rate=0.5) elif name == "count-based-q": return methods.count_based_tabular_q( domain, beta=beta, epsilon=epsilon, epsilon_decay=eps_decay, epsilon_min=eps_min, initial_learn_rate=0.5, show_reward=show_reward, ) elif name == "psrl": return methods.tabular_psrl( domain, seed=seed, show_reward=show_reward, epsilon=epsilon, epsilon_decay=eps_decay, epsilon_min=eps_min, ) elif name == "mbie-eb": return methods.tabular_mbie_eb( domain, seed=seed, show_reward=show_reward, epsilon=epsilon, epsilon_decay=eps_decay, epsilon_min=eps_min, vi_threshold=vi_threshold, ) elif name == "opt-psrl": return methods.tabular_opt_psrl( domain, n_samples=10, seed=seed, show_reward=show_reward, epsilon=epsilon, epsilon_decay=eps_decay, epsilon_min=eps_min, ) elif name == "gaussian-psrl": return methods.tabular_gaussian_psrl( domain, seed=seed, show_reward=show_reward, epsilon=epsilon, epsilon_decay=eps_decay, epsilon_min=eps_min, ) elif name == "ucbvi": return methods.tabular_ucbvi( domain, seed=seed, show_reward=show_reward, epsilon=epsilon, epsilon_decay=eps_decay, epsilon_min=eps_min, ) else: raise NotImplementedError("Method {} is not supported".format(name))