boyan_N0=119, incremental=True, ) elif name == "rbf-q": return methods.rbf_q( domain, seed, num_rbfs=5000, resolution=8, initial_learn_rate=0.26, lambda_=0.9, boyan_N0=2120, ) else: raise NotImplementedError("Method {} is not supported".format(name)) if __name__ == "__main__": run_experiment( select_domain, select_agent, default_max_steps=30000, default_num_policy_checks=10, default_checks_per_policy=50, other_options=[ click.Option(["--task"], type=str, default="original"), click.Option(["--good-reward"], type=float, default=0.0), click.Option(["--kernel"], type=str, default="gaussian_kernel"), ], )
import click from rlpy.domains import DeepSea from rlpy.tools.cli import run_experiment from fr_gridworld import select_agent def select_domain(size, noise, **kwargs): return DeepSea(size, noise=noise) if __name__ == "__main__": run_experiment( select_domain, select_agent, default_max_steps=10000, default_num_policy_checks=10, default_checks_per_policy=50, other_options=[ click.Option(["--size"], type=int, default=10), click.Option(["--noise"], type=float, default=0.0), click.Option(["--epsilon"], type=float, default=0.1), click.Option(["--epsilon-min"], type=float, default=None), click.Option(["--beta"], type=float, default=0.05), click.Option(["--show-reward"], is_flag=True), click.Option(["--vi-threshold"], type=float, default=0.001), ], )
from fr_gridworld import select_agent def select_domain(map_, noise, episode_cap, **kwargs): map_ = BernoulliGridWorld.default_map(map_ + ".txt") return BernoulliGridWorld( map_, noise=noise, episode_cap=episode_cap, ) if __name__ == "__main__": run_experiment( select_domain, select_agent, default_max_steps=10000, default_num_policy_checks=10, default_checks_per_policy=50, other_options=[ click.Option(["--map", "map_"], type=str, default="5x5normal"), click.Option(["--noise"], type=float, default=0.1), click.Option(["--episode-cap"], type=int, default=20), click.Option(["--epsilon"], type=float, default=0.1), click.Option(["--epsilon-min"], type=float, default=None), click.Option(["--beta"], type=float, default=0.05), click.Option(["--show-reward"], is_flag=True), click.Option(["--vi-threshold"], type=float, default=1e-6), ], )
def select_domain(chain_size): return ChainMDP(chain_size=chain_size) def select_agent(name, domain, max_steps, seed, **kwargs): if name is None or name == "lspi": return methods.tabular_lspi(domain, max_steps) elif name == "nac": return methods.tabular_nac(domain) elif name == "tabular-q": return methods.tabular_q(domain, initial_learn_rate=0.1) elif name == "ifddk-q": return methods.ifddk_q(domain, initial_learn_rate=0.1) elif name == "psrl": return methods.tabular_psrl(domain, seed=seed) else: raise NotImplementedError("Method {} is not supported".format(name)) if __name__ == "__main__": run_experiment( select_domain, select_agent, default_max_steps=10000, default_num_policy_checks=10, default_checks_per_policy=50, other_options=[click.Option(["--chain-size"], type=int, default=4)], )
from rlpy.domains import BicycleRiding from rlpy.tools.cli import run_experiment import methods def select_agent(name, domain, max_steps, _seed): if name is None or name == "kifddk-q": return methods.kifdd_q( domain, 11.6543336229, threshold=88044, lambda_=0.43982644088, initial_learn_rate=0.920244401, boyan_N0=64502.0, kernel="linf_triangle_kernel", ) else: raise NotImplementedError("Method {} is not supported".format(name)) if __name__ == "__main__": run_experiment( BicycleRiding(), select_agent, default_max_steps=150000, default_num_policy_checks=30, default_checks_per_policy=1, )
return methods.kifdd_q( domain, 8.567677, threshold=0.0807, lambda_=0.52738, initial_learn_rate=0.4244, boyan_N0=389.56, ) elif name == "rbfs-q": return methods.rbf_q( domain, seed, num_rbfs=96, resolution=21, initial_learn_rate=0.6633, lambda_=0.1953, boyan_N0=13444.0, ) else: raise NotImplementedError() if __name__ == "__main__": run_experiment( PuddleWorld(), select_agent, default_max_steps=40000, default_num_policy_checks=20, default_checks_per_policy=100, )
return methods.fourier_q(domain, order=5) elif name == "fourier-sarsa": return methods.fourier_sarsa(domain, order=5) elif name == "ifdd-q": return methods.ifdd_q(domain) elif name == "ifdd-sarsa": return methods.ifdd_sarsa(domain) elif name == "kifdd-q": return methods.kifdd_q(domain) elif name == "kifdd-sarsa": return methods.kifdd_sarsa(domain) elif name == "rbfs-q": return methods.rbf_q(domain, seed=seed) elif name == "rbfs-sarsa": return methods.rbf_q(domain, seed=seed) else: raise NotImplementedError() if __name__ == "__main__": run_experiment( select_domain, select_agent, default_max_steps=100000, default_num_policy_checks=30, default_checks_per_policy=1, other_options=[ click.Option(["--cfg"], type=str, default="pinball_simple_single") ], )
1 1 0 0 0 1; 0 1 0 1 1 0; 0 1 0 0 1 1; 0 1 0 1 0 1; 0 1 1 0 1 0; 0 1 1 0 0 1""") return methods.tile_ggq( domain, mat, lambda_=lambda_, initial_learn_rate=0.240155681, boyan_N0=14.44946, ) else: raise NotImplementedError("Method {} is not supported".format(name)) if __name__ == "__main__": run_experiment( select_domain, select_agent, default_max_steps=100000, default_num_policy_checks=20, default_checks_per_policy=1, other_options=[ click.Option(["--blocks"], type=int, default=6), click.Option(["--noise"], type=float, default=0.3), click.Option(["--lambda", "lambda_"], type=float, default=0.0), ], )
episode_cap=20) def select_agent(name, domain, max_steps, seed, **kwargs): if name is None or name == "lspi": return methods.tabular_lspi(domain, max_steps) elif name == "nac": return methods.tabular_nac(domain) elif name == "tabular-q": return methods.tabular_q(domain, initial_learn_rate=0.11) elif name == "ifddk-q": return methods.ifddk_q(domain, initial_learn_rate=0.11) elif name == "psrl": return methods.tabular_psrl(domain, seed=seed) else: raise NotImplementedError("Method {} is not supported".format(name)) if __name__ == "__main__": run_experiment( select_domain, select_agent, default_max_steps=10000, default_num_policy_checks=10, default_checks_per_policy=50, other_options=[ click.Option(["--map", "map_"], type=str, default="4x5"), click.Option(["--noise"], type=float, default=0.1), ], )
elif name == "tabular-q": return methods.tabular_q( domain, lambda_=0.9, initial_learn_rate=0.26, boyan_N0=119, incremental=True, ) elif name == "rbf-q": return methods.rbf_q( domain, seed, num_rbfs=5000, resolution=8, initial_learn_rate=0.26, lambda_=0.9, boyan_N0=2120, ) else: raise NotImplementedError("Method {} is not supported".format(name)) if __name__ == "__main__": run_experiment( select_domain, select_agent, default_max_steps=30000, default_num_policy_checks=10, default_checks_per_policy=50, )