Exemplo n.º 1
0
            boyan_N0=119,
            incremental=True,
        )
    elif name == "rbf-q":
        return methods.rbf_q(
            domain,
            seed,
            num_rbfs=5000,
            resolution=8,
            initial_learn_rate=0.26,
            lambda_=0.9,
            boyan_N0=2120,
        )
    else:
        raise NotImplementedError("Method {} is not supported".format(name))


if __name__ == "__main__":
    run_experiment(
        select_domain,
        select_agent,
        default_max_steps=30000,
        default_num_policy_checks=10,
        default_checks_per_policy=50,
        other_options=[
            click.Option(["--task"], type=str, default="original"),
            click.Option(["--good-reward"], type=float, default=0.0),
            click.Option(["--kernel"], type=str, default="gaussian_kernel"),
        ],
    )
Exemplo n.º 2
0
import click
from rlpy.domains import DeepSea
from rlpy.tools.cli import run_experiment
from fr_gridworld import select_agent


def select_domain(size, noise, **kwargs):
    return DeepSea(size, noise=noise)


if __name__ == "__main__":
    run_experiment(
        select_domain,
        select_agent,
        default_max_steps=10000,
        default_num_policy_checks=10,
        default_checks_per_policy=50,
        other_options=[
            click.Option(["--size"], type=int, default=10),
            click.Option(["--noise"], type=float, default=0.0),
            click.Option(["--epsilon"], type=float, default=0.1),
            click.Option(["--epsilon-min"], type=float, default=None),
            click.Option(["--beta"], type=float, default=0.05),
            click.Option(["--show-reward"], is_flag=True),
            click.Option(["--vi-threshold"], type=float, default=0.001),
        ],
    )
Exemplo n.º 3
0
from fr_gridworld import select_agent


def select_domain(map_, noise, episode_cap, **kwargs):
    map_ = BernoulliGridWorld.default_map(map_ + ".txt")
    return BernoulliGridWorld(
        map_,
        noise=noise,
        episode_cap=episode_cap,
    )


if __name__ == "__main__":
    run_experiment(
        select_domain,
        select_agent,
        default_max_steps=10000,
        default_num_policy_checks=10,
        default_checks_per_policy=50,
        other_options=[
            click.Option(["--map", "map_"], type=str, default="5x5normal"),
            click.Option(["--noise"], type=float, default=0.1),
            click.Option(["--episode-cap"], type=int, default=20),
            click.Option(["--epsilon"], type=float, default=0.1),
            click.Option(["--epsilon-min"], type=float, default=None),
            click.Option(["--beta"], type=float, default=0.05),
            click.Option(["--show-reward"], is_flag=True),
            click.Option(["--vi-threshold"], type=float, default=1e-6),
        ],
    )
Exemplo n.º 4
0

def select_domain(chain_size):
    return ChainMDP(chain_size=chain_size)


def select_agent(name, domain, max_steps, seed, **kwargs):
    if name is None or name == "lspi":
        return methods.tabular_lspi(domain, max_steps)
    elif name == "nac":
        return methods.tabular_nac(domain)
    elif name == "tabular-q":
        return methods.tabular_q(domain, initial_learn_rate=0.1)
    elif name == "ifddk-q":
        return methods.ifddk_q(domain, initial_learn_rate=0.1)
    elif name == "psrl":
        return methods.tabular_psrl(domain, seed=seed)
    else:
        raise NotImplementedError("Method {} is not supported".format(name))


if __name__ == "__main__":
    run_experiment(
        select_domain,
        select_agent,
        default_max_steps=10000,
        default_num_policy_checks=10,
        default_checks_per_policy=50,
        other_options=[click.Option(["--chain-size"], type=int, default=4)],
    )
Exemplo n.º 5
0
from rlpy.domains import BicycleRiding
from rlpy.tools.cli import run_experiment

import methods


def select_agent(name, domain, max_steps, _seed):
    if name is None or name == "kifddk-q":
        return methods.kifdd_q(
            domain,
            11.6543336229,
            threshold=88044,
            lambda_=0.43982644088,
            initial_learn_rate=0.920244401,
            boyan_N0=64502.0,
            kernel="linf_triangle_kernel",
        )
    else:
        raise NotImplementedError("Method {} is not supported".format(name))


if __name__ == "__main__":
    run_experiment(
        BicycleRiding(),
        select_agent,
        default_max_steps=150000,
        default_num_policy_checks=30,
        default_checks_per_policy=1,
    )
Exemplo n.º 6
0
        return methods.kifdd_q(
            domain,
            8.567677,
            threshold=0.0807,
            lambda_=0.52738,
            initial_learn_rate=0.4244,
            boyan_N0=389.56,
        )
    elif name == "rbfs-q":
        return methods.rbf_q(
            domain,
            seed,
            num_rbfs=96,
            resolution=21,
            initial_learn_rate=0.6633,
            lambda_=0.1953,
            boyan_N0=13444.0,
        )
    else:
        raise NotImplementedError()


if __name__ == "__main__":
    run_experiment(
        PuddleWorld(),
        select_agent,
        default_max_steps=40000,
        default_num_policy_checks=20,
        default_checks_per_policy=100,
    )
Exemplo n.º 7
0
        return methods.fourier_q(domain, order=5)
    elif name == "fourier-sarsa":
        return methods.fourier_sarsa(domain, order=5)
    elif name == "ifdd-q":
        return methods.ifdd_q(domain)
    elif name == "ifdd-sarsa":
        return methods.ifdd_sarsa(domain)
    elif name == "kifdd-q":
        return methods.kifdd_q(domain)
    elif name == "kifdd-sarsa":
        return methods.kifdd_sarsa(domain)
    elif name == "rbfs-q":
        return methods.rbf_q(domain, seed=seed)
    elif name == "rbfs-sarsa":
        return methods.rbf_q(domain, seed=seed)
    else:
        raise NotImplementedError()


if __name__ == "__main__":
    run_experiment(
        select_domain,
        select_agent,
        default_max_steps=100000,
        default_num_policy_checks=30,
        default_checks_per_policy=1,
        other_options=[
            click.Option(["--cfg"], type=str, default="pinball_simple_single")
        ],
    )
Exemplo n.º 8
0
               1 1 0 0 0 1;
               0 1 0 1 1 0;
               0 1 0 0 1 1;
               0 1 0 1 0 1;
               0 1 1 0 1 0;
               0 1 1 0 0 1""")
        return methods.tile_ggq(
            domain,
            mat,
            lambda_=lambda_,
            initial_learn_rate=0.240155681,
            boyan_N0=14.44946,
        )
    else:
        raise NotImplementedError("Method {} is not supported".format(name))


if __name__ == "__main__":
    run_experiment(
        select_domain,
        select_agent,
        default_max_steps=100000,
        default_num_policy_checks=20,
        default_checks_per_policy=1,
        other_options=[
            click.Option(["--blocks"], type=int, default=6),
            click.Option(["--noise"], type=float, default=0.3),
            click.Option(["--lambda", "lambda_"], type=float, default=0.0),
        ],
    )
Exemplo n.º 9
0
                     episode_cap=20)


def select_agent(name, domain, max_steps, seed, **kwargs):
    if name is None or name == "lspi":
        return methods.tabular_lspi(domain, max_steps)
    elif name == "nac":
        return methods.tabular_nac(domain)
    elif name == "tabular-q":
        return methods.tabular_q(domain, initial_learn_rate=0.11)
    elif name == "ifddk-q":
        return methods.ifddk_q(domain, initial_learn_rate=0.11)
    elif name == "psrl":
        return methods.tabular_psrl(domain, seed=seed)
    else:
        raise NotImplementedError("Method {} is not supported".format(name))


if __name__ == "__main__":
    run_experiment(
        select_domain,
        select_agent,
        default_max_steps=10000,
        default_num_policy_checks=10,
        default_checks_per_policy=50,
        other_options=[
            click.Option(["--map", "map_"], type=str, default="4x5"),
            click.Option(["--noise"], type=float, default=0.1),
        ],
    )
Exemplo n.º 10
0
    elif name == "tabular-q":
        return methods.tabular_q(
            domain,
            lambda_=0.9,
            initial_learn_rate=0.26,
            boyan_N0=119,
            incremental=True,
        )
    elif name == "rbf-q":
        return methods.rbf_q(
            domain,
            seed,
            num_rbfs=5000,
            resolution=8,
            initial_learn_rate=0.26,
            lambda_=0.9,
            boyan_N0=2120,
        )
    else:
        raise NotImplementedError("Method {} is not supported".format(name))


if __name__ == "__main__":
    run_experiment(
        select_domain,
        select_agent,
        default_max_steps=30000,
        default_num_policy_checks=10,
        default_checks_per_policy=50,
    )