Пример #1
0
def main(open_plot=True):

    # Setup MDP.
    mdp = GridWorldMDP(width=8,
                       height=3,
                       init_loc=(1, 1),
                       goal_locs=[(8, 3)],
                       lava_locs=[(4, 2)],
                       gamma=0.95,
                       walls=[(2, 2)],
                       slip_prob=0.05)

    # Make agents.
    ql_agent = QLearningAgent(actions=mdp.get_actions())
    rand_agent = RandomAgent(actions=mdp.get_actions())

    # Run experiment and make plot.
    run_agents_on_mdp([ql_agent, rand_agent],
                      mdp,
                      instances=20,
                      episodes=300,
                      steps=20,
                      open_plot=open_plot,
                      track_success=True,
                      success_reward=1)
Пример #2
0
def main(open_plot=True):
    # Setup MDP.
    mdp = GridWorldMDP(width=4,
                       height=3,
                       init_loc=(1, 1),
                       goal_locs=[(4, 3)],
                       lava_locs=[(4, 2)],
                       gamma=0.95,
                       walls=[(2, 2)],
                       slip_prob=0.05)

    # Make agents.
    ql_agent = QLearningAgent(actions=mdp.get_actions())
    rand_agent = RandomAgent(actions=mdp.get_actions())
    tabular_agent = CherryQAgent(mdp,
                                 model=lambda *x: ActionValueFunction(*x, init=1.0),
                                 name='Tabular',
                                 lr=0.7)
    linear_agent = CherryQAgent(mdp,
                                model=lambda *x: nn.Linear(*x),
                                name='Linear',
                                lr=0.1)
    mlp_agent = CherryQAgent(mdp,
                             model=lambda *x: MLP(*x),
                             name='MLP',
                             lr=0.07)

    # Run experiment and make plot.
    agents = [rand_agent, ql_agent, tabular_agent, linear_agent, mlp_agent]
    run_agents_on_mdp(agents,
                      mdp,
                      instances=10,
                      episodes=50,
                      steps=50,
                      open_plot=open_plot)
Пример #3
0
def main(open_plot=True):
    # Setup MDP, Agents.
    mdp = GridWorldMDP(width=10, height=10, init_loc=(1, 1), goal_locs=[(10, 10)])
    ql_agent = QLearningAgent(actions=mdp.get_actions())
    rand_agent = RandomAgent(actions=mdp.get_actions())
    abstr_identity_agent = AbstractionWrapper(QLearningAgent, agent_params={"epsilon":0.9, "actions":mdp.get_actions()})

    # Run experiment and make plot.
    run_agents_on_mdp([ql_agent, rand_agent, abstr_identity_agent], mdp, instances=5, episodes=100, steps=150, open_plot=open_plot)
Пример #4
0
def main(open_plot=True):
    # Setup MDP, Agents.
    mdp = GridWorldMDP(width=10, height=10, init_loc=(1, 1), goal_locs=[(10, 10)])
    ql_agent = QLearningAgent(actions=mdp.get_actions())
    rand_agent = RandomAgent(actions=mdp.get_actions())
    abstr_identity_agent = AbstractionWrapper(QLearningAgent, agent_params={"epsilon":0.9}, actions=mdp.get_actions())

    # Run experiment and make plot.
    run_agents_on_mdp([ql_agent, rand_agent, abstr_identity_agent], mdp, instances=5, episodes=100, steps=150, open_plot=open_plot)
Пример #5
0
def main():

    # Setup MDP.

    actual_args = {
        "width":
        10,
        "height":
        10,
        "init_loc": (1, 1),
        "goal_locs": [(10, 10)],
        "lava_locs": [(1, 10), (3, 10), (5, 10), (7, 10), (9, 10)],
        "gamma":
        0.9,
        "walls": [
            (2, 2), (2, 3), (2, 4), (2, 5), (2, 6), (2, 7), (2, 8), (2, 9),
            (4, 2), (4, 3), (4, 4), (4, 5), (4, 6), (4, 7), (4, 8), (4, 9),
            (6, 2), (6, 3), (6, 4), (6, 5), (6, 6), (6, 7), (6, 8), (6, 9),
            (8, 2), (8, 3), (8, 4), (8, 5), (8, 6), (8, 7), (8, 8), (8, 9)
        ],
        "slip_prob":
        0.01,
        "lava_cost":
        1.0,
        "step_cost":
        0.1
    }

    mdp = GridWorldMDP(**actual_args)

    # Initialize the custom Q function for a q-learning agent. This should be equivalent to potential shaping.
    # This should cause the Q agent to learn more quickly.
    custom_q = defaultdict(lambda: defaultdict(lambda: 0))
    custom_q[GridWorldState(5, 1)]['right'] = 1.0
    custom_q[GridWorldState(2, 1)]['right'] = 1.0

    # Make a normal q-learning agent and another initialized with the custom_q above.
    # Finally, make a random agent to compare against.
    ql_agent = QLearningAgent(actions=mdp.get_actions(),
                              epsilon=0.2,
                              alpha=0.4)
    ql_agent_pot = QLearningAgent(actions=mdp.get_actions(),
                                  epsilon=0.2,
                                  alpha=0.4,
                                  custom_q_init=custom_q,
                                  name="PotQ")
    rand_agent = RandomAgent(actions=mdp.get_actions())

    # Run experiment and make plot.
    run_agents_on_mdp([ql_agent, ql_agent_pot, rand_agent],
                      mdp,
                      instances=2,
                      episodes=60,
                      steps=200,
                      open_plot=True,
                      verbose=True)
Пример #6
0
def main(open_plot=True):

    # Setup MDP.
    mdp = GridWorldMDP(width=4, height=3, init_loc=(1, 1), goal_locs=[(4, 3)], lava_locs=[(4, 2)], gamma=0.95, walls=[(2, 2)], slip_prob=0.05)

    # Make agents.
    ql_agent = QLearningAgent(actions=mdp.get_actions())
    rand_agent = RandomAgent(actions=mdp.get_actions())

    # Run experiment and make plot.
    run_agents_on_mdp([ql_agent, rand_agent], mdp, instances=10, episodes=50, steps=10, open_plot=open_plot)
Пример #7
0
def main():
    
    # Setup MDP, Agents.
    mdp = GridWorldMDP(width=4, height=3, init_loc=(1, 1), goal_locs=[(4, 3)], lava_locs=[(4, 2)], gamma=0.95, walls=[(2, 2)], slip_prob=0.1)
    ql_agent = QLearningAgent(mdp.get_actions(), epsilon=0.2, alpha=0.2) 
    viz = parse_args()

    # Choose viz type.
    viz = "value"

    if viz == "value":
        # --> Color corresponds to higher value.
        # Run experiment and make plot.
        mdp.visualize_value()
    elif viz == "policy":
        # Viz policy
        value_iter = ValueIteration(mdp)
        value_iter.run_vi()
        policy = value_iter.policy
        mdp.visualize_policy(policy)
    elif viz == "agent":
        # --> Press <spacebar> to advance the agent.
        # First let the agent solve the problem and then visualize the agent's resulting policy.
        print("\n", str(ql_agent), "interacting with", str(mdp))
        run_single_agent_on_mdp(ql_agent, mdp, episodes=500, steps=200)
        mdp.visualize_agent(ql_agent)
    elif viz == "learning":
        # --> Press <r> to reset.
        # Show agent's interaction with the environment.
        mdp.visualize_learning(ql_agent, delay=0.005, num_ep=500, num_steps=200)
    elif viz == "interactive":
        # Press <1>, <2>, <3>, and so on to execute action 1, action 2, etc.
    	mdp.visualize_interaction()
Пример #8
0
def main(open_plot=True):
    # Setup MDP, Agents.
    mdp = GridWorldMDP(width=10,
                       height=10,
                       init_loc=(1, 1),
                       goal_locs=[(10, 10)],
                       gamma=0.95)

    dq_agent = DoubleQAgent(actions=mdp.get_actions())
    ql_agent = QLearnerAgent(actions=mdp.get_actions())
    rand_agent = RandomAgent(actions=mdp.get_actions())

    # Run experiment and make plot.
    run_agents_on_mdp([dq_agent, ql_agent, rand_agent],
                      mdp,
                      instances=50,
                      episodes=150,
                      steps=100,
                      open_plot=open_plot)
Пример #9
0
def main(open_plot=True):
    # Setup MDP, Agents.
    mdp = GridWorldMDP(width=4,
                       height=3,
                       init_loc=(1, 1),
                       goal_locs=[(4, 3)],
                       gamma=0.95,
                       walls=[(2, 2)])

    ql_agent = QLearningAgent(actions=mdp.get_actions())
    rand_agent = RandomAgent(actions=mdp.get_actions())

    # Run experiment and make plot.
    run_agents_on_mdp([ql_agent, rand_agent],
                      mdp,
                      instances=10,
                      episodes=1,
                      steps=20,
                      open_plot=open_plot)
Пример #10
0
def main(open_plot=True):
    # Setup MDP.
    mdp = GridWorldMDP(width=4,
                       height=3,
                       init_loc=(1, 1),
                       goal_locs=[(4, 3)],
                       lava_locs=[(4, 2)],
                       gamma=0.95,
                       walls=[(2, 2)])

    # Make agents.
    ql_agent = QLearningAgent(actions=mdp.get_actions())
    rand_agent = RandomAgent(actions=mdp.get_actions())

    # Run experiment and make plot.
    run_agents_on_mdp([ql_agent, rand_agent],
                      mdp,
                      instances=5,
                      episodes=50,
                      steps=25,
                      open_plot=open_plot,
                      track_disc_reward=False)
Пример #11
0
def main(open_plot=True):
    # Setup MDP.
    mdp = GridWorldMDP(width=4,
                       height=3,
                       init_loc=(1, 1),
                       goal_locs=[(4, 3)],
                       lava_locs=[(4, 2)],
                       gamma=0.95,
                       walls=[(2, 2)])

    # Make agents.
    ql_agent = QLearningAgent(actions=mdp.get_actions())
    rand_agent = RandomAgent(actions=mdp.get_actions())

    # Run experiment and make plot.
    run_agents_on_mdp([ql_agent, rand_agent],
                      mdp,
                      instances=5,
                      episodes=50,
                      steps=25,
                      open_plot=open_plot)

    # Reproduce the experiment.
    reproduce_from_exp_file(exp_name=str(mdp), open_plot=open_plot)
Пример #12
0
from simple_rl.agents import QLearningAgent, RandomAgent, RMaxAgent
from simple_rl.tasks import GridWorldMDP
from simple_rl.run_experiments import run_agents_on_mdp

# Setup MDP.
mdp = GridWorldMDP(width=4,
                   height=3,
                   init_loc=(1, 1),
                   goal_locs=[(4, 3)],
                   lava_locs=[(4, 2)],
                   gamma=0.95,
                   walls=[(2, 2)],
                   slip_prob=0.05)

# Setup Agents.
ql_agent = QLearningAgent(actions=mdp.get_actions())
rmax_agent = RMaxAgent(actions=mdp.get_actions())
rand_agent = RandomAgent(actions=mdp.get_actions())

# Run experiment and make plot.
run_agents_on_mdp([ql_agent, rmax_agent, rand_agent],
                  mdp,
                  instances=5,
                  episodes=50,
                  steps=10)
Пример #13
0
#!/usr/bin/env python

# Other imports.
import srl_example_setup
from simple_rl.agents import QLearnerAgent, RandomAgent, RMaxAgent
from simple_rl.tasks import GridWorldMDP
from simple_rl.run_experiments import run_agents_on_mdp

# Setup MDP, Agents.
mdp = GridWorldMDP(width=6, height=6, init_loc=(1, 1), goal_locs=[(6, 6)])

rmax_agent = RMaxAgent(actions=mdp.get_actions())
ql_agent = QLearnerAgent(actions=mdp.get_actions())
rand_agent = RandomAgent(actions=mdp.get_actions())

# Run experiment and make plot.
run_agents_on_mdp([ql_agent, rand_agent],
                  mdp,
                  instances=10,
                  episodes=30,
                  steps=50,
                  reset_at_terminal=True)
Пример #14
0
                           slip_prob=0.4,
                           step_cost=args.step_cost,
                           rand_init=True)
        mdps.append(mdp)

    mdp_list = MDPList(mdps)

    thres_sm = args.thres_sm
    thres_lg = args.thres_lg

    ql_agent = QLearningAgent(actions=mdp_list.get_actions(),
                              gamma=mdp_list.get_gamma())

    rmax_agent = TabularRMaxAgent(states=mdp.states,
                                  state_map=mdp.state_map,
                                  actions=mdp.get_actions(),
                                  s_a_threshold=thres_lg,
                                  greedy=args.greedy,
                                  gamma=mdp_list.get_gamma())
    multi_agent = MultiTaskRMaxAgent(states=mdp.states,
                                     state_map=mdp.state_map,
                                     actions=mdp.get_actions(),
                                     thres_sm=thres_sm,
                                     thres_lg=thres_lg,
                                     t1=args.t1 + 1,
                                     model_gap=args.model_gap,
                                     greedy=args.greedy,
                                     xi=args.xi,
                                     gamma=mdp_list.get_gamma())

    pattern_agent = PatternLearningAgent(states=mdp.states,