def main(open_plot=True): # Setup MDP. mdp = GridWorldMDP(width=8, height=3, init_loc=(1, 1), goal_locs=[(8, 3)], lava_locs=[(4, 2)], gamma=0.95, walls=[(2, 2)], slip_prob=0.05) # Make agents. ql_agent = QLearningAgent(actions=mdp.get_actions()) rand_agent = RandomAgent(actions=mdp.get_actions()) # Run experiment and make plot. run_agents_on_mdp([ql_agent, rand_agent], mdp, instances=20, episodes=300, steps=20, open_plot=open_plot, track_success=True, success_reward=1)
def main(open_plot=True): # Setup MDP. mdp = GridWorldMDP(width=4, height=3, init_loc=(1, 1), goal_locs=[(4, 3)], lava_locs=[(4, 2)], gamma=0.95, walls=[(2, 2)], slip_prob=0.05) # Make agents. ql_agent = QLearningAgent(actions=mdp.get_actions()) rand_agent = RandomAgent(actions=mdp.get_actions()) tabular_agent = CherryQAgent(mdp, model=lambda *x: ActionValueFunction(*x, init=1.0), name='Tabular', lr=0.7) linear_agent = CherryQAgent(mdp, model=lambda *x: nn.Linear(*x), name='Linear', lr=0.1) mlp_agent = CherryQAgent(mdp, model=lambda *x: MLP(*x), name='MLP', lr=0.07) # Run experiment and make plot. agents = [rand_agent, ql_agent, tabular_agent, linear_agent, mlp_agent] run_agents_on_mdp(agents, mdp, instances=10, episodes=50, steps=50, open_plot=open_plot)
def main(open_plot=True): # Setup MDP, Agents. mdp = GridWorldMDP(width=10, height=10, init_loc=(1, 1), goal_locs=[(10, 10)]) ql_agent = QLearningAgent(actions=mdp.get_actions()) rand_agent = RandomAgent(actions=mdp.get_actions()) abstr_identity_agent = AbstractionWrapper(QLearningAgent, agent_params={"epsilon":0.9, "actions":mdp.get_actions()}) # Run experiment and make plot. run_agents_on_mdp([ql_agent, rand_agent, abstr_identity_agent], mdp, instances=5, episodes=100, steps=150, open_plot=open_plot)
def main(open_plot=True): # Setup MDP, Agents. mdp = GridWorldMDP(width=10, height=10, init_loc=(1, 1), goal_locs=[(10, 10)]) ql_agent = QLearningAgent(actions=mdp.get_actions()) rand_agent = RandomAgent(actions=mdp.get_actions()) abstr_identity_agent = AbstractionWrapper(QLearningAgent, agent_params={"epsilon":0.9}, actions=mdp.get_actions()) # Run experiment and make plot. run_agents_on_mdp([ql_agent, rand_agent, abstr_identity_agent], mdp, instances=5, episodes=100, steps=150, open_plot=open_plot)
def main(): # Setup MDP. actual_args = { "width": 10, "height": 10, "init_loc": (1, 1), "goal_locs": [(10, 10)], "lava_locs": [(1, 10), (3, 10), (5, 10), (7, 10), (9, 10)], "gamma": 0.9, "walls": [ (2, 2), (2, 3), (2, 4), (2, 5), (2, 6), (2, 7), (2, 8), (2, 9), (4, 2), (4, 3), (4, 4), (4, 5), (4, 6), (4, 7), (4, 8), (4, 9), (6, 2), (6, 3), (6, 4), (6, 5), (6, 6), (6, 7), (6, 8), (6, 9), (8, 2), (8, 3), (8, 4), (8, 5), (8, 6), (8, 7), (8, 8), (8, 9) ], "slip_prob": 0.01, "lava_cost": 1.0, "step_cost": 0.1 } mdp = GridWorldMDP(**actual_args) # Initialize the custom Q function for a q-learning agent. This should be equivalent to potential shaping. # This should cause the Q agent to learn more quickly. custom_q = defaultdict(lambda: defaultdict(lambda: 0)) custom_q[GridWorldState(5, 1)]['right'] = 1.0 custom_q[GridWorldState(2, 1)]['right'] = 1.0 # Make a normal q-learning agent and another initialized with the custom_q above. # Finally, make a random agent to compare against. ql_agent = QLearningAgent(actions=mdp.get_actions(), epsilon=0.2, alpha=0.4) ql_agent_pot = QLearningAgent(actions=mdp.get_actions(), epsilon=0.2, alpha=0.4, custom_q_init=custom_q, name="PotQ") rand_agent = RandomAgent(actions=mdp.get_actions()) # Run experiment and make plot. run_agents_on_mdp([ql_agent, ql_agent_pot, rand_agent], mdp, instances=2, episodes=60, steps=200, open_plot=True, verbose=True)
def main(open_plot=True): # Setup MDP. mdp = GridWorldMDP(width=4, height=3, init_loc=(1, 1), goal_locs=[(4, 3)], lava_locs=[(4, 2)], gamma=0.95, walls=[(2, 2)], slip_prob=0.05) # Make agents. ql_agent = QLearningAgent(actions=mdp.get_actions()) rand_agent = RandomAgent(actions=mdp.get_actions()) # Run experiment and make plot. run_agents_on_mdp([ql_agent, rand_agent], mdp, instances=10, episodes=50, steps=10, open_plot=open_plot)
def main(): # Setup MDP, Agents. mdp = GridWorldMDP(width=4, height=3, init_loc=(1, 1), goal_locs=[(4, 3)], lava_locs=[(4, 2)], gamma=0.95, walls=[(2, 2)], slip_prob=0.1) ql_agent = QLearningAgent(mdp.get_actions(), epsilon=0.2, alpha=0.2) viz = parse_args() # Choose viz type. viz = "value" if viz == "value": # --> Color corresponds to higher value. # Run experiment and make plot. mdp.visualize_value() elif viz == "policy": # Viz policy value_iter = ValueIteration(mdp) value_iter.run_vi() policy = value_iter.policy mdp.visualize_policy(policy) elif viz == "agent": # --> Press <spacebar> to advance the agent. # First let the agent solve the problem and then visualize the agent's resulting policy. print("\n", str(ql_agent), "interacting with", str(mdp)) run_single_agent_on_mdp(ql_agent, mdp, episodes=500, steps=200) mdp.visualize_agent(ql_agent) elif viz == "learning": # --> Press <r> to reset. # Show agent's interaction with the environment. mdp.visualize_learning(ql_agent, delay=0.005, num_ep=500, num_steps=200) elif viz == "interactive": # Press <1>, <2>, <3>, and so on to execute action 1, action 2, etc. mdp.visualize_interaction()
def main(open_plot=True): # Setup MDP, Agents. mdp = GridWorldMDP(width=10, height=10, init_loc=(1, 1), goal_locs=[(10, 10)], gamma=0.95) dq_agent = DoubleQAgent(actions=mdp.get_actions()) ql_agent = QLearnerAgent(actions=mdp.get_actions()) rand_agent = RandomAgent(actions=mdp.get_actions()) # Run experiment and make plot. run_agents_on_mdp([dq_agent, ql_agent, rand_agent], mdp, instances=50, episodes=150, steps=100, open_plot=open_plot)
def main(open_plot=True): # Setup MDP, Agents. mdp = GridWorldMDP(width=4, height=3, init_loc=(1, 1), goal_locs=[(4, 3)], gamma=0.95, walls=[(2, 2)]) ql_agent = QLearningAgent(actions=mdp.get_actions()) rand_agent = RandomAgent(actions=mdp.get_actions()) # Run experiment and make plot. run_agents_on_mdp([ql_agent, rand_agent], mdp, instances=10, episodes=1, steps=20, open_plot=open_plot)
def main(open_plot=True): # Setup MDP. mdp = GridWorldMDP(width=4, height=3, init_loc=(1, 1), goal_locs=[(4, 3)], lava_locs=[(4, 2)], gamma=0.95, walls=[(2, 2)]) # Make agents. ql_agent = QLearningAgent(actions=mdp.get_actions()) rand_agent = RandomAgent(actions=mdp.get_actions()) # Run experiment and make plot. run_agents_on_mdp([ql_agent, rand_agent], mdp, instances=5, episodes=50, steps=25, open_plot=open_plot, track_disc_reward=False)
def main(open_plot=True): # Setup MDP. mdp = GridWorldMDP(width=4, height=3, init_loc=(1, 1), goal_locs=[(4, 3)], lava_locs=[(4, 2)], gamma=0.95, walls=[(2, 2)]) # Make agents. ql_agent = QLearningAgent(actions=mdp.get_actions()) rand_agent = RandomAgent(actions=mdp.get_actions()) # Run experiment and make plot. run_agents_on_mdp([ql_agent, rand_agent], mdp, instances=5, episodes=50, steps=25, open_plot=open_plot) # Reproduce the experiment. reproduce_from_exp_file(exp_name=str(mdp), open_plot=open_plot)
from simple_rl.agents import QLearningAgent, RandomAgent, RMaxAgent from simple_rl.tasks import GridWorldMDP from simple_rl.run_experiments import run_agents_on_mdp # Setup MDP. mdp = GridWorldMDP(width=4, height=3, init_loc=(1, 1), goal_locs=[(4, 3)], lava_locs=[(4, 2)], gamma=0.95, walls=[(2, 2)], slip_prob=0.05) # Setup Agents. ql_agent = QLearningAgent(actions=mdp.get_actions()) rmax_agent = RMaxAgent(actions=mdp.get_actions()) rand_agent = RandomAgent(actions=mdp.get_actions()) # Run experiment and make plot. run_agents_on_mdp([ql_agent, rmax_agent, rand_agent], mdp, instances=5, episodes=50, steps=10)
#!/usr/bin/env python # Other imports. import srl_example_setup from simple_rl.agents import QLearnerAgent, RandomAgent, RMaxAgent from simple_rl.tasks import GridWorldMDP from simple_rl.run_experiments import run_agents_on_mdp # Setup MDP, Agents. mdp = GridWorldMDP(width=6, height=6, init_loc=(1, 1), goal_locs=[(6, 6)]) rmax_agent = RMaxAgent(actions=mdp.get_actions()) ql_agent = QLearnerAgent(actions=mdp.get_actions()) rand_agent = RandomAgent(actions=mdp.get_actions()) # Run experiment and make plot. run_agents_on_mdp([ql_agent, rand_agent], mdp, instances=10, episodes=30, steps=50, reset_at_terminal=True)
slip_prob=0.4, step_cost=args.step_cost, rand_init=True) mdps.append(mdp) mdp_list = MDPList(mdps) thres_sm = args.thres_sm thres_lg = args.thres_lg ql_agent = QLearningAgent(actions=mdp_list.get_actions(), gamma=mdp_list.get_gamma()) rmax_agent = TabularRMaxAgent(states=mdp.states, state_map=mdp.state_map, actions=mdp.get_actions(), s_a_threshold=thres_lg, greedy=args.greedy, gamma=mdp_list.get_gamma()) multi_agent = MultiTaskRMaxAgent(states=mdp.states, state_map=mdp.state_map, actions=mdp.get_actions(), thres_sm=thres_sm, thres_lg=thres_lg, t1=args.t1 + 1, model_gap=args.model_gap, greedy=args.greedy, xi=args.xi, gamma=mdp_list.get_gamma()) pattern_agent = PatternLearningAgent(states=mdp.states,