def main(open_plot=True): # Setup MDP, Agents. markov_game = RockPaperScissorsMDP() ql_agent = QLearningAgent(actions=markov_game.get_actions()) fixed_action = random.choice(markov_game.get_actions()) fixed_agent = FixedPolicyAgent(policy=lambda s: fixed_action) # Run experiment and make plot. play_markov_game([ql_agent, fixed_agent], markov_game, instances=15, episodes=1, steps=40, open_plot=open_plot)
def choose_mdp(mdp_name, env_name="Asteroids-v0"): ''' Args: mdp_name (str): one of {gym, grid, chain, taxi, ...} gym_env_name (str): gym environment name, like 'CartPole-v0' Returns: (MDP) ''' # Other imports from simple_rl.tasks import ChainMDP, GridWorldMDP, FourRoomMDP, TaxiOOMDP, RandomMDP, PrisonersDilemmaMDP, RockPaperScissorsMDP, GridGameMDP # Taxi MDP. agent = {"x":1, "y":1, "has_passenger":0} passengers = [{"x":4, "y":3, "dest_x":2, "dest_y":2, "in_taxi":0}] walls = [] if mdp_name == "gym": # OpenAI Gym MDP. try: from simple_rl.tasks.gym.GymMDPClass import GymMDP except: raise ValueError("(simple_rl) Error: OpenAI gym not installed.") return GymMDP(env_name, render=True) else: return {"grid":GridWorldMDP(5, 5, (1, 1), goal_locs=[(5, 3), (4,1)]), "four_room":FourRoomMDP(), "chain":ChainMDP(5), "taxi":TaxiOOMDP(10, 10, slip_prob=0.0, agent=agent, walls=walls, passengers=passengers), "random":RandomMDP(num_states=40, num_rand_trans=20), "prison":PrisonersDilemmaMDP(), "rps":RockPaperScissorsMDP(), "grid_game":GridGameMDP(), "multi":{0.5:RandomMDP(num_states=40, num_rand_trans=20), 0.5:RandomMDP(num_states=40, num_rand_trans=5)}}[mdp_name]
#!/usr/bin/env python # Python imports. import random # Other imports. import srl_example_setup from simple_rl.agents import QLearnerAgent, FixedPolicyAgent from simple_rl.tasks import RockPaperScissorsMDP from simple_rl.run_experiments import play_markov_game # Setup MDP, Agents. markov_game = RockPaperScissorsMDP() ql_agent = QLearnerAgent(actions=markov_game.get_actions()) fixed_action = random.choice(markov_game.get_actions()) fixed_agent = FixedPolicyAgent(policy=lambda s: fixed_action) # Run experiment and make plot. play_markov_game([ql_agent, fixed_agent], markov_game, instances=15, episodes=1, steps=40)