Exemplo n.º 1
0
def main(open_plot=True):
    # Setup MDP, Agents.
    markov_game = RockPaperScissorsMDP()
    ql_agent = QLearningAgent(actions=markov_game.get_actions())
    fixed_action = random.choice(markov_game.get_actions())
    fixed_agent = FixedPolicyAgent(policy=lambda s: fixed_action)

    # Run experiment and make plot.
    play_markov_game([ql_agent, fixed_agent], markov_game, instances=15, episodes=1, steps=40, open_plot=open_plot) 
Exemplo n.º 2
0
def choose_mdp(mdp_name, env_name="Asteroids-v0"):
    '''
    Args:
        mdp_name (str): one of {gym, grid, chain, taxi, ...}
        gym_env_name (str): gym environment name, like 'CartPole-v0'

    Returns:
        (MDP)
    '''

    # Other imports
    from simple_rl.tasks import ChainMDP, GridWorldMDP, FourRoomMDP, TaxiOOMDP, RandomMDP, PrisonersDilemmaMDP, RockPaperScissorsMDP, GridGameMDP

    # Taxi MDP.
    agent = {"x":1, "y":1, "has_passenger":0}
    passengers = [{"x":4, "y":3, "dest_x":2, "dest_y":2, "in_taxi":0}]
    walls = []
    if mdp_name == "gym":
        # OpenAI Gym MDP.
        try:
            from simple_rl.tasks.gym.GymMDPClass import GymMDP
        except:
            raise ValueError("(simple_rl) Error: OpenAI gym not installed.")
        return GymMDP(env_name, render=True)
    else:
        return {"grid":GridWorldMDP(5, 5, (1, 1), goal_locs=[(5, 3), (4,1)]),
                "four_room":FourRoomMDP(),
                "chain":ChainMDP(5),
                "taxi":TaxiOOMDP(10, 10, slip_prob=0.0, agent=agent, walls=walls, passengers=passengers),
                "random":RandomMDP(num_states=40, num_rand_trans=20),
                "prison":PrisonersDilemmaMDP(),
                "rps":RockPaperScissorsMDP(),
                "grid_game":GridGameMDP(),
                "multi":{0.5:RandomMDP(num_states=40, num_rand_trans=20), 0.5:RandomMDP(num_states=40, num_rand_trans=5)}}[mdp_name]
Exemplo n.º 3
0
#!/usr/bin/env python

# Python imports.
import random

# Other imports.
import srl_example_setup
from simple_rl.agents import QLearnerAgent, FixedPolicyAgent
from simple_rl.tasks import RockPaperScissorsMDP
from simple_rl.run_experiments import play_markov_game

# Setup MDP, Agents.
markov_game = RockPaperScissorsMDP()
ql_agent = QLearnerAgent(actions=markov_game.get_actions())
fixed_action = random.choice(markov_game.get_actions())
fixed_agent = FixedPolicyAgent(policy=lambda s: fixed_action)

# Run experiment and make plot.
play_markov_game([ql_agent, fixed_agent],
                 markov_game,
                 instances=15,
                 episodes=1,
                 steps=40)