Python GridWorld примеры использования

Язык программирования: Python

Пространство имен/Пакет: env.grid_world

Класс/Тип: GridWorld

Примеров на hotexamples.com: 6

Python GridWorld - 6 примеров найдено. Это лучшие примеры Python кода для env.grid_world.GridWorld, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

add_obstructions(7)

add_transition_probability(7)

GridWorld(6)

add_discount(6)

add_rewards(6)

create_gridworld(6)

Пример #1

Показать файл

Файл: example_plot_gridworld.py Проект: linesd/tabular-methods

obstructions = np.array([[0, 7], [1, 1], [1, 2], [1, 3], [1, 7], [2,
                                                                  1], [2, 3],
                         [2, 7], [3, 1], [3, 3], [3, 5], [4, 3], [4,
                                                                  5], [4, 7],
                         [5, 3], [5, 7], [5, 9], [6, 3], [6, 9], [7,
                                                                  1], [7, 6],
                         [7, 7], [7, 8], [7, 9], [8, 1], [8, 5], [8, 6],
                         [9, 1]])
bad_states = np.array([[1, 9], [4, 2], [4, 4], [7, 5], [9, 9]])
restart_states = np.array([[3, 7], [8, 2]])
start_state = np.array([[0, 4]])
goal_states = np.array([[0, 9], [2, 2], [8, 7]])

# create model
gw = GridWorld(num_rows=num_rows,
               num_cols=num_cols,
               start_state=start_state,
               goal_states=goal_states)
gw.add_obstructions(obstructed_states=obstructions,
                    bad_states=bad_states,
                    restart_states=restart_states)
gw.add_rewards(step_reward=-1,
               goal_reward=10,
               bad_state_reward=-6,
               restart_state_reward=-10)
gw.add_transition_probability(p_good_transition=0.7, bias=0.5)
gw.add_discount(discount=0.9)
model = gw.create_gridworld()

# plot world
path = "../doc/imgs/unsolved_gridworld.png"
plot_gridworld(model, title="Test world", path=path)

Пример #2

Показать файл

def test_value_iteration():
    #load the test data
    vp = np.load("../data/test_data/test_value_iteration.npy")
    test_value = vp[:, 0].reshape(-1, 1)
    test_pi = vp[:, 1].reshape(-1, 1)

    # specify world parameters
    num_cols = 12
    num_rows = 9
    obstructions = np.array([[8, 6], [7, 6], [6, 6], [5, 6], [4, 6], [3, 6],
                             [3, 7], [3, 8], [3, 9]])
    bad_states = np.array([[2, 1]])
    start_state = np.array([[0, 1]])
    goal_state = np.array([[7, 8]])

    # create model
    gw = GridWorld(num_rows=num_rows,
                   num_cols=num_cols,
                   start_state=start_state,
                   goal_states=goal_state)
    gw.add_obstructions(obstructed_states=obstructions, bad_states=bad_states)
    gw.add_rewards(step_reward=-1, goal_reward=10, bad_state_reward=-6)
    gw.add_transition_probability(p_good_transition=0.7, bias=0.5)
    gw.add_discount(discount=0.9)
    model = gw.create_gridworld()

    # solve with value iteration
    value_function, pi = value_iteration(model, maxiter=100)

    # test value iteration outputs
    assert np.all(value_function == test_value)
    assert np.all(pi == test_pi)

Пример #3

Показать файл

Файл: test_gridworld.py Проект: linesd/tabular-methods

def test_gridworld():
    # load the test data
    grid_world = loadmat('../data/test_data/gridworld.mat')['model']

    # specify world parameters
    num_cols = 12
    num_rows = 9
    obstructions = np.array([[8, 6], [7, 6], [6, 6], [5, 6], [4, 6], [3, 6],
                             [3, 7], [3, 8], [3, 9]])
    bad_states = np.array([[2, 1]])
    start_state = np.array([[0, 1]])
    goal_state = np.array([[7, 8]])

    # create model
    gw = GridWorld(num_rows=num_rows,
                   num_cols=num_cols,
                   start_state=start_state,
                   goal_states=goal_state)
    gw.add_obstructions(obstructed_states=obstructions, bad_states=bad_states)
    gw.add_rewards(step_reward=-1, goal_reward=10, bad_state_reward=-6)
    gw.add_transition_probability(p_good_transition=0.7, bias=0.5)
    gw.add_discount(discount=0.9)
    model = gw.create_gridworld()

    # run tests
    assert np.all(model.R == grid_world['R'][0][0][:, 0].reshape(-1, 1))
    assert np.all(model.P[:, :, 0] == grid_world['P'][0][0][:, :, 0])
    assert np.all(model.P[:, :, 1] == grid_world['P'][0][0][:, :, 1])
    assert np.all(model.P[:, :, 2] == grid_world['P'][0][0][:, :, 2])
    assert np.all(model.P[:, :, 3] == grid_world['P'][0][0][:, :, 3])

Пример #4

Показать файл

def test_qlearning():
    np.random.seed(1)
    # load the test data
    vp = np.load("../data/test_data/test_qlearning.npy")
    test_q = vp[:, 0].reshape(-1, 1)
    test_pi = vp[:, 1].reshape(-1, 1)

    # specify world parameters
    num_rows = 4
    num_cols = 4
    obstacles = np.array([[1, 1], [2, 1], [1, 2]])
    bad_states = np.array([[3, 0]])
    restart_state = np.array([[2, 2]])
    start_state = np.array([[0, 0]])
    goal_state = np.array([[3, 3]])

    # create world
    gw = GridWorld(num_rows=num_rows,
                   num_cols=num_cols,
                   start_state=start_state,
                   goal_states=goal_state)
    gw.add_obstructions(obstructed_states=obstacles,
                        bad_states=bad_states,
                        restart_states=restart_state)
    gw.add_rewards(step_reward=-1,
                   goal_reward=10,
                   bad_state_reward=-6,
                   restart_state_reward=-10)
    gw.add_transition_probability(p_good_transition=0.8, bias=0.5)
    gw.add_discount(discount=0.9)
    model = gw.create_gridworld()

    # solve with sarsa
    q, pi, _ = qlearning(model,
                         alpha=0.8,
                         epsilon=0.1,
                         maxiter=100,
                         maxeps=1000)

    # test value iteration outputs
    assert np.all(q == test_q)
    assert np.all(pi == test_pi)

Пример #5

Показать файл

def test_cliffworld():
    # load the test data
    grid_world = loadmat('../data/test_data/cliffworld.mat')['model']

    # specify world parameters
    num_rows = 5
    num_cols = 10
    restart_states = np.array([[4, 1], [4, 2], [4, 3], [4, 4], [4, 5], [4, 6],
                               [4, 7]])
    obstructed_states = np.array([[0, 9], [1, 9], [2, 9], [3, 9], [4, 9]])
    start_state = np.array([[4, 0]])
    goal_states = np.array([[4, 8]])

    # create model
    gw = GridWorld(num_rows=num_rows,
                   num_cols=num_cols,
                   start_state=start_state,
                   goal_states=goal_states)
    gw.add_obstructions(obstructed_states=obstructed_states,
                        restart_states=restart_states)
    gw.add_rewards(step_reward=-1, goal_reward=10, restart_state_reward=-100)
    gw.add_transition_probability(p_good_transition=1, bias=0)
    gw.add_discount(discount=0.9)
    model = gw.create_gridworld()

    # run tests
    assert np.all(model.R == grid_world['R'][0][0][:, 0].reshape(-1, 1))
    assert np.all(model.P[:, :, 0] == grid_world['P'][0][0][:, :, 0])
    assert np.all(model.P[:, :, 1] == grid_world['P'][0][0][:, :, 1])
    assert np.all(model.P[:, :, 2] == grid_world['P'][0][0][:, :, 2])
    assert np.all(model.P[:, :, 3] == grid_world['P'][0][0][:, :, 3])

Пример #6

Показать файл

def test_smallworld():
    small_world = loadmat('../data/test_data/smallworld.mat')['model']

    # specify world parameters
    num_cols = 4
    num_rows = 4
    obstacles = np.array([[1, 1], [2, 1], [1, 2]])
    start_state = np.array([[0, 0]])
    goal_state = np.array([[3, 3]])

    # create model
    gw = GridWorld(num_rows=num_rows,
                   num_cols=num_cols,
                   start_state=start_state,
                   goal_states=goal_state)
    gw.add_obstructions(obstructed_states=obstacles)
    gw.add_rewards(step_reward=-1, goal_reward=10)
    gw.add_transition_probability(p_good_transition=0.8, bias=0.5)
    gw.add_discount(discount=0.9)
    model = gw.create_gridworld()

    # run tests
    assert np.all(model.R == small_world['R'][0][0][:, 0].reshape(-1, 1))
    assert np.all(model.P[:, :, 0] == small_world['P'][0][0][:, :, 0])
    assert np.all(model.P[:, :, 1] == small_world['P'][0][0][:, :, 1])
    assert np.all(model.P[:, :, 2] == small_world['P'][0][0][:, :, 2])
    assert np.all(model.P[:, :, 3] == small_world['P'][0][0][:, :, 3])