Exemplo n.º 1
0
def get_max_neighbors_test():
    a = Agent(3, 2)
    w = World(5, 5, [(1, 1, 3), (2, 2, 4)], [(3, 3, 3), (4, 4, 4)], -1, 13, 13)
    q = QTable(w)

    q[get_current_state(w, a)]['south'] = 13

    assert (get_max_neighbors(w.get_neighbors(*a.get_position()),
                              get_current_state(w, a), q) == ['south'])
Exemplo n.º 2
0
def get_current_state_test():
    w = World(5, 5, [(1, 1, 3), (2, 2, 4)], [(3, 3, 3), (4, 4, 4)], -1, 13, 13)
    a = Agent(0, 0)

    assert(get_current_state(w, a) == (0, 0, False, True, True, True, True))

    a._set_position(3, 3)
    w.pick_up(1, 1)
    w.pick_up(1, 1)
    w.pick_up(1, 1)

    assert(get_current_state(w, a) == (3, 3, False, False, True, True, True))
Exemplo n.º 3
0
def SARSA(world, agent, qtable, action, next_action, learning_rate,
          discount_rate, state_space='big'):
    # this is a pickup or dropoff.. just return because we don't update the q
    # value for it
    if isinstance(qtable[get_current_state(world, agent, state_space=state_space)], str):
        return

    else:
        assert action in ['north', 'south', 'east', 'west']
        assert next_action in ['north', 'south', 'east', 'west']
        state = get_current_state(world, agent, state_space=state_space)
        next_state = get_current_state(world, agent.pretend_move(action),
                                       state_space=state_space)
        r = world.get_reward(*agent.get_position(), agent.is_holding_block())
        qtable[state][action] = qtable[state][action] + learning_rate * ( r + qtable[next_state][next_action] - qtable[state][action])
Exemplo n.º 4
0
def state_lookup():
    w = World(5, 5, [(1, 1, 3), (2, 2, 4)], [(3, 3, 3), (4, 4, 4)], -1, 13, 13)
    q = QTable(w)
    a = Agent(0, 0)

    state = get_current_state(w, a)

    assert (q[state] == {'north': 0, 'south': 0, 'east': 0, 'west': 0})
Exemplo n.º 5
0
def q_learning(world, agent, qtable, action, next_action, learning_rate,
               discount_rate, state_space='big'):
    # this is a pickup or dropoff.. just return because we don't update the q
    # value for it
    if isinstance(qtable[get_current_state(world, agent,
                                           state_space=state_space)], str):
        return

    else:
        state = get_current_state(world, agent, state_space=state_space)
        assert action in ['north', 'south', 'east', 'west']
        next_state = get_current_state(world, agent.pretend_move(action),
                                       state_space=state_space)
        x, y = next_state[:2]
        neighbors = world.get_neighbors(x, y)
        dir = max(get_max_neighbors(neighbors, state, qtable))
        r = world.get_reward(*agent.get_position(), agent.is_holding_block())
        qtable[state][action] = (1 - learning_rate) * qtable[state][action] + (r + discount_rate * qtable[next_state][dir])
Exemplo n.º 6
0
def get_adjacent_states_test():
    a = Agent(3, 3)
    w = World(5, 5, [(1, 1, 3), (2, 2, 4)], [(3, 3, 3), (4, 4, 4)], -1, 13, 13)
    adj = get_adjacent_states(w.get_neighbors(3, 3), get_current_state(w, a))

    assert ((3, 4, False, True, True, True, True) in adj)
    assert ((3, 4, False, True, True, True, True) in adj)
    assert ((3, 4, False, True, True, True, True) in adj)
    assert ((3, 4, False, True, True, True, True) in adj)
Exemplo n.º 7
0
def p_greedy(agent, world, qtable, state_space='big'):
    neighbors = world.get_neighbors(*agent.get_position())

    for _dir, pos in neighbors.items():
        if world.is_pickup(*pos) and not agent.is_holding_block():
            return _dir
        elif world.is_dropoff(*pos) and agent.is_holding_block():
            return _dir

    return random.choice(get_max_neighbors(
        neighbors, get_current_state(world, agent, state_space=state_space),
        qtable))
Exemplo n.º 8
0
def p_exploit(agent, world, qtable, state_space='big'):
    neighbors = world.get_neighbors(*agent.get_position())

    for _dir, pos in neighbors.items():
        if world.is_pickup(*pos) and not agent.is_holding_block():
            return _dir
        elif world.is_dropoff(*pos) and agent.is_holding_block():
            return _dir

    r = random.randint(1, 10)
    if r <= 8:
        return random.choice(get_max_neighbors(
            neighbors, get_current_state(world, agent, state_space=state_space),
            qtable))
    else:
        #because I'm lazy
        return p_random(agent, world, qtable)