Exemplos de get_current_state em Python, exemplos de manager.get_current_state em Python

Exemplo n.º 1

0

Exibir arquivo

def get_max_neighbors_test():
    a = Agent(3, 2)
    w = World(5, 5, [(1, 1, 3), (2, 2, 4)], [(3, 3, 3), (4, 4, 4)], -1, 13, 13)
    q = QTable(w)

    q[get_current_state(w, a)]['south'] = 13

    assert (get_max_neighbors(w.get_neighbors(*a.get_position()),
                              get_current_state(w, a), q) == ['south'])

Exemplo n.º 2

0

Exibir arquivo

Arquivo: manager.py Projeto: xylafur/cosc4368-AI-GroupProject

def get_current_state_test():
    w = World(5, 5, [(1, 1, 3), (2, 2, 4)], [(3, 3, 3), (4, 4, 4)], -1, 13, 13)
    a = Agent(0, 0)

    assert(get_current_state(w, a) == (0, 0, False, True, True, True, True))

    a._set_position(3, 3)
    w.pick_up(1, 1)
    w.pick_up(1, 1)
    w.pick_up(1, 1)

    assert(get_current_state(w, a) == (3, 3, False, False, True, True, True))

Exemplo n.º 3

0

Exibir arquivo

def SARSA(world, agent, qtable, action, next_action, learning_rate,
          discount_rate, state_space='big'):
    # this is a pickup or dropoff.. just return because we don't update the q
    # value for it
    if isinstance(qtable[get_current_state(world, agent, state_space=state_space)], str):
        return

    else:
        assert action in ['north', 'south', 'east', 'west']
        assert next_action in ['north', 'south', 'east', 'west']
        state = get_current_state(world, agent, state_space=state_space)
        next_state = get_current_state(world, agent.pretend_move(action),
                                       state_space=state_space)
        r = world.get_reward(*agent.get_position(), agent.is_holding_block())
        qtable[state][action] = qtable[state][action] + learning_rate * ( r + qtable[next_state][next_action] - qtable[state][action])

Exemplo n.º 4

0

Exibir arquivo

def state_lookup():
    w = World(5, 5, [(1, 1, 3), (2, 2, 4)], [(3, 3, 3), (4, 4, 4)], -1, 13, 13)
    q = QTable(w)
    a = Agent(0, 0)

    state = get_current_state(w, a)

    assert (q[state] == {'north': 0, 'south': 0, 'east': 0, 'west': 0})

Exemplo n.º 5

0

Exibir arquivo

def q_learning(world, agent, qtable, action, next_action, learning_rate,
               discount_rate, state_space='big'):
    # this is a pickup or dropoff.. just return because we don't update the q
    # value for it
    if isinstance(qtable[get_current_state(world, agent,
                                           state_space=state_space)], str):
        return

    else:
        state = get_current_state(world, agent, state_space=state_space)
        assert action in ['north', 'south', 'east', 'west']
        next_state = get_current_state(world, agent.pretend_move(action),
                                       state_space=state_space)
        x, y = next_state[:2]
        neighbors = world.get_neighbors(x, y)
        dir = max(get_max_neighbors(neighbors, state, qtable))
        r = world.get_reward(*agent.get_position(), agent.is_holding_block())
        qtable[state][action] = (1 - learning_rate) * qtable[state][action] + (r + discount_rate * qtable[next_state][dir])

Exemplo n.º 6

0

Exibir arquivo

def get_adjacent_states_test():
    a = Agent(3, 3)
    w = World(5, 5, [(1, 1, 3), (2, 2, 4)], [(3, 3, 3), (4, 4, 4)], -1, 13, 13)
    adj = get_adjacent_states(w.get_neighbors(3, 3), get_current_state(w, a))

    assert ((3, 4, False, True, True, True, True) in adj)
    assert ((3, 4, False, True, True, True, True) in adj)
    assert ((3, 4, False, True, True, True, True) in adj)
    assert ((3, 4, False, True, True, True, True) in adj)

Exemplo n.º 7

0

Exibir arquivo

def p_greedy(agent, world, qtable, state_space='big'):
    neighbors = world.get_neighbors(*agent.get_position())

    for _dir, pos in neighbors.items():
        if world.is_pickup(*pos) and not agent.is_holding_block():
            return _dir
        elif world.is_dropoff(*pos) and agent.is_holding_block():
            return _dir

    return random.choice(get_max_neighbors(
        neighbors, get_current_state(world, agent, state_space=state_space),
        qtable))

Exemplo n.º 8

0

Exibir arquivo

def p_exploit(agent, world, qtable, state_space='big'):
    neighbors = world.get_neighbors(*agent.get_position())

    for _dir, pos in neighbors.items():
        if world.is_pickup(*pos) and not agent.is_holding_block():
            return _dir
        elif world.is_dropoff(*pos) and agent.is_holding_block():
            return _dir

    r = random.randint(1, 10)
    if r <= 8:
        return random.choice(get_max_neighbors(
            neighbors, get_current_state(world, agent, state_space=state_space),
            qtable))
    else:
        #because I'm lazy
        return p_random(agent, world, qtable)