Пример #1
0
def arg_max(q_list):
    max_idx_list = np.argwhere(q_list == np.amax(q_list))
    max_idx_list = max_idx_list.flatten().tolist()
    return random.choice(max_idx_list)


if __name__ == "__main__":
    env = Env()
    agent = QLearningAgent(actions=list(range(env.n_actions)))

    for episode in range(1000):
        state = env.reset()

        while True:
            # 게임 환경과 상태를 초기화
            env.render()
            # 현재 상태에 대한 행동 선택
            action = agent.get_action(state)
            # 행동을 취한 후 다음 상태, 보상 에피소드의 종료여부를 받아옴
            next_state, reward, done = env.step(action)
            # <s,a,r,s'>로 큐함수를 업데이트
            agent.learn(state, action, reward, next_state)

            state = next_state

            # 모든 큐함수를 화면에 표시
            env.print_value_all(agent.q_table)

            if done:
                break
Пример #2
0
    env = Env()
    agent = SARSAgent(actions=list(range(env.n_actions)))

    for episode in range(1000):
        # reset environment and initialize state

        state = env.reset()
        # get action of state from agent
        action = agent.get_action(str(state))

        while True:
            env.render()

            # take action and proceed one step in the environment
            next_state, reward, done = env.step(action)
            next_action = agent.get_action(str(next_state))

            # with sample <s,a,r,s',a'>, agent learns new q function
            agent.learn(str(state), action, reward, str(next_state), next_action)

            state = next_state
            action = next_action

            # print q function of all states at screen
            env.print_value_all(agent.q_table)

            # if episode ends, then break
            if done:
                break

Пример #3
0
from environment import Env
from QLearning import QLearning

if __name__ == "__main__":
    env = Env()
    QL = QLearning(list(range(env.n_actions)))

    for episode in range(1000):
        state = env.reset()
        while True:
            env.render()

            # take action and proceed one step in the environment
            action = QL.get_action(str(state))
            next_state, reward, done = env.step(action)

            # with sample <s,a,r,s'>, agent learns new q function
            QL.learn(str(state), action, reward, str(next_state))

            state = next_state
            env.print_value_all(QL.q_table)

            # if episode ends, then break
            if done:
                break
Пример #4
0
                max_index_list.append(index)
        return random.choice(max_index_list)


if __name__ == "__main__":
    env = Env()
    agent = QLearningAgent(actions=list(range(env.n_actions)))

    for episode in range(1000):
        state = env.reset()

        while True:
            env.render()

            # take action and proceed one step in the environment
            action = agent.get_action(str(state))
            agent.save_samples_for_print(state, action)

            next_state, reward, done = env.step(action)

            # with sample <s,a,r,s'>, agent learns new q function
            agent.learn(str(state), action, reward, str(next_state))

            state = next_state
            env.print_value_all(agent.q_table, agent.samples_for_print)

            # if episode ends, then break
            if done:
                agent.samples_for_print.clear()
                break
Пример #5
0
env = Env()
env.reset()

# action space 출력
print(env.n_actions)

# 에이전트의 행동을 환경에 전달하고 정보 받아오기
next_state, reward, done = env.step(1)
print(next_state, reward, done)

# q 함수를 담을 default dict
q_table = defaultdict(lambda: [0.0, 0.0, 0.0, 0.0])
q_table[str([1, 0])] = [100., 100., 100., 100.]
print(q_table['blah blah'])
print(q_table)

# 환경에 q함수를 출력
env.print_value_all(q_table)
# 환경 업데이트
env.render()
time.sleep(3)

# 에이전트 연속 행동
for i in range(100):
    actions = np.random.choice(4)
    env.step(actions)
    env.render()
env.reset()

env.destroy()