Python Env.actionTransform примеры использования

Язык программирования: Python

Пространство имен/Пакет: env

Класс/Тип: Env

Метод/Функция: actionTransform

Примеров на hotexamples.com: 1

Python Env.actionTransform - 1 пример найден. Это лучшие примеры Python кода для env.Env.actionTransform, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Env(30)

action_space(6)

actions(4)

declare(3)

GetInitState(2)

Step(2)

getColor(2)

addParser(2)

getRectIMG(2)

BlackWins(1)

check_env_wrt_manifest(1)

directly_defined(1)

draw(1)

draw_field(1)

getRectPos(1)

bound(1)

getRectSize(1)

getResolution(1)

get_base_url(1)

get_rnn_url(1)

log(1)

provision_resources(1)

provision_zone(1)

set_robots(1)

build_map(1)

batch_size(1)

blob_name_run_info(1)

action_bound(1)

IsTerminal(1)

_graceexit(1)

_set(1)

_update(1)

act(1)

action(1)

actionTransform(1)

action_dim(1)

blob_name(1)

actions_dim(1)

add_item(1)

add_loaded_module(1)

advance(1)

appendSeq(1)

apply_action(1)

assign_cidr(1)

black(1)

top_module(1)

Пример #1

Показать файл

                  memory_size=5000,
                  batch_size=320,
                  # output_graph=True
                  )
    step = 0
    # total train episode
    for episode in range(4):
        # for each episode train all the user onece
        episode_start_time = time.time()
        for currentUserId in trainUserIdRange + 1:    #current_Env.numUser
            currentSeqIndex = 0
            observation = current_Env.generateInputVector( currentUserId, currentSeqIndex )
            while True:
                # RL choose action based on observation which its a index number
                flag, actionIndex = RL.choose_action(observation)
                action = current_Env.actionTransform(actionIndex)
                # RL take action and get next observation and reward
                ifTerminal, currentSeqIndex, observation_, reward = current_Env.update(currentUserId, currentSeqIndex, action)
                # Experience replay
                RL.store_transition(observation, actionIndex, reward, observation_)

                if (step > 200) and (step % 5 == 0):
                    RL.learn()

                # swap observation
                observation = observation_
                step += 1
                # break while loop when end of this episode
                if ifTerminal:
                    print("User: "******" Done")
                    break