Python Environment._get_obs примеры использования

Язык программирования: Python

Пространство имен/Пакет: environment

Класс/Тип: Environment

Метод/Функция: _get_obs

Примеров на hotexamples.com: 2

Python Environment._get_obs - 2 примера найдено. Это лучшие примеры Python кода для environment.Environment._get_obs, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

_max_int(30)

set_value(30)

add_ids_to_frame(30)

Environment(30)

_min_int(30)

get_value(22)

act(13)

action_size(8)

__init__(7)

action_space(4)

add_agent(3)

add_object(3)

add_plane(3)

add(3)

Quit(3)

cross(3)

CodeRunner(3)

relations(2)

_step(2)

GetViruses(2)

setCurrentState(2)

action_outcome(2)

select(2)

add_obstacle(2)

MnistDataset(2)

hasher(2)

add_door(2)

immigrate(2)

AllTests(2)

nextStep(2)

mutate(2)

_reset(2)

is_development(2)

Test(2)

add_target(2)

State(2)

WorkingTests(2)

Shared(2)

START_NEW_GAME(2)

getP_grid(2)

get_lambda_type_by_node(2)

Reward(2)

_get_obs(2)

init_done(1)

getostype(1)

init_population(1)

getPath(1)

init_state_on_stack(1)

getTons(1)

getSolar(1)

Пример #1

Показать файл

def main():
    #    try:
    parse_cmd_args()

    sess = tf.Session()
    K.set_session(sess)
    db = Database()
    env = Environment(db, argus)

    actor_critic = ActorCritic(env, sess, learning_rate=argus['learning_rate'], train_min_size=argus['train_min_size'],
                               size_mem=argus['maxlen_mem'], size_predict_mem=argus['maxlen_predict_mem'])

    num_trials = argus['num_trial']  # ?
    # trial_len  = 500   # ?
    # ntp
    env.preheat()

    # First iteration
    cur_state = env._get_obs()  # np.array      (inner_metric + sql)
    cur_state = cur_state.reshape((1, env.state.shape[0]))
    # action = env.action_space.sample()
    action = env.fetch_action()  # np.array
    action_2 = action.reshape((1, env.action_space.shape[0]))  # for memory
    new_state, reward, done, _ = env.step(action, 0, 1)  # apply the action -> to steady state -> return the reward
    new_state = new_state.reshape((1, env.state.shape[0]))
    reward_np = np.array([reward])

    print("0-shape-")
    print(new_state.shape)
    actor_critic.remember(cur_state, action_2, reward_np, new_state, done)
    actor_critic.train()  # len<32, useless

    cur_state = new_state
    for i in range(num_trials):
        # env.render()
        cur_state = cur_state.reshape((1, env.state.shape[0]))
        action, isPredicted = actor_critic.act(cur_state)
        print(action)
        action_2 = action.reshape((1, env.action_space.shape[0]))  # for memory
        # action.tolist()                                          # to execute
        new_state, reward, done, _ = env.step(action, isPredicted, i + 1)
        new_state = new_state.reshape((1, env.state.shape[0]))

        reward_np = np.array([reward])
        print("%d-shape-" % i)
        print(new_state.shape)

        actor_critic.remember(cur_state, action_2, reward_np, new_state, done)
        actor_critic.train()

        cur_state = new_state
    '''

Пример #2

Показать файл

Файл: main.py Проект: thomount/qtune

    sess = tf.Session()
    K.set_session(sess)
    db = Database(argus) # connector knobs metric
    env = Environment(db, argus)

    actor_critic = ActorCritic(env, sess, learning_rate=argus['learning_rate'], train_min_size=argus['train_min_size'],
                               size_mem=argus['maxlen_mem'], size_predict_mem=argus['maxlen_predict_mem'])

    num_trials = argus['num_trial']  # ?
    # trial_len  = 500   # ?
    # ntp
    env.preheat()

    # First iteration
    cur_state = env._get_obs()  # np.array      (inner_metric + sql)
    cur_state = cur_state.reshape((1, env.state.shape[0]))
    # action = env.action_space.sample()
    action = env.fetch_action()  # np.array
    action_2 = action.reshape((1, env.action_space.shape[0]))  # for memory
    new_state, reward, done, socre,  _ = env.step(action, 0, 1)  # apply the action -> to steady state -> return the reward
    new_state = new_state.reshape((1, env.state.shape[0]))
    reward_np = np.array([reward])

    print("0-shape")
    print(new_state.shape)
    actor_critic.remember(cur_state, action_2, reward_np, new_state, done)
    actor_critic.train()  # len<32, useless

    cur_state = new_state
    predicted_rewardList = []