Python DQNAgent.save примеры использования

Язык программирования: Python

Пространство имен/Пакет: dqn

Класс/Тип: DQNAgent

Метод/Функция: save

Примеров на hotexamples.com: 5

Python DQNAgent.save - 5 примеров найдено. Это лучшие примеры Python кода для dqn.DQNAgent.save, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

DQNAgent(30)

act(13)

load(11)

compile(8)

fit(5)

save(5)

train(5)

replay(5)

test(4)

save_weights(4)

remember(4)

get_action(4)

load_model(4)

actDeterministically(4)

epsilon(3)

save_model(3)

load_weights(3)

target_model(2)

observe(2)

start(2)

get_last_observations(2)

end(2)

train_one_episode(1)

train_model(1)

trainAgent(1)

train_only(1)

update_epoch(1)

update_replay_memory(1)

test_one_episode(1)

test_model(1)

update_target(1)

store_transition(1)

train_rnn(1)

testAgent(1)

update_target_model(1)

train_vae(1)

training(1)

restart_epoch(1)

store_experience(1)

load_state_dict(1)

__init__(1)

act_2(1)

append_sample(1)

backword(1)

fill_memory(1)

get_test_loss(1)

learn(1)

loss(1)

step(1)

parameters(1)

Пример #1

Показать файл

Файл: train_dqn.py Проект: wenwenla/DeepRL

def train_dqn():
    args = DQNArgs()
    env = gym.make(args.env_name)
    agent = DQNAgent(env, QNet, SimpleNormalizer, args)
    pre_best = -1e9
    for ep in range(args.max_ep):
        agent.train_one_episode()
        if ep % args.test_interval == 0:
            r = agent.test_model()
            if r > pre_best:
                pre_best = r
                agent.save(args.save_dir)

Пример #2

Показать файл

        if len(agent.memory) >= batch_size:
            loss_sim_list.append(agent.replay(batch_size))
            # For data visualisation
            i.append(mdp.s[0, -1])
            v.append(mdp.s[1, -1])
            r.append(mdp.reward)

    loss_over_simulation_time = np.sum(np.array([loss_sim_list])[0]) / len(
        np.array([loss_sim_list])[0])
    loss_of_episode.append(loss_over_simulation_time)
    print("Initial Heading : {}".format(hdg0_rand))
    print("----------------------------")
    print("episode: {}/{}, Mean Loss = {}".format(e, EPISODES,
                                                  loss_over_simulation_time))
    print("----------------------------")
agent.save("../Networks/dqn-test")

# plt.semilogy(np.linspace(1, EPISODES, EPISODES), np.array(loss_of_episode))
# plt.xlabel("Episodes")
# plt.ylabel("Cost")

f, axarr = plt.subplots(4, sharex=True)
axarr[0].plot(np.array(i[floor(len(i) / 2):len(i) - 1]) / TORAD)
axarr[1].plot(v[floor(len(i) / 2):len(i) - 1])
axarr[2].plot(r[floor(len(i) / 2):len(i) - 1])
axarr[3].semilogy(loss_sim_list[floor(len(i) / 2):len(i) - 1])
axarr[0].set_ylabel("angle of attack")
axarr[1].set_ylabel("v")
axarr[2].set_ylabel("r")
axarr[3].set_ylabel("cost")
plt.show()

Пример #3

Показать файл

state_size = 3
action_size = 9

actions = [ [[0,0],[-100,-100]],    [[0,0],[-100,0]],   [[0,0],[-100,100]],
            [[0,0],[0,-100]],       [[0,0],[0,0]],      [[0,0],[0,100]],
            [[0,0],[100,-100]],     [[0,0],[100,0]],    [[0,0],[100,100]]]

env = MyEnvironment()
agent = DQNAgent(state_size, action_size)
agent.load("./save/example_dqn.h5")#load
batch_size = 32
for e in range(3000):
    state = np.reshape(env.reset(), [1, state_size])
    last_reward = 0
    for time in range(1000):
        env.render() #render
        action = agent.act_2(state)
        commands = actions[action]
        next_state, reward, done, _ = env.step2(commands)
        next_state = np.reshape(next_state, [1, state_size])
        agent.remember(state, action, reward, next_state, done)
        state = next_state

        if len(agent.memory) > batch_size:
            agent.replay(batch_size)
    if e % 1 == 0:
        agent.save("./save/example_dqn.h5")

Пример #4

Показать файл

Файл: brokerage_trading.py Проект: Tuaman/CS229project

                action = agent.act(state)
                next_state, reward, done, _ = env.step(action)
                # reward = reward if not done else -10
                next_state = np.reshape(next_state, [1, state_size])
                agent.remember(state, action, reward, next_state, done)
                state = next_state
                # print(action, reward)
                if done:
                    print("episode: {}/{}, score: {}, e: {:.5}".format(
                        e, EPISODES, time, agent.epsilon))
                    break
                if len(agent.memory) > batch_size:
                    agent.replay(batch_size)
            if e % 10 == 0:
                save_string = './save/' + stock_name + '_weights_with_fees.h5'
                agent.save(save_string)

# # serialize model to JSON
# model_json = model.to_json()
# with open("model.json", "w") as json_file:
#     json_file.write(model_json)
# # serialize weights to HDF5
# model.save_weights("model.h5")
# print("Saved model to disk")

# # later...

# # load json and create model
# json_file = open('model.json', 'r')
# loaded_model_json = json_file.read()
# json_file.close()

Пример #5

Показать файл

        next_state = np.reshape(next_state, [1, state_size])
        state = next_state

        if (done):
            reward = 2000

        if (reward <= last_reward and done == False):
            last_reward = reward
            reward = -1000
        else:
            last_reward = reward

        if (TRAINING):
            agent.remember(state, action, reward, next_state, done)

        if done:
            print("1;{};{};{:.2f};{:.2}".format(e, EPISODES, reward,
                                                agent.epsilon))
            if (TRAINING):
                agent.replay(len(agent.memory))
            break
        if len(agent.memory) > batch_size and TRAINING:
            agent.replay(batch_size)
        if (time == MOVES - 1):
            print("0;{};{};{:.2f};{:.2}".format(e, EPISODES, last_reward,
                                                agent.epsilon))

    if (TRAINING):
        agent.save("./save/execution1.h5")