Python DQN.train примеры использования

Язык программирования: Python

Пространство имен/Пакет: agent

Класс/Тип: DQN

Метод/Функция: train

Примеров на hotexamples.com: 4

Python DQN.train - 4 примера найдено. Это лучшие примеры Python кода для agent.DQN.train, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

DQN(30)

sample_action(4)

train(4)

remember(4)

act(4)

load_model(3)

choose_action(3)

construct_model(3)

save_model(3)

learn(3)

replay(2)

save(2)

get_action(2)

create_model(2)

set_model(1)

state_dict(1)

train_model(1)

show_cost(1)

load_state_dict(1)

parameters(1)

init_var(1)

evaluate(1)

eval(1)

copy_weights(1)

train_with_experience_replay(1)

Пример #1

Показать файл

def train(game):
    agent = DQN(game)

    for i in tqdm(range(TRAIN_GAMES)):
        game.new_episode()
        previous_variables = None
        previous_img = None
        done = False
        local_history = []
        total_reward = 0
        while not done:
            state = game.get_state()

            img = state.screen_buffer
            variables = state.game_variables
            if previous_variables is None:
                previous_variables = variables
            if previous_img is None:
                previous_img = img

            action = agent.act(img)
            reward = game.make_action(action)
            done = game.is_episode_finished()
            reward = (reward + calculate_additional_reward(previous_variables, variables)) / 100
            total_reward += reward
            local_history.append([previous_img, img, reward, action, done])
            previous_variables = variables
            previous_img = img

        if total_reward >= 0:
            for previous_state, state, reward, action, done in local_history:
                agent.remember(previous_state, state, reward, action, done)
            agent.train()

Пример #2

Показать файл

def main(args):
    env = gym.make("CartPole-v0")

    if args.seed >= 0:
        random_seed(args.seed)
        env.seed(args.seed)

    agent = DQN(env, args)
    model = get_model(out_dim=env.action_space.n, lr=args.lr)
    agent.set_model(model)

    rewards_history, steps_history = [], []
    train_steps = 0
    # Training
    for ep in range(args.max_ep):
        state = env.reset()
        ep_rewards = 0
        for step in range(env.spec.timestep_limit):
            # sample action
            action = agent.sample_action(state, policy="egreedy")
            # apply action
            next_state, reward, done, debug = env.step(action)
            train_steps += 1
            ep_rewards += reward
            # modified reward to speed up learning
            reward = 0.1 if not done else -1
            # train
            agent.train(state, action, reward, next_state, done)

            state = next_state
            if done:
                break

        steps_history.append(train_steps)
        if not rewards_history:
            rewards_history.append(ep_rewards)
        else:
            rewards_history.append(rewards_history[-1] * 0.9 +
                                   ep_rewards * 0.1)

        # Decay epsilon
        if agent.epsilon > args.final_epsilon:
            decay = (args.init_epsilon - args.final_epsilon) / args.max_ep
            agent.epsilon -= decay

        # Evaluate during training
        if ep % args.log_every == args.log_every - 1:
            total_reward = 0
            for i in range(args.test_ep):
                state = env.reset()
                for j in range(env.spec.timestep_limit):
                    if args.render:
                        env.render()
                    action = agent.sample_action(state, policy="greedy")
                    state, reward, done, _ = env.step(action)
                    total_reward += reward
                    if done:
                        break
            current_mean_rewards = total_reward / args.test_ep
            print("Episode: %d Average Reward: %.2f" %
                  (ep + 1, current_mean_rewards))

    # plot training rewards
    plt.plot(steps_history, rewards_history)
    plt.xlabel("steps")
    plt.ylabel("running avg rewards")
    plt.show()

Пример #3

Показать файл

    x = input('''To train model: train,
        To test a trained model: test,
        To train on different dataset: d: ''')
    if x == 'd':
        dataset = input('Enter name of dataset as "example_dataset.csv": ')
        try:
            raw = preprocess(dataset)
        except:
            print('Invalid dataset')
        raw = preprocess(dataset)
        actions = 2
        states = 7
        env = MyStocksEnv(raw, window_size=1, frame_bound=(1, 300))
        agent = DQN(actions, states, 100)
        all_rewards = agent.train(env, 1000)
    elif x == 'test':
        raw = preprocess()
        env = MyStocksEnv(raw, window_size=1, frame_bound=(1, 300))
        all_rewards = trained_test('dqn_model.h5', env)
    else:
        raw = preprocess()
        actions = 2
        states = 7
        env = MyStocksEnv(raw, window_size=1, frame_bound=(1, 300))
        agent = DQN(actions, states, 100)
        all_rewards = agent.train(env, 1000)

    if all_rewards != 0:
        print(all_rewards)
        plot(all_rewards)

Пример #4

Показать файл


# Check whether cuda is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Initialise the game
env = gym.make('ChromeDino-v0')
# env = gym.make('ChromeDinoNoBrowser-v0')
env = make_dino(env, timer=True, frame_stack=True)

# Get the number of actions and the dimension of input
n_actions = env.action_space.n

# ----------- Nature DQN ---------------
dqn = DQN(n_actions, device)
dqn.train(env, logger)
# dqn.load("./trained/dqn.pkl")
# dqn.test(env)

# ----------- Prioritized DQN ---------------
# dqn_p = DQNPrioritized(n_actions, device)
# dqn_p.train(env, logger)
# dqn_p.load("./trained/dqn_p.pkl")
# dqn_p.test(env)


# ----------- Double DQN ----------------
# double_dqn = DoubleDQN(n_actions, device)
# double_dqn.train(env, logger)
# double_dqn.load("./trained/double-dqn.pkl")
# double_dqn.test(env)