Python DeepQAgent示例

编程语言: Python

命名空间/包名称: agent

类/类型: DeepQAgent

hotexamples.com的示例: 6

Python DeepQAgent - 已找到6个示例。这些是从开源项目中提取的最受好评的agent.DeepQAgent现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

DeepQAgent(6)

learn(2)

train(2)

act(1)

get_action(1)

load_models(1)

memorize(1)

observe(1)

pick_action(1)

play(1)

save(1)

save_models(1)

store_transition(1)

swap_board(1)

示例#1

显示文件

def main():
    env = gym.make("LunarLander-v2")

    timestamp = '{:%Y-%m-%d-%H:%M}'.format(datetime.datetime.now())
    o_dir = "LunarLander-v2/{}/models".format(timestamp)
    if not os.path.exists(o_dir):
        os.makedirs(o_dir)

    nof_episodes = 500
    # 8 values in [0, 1]
    state_size = env.observation_space.shape[0]
    # 0, 1, 2, 3
    action_size = env.action_space.n
    agent = DeepQAgent(state_size, action_size, model=2)
    batch_size = 32

    for episode in range(nof_episodes):
        state = env.reset()
        state = np.reshape(state, [1, state_size])

        done = False
        t = 0
        episode_reward = 0
        # Iterate over the timesteps
        while not done:
            env.render()

            # Instruct the agent to choose an action based on the current state of the environment
            # This may be a random action depending on the value of the exploration_rate(epsilon)
            action = agent.act(state)
            # Execute said action
            next_state, reward, done, _ = env.step(action)
            episode_reward += reward
            next_state = np.reshape(next_state, [1, state_size])

            agent.memorize(state, action, reward, next_state, done)
            state = next_state
            if done:
                print("episode: {}/{}, time: {}, total_reward: {}".format(
                    episode, nof_episodes - 1, t, episode_reward))
            t += 1
        if len(agent.memory) / batch_size > 1:
            agent.train(batch_size)
        # Save model after training
        if episode % batch_size == 1:
            agent.save(o_dir + "/model_" + str(episode) + ".hdf5")

示例#2

显示文件

def main(_):
    if not tf.test.is_gpu_available() and FLAGS.use_gpu:
        raise Exception("use_gpu flag is true when no GPUs are available")

    assert FLAGS.checkpoint_dir != '', 'Checkpoint directory must be specified'

    if not FLAGS.to_train and not os.path.isfile(
            os.path.join(FLAGS.checkpoint_dir, 'ckpt.index')):
        raise Exception(
            "Checkpoint directory must contain a trained model to do testing")

    gpu_options = tf.GPUOptions(
        per_process_gpu_memory_fraction=calc_gpu_fraction(FLAGS.gpu_fraction),
        allow_growth=True)

    sess_config = tf.ConfigProto(
        log_device_placement=False,
        allow_soft_placement=FLAGS.allow_soft_placement,
        gpu_options=gpu_options)

    with tf.Session(config=sess_config) as sess:
        config = get_config(FLAGS)

        env = AtariEnvironment(config)

        agent = DeepQAgent(env, sess, config)

        if config.to_train:
            agent.train()
        else:
            agent.play()

示例#3

显示文件

import numpy as np
import time
import actor
from env.puzzle import PAD
from agent import DeepQAgent

shape = [5,6]
moves = 100
board = PAD(shape=shape, max_moves=moves, show=False)
print('board set up')

print('setting up agent')
agent = DeepQAgent(board,
    n_moves=moves,
    batch_size=64,
    memory=128,
    sample_mode='e_greedy',
    reward_type='combo')
print('agent set up')

print('Max moves: ', agent.n_moves)
agent.observe()
actor.train_loop(agent)

## Replace board so we can watch some play
board = PAD(shape=shape,
    max_moves=moves,
    show=True,
    sleep_time=0.05)
agent.swap_board(board)
actor.run_loop(agent)

示例#4

显示文件

from agent import TabularQAgent, DeepQAgent
import numpy as np
import gym
import matplotlib.pyplot as plt
from utils import plot_learning_curve

env = gym.make('CartPole-v1')
n_actions = env.action_space.n
n_states = env.observation_space.shape

A = DeepQAgent(lr=0.001,
               gamma=0.9,
               eps_max=1.0,
               eps_min=0.01,
               eps_dec=0.9999995,
               n_actions=n_actions,
               n_states=n_states,
               input_dims=n_states)

n_episodes = 10000
win_pct_list = []
scores = []
eps_history = []

for i in range(n_episodes):
    done = False
    score = 0
    s = env.reset()
    done = False

    while not done:

示例#5

显示文件

def main():
    print("Start Atari games")
    environment_name = "PongNoFrameskip-v4"
    env = make_env(environment_name)
    best_score = -np.inf
    load_checkpoint = False
    n_games = 500
    lr = 0.0001
    epsilon = 1
    gamma = 0.99
    input_dims = env.observation_space.shape
    n_actions = env.action_space.n
    eps_min = 0.01
    eps_dec = 5e-7
    replace = 1000
    algo = None
    mem_size = 50000
    batch_size = 32
    chkpt_dir = "models/"
    algo = "DeepQAgent"
    agent = DeepQAgent(lr, n_actions, input_dims, chkpt_dir, epsilon, gamma,
                       mem_size, batch_size, eps_min, eps_dec, replace, algo,
                       environment_name)
    if load_checkpoint:
        agent.load_models()
    fname = agent.algo + "_" + agent.env_name + '_lr' + str(
        agent.lr) + "_" + str(n_games) + "_games"
    figure_file = "plots/" + fname + ".png"
    n_steps = 0
    scores, eps_history, steps_array = [], [], []
    for i in range(n_games):
        done = False
        score = 0
        observation = env.reset()
        while not done:
            action = agent.get_action(observation)
            new_observation, reward, done, info = env.step(action)
            score += reward
            if not load_checkpoint:
                agent.store_transition(observation, action, reward,
                                       new_observation, int(done))
                agent.learn()
            observation = new_observation
            n_steps += 1
        scores.append(score)
        steps_array.append(n_steps)
        avg_score = np.mean(scores[-100:])
        print(
            "episode ", i + 1, "score: ", score,
            "average score %.1f best score %.1f epsilon %.2f" %
            (avg_score, best_score, agent.epsilon), " steps ", n_steps)
        if avg_score > best_score:
            if not load_checkpoint:
                agent.save_models()
            best_score = avg_score
        eps_history.append(agent.epsilon)
    plot_learning_curve(steps_array, scores, eps_history, figure_file)
    print("End Atari games")

示例#6

显示文件

文件： train.py 项目： roj4s/balloma_reinforcement_learning

    from agent import DDPG, DeepQAgent
    from environment import Environment

    done_comparison_data = {
        'coords_done_fail': [45, 60, 118, 180],
        'coords_done_success': [5, 16, 122, 174],
        'img_done_fail': 'data/s8_cut_try_again.png',
        'img_done_success': 'data/game_score_s8.png',
        'restart_btn_coords': [640, 1110],
        'restart_ongame': [(2764, 93), (2624, 552)],
    }

    scores = {
        'coords_diamonds_gathered': [11, 27, 25, 35],
        'digits_mask_addr': 'data/digits',
        'match_threshold': 10,
        'state_area': [28, 112, 0, 296],
        'time_importance': 0.7,
        'diamonds_importance': 0.3,
        'episode_time_limit': 60,
        'diamonds_total': 7
    }

    env = Environment(device_ref_elements_data={
        'done_comparison_data': done_comparison_data,
        'scores': scores
    })
    #agent = DDPG(env)
    agent = DeepQAgent(env)
    train(agent, env, episode_seconds_constrain=45)