Python PolicyGradientAgent示例

编程语言: Python

命名空间/包名称: PolicyGradientAgent

hotexamples.com的示例: 6

Python PolicyGradientAgent - 已找到6个示例。这些是从开源项目中提取的最受好评的PolicyGradientAgent.PolicyGradientAgent现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

PolicyGradientAgent(4)

act(1)

get_action(1)

learn(1)

load_model(1)

load_state(1)

persist_state(1)

remember(1)

save_model(1)

save_state(1)

update(1)

示例#1

显示文件

def run_experiment(Lambda,
                   alpha,
                   twe,
                   trunc_normal,
                   subspaces,
                   num_runs,
                   num_episodes=20000,
                   num_procs=None,
                   name=""):
    returns = np.empty((num_runs, num_episodes), dtype=np.float64)
    results.append(returns)
    for i in xrange(num_runs):
        print name
        agent = PolicyGradientAgent(simulator,
                                    Lambda=Lambda,
                                    alpha_u=alpha,
                                    alpha_v=alpha,
                                    tile_weight_exponent=twe,
                                    trunc_normal=trunc_normal,
                                    subspaces=subspaces)
        agent.persist_state()
        framework = Framework(simulator, agent, num_episodes=num_episodes)
        framework.train(num_procs=num_procs)
        returns[i] = framework.returns
    random = np.random.randint(sys.maxsize)

    directory = 'data/%s/' % (name)
    filename = directory + ('%d.npy' % (random))
    try:
        os.makedirs(directory)
    except OSError:
        pass
    np.save(filename, returns)
    return returns

示例#2

显示文件

文件： Experiment.py 项目： roshanshariff/lunarlander

def run_experiment(Lambda, alpha, twe, trunc_normal, subspaces, num_runs,num_episodes=20000, num_procs=None,name=""):
    returns = np.empty((num_runs, num_episodes), dtype=np.float64)
    results.append(returns)
    for i in xrange(num_runs):
        print(name)
        agent = PolicyGradientAgent (simulator, 
                                     Lambda=Lambda, alpha_u=alpha, alpha_v=alpha,
                                     tile_weight_exponent=twe,
                                     trunc_normal=trunc_normal,
                                     subspaces=subspaces)
        agent.persist_state()
        framework = Framework(simulator, agent, num_episodes=num_episodes)
        framework.train(num_procs=num_procs)
        returns[i] = framework.returns
    random = np.random.randint(sys.maxsize)

    directory = 'data/%s/' % (name)
    filename = directory + ('%d.npy' % (random))
    try:
        os.makedirs(directory)
    except OSError:
        pass
    np.save (filename, returns)
    return returns

示例#3

显示文件

import os

import gym
import numpy as np

from PolicyGradientAgent import PolicyGradientAgent

os.environ["CUDA_VISIBLE_DEVICES"] = "-1"


env = gym.make('LunarLander-v2')
action_size = env.action_space.n
state_size = 8

agent = PolicyGradientAgent(state_size, action_size)

print("Training...")
train_episodes = 5000
avg_score = 0
loss = 0
for episode in range(train_episodes):

    state = env.reset()
    state = np.reshape(state, [1, state_size])
    cum_reward = 0
    for i in range(1000):

        action = agent.act(state, is_training=True)

        next_state, reward, done, _ = env.step(action)
        next_state = np.reshape(next_state, [1, state_size])

示例#4

显示文件

文件： lunar_lander.py 项目： jeffery1236/policy-gradient

    print(env.observation_space.shape, env.action_space.n)
    env.reset()

    test_mode = False
    num_games = 2500
    best_score = -np.inf
    scores = []
    eps_history = []

    state_dims = env.observation_space.shape[0]
    num_actions = env.action_space.n
    lr = 0.001
    gamma = 0.99
    agent = PolicyGradientAgent(lr=lr,
                                gamma=gamma,
                                state_dims=state_dims,
                                num_actions=num_actions,
                                env_name='lunar_lander',
                                checkpoint_dir='temp/')
    if test_mode:
        agent.load_model()

    # env = gym.wrappers.Monitor(env, 'temp/lunar_lander',
    #                             video_callable=lambda episode_id: True, force=True)

    for count in range(num_games):
        state = env.reset()
        done = False
        score = 0

        while not done:
            env.render()

示例#5

显示文件

文件： LambdaStudy.py 项目： roshanshariff/lunarlander

def make_framework(Lambda):
    agent = PolicyGradientAgent(simulator, Lambda=Lambda)
    filename = "data/saved_state_lambda" + str(Lambda) + ".npy"
    agent.save_state(filename)
    agent.load_state(filename, mmap_mode="r+")
    return Framework(simulator, agent)

示例#6

显示文件

文件： LambdaStudy.py 项目： keithmgould/lunarlander

def make_framework(Lambda):
    agent = PolicyGradientAgent(simulator, Lambda=Lambda)
    filename = 'data/saved_state_lambda' + str(Lambda) + '.npy'
    agent.save_state(filename)
    agent.load_state(filename, mmap_mode='r+')
    return Framework(simulator, agent)