Exemplos de Agent em Python, exemplos de ddpg_torch.Agent em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: main.py Projeto: kentwhf/capstone

def main():

    actor_learning_rate = [
        1 * 10**-4, 3 * 10**-4, 6 * 10**-4, 10**-3, 3 * 10**-3, 6 * 10**-3,
        10**-2
    ]
    critic_learning_rate = [
        1 * 10**-4, 3 * 10**-4, 6 * 10**-4, 10**-3, 3 * 10**-3, 6 * 10**-3,
        10**-2
    ]
    tau = [.9, .93, .95, .97, .99]
    batch_size = [32, 64, 128, 256]
    p_rand = [0, .1, .2, .3, .4]
    sigma = [0, .1, .2, .3, .4]
    L2_norm_coeff = [0, .01, .03, .1, .3, .6, 1]

    load_checkpoint = False
    epochs = 40
    env = gym.make("FetchReach-v1")
    agent = Agent(n_actions=env.action_space.shape[0],
                  load_checkpoint=load_checkpoint,
                  env=env,
                  epochs=epochs)
    if load_checkpoint is False:
        score_history = agent.train()
    else:
        agent.load_models()
        agent.env.render(mode='human')
        agent.episodes = 40
        score_history = agent.eval_agent()

    if not load_checkpoint:
        x = [i + 1 for i in range(epochs)]
        plot_error_drop(score_history)

Exemplo n.º 2

0

Exibir arquivo

from ddpg_torch import Agent
import gym
import numpy as np
#from utils import PlotLearning
env = gym.make('LunarLanderContinuous-v2')
agent = Agent(alpha=0.000025,beta=0.00025, input_dims=[8],tau=0.001,env=env)
np.random.seed(0)
agent.load_models()
score_history = []
ep = 0
while True:
    ep += 1
    done = False
    score = 0
    obs = env.reset()
    while not done:
        env.render()
        act = agent.choose_action(obs)
        new_state,reward,done,info = env.step(act)
        agent.remember(obs,act,reward,new_state,int(done))
        #agent.learn()
        score += reward
        obs = new_state
    score_history.append(score)
    print('episode',ep,'score%.2f' % score, '100 game average %.2f' % np.mean(score_history[-100:]))
    if ep % 25 == 0:
        agent.save_models()    
    if len(score_history) >= 25 and np.mean(score_history[-25:]) > 200:
        break
filename = 'pendulum.png'
#plotLearning(score_history, filename, window=100)

Exemplo n.º 3

0

Exibir arquivo

        self.state = [0, 0, 0]
        self.steps = 0
        self.count = 0
        return np.array(self.state)


if __name__ == '__main__':
    setpoint = [18] * 300
    env = Process()
    n_games = 200
    fc1_dims = 400
    fc2_dims = 300
    agent = Agent(alpha=0.001,
                  beta=0.001,
                  input_dims=env.observation_space.shape,
                  tau=0.01,
                  batch_size=128,
                  n_actions=env.action_space.shape[0],
                  fc1_dims=400,
                  fc2_dims=300)
    f_name = 'Process' + str(agent.alpha) + 'beta_' + str(
        agent.beta) + '_' + str(n_games) + '_' + 'games' + 'fc1_dims_' + str(
            fc1_dims) + '_' + 'fc2_dims_' + str(fc2_dims)
    figure_file = 'plots/' + f_name + '.png'

    best_score = env.reward_range[0]
    scores = []
    Actions = []
    Observations = []

    for i in tqdm(range(n_games)):
        obs = env.reset()

Exemplo n.º 4

0

Exibir arquivo

Arquivo: main.py Projeto: PeterZaidel/lifting-simulation-rl

    "TIME_STEP":
    0.01,
    "VIDEO_FPS":
    30,
}

# env = LinkageEnv(angles_file, params, verbose=0)

env = gym.make("LunarLanderContinuous-v2")

agent = Agent(
    lr_actor=0.000025,
    lr_critic=0.00025,
    input_dims=[8],
    tau=0.001,
    env=env,
    batch_size=64,
    layer1_size=400,
    layer2_size=300,
    n_actions=2,
)

np.random.seed(0)

score_history = []

liveloss = PlotLosses()

for i in range(100000):
    done = False
    score = 0

Exemplo n.º 5

0

Exibir arquivo

        },
        'duration': 3000,
        'offroad_terminal':True,
        'policy_frequency':10,
        'simulation_frequency':10,
        'vehicles_count':20,
    }
    env = gym.make('highway-v0')
    env.configure(config)
    observation = env.reset()
    observation=observation.reshape(observation.shape[0]*observation.shape[1],)
    print(observation.shape)
    print(env.action_space.sample())
    print(env.config)
    agent = Agent(alpha=0.0001, beta=0.001, 
                    input_dims=observation.shape, tau=0.001,
                    batch_size=64, fc1_dims=400, fc2_dims=300, 
                    n_actions=env.action_space.shape[0])
    n_games = 1000
    test_agent = True
    load_checkpoint=False
    
    if test_agent:
        n_games=100
        load_checkpoint=True
    filename = 'Highway_alpha_' + str(agent.alpha) + '_beta_' + \
                str(agent.beta) + '_' + str(n_games) + '_games'
    figure_file = 'plots/' + filename + '.png'

    if load_checkpoint:
            agent.load_models()

Exemplo n.º 6

0

Exibir arquivo

Arquivo: main_torch.py Projeto: M46N3/learn2drive

from ddpg_torch import Agent
import gym
import numpy as np
from utils import plotLearning

env = gym.make('LunarLanderContinuous-v2')
agent = Agent(alpha=0.000025,
              beta=0.00025,
              input_dims=[8],
              tau=0.001,
              env=env,
              batch_size=64,
              layer1_size=400,
              layer2_size=300,
              n_actions=2)

#agent.load_models()
np.random.seed(0)

score_history = []
for i in range(5000):
    obs = env.reset()
    done = False
    score = 0
    while not done:
        act = agent.choose_action(obs)
        new_state, reward, done, info = env.step(act)
        agent.remember(obs, act, reward, new_state, int(done))
        agent.learn()
        score += reward
        obs = new_state

Exemplo n.º 7

0

Exibir arquivo

def plotLearning(scores, filename, x=None, window=5):   
    N = len(scores)
    running_avg = np.empty(N)
    for t in range(N):
	    running_avg[t] = np.mean(scores[max(0, t-window):(t+1)])
    if x is None:
        x = [i for i in range(N)]
    plt.ylabel('Score')       
    plt.xlabel('Game')                     
    plt.plot(x, running_avg)
    plt.savefig(filename)
    plt.close() 


env = gym.make('MountainCarContinuous-v0')
agent = Agent(alpha=0.033, beta=0.33, input_dims=[2], tau=0.001, env=env,
              batch_size=64,  layer1_size=75, layer2_size=50, n_actions=1)

#agent.load_models()
np.random.seed(3)

score_history = []
for i in range(150):
    obs = env.reset()
    done = False
    score = 0
    while not done:
        act = agent.choose_action(obs)
        new_state, reward, done, info = env.step(act)
        agent.remember(obs, act, reward, new_state, int(done))
        agent.learn()
        score += reward

Exemplo n.º 8

0

Exibir arquivo

Arquivo: main.py Projeto: kentwhf/capstone

def main():

    actor_learning_rate = [
        1 * 10**-4, 3 * 10**-4, 6 * 10**-4, 10**-3, 3 * 10**-3, 6 * 10**-3,
        10**-2
    ]
    critic_learning_rate = [
        1 * 10**-4, 3 * 10**-4, 6 * 10**-4, 10**-3, 3 * 10**-3, 6 * 10**-3,
        10**-2
    ]
    tau = [.9, .93, .95, .97, .99]
    batch_size = [32, 64, 128, 256]
    p_rand = [0, .1, .2, .3, .4]
    sigma = [0, .1, .2, .3, .4]
    L2_norm_coeff = [0, .01, .03, .1, .3, .6, 1]

    load_checkpoint = False

    env = gym.make("CartPoleContinuousBulletEnv-v0")

    agent = Agent(input_dims=env.observation_space.shape,
                  n_actions=env.action_space.shape[0])
    episodes = 250
    filename = 'MoutainCarContinuous.png'
    figure_file = 'plots/' + filename

    best_score = env.reward_range[0]
    score_history = []

    if load_checkpoint:
        agent.load_models()
        env.render(mode='human')

    for i in range(episodes):
        observation = env.reset()
        done = False
        score = 0
        while not done:
            action = agent.choose_action(observation)
            observation_, reward, done, info = env.step(action)
            score += reward
            agent.remember(observation, action, reward, observation_, done)
            if not load_checkpoint:
                agent.learn()
            else:
                env.render()
            observation = observation_
        score_history.append(score)
        avg_score = np.mean(score_history[-100:])
        if avg_score > best_score:
            best_score = avg_score
            if not load_checkpoint:
                agent.save_models()

        print("episode", i, "score", score, "average score", avg_score)
    if not load_checkpoint:
        x = [i + 1 for i in range(episodes)]
        plot_error_drop(x, score_history)

Exemplo n.º 9

0

Exibir arquivo

import gym_lqr

#env = gym.make('gym_lqr:lqr-stochastic-v0')
#env = gym.make('gym_lqr:lqr-2d-v0')
#env = gym.make('gym_lqr:lqr-v0')
#env = gym.make('InvertedPendulumPyBulletEnv-v0')
#env = gym.make('InvertedPendulum-v2')
env = gym.make('Walker2DPyBulletEnv-v0')
#env = gym.make('Ant-v2')
#print(env.action_space.shape[0])
print(env.action_space.high)
agent = Agent(alpha=0.000025,
              beta=0.00025,
              input_dims=[env.observation_space.shape[0]],
              tau=0.001,
              env=env,
              batch_size=64,
              layer1_size=400,
              layer2_size=300,
              n_actions=env.action_space.shape[0],
              action_bound=env.action_space.high)

n_games = 3000
# uncomment this line and do a mkdir tmp && mkdir video if you want to
# record video of the agent playing the game.
#env = wrappers.Monitor(env, 'tmp/video', video_callable=lambda episode_id: True, force=True)
filename = 'inverted_pendulum.png'

#print(env.action_space.high)

figure_file = 'plots/' + filename