Exemplo n.º 1
0
        'epsi_high': 0.9,
        'epsi_low': 0.05,
        'decay': 200, 
        'lr': 0.001,
        'capacity': 10000,
        'batch_size': 32,
        'state_space_dim': env.state_dim,
        'action_space_dim': env.action_dim,
        }
    agent = Agent(**params)

    score = []
    mean = []

    for episode in range(1000):
        s0 = env.reset()
        total_reward = 1
        while True:
            env.render()
            a0 = agent.act(s0)
            s1, r1, done= env.step(a0)
            
            if done:
                r1 = -1
                
            agent.put(s0, a0, r1, s1)
            
            if done:
                break

            total_reward += r1
Exemplo n.º 2
0
# Gloabel Variable
MAX_EPISOSES = 500
MAX_EP_STEPS = 500

# Set the environement
env = ArmEnv()
s_dim = env.state_dim
a_dim = env.action_dim
a_bound = env.action_bound

# set the RL method
rl = DDPG(a_dim, s_dim, a_bound)

# start Training

for i in range(MAX_EPISOSES):
    s = env.reset()
    for j in range(MAX_EP_STEPS):
        env.render()

        a = rl.choose_action(s)

        s_, r, done = env.step(a)

        rl.store_transitions(s, a, r, s_)

        if rl.memory_full:
            # start to learn once has fulfulled the memory
            rl.learn()
        s = s_
Exemplo n.º 3
0
def eval():
    rl.restore()
    env.render()
    env.viewer.set_vsync(True)
    while True:
        s = env.reset()
        for _ in range(200):
            env.render()
            a = rl.choose_action(s)
            s, r, done = env.step(a)
            if done:
                break


if ON_TRAIN:
    train()
else:
    eval()
# summary:
"""
env should have at least:
env.reset()
env.render()
env.step()
while RL should have at least:
rl.choose_action()
rl.store_transition()
rl.learn()
rl.memory_full
"""
Exemplo n.º 4
0
MAX_EPISODES = 500
MAX_EP_STEPS = 200

# set env
env = ArmEnv()
s_dim = env.state_dim
a_dim = env.action_dim
a_bound = env.action_bound

# set RL method
rl = DDPG(a_dim, s_dim, a_bound)

# start training
for i in range(MAX_EPISODES):
    s = env.reset()
    for j in range(MAX_EP_STEPS):
        env.render()

        a = rl.choose_action(s)

        s_, r, done = env.step(a)

        rl.store_transition(s, a, r, s_)

        if rl.memory_full:
            # start to learn once has fulfilled the memory
            rl.learn()

        s = s_