示例#1
0
from env import ArmEnv
from rl import DDPG

MAX_EPISODES = 900
MAX_EP_STEPS = 200
ON_TRAIN = False

# set env
env = ArmEnv()
s_dim = env.state_dim
a_dim = env.action_dim
a_bound = env.action_bound

# set RL method (continuous)
rl = DDPG(a_dim, s_dim, a_bound)

steps = []
def train():
    # start training
    for i in range(MAX_EPISODES):
        s = env.reset()
        ep_r = 0.
        for j in range(MAX_EP_STEPS):
            env.render()

            a = rl.choose_action(s)

            s_, r, done = env.step(a)

            rl.store_transition(s, a, r, s_)
示例#2
0
文件: main.py 项目: MorvanZhou/RLarm
            "lr": 0.0001,
            "memory_capacity": 9000
        }

    # set env
    print(PARAMS)
    env = ArmEnv(n_arms=PARAMS["n_arms"],
                 random_goal=PARAMS["random_target"],
                 on_mouse=False if PARAMS["training"] else True,
                 show_fps=args.show_fps,
                 fps=args.fps)
    s_dim = env.state_dim
    a_dim = env.action_dim
    a_bound = env.action_bound

    # set RL method (continuous)
    rl = DDPG(
        a_dim,
        s_dim,
        a_bound,
        soft_replace=PARAMS["soft_replace"],
        tau=PARAMS["tau"],
        gamma=PARAMS["gamma"],
        lr=PARAMS["lr"],
    )

    MODEL_DIR = "models/{}arms".format(PARAMS["n_arms"])
    if PARAMS["training"]:
        train()
    else:
        eval()
示例#3
0
from env import ArmEnv
from rl import DDPG
import time as t
import numpy as np
MAX_EPISODES = 500
MAX_EP_STEPS = 200
ON_TRAIN = False

# set env
env = ArmEnv()
s_dim = env.state_dim
a_dim = env.action_dim
a_bound = env.action_bound

# set RL method (continuous)
rl = DDPG(s_dim, a_dim, a_bound)

steps = []


def train():
    # start training
    for i in range(MAX_EPISODES):
        s = env.reset()
        ep_r = 0.
        for j in range(MAX_EP_STEPS):
            env.render()
            #a=env.sample_action()
            a = rl.choose_action(s)
            if (np.isnan(a[0])):
                a[0] = 0
示例#4
0
from env import ArmEnv
from rl import DDPG
import time
import numpy as np

MAX_EPISODES = 50000
MAX_EP_STEPS = 200
ON_TRAIN = True

# set env
env = ArmEnv()

# set RL method (continuous)
rl = DDPG()

steps = []


def train():
    # start training
    RENDER = False
    done_cnt = 0
    var = 2.00
    for i in range(MAX_EPISODES):
        s = env.reset()
        ep_r = 0.
        for j in range(MAX_EP_STEPS):
            if RENDER:
                env.render()

            if len(rl.memory) <= 9999:
示例#5
0
from env import TrafficEnv
from rl import DDPG
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

MAX_EPOCH = 500
MAX_EVENT = 100

env = TrafficEnv()
s_dim = env.state_dim
a_dim = env.action_dim
a_bound = env.action_bound
r_dim = env.reward_dim

rl = DDPG(a_dim, s_dim, r_dim, a_bound)


def save_result(d, id):
    data_dict = {
        'FOT_Control.Speed': d[:, 0],
        'IMU.Accel_X': d[:, 1],
        'SMS.X_Velocity_T0': d[:, 2],
        'SMS.X_RANGE_T0': d[:, 3]
    }
    d_frame = pd.DataFrame(data_dict)
    d_frame.to_csv('./data/result_' + str(id) + '.csv', sep=',')


def plot(data1, data2):
    plt.figure(1)
示例#6
0
文件: main.py 项目: unasm/utils
############################################### 

import os
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from env import ArmEnv
from rl import DDPG

MAX_EPISODES = 500
MAX_EP_STEPS = 200

env = ArmEnv()
s_dim = env.state_dim
a_dim = env.action_dim
a_bound = env.action_bound


rl = DDPG(a_dim, s_dim, a_bound)
for i in range(MAX_EPISODES):
    s = evn.reset()
    for j in range(MAX_EP_STEPS):
        env.render()
        a = rl.choose_actions(s)
        s_, r, done = env.step(a)
        rl.store_transition(s, a, r, s_)
        if rl.memory_full():
            rl.learn()
        s = s_
示例#7
0
from noise import noise

MAX_EPISODES = 3000
MAX_EP_STEPS = 100
ON_TRAIN = True

# set env
env = ArmEnv([0., 0., 0.], [0., 0., 0.], [0., 0., 0.], [0., 0., 0.])
s_dim = env.state_dim
a_dim = env.action_dim
a_bound = env.action_bound

# set RL method (continuous)
a_scale = [1000, 3000]
#a_scale = [0, 10]
rl = DDPG(a_dim, s_dim, a_scale)

noise_mean = 0
noise_std_dev = 0.2
noise_theta = 0.15
noise_dt = env.dt
noise = noise(a_dim, noise_mean, noise_std_dev, noise_theta, noise_dt)
steps = []


def train():
    # start training
    for i in range(MAX_EPISODES):
        s = env.reset()
        noise.reset()
        ep_r = 0.
示例#8
0
from rl import DDPG
import random

MAX_EPISODES = 300
MAX_EP_STEPS = 200
ON_TRAIN = True

# set env
env = ArmEnv()
env.get_train_state = ON_TRAIN
s_dim = env.state_dim
a_dim = env.action_dim
a_bound = env.action_bound

# set RL method (continuous)
rl = DDPG(a_dim, s_dim, a_bound)
rl.get_train_state = ON_TRAIN


def train():
    # start training
    sample_goal = [None] * 36
    for incx in range(6):
        for incy in range(6):
            sample_goal[incy * 6 + incx] = {
                'x': (100. + incx * 40),
                'y': (100. + incy * 40),
                'l': 40
            }
    print(len(sample_goal))
    for i in range(MAX_EPISODES):
示例#9
0
from env import ArmEnv
from rl import DDPG

# Gloabel Variable
MAX_EPISOSES = 500
MAX_EP_STEPS = 500

# Set the environement
env = ArmEnv()
s_dim = env.state_dim
a_dim = env.action_dim
a_bound = env.action_bound

# set the RL method
rl = DDPG(a_dim, s_dim, a_bound)

# start Training

for i in range(MAX_EPISOSES):
    s = env.reset()
    for j in range(MAX_EP_STEPS):
        env.render()

        a = rl.choose_action(s)

        s_, r, done = env.step(a)

        rl.store_transitions(s, a, r, s_)

        if rl.memory_full:
示例#10
0
from env import ArmEnv
from rl import DDPG

MAX_EPISODES = 500
MAX_EP_STEPS = 200
ON_TRAIN = True

# set env
env = ArmEnv()
state_dim = env.state_dim
action_dim = env.action_dim
action_bound = env.action_bound

# set RL method (continuous)
rl = DDPG(state_dim, action_dim, action_bound)


def train():
    # start training
    for i in range(MAX_EPISODES):
        state = env.reset()
        ep_reward = 0.
        for j in range(MAX_EP_STEPS):
            env.render()
            action = rl.choose_action(state)
            state_, reward, done = env.step(action)

            # memory storage
            rl.store_transition(state, action, reward, state_)

            if rl.memory_full: