'epsi_high': 0.9, 'epsi_low': 0.05, 'decay': 200, 'lr': 0.001, 'capacity': 10000, 'batch_size': 32, 'state_space_dim': env.state_dim, 'action_space_dim': env.action_dim, } agent = Agent(**params) score = [] mean = [] for episode in range(1000): s0 = env.reset() total_reward = 1 while True: env.render() a0 = agent.act(s0) s1, r1, done= env.step(a0) if done: r1 = -1 agent.put(s0, a0, r1, s1) if done: break total_reward += r1
# Gloabel Variable MAX_EPISOSES = 500 MAX_EP_STEPS = 500 # Set the environement env = ArmEnv() s_dim = env.state_dim a_dim = env.action_dim a_bound = env.action_bound # set the RL method rl = DDPG(a_dim, s_dim, a_bound) # start Training for i in range(MAX_EPISOSES): s = env.reset() for j in range(MAX_EP_STEPS): env.render() a = rl.choose_action(s) s_, r, done = env.step(a) rl.store_transitions(s, a, r, s_) if rl.memory_full: # start to learn once has fulfulled the memory rl.learn() s = s_
def eval(): rl.restore() env.render() env.viewer.set_vsync(True) while True: s = env.reset() for _ in range(200): env.render() a = rl.choose_action(s) s, r, done = env.step(a) if done: break if ON_TRAIN: train() else: eval() # summary: """ env should have at least: env.reset() env.render() env.step() while RL should have at least: rl.choose_action() rl.store_transition() rl.learn() rl.memory_full """
MAX_EPISODES = 500 MAX_EP_STEPS = 200 # set env env = ArmEnv() s_dim = env.state_dim a_dim = env.action_dim a_bound = env.action_bound # set RL method rl = DDPG(a_dim, s_dim, a_bound) # start training for i in range(MAX_EPISODES): s = env.reset() for j in range(MAX_EP_STEPS): env.render() a = rl.choose_action(s) s_, r, done = env.step(a) rl.store_transition(s, a, r, s_) if rl.memory_full: # start to learn once has fulfilled the memory rl.learn() s = s_