Пример #1
0
print('Model initialized')

policy = DDPG(MlpPolicy, env, verbose=1, param_noise=param_noise, action_noise=action_noise)
total_steps = 1000

print('DDPG initiliazed')

sample_buffer1 = deque(maxlen = 50000)
sample_buffer2 = deque(maxlen = 50000)
sample_buffer3 = deque(maxlen = 50000)

for _ in range(total_steps):

    print ('Iteration :',  _ + 1)
    print('Training policy on environment')
    samples = policy.learn_env(total_timesteps=env_timesteps,seed = seed,environment = environment,log_interval=10)
    
    env_timesteps = 1000            #change env_steps from next iteration onwards
    
    for z in samples[0]:
        sample_buffer1.append(z)
    for z in samples[1]:
        sample_buffer2.append(z)
    for z in samples[2]:
        sample_buffer3.append(z)
    
    loss = model.train_network(np.array(sample_buffer1),np.array(sample_buffer2),np.array(sample_buffer3),algorithm_id='DDPG', mini_batch_num=1000)


    print('Model train loss = ', loss)
Пример #2
0
env = gym.make(environment)
env = DummyVecEnv([lambda: env])

# the noise objects for DDPG
n_actions = env.action_space.shape[-1]
param_noise = None
action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions),
                                            sigma=float(0.5) *
                                            np.ones(n_actions))

model = DDPG(MlpPolicy,
             env,
             verbose=1,
             param_noise=param_noise,
             action_noise=action_noise)
n = model.learn_env(total_timesteps=total_timesteps,
                    seed=seed,
                    environment=environment)
model.save(pathmodel)
# np.save("swimmer_ddpg_reward.npy",n)

del model  # remove to demonstrate saving and loading
print("Done...")
model = DDPG.load(pathmodel)

obs = env.reset()
while True:
    action, _states = model.predict(obs)
    obs, rewards, dones, info = env.step(action)
    env.render()