Python DDPGAgent.forward примеры использования

Язык программирования: Python

Пространство имен/Пакет: rl.agents

Класс/Тип: DDPGAgent

Метод/Функция: forward

Примеров на hotexamples.com: 10

Python DDPGAgent.forward - 10 примеров найдено. Это лучшие примеры Python кода для rl.agents.DDPGAgent.forward, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

compile(30)

fit(30)

load_weights(30)

save_weights(30)

test(30)

DDPGAgent(8)

forward(7)

select_action(2)

__init__(1)

learning(1)

load_actor_weights(1)

memory(1)

save_actor_weights(1)

train(1)

warm_fit(1)

Пример #1

Показать файл

Файл: model_1_testme.py Проект: sushrutt12/CognitiveComputing_FinalProject

    agent.save_weights(args.model, overwrite=True)

# If TEST and TOKEN, submit to crowdAI
if not args.train and args.token:
    agent.load_weights(args.model)
    # Settings
    remote_base = 'http://grader.crowdai.org:1729'
    client = Client(remote_base)

    # Create environment
    observation = client.env_create(args.token)

    # Run a single step
    # The grader runs 3 simulations of at most 1000 steps each. We stop after the last one
    while True:
        v = np.array(observation).reshape((env.observation_space.shape[0]))
        action = agent.forward(v)
        [observation, reward, done, info] = client.env_step(action.tolist())
        if done:
            observation = client.env_reset()
            if not observation:
                break

    client.submit()

# If TEST and no TOKEN, run some test experiments
if not args.train and not args.token:
    agent.load_weights(args.model)
    # Finally, evaluate our algorithm for 1 episode.
    agent.test(env, nb_episodes=1, visualize=False, nb_max_episode_steps=500)
""" """

Пример #2

Показать файл

Файл: train.ddpg.py Проект: wiplug/osim-rl

                  memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100,
                  random_process=random_process, gamma=.99, target_model_update=1e-3,
                  delta_range=(-100., 100.))
# agent = ContinuousDQNAgent(nb_actions=env.noutput, V_model=V_model, L_model=L_model, mu_model=mu_model,
#                            memory=memory, nb_steps_warmup=1000, random_process=random_process,
#                            gamma=.99, target_model_update=0.1)
#agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])
agent.compile([RMSprop(lr=.001), RMSprop(lr=.001)], metrics=['mae'])

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
if args.train:
    agent.fit(env, nb_steps=nallsteps, visualize=True, verbose=1, nb_max_episode_steps=env.timestep_limit, log_interval=10000)
    # After training is done, we save the final weights.
    agent.save_weights(args.output, overwrite=True)

if not args.train:
    agent.load_weights(args.output)
    # Finally, evaluate our algorithm for 5 episodes.

    if args.env != "Arm":
        agent.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=500)
    else:
        for i in range(10000):
            if i % 300 == 0:
                env.new_target()
                print("Target shoulder = %f, elbow = %f" % (env.shoulder,env.elbow)) 
            
            env.step(agent.forward(env.get_observation()))

Пример #3

Показать файл

Файл: DDPG_keras.py Проект: WHY-David/repo

                  target_model_update=1e-3)
agent.compile(optimizer=Adam(
    lr=1e-3, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False))

if __name__ == '__main__':
    # Load
    agent.load_weights('OsmoEnv.h5f')

    # Train
    # agent.fit(env, nb_steps=50000, verbose=1, nb_max_episode_steps=200)
    #
    # # Weights
    # agent.save_weights('OsmoEnv.h5f',overwrite=True)
    #
    # # Load
    # agent.load_weights('OsmoEnv.h5f')

    # Test
    # agent.test(env, visualize=False, nb_episodes=50, nb_max_episode_steps=200)

    #Play
    for _ in range(10):
        observation = env.reset()
        done = False
        while not done:
            action = agent.forward(observation)
            observation, reward, done, info = env.step(action)
            print(action)
        else:
            print(info)

Пример #4

Показать файл

Файл: pub_drive.py Проект: tsuchiya-i/rgbd_ddpg_navigation

    print("not found weights-file")

rospy.init_node('pub_drive', anonymous=True)
rospy.Subscriber("/observe", Float32MultiArray, callback_observe)

pub = rospy.Publisher('/cmd_vel', Twist, queue_size=1)
r = rospy.Rate(10) # 5hz

fig, ax = plt.subplots(1, 1)
ax.set_ylim(math.radians(-50), math.radians(50))
x = []
y = []
step_count = 0

while not rospy.is_shutdown():
    predict_action = agent.forward(observe_data)
    pub_vel = Twist()
    if predict_action[0] > 0.8:
        pub_vel.linear.x = 0.8
    elif predict_action[0] < -0.8:
        pub_vel.linear.x = -0.8
    else:
        pub_vel.linear.x = predict_action[0]
    if predict_action[1] > 0.8:
        pub_vel.angular.z = 0.8
    elif predict_action[1] < -0.8:
        pub_vel.angular.z = -0.8
    else:
        pub_vel.angular.z = predict_action[1]

    pub_vel.linear.x = pub_vel.linear.x*0.5

Пример #5

Показать файл

Файл: train.ddpg.py Проект: kumarjitpathakbangalore/backup

                  memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100,
                  random_process=random_process, gamma=.99, target_model_update=1e-3,
                  delta_range=(-100., 100.))
# agent = ContinuousDQNAgent(nb_actions=env.noutput, V_model=V_model, L_model=L_model, mu_model=mu_model,
#                            memory=memory, nb_steps_warmup=1000, random_process=random_process,
#                            gamma=.99, target_model_update=0.1)
#agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])
agent.compile([RMSprop(lr=.001), RMSprop(lr=.001)], metrics=['mae'])

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
if args.train:
    agent.fit(env, nb_steps=nallsteps, visualize=True, verbose=1, nb_max_episode_steps=env.timestep_limit, log_interval=10000)
    # After training is done, we save the final weights.
    agent.save_weights(args.output, overwrite=True)

if not args.train:
    agent.load_weights(args.output)
    # Finally, evaluate our algorithm for 5 episodes.

    if args.env != "Arm":
        agent.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=500)
    else:
        for i in range(10000):
            if i % 300 == 0:
                env.new_target()
                print("Target shoulder = %f, elbow = %f" % (env.shoulder,env.elbow)) 
            
            env.step(agent.forward(env.get_observation()))

Пример #6

Показать файл

Файл: example.py Проект: wiplug/osim-rl

    agent.save_weights(args.model, overwrite=True)

# If TEST and TOKEN, submit to crowdAI
if not args.train and args.token:
    agent.load_weights(args.model)
    # Settings
    remote_base = 'http://grader.crowdai.org:1729'
    client = Client(remote_base)

    # Create environment
    observation = client.env_create(args.token)

    # Run a single step
    # The grader runs 3 simulations of at most 1000 steps each. We stop after the last one
    while True:
        v = np.array(observation).reshape((env.observation_space.shape[0]))
        action = agent.forward(v)
        [observation, reward, done, info] = client.env_step(action.tolist())
        if done:
            observation = client.env_reset()
            if not observation:
                break

    client.submit()

# If TEST and no TOKEN, run some test experiments
if not args.train and not args.token:
    agent.load_weights(args.model)
    # Finally, evaluate our algorithm for 1 episode.
    agent.test(env, nb_episodes=1, visualize=False, nb_max_episode_steps=500)

Пример #7

Показать файл

Файл: example.py Проект: zhanghonglishanzai/learning2run

                              nb_max_episode_steps=1000)

    print('Saving model ' + args.model)
    agent.save_weights(args.model, overwrite=True)
    print('Saved model ' + args.model)

    with open(args.model + '_history', 'w') as f:
        f.write(str(keras_history.history))
    summarize.plot_diagrams(keras_history.history, args.model)

if args.test:
    agent.load_weights(args.model)
    agent.test(env, nb_episodes=1, nb_max_episode_steps=env.timestep_limit)

if args.submit:
    agent.load_weights(args.model)
    remote_base = 'http://grader.crowdai.org:1729'
    token = '688545d8ba985c174b4f967b40924a43'
    client = Client(remote_base)
    observation = client.env_create(token)
    # The grader runs 3 simulations of at most 1000 steps each. We stop after the last one
    while True:
        [observation, reward, done,
         info] = client.env_step(agent.forward(observation).tolist())
        print(observation)
        if done:
            observation = client.env_reset()
            if not observation:
                break
    client.submit()

Пример #8

Показать файл

Файл: train.ddpg.py Проект: kumarjitpathakbangalore/backup

# Ctrl + C.
if args.train:
    agent.fit(env,
              nb_steps=nallsteps,
              visualize=True,
              verbose=1,
              nb_max_episode_steps=env.timestep_limit,
              log_interval=10000,
              prefix=prefix)
    # After training is done, we save the final weights.
    agent.save_weights("%s.h5f" % args.output, overwrite=True)

if not args.train:
    agent.load_weights("%s.h5f" % args.output)
    # Finally, evaluate our algorithm for 5 episodes.
    if args.env != "Arm":
        agent.test(env,
                   nb_episodes=5,
                   visualize=True,
                   nb_max_episode_steps=500)
    else:
        for i in range(10000):
            if i % 300 == 0:
                env.new_target()
                print("\n\nTarget shoulder = %f, elbow = %f" %
                      (env.shoulder, env.elbow))

            obs = env.get_observation()
            print "Actual shoulder = %f, elbow = %f\r" % (obs[2], obs[3]),
            env.step(agent.forward(obs))

Пример #9

Показать файл

Файл: DDPG.py Проект: TangChangcheng/SquareEnvironment-0

env.is_train = False
env.plot_row = 1
env.plot_col = 5

q_values = pd.DataFrame()
st = status.reshape([-1, 1])
for action in actions:
    state1_batch_with_action = [
        st, np.ones(st.shape).reshape(-1, 1, 1) * action
    ]
    q_values = pd.concat([
        q_values,
        pd.DataFrame(
            agent.target_critic.predict_on_batch(state1_batch_with_action))
    ],
                         axis=1)
q_values.to_csv('critic.csv')

with open('actor.json', 'w') as fw:
    observation = status.tolist()
    action = [float(agent.forward(np.array([obs]))[0]) for obs in observation]
    json.dump({'observation': observation, 'action': action}, fw)

agent.test(env,
           nb_episodes=5,
           visualize=True,
           verbose=1,
           nb_max_episode_steps=20)

env.plt.ioff()
env.plt.show()

Пример #10

Показать файл

Файл: train.ddpg.py Проект: wiplug/osim-rl

#                            gamma=.99, target_model_update=0.1)
#agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])
agent.compile([RMSprop(lr=.001), RMSprop(lr=.001)], metrics=['mae'])

prefix = args.output if args.output else "%s_s%f_t%f" % (args.env ,float(args.sigma), float(args.theta))

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
if args.train:
    agent.fit(env, nb_steps=nallsteps, visualize=True, verbose=1, nb_max_episode_steps=env.timestep_limit, log_interval=10000, prefix=prefix)
    # After training is done, we save the final weights.
    agent.save_weights("%s.h5f" % args.output, overwrite=True)

if not args.train:
    agent.load_weights("%s.h5f" % args.output)
    # Finally, evaluate our algorithm for 5 episodes.
    if args.env != "Arm":
        agent.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=500)
    else:
        for i in range(10000):
            if i % 300 == 0:
                env.new_target()
                print("\n\nTarget shoulder = %f, elbow = %f" % (env.shoulder,env.elbow)) 

            obs = env.get_observation()
            print "Actual shoulder = %f, elbow = %f\r" % (obs[2],obs[3]),
            env.step(agent.forward(obs))