agent.save_weights(args.model, overwrite=True) # If TEST and TOKEN, submit to crowdAI if not args.train and args.token: agent.load_weights(args.model) # Settings remote_base = 'http://grader.crowdai.org:1729' client = Client(remote_base) # Create environment observation = client.env_create(args.token) # Run a single step # The grader runs 3 simulations of at most 1000 steps each. We stop after the last one while True: v = np.array(observation).reshape((env.observation_space.shape[0])) action = agent.forward(v) [observation, reward, done, info] = client.env_step(action.tolist()) if done: observation = client.env_reset() if not observation: break client.submit() # If TEST and no TOKEN, run some test experiments if not args.train and not args.token: agent.load_weights(args.model) # Finally, evaluate our algorithm for 1 episode. agent.test(env, nb_episodes=1, visualize=False, nb_max_episode_steps=500) """ """
memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process, gamma=.99, target_model_update=1e-3, delta_range=(-100., 100.)) # agent = ContinuousDQNAgent(nb_actions=env.noutput, V_model=V_model, L_model=L_model, mu_model=mu_model, # memory=memory, nb_steps_warmup=1000, random_process=random_process, # gamma=.99, target_model_update=0.1) #agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) agent.compile([RMSprop(lr=.001), RMSprop(lr=.001)], metrics=['mae']) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. if args.train: agent.fit(env, nb_steps=nallsteps, visualize=True, verbose=1, nb_max_episode_steps=env.timestep_limit, log_interval=10000) # After training is done, we save the final weights. agent.save_weights(args.output, overwrite=True) if not args.train: agent.load_weights(args.output) # Finally, evaluate our algorithm for 5 episodes. if args.env != "Arm": agent.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=500) else: for i in range(10000): if i % 300 == 0: env.new_target() print("Target shoulder = %f, elbow = %f" % (env.shoulder,env.elbow)) env.step(agent.forward(env.get_observation()))
target_model_update=1e-3) agent.compile(optimizer=Adam( lr=1e-3, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)) if __name__ == '__main__': # Load agent.load_weights('OsmoEnv.h5f') # Train # agent.fit(env, nb_steps=50000, verbose=1, nb_max_episode_steps=200) # # # Weights # agent.save_weights('OsmoEnv.h5f',overwrite=True) # # # Load # agent.load_weights('OsmoEnv.h5f') # Test # agent.test(env, visualize=False, nb_episodes=50, nb_max_episode_steps=200) #Play for _ in range(10): observation = env.reset() done = False while not done: action = agent.forward(observation) observation, reward, done, info = env.step(action) print(action) else: print(info)
print("not found weights-file") rospy.init_node('pub_drive', anonymous=True) rospy.Subscriber("/observe", Float32MultiArray, callback_observe) pub = rospy.Publisher('/cmd_vel', Twist, queue_size=1) r = rospy.Rate(10) # 5hz fig, ax = plt.subplots(1, 1) ax.set_ylim(math.radians(-50), math.radians(50)) x = [] y = [] step_count = 0 while not rospy.is_shutdown(): predict_action = agent.forward(observe_data) pub_vel = Twist() if predict_action[0] > 0.8: pub_vel.linear.x = 0.8 elif predict_action[0] < -0.8: pub_vel.linear.x = -0.8 else: pub_vel.linear.x = predict_action[0] if predict_action[1] > 0.8: pub_vel.angular.z = 0.8 elif predict_action[1] < -0.8: pub_vel.angular.z = -0.8 else: pub_vel.angular.z = predict_action[1] pub_vel.linear.x = pub_vel.linear.x*0.5
agent.save_weights(args.model, overwrite=True) # If TEST and TOKEN, submit to crowdAI if not args.train and args.token: agent.load_weights(args.model) # Settings remote_base = 'http://grader.crowdai.org:1729' client = Client(remote_base) # Create environment observation = client.env_create(args.token) # Run a single step # The grader runs 3 simulations of at most 1000 steps each. We stop after the last one while True: v = np.array(observation).reshape((env.observation_space.shape[0])) action = agent.forward(v) [observation, reward, done, info] = client.env_step(action.tolist()) if done: observation = client.env_reset() if not observation: break client.submit() # If TEST and no TOKEN, run some test experiments if not args.train and not args.token: agent.load_weights(args.model) # Finally, evaluate our algorithm for 1 episode. agent.test(env, nb_episodes=1, visualize=False, nb_max_episode_steps=500)
nb_max_episode_steps=1000) print('Saving model ' + args.model) agent.save_weights(args.model, overwrite=True) print('Saved model ' + args.model) with open(args.model + '_history', 'w') as f: f.write(str(keras_history.history)) summarize.plot_diagrams(keras_history.history, args.model) if args.test: agent.load_weights(args.model) agent.test(env, nb_episodes=1, nb_max_episode_steps=env.timestep_limit) if args.submit: agent.load_weights(args.model) remote_base = 'http://grader.crowdai.org:1729' token = '688545d8ba985c174b4f967b40924a43' client = Client(remote_base) observation = client.env_create(token) # The grader runs 3 simulations of at most 1000 steps each. We stop after the last one while True: [observation, reward, done, info] = client.env_step(agent.forward(observation).tolist()) print(observation) if done: observation = client.env_reset() if not observation: break client.submit()
# Ctrl + C. if args.train: agent.fit(env, nb_steps=nallsteps, visualize=True, verbose=1, nb_max_episode_steps=env.timestep_limit, log_interval=10000, prefix=prefix) # After training is done, we save the final weights. agent.save_weights("%s.h5f" % args.output, overwrite=True) if not args.train: agent.load_weights("%s.h5f" % args.output) # Finally, evaluate our algorithm for 5 episodes. if args.env != "Arm": agent.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=500) else: for i in range(10000): if i % 300 == 0: env.new_target() print("\n\nTarget shoulder = %f, elbow = %f" % (env.shoulder, env.elbow)) obs = env.get_observation() print "Actual shoulder = %f, elbow = %f\r" % (obs[2], obs[3]), env.step(agent.forward(obs))
env.is_train = False env.plot_row = 1 env.plot_col = 5 q_values = pd.DataFrame() st = status.reshape([-1, 1]) for action in actions: state1_batch_with_action = [ st, np.ones(st.shape).reshape(-1, 1, 1) * action ] q_values = pd.concat([ q_values, pd.DataFrame( agent.target_critic.predict_on_batch(state1_batch_with_action)) ], axis=1) q_values.to_csv('critic.csv') with open('actor.json', 'w') as fw: observation = status.tolist() action = [float(agent.forward(np.array([obs]))[0]) for obs in observation] json.dump({'observation': observation, 'action': action}, fw) agent.test(env, nb_episodes=5, visualize=True, verbose=1, nb_max_episode_steps=20) env.plt.ioff() env.plt.show()
# gamma=.99, target_model_update=0.1) #agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) agent.compile([RMSprop(lr=.001), RMSprop(lr=.001)], metrics=['mae']) prefix = args.output if args.output else "%s_s%f_t%f" % (args.env ,float(args.sigma), float(args.theta)) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. if args.train: agent.fit(env, nb_steps=nallsteps, visualize=True, verbose=1, nb_max_episode_steps=env.timestep_limit, log_interval=10000, prefix=prefix) # After training is done, we save the final weights. agent.save_weights("%s.h5f" % args.output, overwrite=True) if not args.train: agent.load_weights("%s.h5f" % args.output) # Finally, evaluate our algorithm for 5 episodes. if args.env != "Arm": agent.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=500) else: for i in range(10000): if i % 300 == 0: env.new_target() print("\n\nTarget shoulder = %f, elbow = %f" % (env.shoulder,env.elbow)) obs = env.get_observation() print "Actual shoulder = %f, elbow = %f\r" % (obs[2],obs[3]), env.step(agent.forward(obs))