control_timestep=CTRL_EVERY_N_STEPS * env.TIMESTEP, state=obs[str(j)]["state"], target_pos=np.hstack( [TARGET_POS[wp_counters[j], 0:2], H + j * H_STEP])) #### Go to the next way point and loop ############################################################# for j in range(ARGS.num_drones): wp_counters[j] = wp_counters[j] + 1 if wp_counters[j] < ( NUM_WP - 1) else 0 #### Log the simulation ############################################################################ for j in range(ARGS.num_drones): logger.log(drone=j, timestamp=i / env.SIM_FREQ, state=obs[str(j)]["state"], control=np.hstack([ TARGET_POS[wp_counters[j], 0:2], H + j * H_STEP, np.zeros(9) ])) #### Printout ###################################################################################### if i % env.SIM_FREQ == 0: env.render() #### Print the matrices with the images captured by each drone ##################################### if ARGS.vision: for j in range(ARGS.num_drones): print(obs[str(j)]["rgb"].shape, np.average(obs[str(j)]["rgb"]), obs[str(j)]["dep"].shape, np.average(obs[str(j)]["dep"]), obs[str(j)]["seg"].shape,
test_env.AGGR_PHY_STEPS), num_drones=1) obs = test_env.reset() start = time.time() for i in range( 6 * int(test_env.SIM_FREQ / test_env.AGGR_PHY_STEPS)): # Up to 6'' action, _states = model.predict( obs, deterministic=True # OPTIONAL 'deterministic=False' ) obs, reward, done, info = test_env.step(action) test_env.render() if OBS == ObservationType.KIN: logger.log(drone=0, timestamp=i / test_env.SIM_FREQ, state=np.hstack( [obs[0:3], obs[3:15], np.resize(action, (4))]), control=np.zeros(12)) sync(np.floor(i * test_env.AGGR_PHY_STEPS), start, test_env.TIMESTEP) # if done: obs = test_env.reset() # OPTIONAL EPISODE HALT test_env.close() logger.save_as_csv("sa") # Optional CSV save logger.plot() # with np.load(ARGS.exp+'/evaluations.npz') as data: # print(data.files) # print(data['timesteps']) # print(data['results']) # print(data['ep_lengths'])
state=obs[str(j)]["state"], target_pos=np.hstack( [TARGET_POS[wp_counters[j], :], INIT_XYZS[j, 2]]), ) #### Go to the next way point and loop ##################### for j in range(2): wp_counters[j] = wp_counters[j] + 1 if wp_counters[j] < ( NUM_WP - 1) else 0 #### Log the simulation #################################### for j in range(2): logger.log(drone=j, timestamp=i / env.SIM_FREQ, state=obs[str(j)]["state"], control=np.hstack([ TARGET_POS[wp_counters[j], :], INIT_XYZS[j, 2], np.zeros(9) ])) #### Printout ############################################## if i % env.SIM_FREQ == 0: env.render() #### Sync the simulation ################################### if ARGS.gui: sync(i, START, env.TIMESTEP) #### Close the environment ################################# env.close()
if i % CTRL_EVERY_N_STEPS == 0: #### Compute control for the current way point ############# action["0"], _, _ = ctrl.computeControlFromState( control_timestep=CTRL_EVERY_N_STEPS * env.TIMESTEP, state=obs["0"]["state"], target_pos=TARGET_POS[wp_counter, :], ) #### Go to the next way point and loop ##################### wp_counter = wp_counter + 1 if wp_counter < (NUM_WP - 1) else 0 #### Log the simulation #################################### logger.log(drone=0, timestamp=i / env.SIM_FREQ, state=obs["0"]["state"], control=np.hstack([TARGET_POS[wp_counter, :], np.zeros(9)])) #### Printout ############################################## if i % env.SIM_FREQ == 0: env.render() #### Sync the simulation ################################### if ARGS.gui: sync(i, START, env.TIMESTEP) #### Close the environment ################################# env.close() #### Save the simulation results ###########################
# if i/ENV.SIM_FREQ>3 and i%30==0 and i/ENV.SIM_FREQ<10: p.loadURDF("duck_vhacd.urdf", [random.gauss(0, 0.3), random.gauss(0, 0.3), 3], p.getQuaternionFromEuler([random.randint(0, 360),random.randint(0, 360),random.randint(0, 360)]), physicsClientId=PYB_CLIENT) #### Step the simulation ################################### OBS, _, _, _ = ENV.step(ACTION) #### Compute control for drone 0 ########################### STATE = OBS["0"]["state"] ACTION["0"] = CTRL_0.compute_control( current_position=STATE[0:3], current_velocity=STATE[10:13], current_rpy=STATE[7:10], target_position=TARGET_POSITION[i, :], target_velocity=TARGET_VELOCITY[i, :], target_acceleration=TARGET_ACCELERATION[i, :]) #### Log drone 0 ########################################### LOGGER.log(drone=0, timestamp=i / ENV.SIM_FREQ, state=STATE) #### Compute control for drone 1 ########################### STATE = OBS["1"]["state"] ACTION["1"] = CTRL_1.compute_control( current_position=STATE[0:3], current_velocity=STATE[10:13], current_rpy=STATE[7:10], target_position=TARGET_POSITION[i, :] + np.array([-.3, .0, .0]), target_velocity=TARGET_VELOCITY[i, :], target_acceleration=TARGET_ACCELERATION[i, :]) #### Log drone 1 ########################################### LOGGER.log(drone=1, timestamp=i / ENV.SIM_FREQ, state=STATE) #### Compute control for drone 2 ########################### STATE = OBS["2"]["state"]
for i in range(ARGS.duration_sec * env.SIM_FREQ): if ARGS.duration_sec * env.SIM_FREQ % AGGR_PHY_STEPS == 0: action, _states = model.predict( obs, deterministic=True, ) #else: # action = np.array([1,0,0]) #No Turn #print(f"action {action}") #print(f"obs : {obs}") obs, reward, done, info = env.step(action) for j in range(ARGS.num_drones): logger.log(drone=j, timestamp=i / env.SIM_FREQ, state=env._getDroneStateVector(int(j)), control=env._getTargetVelocityControls(int(j))) #if i%env.SIM_FREQ == 0: #env.render() #print(f"Episode is done: {done}") sync(i, start, env.TIMESTEP) if done: n_trial += 1 obs = env.reset() print(f"Run # {n_trial}") env.close() logger.save() logger.plot()
total_rew = 0. obs = env.reset() logger = Logger(logging_freq_hz=env.SIM_FREQ, num_drones=1) for i in range(episode_max_steps): # init obs.shape=(12, ) assert len(obs.shape) == 1 and obs.shape[0] == 12 # parse obses を使うためにバッチサイズ1の様に扱う batched_obs = obs.reshape((1, 12)) # RSを使って1ステップだけ進める act = random_shooting(batched_obs) # act もバッチサイズ1の様に扱う assert len(act.shape) == 1 and act.shape[0] == 4 batched_act = act.reshape((1, 4)) next_obs, _, done, _ = env.step(act) logger.log(drone=0, timestamp=i/env.SIM_FREQ, state=np.hstack( [obs[0:3], obs[6:9], obs[3:6], obs[9:12], np.resize(act, (4))]), control=np.zeros(12)) rewards = reward_fn(batched_obs, batched_act) total_rew += rewards if done: break obs = next_obs print("total reward: {}".format(total_rew[0])) logger.save() logger.plot()
logger = Logger(logging_freq_hz=int(env.SIM_FREQ/env.AGGR_PHY_STEPS), num_drones=ARGS.num_drones ) obs = env.reset() state = agent.get_policy().get_initial_state() start = time.time() for i in range(ARGS.duration_sec*env.SIM_FREQ): if obs[-1] < neighbourhood_radius : action, _states, _dict = policy.compute_single_action(obs, state) else: action = np.array([1,0,0]) #No Turn #print(f"action {action}") obs, reward, done, info = env.step(action) for j in range(ARGS.num_drones): logger.log(drone=j, timestamp=i/env.SIM_FREQ, state = env._getDroneStateVector(int(j)), control= np.zeros(12) ) if i%env.SIM_FREQ == 0: env.render() print(f"Episode is done: {done}") sync(i, start, env.TIMESTEP) if done: obs = env.reset() env.close() logger.plot()
#### Compute control at the desired frequency ############## if i%CTRL_EVERY_N_STEPS == 0: #### Compute control for the current way point ############# for j in range(4): action[str(j)] = TARGET_VEL[j, wp_counters[j], :] #### Go to the next way point and loop ##################### for j in range(4): wp_counters[j] = wp_counters[j] + 1 if wp_counters[j] < (NUM_WP-1) else 0 #### Log the simulation #################################### for j in range(4): logger.log(drone=j, timestamp=i/env.SIM_FREQ, state= obs[str(j)]["state"], control=np.hstack([TARGET_VEL[j, wp_counters[j], 0:3], np.zeros(9)]) ) #### Printout ############################################## if i%env.SIM_FREQ == 0: env.render() #### Sync the simulation ################################### if ARGS.gui: sync(i, START, env.TIMESTEP) #### Close the environment ################################# env.close() #### Plot the simulation results ###########################
obs, reward, done, info = env.step(action) #### Compute next action using the set points from the trace action["0"], pos_err, yaw_err = ctrl.computeControlFromState(control_timestep=env.TIMESTEP, state=obs["0"]["state"], target_pos=TRACE_CTRL_REFERENCE[i, 0:3], target_vel=TRACE_CTRL_REFERENCE[i, 3:6] ) #### Re-arrange the trace for consistency with the logger trace_obs = np.hstack([TRACE_DATA[i, 0:3], np.zeros(4), TRACE_DATA[i, 6:9], TRACE_DATA[i, 3:6], TRACE_DATA[i, 9:12], TRACE_DATA[i, 12:16]]) #### Log the trace ######################################### logger.log(drone=0, timestamp=TRACE_TIMESTAMPS[i], state=trace_obs, control=np.hstack([TRACE_CTRL_REFERENCE[i, :], np.zeros(6)]) ) #### Log the simulation #################################### logger.log(drone=1, timestamp=i/env.SIM_FREQ, state=obs["0"]["state"], control=np.hstack([TRACE_CTRL_REFERENCE[i, :], np.zeros(6)]) ) #### Printout ############################################## if i%env.SIM_FREQ == 0: env.render() #### Sync the simulation ###################################
#### Initialize the environment ######################################################################### env = RLTetherAviary(gui=False, record=False) #### Initialize the logger ######################################################################### logger = Logger(logging_freq_hz=int(env.SIM_FREQ), num_drones=1) model_name = "<path-to-model>" # model = DDPG.load(model_name) # model = PPO.load(model_name) loaded = keras.models.model_from_json(model_name) obs = env.reset() done = False for i in range(10 * env.SIM_FREQ): action, _states = model.predict(obs, deterministic=True) obs, rewards, done, info = env.step(action) logger.log(drone=0, timestamp=1 / env.SIM_FREQ, state=obs, control=action) # env.render() env.close() # logger.save() # logger.plot() logger.plot3D("DDPG")