def main(): """ Main function for training the DQN network in learning how to accomplish autonomous landing. """ # ATTENTION: If you want to restore files from a previous simulation you # must pass valid values for these variables: policy_weights_path_1 = '/home/pulver/Desktop/episode_113250/policy/policy_checkpoint.ckp' root_images = "/home/pulver/Desktop/network_testing/" + \ str(datetime.datetime.now().time()) + "/" # NOTE: openCV doesn't write in a folder that does not exist if not os.path.exists(root_images): os.makedirs(root_images) source = 3 # NOTE: 1 for real drone, 2 for gazebo, 3 for URL screen_width = 84 # original is 160 screen_height = 84 # original is 210 images_stack_size = 4 # Use only the first 5 actions for this simulation # action_list = ['left', 'right', 'forward', 'backward', 'stop', 'descend'] # 0 (left, 1 (right), 2 (forward), 3 (backward), 4 (stop), 5 (descend) tot_actions = 6 batch_size = 32 # size of the experience batch rospy.init_node("DRLTrainingNode") rospy.loginfo("----- DRL Training Node -----") # Create a subscriber fot the greyscale image # 1) Real drone if source == 1: rospy.Subscriber("/drl/grey_camera", ROSImage, image_callback) elif source == 2: # 2) Gazebo rospy.Subscriber("/quadrotor/ardrone/bottom/ardrone/bottom/image_raw", ROSImage, image_callback, queue_size=30) # Store the last 30 messages elif source == 3: # 3) URL # video_stream_url = 'http://10.188.34.59:8080/videofeed' video_stream_url = "http://10.5.5.9:8080/live/amba.m3u8" bytes = '' else: print "Insert a correct source value (1 for real drone, 2 for gazebo, 3 for URL)" images_stack_size = 4 tot_steps = 3000000 # finite-horizont simulation r = rospy.Rate(1) # 10hz # Init session and networks sess = tf.Session() summary_folder = "" # if empty the summary is written in ./log/ + current time if(summary_folder == ""): tf_summary_writer = tf.summary.FileWriter( './log/' + str(datetime.datetime.now().time()), sess.graph) else: tf_summary_writer = tf.summary.FileWriter( summary_folder, sess.graph) policy_network_1 = QNetwork(sess, tot_actions=tot_actions, image_shape=( screen_width, screen_height, images_stack_size), batch_size=batch_size, network_name="policy_net") # Instructions for updating: Use `tf.global_variables_initializer init = tf.global_variables_initializer() sess.run(init) # Load Neural Networks weights from memory if a valid checkpoint path is # passed if(policy_weights_path_1 != ""): print("Loading weights 1 from memory...") policy_network_1.load_weights(policy_weights_path_1) else: print("The networks path are empty.") # sys.exit() if source == 3: state = acquire_frame(video_stream_url) time.sleep(1) state = _last_image # Save first image image_path = root_images + "image_0.png" cv2.imwrite(image_path, state) # 1 - Create a stack of X images image_t = np.stack([state] * images_stack_size, axis=2) matplotlib.interactive(True) # fig = plt.figure() f, (ax3, ax2) = plt.subplots(2, 1) f.tight_layout() # gs = gridspec.GridSpec(2, 2, width_ratios=[1, 1]) # gs.update(left=0.05, right=0.95, wspace=0.05, hspace=0) # fig.set_size_inches(8, 4) f.patch.set_facecolor('gray') for step in range(1, 100000000): ########################################## ## CAMERA ## ########################################## pad_size = 1 pad_value = 0 # print "Shape image_t:" + str(np.shape(image_t)) image = np.lib.pad(image_t[:, :, 0], (pad_size, pad_size), 'constant', constant_values=(pad_value, pad_value)) for d in range(1, images_stack_size): image_stack_padded = np.lib.pad( image_t[:, :, d], (pad_size, pad_size), 'constant', constant_values=(pad_value, pad_value)) image = np.append(image, image_stack_padded, axis=1) # print"shape: " + str(np.shape(image)) # Plot in the first row the camera images # ax1 = plt.subplot(2, 1, 1) # ax1 = plt.subplot(gs[0, :]) # but first clear the old one # ax1.clear() # ax1.axis("off") # specify greyscale instead BGR # ax1.imshow(image, cmap='gray') ######################################### # 2 - Forward in input to NN # action_distribution_1 = policy_network_1.return_action_distribution( # input_data=np.reshape(image_t, (1, 84, 84, images_stack_size)), # softmax=False) action_distribution_1, conv1, conv2, conv3 = policy_network_1.return_everything( input_data=np.reshape(image_t, (1, 84, 84, images_stack_size)), softmax=False) action = np.argmax(action_distribution_1) action = convert_action_int_to_str(action) print "######################" print "Action distribution: " + str(action_distribution_1) print "Action_1 selected: " + action # print "Shape action dis:" + str(np.shape(action_distribution_1)) # print "Shape conv1:" + str(np.shape(conv1)) # print "Shape conv2:" + str(np.shape(conv2)) # print "Shape conv3:" + str(np.shape(conv3)) ########################################## ## FILTERS ## ########################################## ax3.clear() ax3.axis("off") padding = np.ones((1, 21 * 16)) # 1 Conv layer ########################### conv1 = np.reshape(conv1, (21, 21, 32)) image_cv1_1 = np.reshape(conv1[:, :, 0:16], (21, 21 * 16), order='F') image_cv1_2 = np.reshape(conv1[:, :, 16:32], (21, 21 * 16), order='F') # image_cv1_1 = np.reshape(conv1[:, :, 0:8], (21, 21 * 8), order='F') # image_cv1_2 = np.reshape(conv1[:, :, 8:16], (21, 21 * 8), order='F') # image_cv1_3 = np.reshape(conv1[:, :, 16:24], (21, 21 * 8), order='F') # image_cv1_4 = np.reshape(conv1[:, :, 24:32], (21, 21 * 8), order='F') image_cv1 = np.concatenate( (padding, image_cv1_1, image_cv1_2), axis=0) # Save filters filter_path = root_images + "filters/step_" + \ str(step) + "/" if not os.path.exists(filter_path): os.makedirs(filter_path) filter_path = filter_path + "conv_1.jpg" I = image_cv1 I8 = (((I - I.min()) / (I.max() - I.min())) * 255.9).astype(np.uint8) image_cv1_resized = cv2.resize(I8, (84 * 4, 21 * 2), interpolation=cv2.INTER_AREA) img = Image.fromarray(image_cv1_resized) img.save(filter_path) # 2 Conv layer ########################### padding = np.zeros((1, 11 * 32)) conv2 = np.reshape(conv2, (11, 11, 64)) image_cv2_1 = np.reshape(conv2[:, :, 0:32], (11, 11 * 32), order='F') image_cv2_2 = np.reshape(conv2[:, :, 32:64], (11, 11 * 32), order='F') # image_cv2_3 = np.reshape(conv2[:, :, 16:24], (21, 21 * 8), order='F') # image_cv2_4 = np.reshape(conv2[:, :, 24:32], (21, 21 * 8), order='F') image_cv2 = np.concatenate( (padding, image_cv2_1, image_cv2_2), axis=0) # Save filters filter_path = root_images + "filters/step_" + \ str(step) + "/" if not os.path.exists(filter_path): os.makedirs(filter_path) filter_path = filter_path + "conv_2.jpg" I = image_cv2 I8 = (((I - I.min()) / (I.max() - I.min())) * 255.9).astype(np.uint8) image_cv2_resized = cv2.resize(I8, (84 * 4, 11 * 2), interpolation=cv2.INTER_AREA) img = Image.fromarray(image_cv2_resized) img.save(filter_path) # 3 Conv layer ########################### padding = np.ones((1, 11 * 32)) conv3 = np.reshape(conv3, (11, 11, 64)) image_cv3_1 = np.reshape(conv3[:, :, 0:32], (11, 11 * 32), order='F') image_cv3_2 = np.reshape(conv3[:, :, 32:64], (11, 11 * 32), order='F') # image_cv2_3 = np.reshape(conv2[:, :, 16:24], (21, 21 * 8), order='F') # image_cv2_4 = np.reshape(conv2[:, :, 24:32], (21, 21 * 8), order='F') image_cv3 = np.concatenate( (padding, image_cv3_1, image_cv3_2), axis=0) # Save filters filter_path = root_images + "filters/step_" + \ str(step) + "/" if not os.path.exists(filter_path): os.makedirs(filter_path) filter_path = filter_path + "conv_3.jpg" I = image_cv3 I8 = (((I - I.min()) / (I.max() - I.min())) * 255.9).astype(np.uint8) image_cv3_resized = cv2.resize(I8, (84 * 4, 11 * 2), interpolation=cv2.INTER_AREA) img = Image.fromarray(image_cv3_resized) img.save(filter_path) # Assemble final image #################### image_input = np.reshape(image_t, (84, 84 * 4), order='F') image_input_resized = cv2.resize(image, (84 * 4, 84), interpolation=cv2.INTER_AREA) final_image = np.concatenate( (image_input_resized, image_cv1_resized, image_cv2_resized, image_cv3_resized)) # Plot the image # ax3 = plt.subplot(gs[0, :]) # but first clear the old one # specify greyscale instead BGR ax3 = plt.subplot(2, 1, 1) ax3.imshow(final_image, cmap='gray') ########################################## ## HISTOGRAM ## ########################################## # print np.shape(action_distribution_1) # print len(action_distribution_1[0]) ind = np.arange(len(action_distribution_1[0])) # print ind width = 0.4 # fig, ax = plt.subplots() # ax2 = plt.subplot(gs[1, :]) ax2.clear() ax2 = plt.subplot(2, 1, 2) ax2.set_aspect(3) rects = ax2.bar(0.4 + ind, action_distribution_1[0], width=width, color='r', alpha=0.4) rects[np.argmax(action_distribution_1)].set_color('g') ax2.set_xticks(0.4 + ind + width / 2) ax2.set_xticklabels(("left", "right", "forward", "backward", "stop", "descend")) for i in ax2.xaxis.get_ticklabels(): i.set_color("white") ax2.set_ylim([0, 1]) # plt.xticks(fontsize=16) # plt.show() ########################################## # 3 - Send command and wait for translation raw_input("Press a button to acquire images...") # 4 - Acquire second frame and add to the stack if source == 3: global _last_image _last_image = acquire_frame(video_stream_url) time.sleep(1) image_t1 = _last_image # Save every one image image image_path = root_images + "image_" + str(step) + ".png" cv2.imwrite(image_path, image_t1) image_t1 = np.expand_dims(image_t1, 2) image_t1 = np.append(image_t[:, :, 1:], image_t1, axis=2) # 5 - Forward stack in input to the network image_t = image_t1
def main(): """ Main function for training the DQN network in learning how to accomplish autonomous landing. """ # ATTENTION: If you want to restore files from a previous simulation you # must pass valid values for these variables: policy_weights_path = '/home/pulver/Desktop/simulation_8/checkpoint/episode_20500/policy/policy_checkpoint.ckp' summary_folder = "" # if empty the summary is written in ./log/ + current time start_episode = 0 # change to last episode number frame_counter = 0 # change to last number of frames epsilon = 0.1 screen_width = 84 # original is 160 screen_height = 84 # original is 210 images_stack_size = 4 # Use only the first 5 actions for this simulation # action_list = ['left', 'right', 'forward', 'backward', 'stop', 'descend'] # 0 (left, 1 (right), 2 (forward), 3 (backward), 4 (stop), 5 (descend) tot_actions = 6 batch_size = 64 # size of the experience batch tot_steps = 1000 # finite-horizont simulation # 10x10^6 high number since training can be stopped at every time tot_episodes = 210 steps_per_episodes = 40 # expressed in number step noop_time = 2.0 # pause in seconds between actions num_ground_plane = 70 actual_ground_index = 0 # episode_per_ground specify the number of episodes with the same ground plane ground_counter = 1 episode_per_ground = 10 timer_total_start = time.time() rospy.init_node("DRLTestNode") rospy.loginfo("----- DRL Test Node -----") # Create a subscriber fot the greyscale image # rospy.Subscriber("/drl/grey_camera", ROSImage, image_callback)#TODO # restore default rospy.Subscriber( "/quadrotor/ardrone/bottom/ardrone/bottom/image_raw", ROSImage, image_callback,queue_size=30) # Store the last 30 messages before discarding them images_stack_size = 4 r = rospy.Rate(10) # 10hz ground_list = ["asphalt11", "asphalt12", "asphalt13", "brick11", "brick12", "brick13", "grass11", "grass12", "grass13", "pavement11", "pavement12", "pavement13", "sand11", "sand12", "sand13", "snow11", "snow12", "snow13", "soil11", "soil12", "soil13"] # Init session and networks sess = tf.Session() if(summary_folder == ""): tf_summary_writer = tf.summary.FileWriter( './log/' + str(datetime.datetime.now().time()), sess.graph) else: tf_summary_writer = tf.summary.FileWriter( summary_folder, sess.graph) policy_network = QNetwork(sess, tot_actions=tot_actions, image_shape=( screen_width, screen_height, images_stack_size), batch_size=batch_size, network_name="policy_net") init = tf.global_variables_initializer() sess.run(init) if(policy_weights_path != ""): print("Loading weights from memory: " + str(policy_weights_path)) policy_network.load_weights(policy_weights_path) else: print("The networks path are empty. Choose appropriate weights!") sys.exit() # start here for episode in range(start_episode, tot_episodes): # Reset the ground in the environment every 50 episodes (or episode_per_ground) ground_index = episode / episode_per_ground if ground_index != actual_ground_index or (episode == 0): clean_world(ground_list) generate_new_world(ground_index, ground_list) actual_ground_index = ground_index timer_start = time.time() actual_time = rospy.get_rostime() rospy_start_time = actual_time.secs + actual_time.nsecs / 1000000000.0 frame_episode = 0 cumulated_reward = 0 epsilon_used = 0 send_action("takeoff") print("") print("Episode: " + str(episode)) # 1-Accumulate the first state reset_pose() print "Reset pose!" send_action("stop") rospy.sleep(1.0) # get_image() state = _last_image # create a stack of X images image_t = np.stack([state] * images_stack_size, axis=2) for step in range(tot_steps): # 2- Get the action following epsilon-greedy or through policy network. # Here image_t is always ready and it can be given as input to # the network epsilon_used_bool = False if(np.random.random_sample(1) < epsilon): epsilon_used += 1 action = get_random_action() epsilon_used_bool = True # Take the action from the policy network else: # Here image_t is always ready and it can be given as input to # the network action_distribution = policy_network.return_action_distribution( input_data=np.reshape(image_t, (1, 84, 84, images_stack_size)), softmax=False) action = np.argmax(action_distribution) action = convert_action_int_to_str(action) #print action send_action(action) rospy.sleep(noop_time) # 3- Get the state_t1 repeating the action obtained at step-2 and add everything # in the replay buffer. Then set state_t = state_t1 #get_image() image_t1 = _last_image send_action("stop") # If the action taken is to land and the UAV is inside the landing BB, done will be calculated accordingly done_reward = get_done_reward() reward = done_reward.reward done = done_reward.done if epsilon_used_bool: print " Step(" + str(step) + "), Action: " + action + ", Altitude: " + str(done_reward.z) + ", Reward: " + str(reward) else: print "#Step(" + str(step) + "), Action: " + action + ", Altitude: " + str(done_reward.z) + ", Reward: " + str(reward) # ['left', 'right', 'forward', 'backward', 'stop', 'land' print(" left:" + str(action_distribution[0][0][0]) + "; right:" + str(action_distribution[0][0][1]) + "; forward:" + str(action_distribution[0][0][2]) + "; backward:" + str(action_distribution[0][0][3]) + "; stop:" + str(action_distribution[0][0][4]) + "; descend:" + str(action_distribution[0][0][5])) image_t1 = np.expand_dims(image_t1, 2) # stack the images image_t1 = np.append(image_t[:, :, 1:], image_t1, axis=2) frame_counter += 1 # To call every time a frame is obtained frame_episode += 1 image_t = image_t1 get_relative_pose(ground_index, episode, step, reward) cumulated_reward += reward # At every step check if more than 30 seconds from the start passed. # In that case, set done = True and end the episode timer_stop = time.time() # Stop the episode if the number of frame is more than a threshold if frame_episode >= steps_per_episodes: done = True # When the episode is done if done: timer_stop = time.time() actual_time = rospy.get_rostime() rospy_stop_time = actual_time.secs + actual_time.nsecs / 1000000000.0 rospy_time_elapsed = rospy_stop_time - rospy_start_time print("Tot Frame counter: " + str(frame_counter)) print("Time episode: " + str(timer_stop - timer_start) + " seconds") print("Ros time episode: " + str(rospy_time_elapsed) + " seconds") if cumulated_reward >= 0: rospy.logwarn("Positive reward obtained!") print("Cumulated reward: " + str(cumulated_reward)) print("Episode finished after {} timesteps".format(step + 1)) print("Epsilon used: " + str(epsilon_used) + " out of " + str(step + 1) + "(" + str(float((epsilon_used * 100.0) / (step + 1.0))) + "%)") sys.stdout.flush() time.sleep(5) break
def main(): """ Main function for training the DQN network in learning how to accomplish autonomous landing. """ # ground_list = ["water1", # "water2", # "water3", # "water4", # "water5", # "water6", # "water7", # "water8", # "water9", # "water10"] # ground_list = ["water11", "water12", "water13", # "water14", "water15", "water16", # "water17", "water18", "water19"] ground_list = [ "pavement11", "pavement12", "pavement5", "pavement7", "pavement14", "pavement15", "pavement16" ] # NOTE: the ckp path must be modified in the code below ckp_list = ["120750"] # ATTENTION: If you want to restore files from a previous simulation you # must pass valid values for these variables: # policy_weights_path = # '/home/pulver/Desktop/marine_descending/checkpoint/episode_94500/policy/policy_checkpoint.ckp' summary_folder = "" # if empty the summary is written in ./log/ + current time start_episode = 0 # change to last episode number frame_counter = 0 # change to last number of frames screen_width = 84 # original is 160 screen_height = 84 # original is 210 images_stack_size = 4 # Use only the first 5 actions for this simulation # action_list = ['left', 'right', 'forward', 'backward', 'stop', 'descend'] # 0 (left, 1 (right), 2 (forward), 3 (backward), 4 (stop), 5 (descend) tot_actions = 6 batch_size = 32 # size of the experience batch tot_steps = 1000 # finite-horizont simulation # 10x10^6 high number since training can be stopped at every time steps_per_episodes = 40 # expressed in number step noop_time = 2.0 # pause in seconds between actions num_ground_plane = len(ground_list) actual_ground_index = 0 # episode_per_ground specify the number of episodes with the same ground # plane episode_per_ground = 10 tot_episodes = episode_per_ground * num_ground_plane accuracy_ground = 0.0 timer_total_start = time.time() rospy.init_node("DRLTestNode") rospy.loginfo("----- DRL Test Node -----") # Create a subscriber fot the greyscale image # rospy.Subscriber("/drl/grey_camera", ROSImage, image_callback)#TODO # restore default rospy.Subscriber( "/quadrotor/ardrone/bottom/ardrone/bottom/image_raw", ROSImage, image_callback, queue_size=30) # Store the last 30 messages before discarding them # Create a publisher for changing light position # light_pub = rospy.Publisher('/gazebo/default/light/modify', Light) images_stack_size = 4 # r = rospy.Rate(10) # 10hz # Init session and networks sess = tf.Session() policy_network = QNetwork(sess, tot_actions=tot_actions, image_shape=(screen_width, screen_height, images_stack_size), batch_size=batch_size, network_name="policy_net") init = tf.global_variables_initializer() sess.run(init) for i in range(len(ckp_list)): ckp = ckp_list[i] policy_weights_path = '/home/pulver/Desktop/sim11/episode_' + \ str(ckp) + '/policy/policy_checkpoint.ckp' if (summary_folder == ""): tf_summary_writer = tf.summary.FileWriter( './' + str(ckp) + '/log/' + str(datetime.datetime.now().time()), sess.graph) else: tf_summary_writer = tf.summary.FileWriter(summary_folder, sess.graph) if (policy_weights_path != ""): print("Loading weights from memory: " + str(policy_weights_path)) policy_network.load_weights(policy_weights_path) else: print("The networks path are empty. Choose appropriate weights!") sys.exit() # start here for episode in range(start_episode, tot_episodes): # Reset the ground in the environment every episode_per_ground ground_index = episode / episode_per_ground print "Current ground index: " + str(ground_index) if episode % ( episode_per_ground) == 0 and episode != start_episode: with open( "./" + str(ckp) + "/accuracy_ckp" + str(ckp) + ".csv", "a") as myfile: accuracy_ground = accuracy_ground / \ (episode_per_ground * 1.0) print "Final accuracy value: " + str(accuracy_ground) string_to_add = ground_list[ground_index - 1] + \ "," + str(accuracy_ground) + "\n" myfile.write(string_to_add) if ground_index != actual_ground_index or (episode == start_episode): accuracy_ground = 0.0 clean_world(ground_list) generate_new_world(ground_index, ground_list) # Reset the light every 10 episodes # if (episode % 10 == 0): # # Change the position of the three point lights # for i in range(0, 3): # model_to_change = "user_point_light_" + str(i) # x = STDrandom.uniform(-5.0, 5.0) # y = STDrandom.uniform(-5.0, 5.0) # z = STDrandom.uniform(1.0, 10.0) # # os.system("rosrun gazebo_ros spawn_model -file ~/.gazebo/models/" + model_to_change + # # "/model.sdf -sdf -model " + model_to_change + "_plane -x " + x + " -y " + y + " -z " + z + "") # light_msg = Light() # light_msg.pose.x = x # light_msg.pose.y = y # light_msg.pose.z = z # light_msg.name = model_to_change # light_pub.publish(light_msg) # rospy.loginfo("Lights have been resetted!") actual_ground_index = ground_index timer_start = time.time() actual_time = rospy.get_rostime() rospy_start_time = actual_time.secs + actual_time.nsecs / 1000000000.0 frame_episode = 0 cumulated_reward = 0 epsilon_used = 0 send_action("takeoff") print("") print("Episode: " + str(episode)) # 0-Reset USV's position os.system( "rosservice call /gazebo/set_model_state '{model_state: { model_name: usv_model, pose: { position: { x: 0.0, y: 0.0 ,z: 5.0 }, orientation: {x: 0, y: 0.0, z: 0, w: 0.0 } }, twist: { linear: {x: 0.0 , y: 0 ,z: 0 } , angular: { x: 0.0 , y: 0 , z: 0.0 } } , reference_frame: world } }'" ) # 1-Accumulate the first state reset_pose() print "Reset pose!" send_action("stop") rospy.sleep(1.0) # get_image() state = _last_image # create a stack of X images image_t = np.stack([state] * images_stack_size, axis=2) for step in range(tot_steps): # 2- Get the action following epsilon-greedy or through policy network. # Here image_t is always ready and it can be given as input to # the network action_distribution = policy_network.return_action_distribution( input_data=np.reshape(image_t, (1, 84, 84, images_stack_size)), softmax=False) # print "" # print action_distribution action = np.argmax(action_distribution) action = convert_action_int_to_str(action) # print action send_action(action) rospy.sleep(noop_time) # 3- Get the state_t1 repeating the action obtained at step-2 and add everything # in the replay buffer. Then set state_t = state_t1 # get_image() image_t1 = _last_image send_action("stop") # If the action taken is to land and the UAV is inside the landing # BB, done will be calculated accordingly done_reward = get_done_reward() reward = done_reward.reward done = done_reward.done image_t1 = np.expand_dims(image_t1, 2) # stack the images image_t1 = np.append(image_t[:, :, 1:], image_t1, axis=2) frame_counter += 1 # To call every time a frame is obtained frame_episode += 1 image_t = image_t1 get_relative_pose(ground_index, episode, step, reward, ground_list, ckp) cumulated_reward += reward # At every step check if more than 30 seconds from the start passed. # In that case, set done = True and end the episode timer_stop = time.time() # Stop the episode if the number of frame is more than a # threshold if frame_episode >= steps_per_episodes: done = True # When the episode is done if done: timer_stop = time.time() actual_time = rospy.get_rostime() rospy_stop_time = actual_time.secs + actual_time.nsecs / 1000000000.0 rospy_time_elapsed = rospy_stop_time - rospy_start_time print("Tot Frame counter: " + str(frame_counter)) print("Time episode: " + str(timer_stop - timer_start) + " seconds") print("Ros time episode: " + str(rospy_time_elapsed) + " seconds") if cumulated_reward >= 0: rospy.logwarn("Positive reward obtained!") print("Cumulated reward: " + str(cumulated_reward)) print("Episode finished after {} timesteps".format(step + 1)) sys.stdout.flush() with open( "./" + str(ckp) + "/results_ckp" + str(ckp) + "_" + str(datetime.date.today().strftime("%m%d%Y")) + ".csv", "a") as myfile: boolean_reward = 0 if (cumulated_reward > 0): boolean_reward = 1 accuracy_ground += 1.0 print "Current accuracy value: " + str(accuracy_ground) string_to_add = ground_list[ground_index] + "," + str( episode) + "," + str(step) + "," + str( cumulated_reward) + "," + str( boolean_reward) + '\n' myfile.write(string_to_add) if episode == tot_episodes - 1: with open( "./" + str(ckp) + "/accuracy_ckp" + str(ckp) + ".csv", "a") as myfile: accuracy_ground = accuracy_ground / \ (episode_per_ground * 1.0) print "Final accuracy value: " + str( accuracy_ground) string_to_add = ground_list[actual_ground_index] + \ "," + str(accuracy_ground) + "\n" myfile.write(string_to_add) break
def main(): """ Main function for training the DQN network in learning how to accomplish autonomous landing. """ # ATTENTION: If you want to restore files from a previous simulation you # must pass valid values for these variables: policy_weights_path = '/home/pulver/Desktop/episode_113250/policy/policy_checkpoint.ckp' summary_folder = "" # if empty the summary is written in ./log/ + current time screen_width = 84 # original is 160 screen_height = 84 # original is 210 images_stack_size = 4 # Use only the first 5 actions for this simulation # action_list = ['left', 'right', 'forward', 'backward', 'stop', 'descend'] # 0 (left, 1 (right), 2 (forward), 3 (backward), 4 (stop), 5 (descend) tot_actions = 6 batch_size = 32 # size of the experience batch tot_steps = 1000 # finite-horizont simulation # 10x10^6 high number since training can be stopped at every time steps_per_episodes = 40 # expressed in number step noop_time = 2.0 # pause in seconds between actions timer_total_start = time.time() rospy.init_node("DRLTestNode") rospy.loginfo("----- DRL Test Node -----") # Create a subscriber fot the greyscale image rospy.Subscriber("/drl/grey_camera", ROSImage, image_callback) # TODO # restore default # rospy.Subscriber("/quadrotor/ardrone/bottom/ardrone/bottom/image_raw", ROSImage, image_callback, queue_size=30) # Store the last 30 messages # before discarding them images_stack_size = 4 r = rospy.Rate(10) # 10hz # Init session and networks sess = tf.Session() if (summary_folder == ""): tf_summary_writer = tf.summary.FileWriter( './log/' + str(datetime.datetime.now().time()), sess.graph) else: tf_summary_writer = tf.summary.FileWriter(summary_folder, sess.graph) policy_network = QNetwork(sess, tot_actions=tot_actions, image_shape=(screen_width, screen_height, images_stack_size), batch_size=batch_size, network_name="policy_net") init = tf.global_variables_initializer() sess.run(init) if (policy_weights_path != ""): print("Loading weights from memory: " + str(policy_weights_path)) policy_network.load_weights(policy_weights_path) else: print("The networks path are empty. Choose appropriate weights!") sys.exit() ########################################################################## ## START HERE ## ########################################################################## timer_start = time.time() actual_time = rospy.get_rostime() rospy_start_time = actual_time.secs + actual_time.nsecs / 1000000000.0 frame_episode = 0 cumulated_reward = 0 send_action("takeoff") # 1-Accumulate the first state send_action("stop") rospy.sleep(1.0) # acquire image from bottomcamera # get_image() state = _last_image # create a stack of X images image_t = np.stack([state] * images_stack_size, axis=2) for step in range(tot_steps): # 2- Get the action following epsilon-greedy or through policy network. # Here image_t is always ready and it can be given as input to # the network action_distribution = policy_network.return_action_distribution( input_data=np.reshape(image_t, (1, 84, 84, images_stack_size)), softmax=False) # print "" # print action_distribution action = np.argmax(action_distribution) action = convert_action_int_to_str(action) # print action send_action(action) rospy.sleep(noop_time) # 3- Get the state_t1 repeating the action obtained at step-2 and add everything # in the replay buffer. Then set state_t = state_t1 # get_image() image_t1 = _last_image send_action("stop") # If the action taken is to land and the UAV is inside the landing # BB, done will be calculated accordingly # NOTE: in the real implementation there's no way to get done and reward # done_reward = get_done_reward() # reward = done_reward.reward # done = done_reward.done image_t1 = np.expand_dims(image_t1, 2) # stack the images image_t1 = np.append(image_t[:, :, 1:], image_t1, axis=2) frame_episode += 1 image_t = image_t1 # cumulated_reward += reward # At every step check if more than 30 seconds from the start passed. # In that case, set done = True and end the episode timer_stop = time.time() # Stop the episode if the number of frame is more than a threshold if frame_episode >= steps_per_episodes: done = True # When the episode is done if done: timer_stop = time.time() actual_time = rospy.get_rostime() rospy_stop_time = actual_time.secs + actual_time.nsecs / 1000000000.0 rospy_time_elapsed = rospy_stop_time - rospy_start_time print("Time episode: " + str(timer_stop - timer_start) + " seconds") print("Ros time episode: " + str(rospy_time_elapsed) + " seconds") # if cumulated_reward >= 0: # rospy.logwarn("Positive reward obtained!") # print("Cumulated reward: " + str(cumulated_reward)) print("Episode finished after {} timesteps".format(step + 1)) sys.stdout.flush() break
def main(): """ Main function for training the DQN network in learning how to accomplish autonomous landing. """ # ATTENTION: If you want to restore files from a previous simulation you # must pass valid values for these variables: policy_weights_path = '/home/pulver/Desktop/DQN-single/episode_36000/policy/policy_checkpoint.ckp' ground_list = [ "asphalt11_small", "asphalt12_small", "asphalt13_small", "brick11_small", "brick12_small", "brick13_small", "grass11_small", "grass12_small", "grass13_small", "pavement11_small", "pavement12_small", "pavement13_small", "sand11_small", "sand12_small", "sand13_small", "snow11_small", "snow12_small", "snow13_small", "soil11_small", "soil12_small", "soil13_small" ] summary_folder = "" # if empty the summary is written in ./log/ + current time start_episode = 0 # change to last episode number frame_counter = 0 # change to last number of frames screen_width = 84 # original is 160 screen_height = 84 # original is 210 images_stack_size = 4 # Use only the first 5 actions for this simulation # 0 (left, 1 (right), 2 (forward), 3 (backward), 4 (stop), 5 (land), # 6 (ascend), 7 (descend), 8 (rotate_left), 9 (rotate_right) tot_actions = 6 batch_size = 32 # size of the experience batch tot_steps = 1000 # finite-horizont simulation # 10x10^6 high number since training can be stopped at every time tot_episodes = 50 steps_per_episodes = 20 # expressed in number step noop_time = 2.0 # pause in seconds between actions num_ground_plane = 21 actual_ground_index = 0 # episode_per_ground specify the number of episodes with the same ground # plane ground_counter = 1 episode_per_ground = 10 timer_total_start = time.time() rospy.init_node("DeepReinforcedLanding") rospy.loginfo("----- Deep Reinforced Landing Node -----") rospy.Subscriber( "/quadrotor/ardrone/bottom/ardrone/bottom/image_raw", ROSImage, image_callback, queue_size=30) # Store the last 30 messages before discarding them r = rospy.Rate(10) # 10hz # Init session and networks sess = tf.Session() if (summary_folder == ""): tf_summary_writer = tf.summary.FileWriter( './log/' + str(datetime.datetime.now().time()), sess.graph) else: tf_summary_writer = tf.summary.FileWriter(summary_folder, sess.graph) policy_network = QNetwork(sess, tot_actions=tot_actions, image_shape=(screen_width, screen_height, images_stack_size), batch_size=batch_size, network_name="policy_net") init = tf.global_variables_initializer() sess.run(init) # Load Neural Networks weights from memory if a valid checkpoint path is passed # if(os.path.isfile(policy_weights_path) == True and # os.path.isfile(target_weights_path) == True): try: print("Loading weights from memory...") policy_network.load_weights(policy_weights_path) except: print("Error while loading the weights!") return # start here for episode in range(start_episode, tot_episodes): # Reset the ground in the environment every 50 episodes (or # episode_per_ground) ground_index = episode / episode_per_ground if ground_index != actual_ground_index or (episode == start_episode): clean_world(ground_list) generate_new_world(ground_list) actual_ground_index = ground_index timer_start = time.time() actual_time = rospy.get_rostime() rospy_start_time = actual_time.secs + actual_time.nsecs / 1000000000.0 cumulated_reward = 0 print("") print("Episode: " + str(episode)) # 1-Accumulate the first state reset_pose() print "Reset pose!" send_action("stop") rospy.sleep(1.0) state = _last_image # create a stack of X images image_t = np.stack([state] * images_stack_size, axis=2) frame_episode = 0 for step in range(tot_steps): # 2- Get the action following epsilon-greedy or through policy network. # With probability epsilon take random action otherwise it takes # an action from the policy network. # Take a random action # Here image_t is always ready and it can be given as input to # the network action_distribution = policy_network.return_action_distribution( input_data=np.reshape(image_t, (1, 84, 84, images_stack_size)), softmax=False) action = np.argmax(action_distribution) action = convert_action_int_to_str(action) # action = get_random_action() send_action(action) rospy.sleep(noop_time) # 3- Get the state_t1 repeating the action obtained at step-2 and add everything # in the replay buffer. Then set state_t = state_t1 # get_image() image_t1 = _last_image # If the action taken is to land and the UAV is inside the landing # BB, done will be calculated accordingly done_reward = get_done_reward() reward = done_reward.reward done = done_reward.done image_t1 = np.expand_dims(image_t1, 2) # stack the images image_t1 = np.append(image_t[:, :, 1:], image_t1, axis=2) image_t = image_t1 cumulated_reward += reward send_action("stop") frame_episode = frame_episode + 1 # get_relative_pose(ground_index, episode, step, reward) # At every step check if more than 30 seconds from the start passed. # In that case, set done = True and end the episode timer_stop = time.time() # Stop the episode if the number of frame is more than a threshold if frame_episode >= steps_per_episodes: done = True # if timer_stop - timer_start >= 30.0: # maximum time allowed # #cumulated_reward += -1 # done = True # When the episode is done if done: timer_stop = time.time() actual_time = rospy.get_rostime() rospy_stop_time = actual_time.secs + actual_time.nsecs / 1000000000.0 rospy_time_elapsed = rospy_stop_time - rospy_start_time print("Tot Frame counter: " + str(frame_counter)) print("Time episode: " + str(timer_stop - timer_start) + " seconds") print("Ros time episode: " + str(rospy_time_elapsed) + " seconds") if cumulated_reward >= 0: rospy.logwarn("Positive reward obtained!") print("Cumulated reward: " + str(cumulated_reward)) print("Episode finished after {} timesteps".format(step + 1)) sys.stdout.flush() # time.sleep(5) with open( "./results_" + str(datetime.date.today().strftime("%m%d%Y")) + ".csv", "a") as myfile: boolean_reward = 0 if (cumulated_reward > 0): boolean_reward = 1 ground_list = "mixed" string_to_add = "mixed" + "," + str(episode) + "," + str( step) + "," + str(cumulated_reward) + "," + str( boolean_reward) + '\n' myfile.write(string_to_add) break break
def main(): """ Main function for training the DQN network in learning how to accomplish autonomous landing. """ # ATTENTION: If you want to restore files from a previous simulation you # must pass valid values for these variables: policy_weights_path = '/home/pulver/Desktop/episode_32400/policy/policy_checkpoint.ckp' summary_folder = "" # if empty the summary is written in ./log/ + current time start_episode = 0 # change to last episode number frame_counter = 0 # change to last number of frames screen_width = 84 # original is 160 screen_height = 84 # original is 210 images_stack_size = 4 # Use only the first 5 actions for this simulation # 0 (left, 1 (right), 2 (forward), 3 (backward), 4 (stop), 5 (land), # 6 (ascend), 7 (descend), 8 (rotate_left), 9 (rotate_right) tot_actions = 6 batch_size = 32 # size of the experience batch tot_steps = 1000 # finite-horizont simulation # 10x10^6 high number since training can be stopped at every time tot_episodes = 1050 steps_per_episodes = 20 # expressed in number step noop_time = 2.0 # pause in seconds between actions num_ground_plane = 21 episodes_per_ground_plane = tot_episodes / num_ground_plane actual_ground_index = 0 timer_total_start = time.time() rospy.init_node("DeepReinforcedLanding") rospy.loginfo("----- Deep Reinforced Landing Node -----") rospy.Subscriber( "/quadrotor/ardrone/bottom/ardrone/bottom/image_raw", ROSImage, image_callback, queue_size=30) # Store the last 30 messages before discarding them r = rospy.Rate(10) # 10hz # Init session and networks sess = tf.Session() if(summary_folder == ""): tf_summary_writer = tf.summary.FileWriter( './log/' + str(datetime.datetime.now().time()), sess.graph) else: tf_summary_writer = tf.summary.FileWriter( summary_folder, sess.graph) policy_network = QNetwork(sess, tot_actions=tot_actions, image_shape=( screen_width, screen_height, images_stack_size), batch_size=batch_size, network_name="policy_net") init = tf.global_variables_initializer() sess.run(init) # Load Neural Networks weights from memory if a valid checkpoint path is passed # if(os.path.isfile(policy_weights_path) == True and # os.path.isfile(target_weights_path) == True): try: print("Loading weights from memory...") policy_network.load_weights(policy_weights_path) except: print("Error while loading the weights!") return print "Episode, step, action, reward, action_distribution" # start here for episode in range(start_episode, tot_episodes): # Reset the ground in the environment every 25k frames # ground_index = episode / 50 # if ground_index != actual_ground_index or episode == 0: # generate_new_world(ground_index) # actual_ground_index = ground_index timer_start = time.time() actual_time = rospy.get_rostime() rospy_start_time = actual_time.secs + actual_time.nsecs / 1000000000.0 cumulated_reward = 0 # 1-Accumulate the first state #reset_pose() send_action("stop") rospy.sleep(1.0) state = _last_image # create a stack of X images image_t = np.stack([state] * images_stack_size, axis=2) frame_episode = 0 pad_size = 3 pad_value = 255 for step in range(tot_steps): # 2- Get the action following epsilon-greedy or through policy network. # With probability epsilon take random action otherwise it takes # an action from the policy network. # Take a random action image_stack_padded = [np.lib.pad(image_t[:,:,0], (pad_size, pad_size), 'constant', constant_values=(pad_value, pad_value)), np.lib.pad(image_t[:,:,1], (pad_size, pad_size), 'constant', constant_values=(pad_value, pad_value)), np.lib.pad(image_t[:,:,2], (pad_size, pad_size), 'constant', constant_values=(pad_value, pad_value)), np.lib.pad(image_t[:,:,3], (pad_size, pad_size), 'constant', constant_values=(pad_value, pad_value))] image_hstack = np.hstack(image_stack_padded) cv2.imwrite("/home/pulver/Desktop/print_network_action_distribution/img_ep"+str(episode)+"_step" + str(step) + ".png", image_hstack ) # Here image_t is always ready and it can be given as input to # the network action_distribution = policy_network.return_action_distribution( input_data=np.reshape(image_t, (1, 84, 84, images_stack_size)), softmax=False) action = np.argmax(action_distribution) action = convert_action_int_to_str(action) #action = get_random_action() send_action(action) rospy.sleep(noop_time) # 3- Get the state_t1 repeating the action obtained at step-2 and add everything # in the replay buffer. Then set state_t = state_t1 # get_image() image_t1 = _last_image # If the action taken is to land and the UAV is inside the landing # BB, done will be calculated accordingly done_reward = get_done_reward() reward = done_reward.reward done = done_reward.done image_t1 = np.expand_dims(image_t1, 2) # stack the images image_t1 = np.append(image_t[:, :, 1:], image_t1, axis=2) image_t = image_t1 cumulated_reward += reward send_action("stop") frame_episode = frame_episode + 1 #get_relative_pose(ground_index, episode, step, reward) # At every step check if more than 30 seconds from the start passed. # In that case, set done = True and end the episode timer_stop = time.time() print str(episode) + "," + str(step) + "," + action + "," + str(reward) + "," +str(action_distribution) # Stop the episode if the number of frame is more than a threshold if frame_episode >= steps_per_episodes: done = True # if timer_stop - timer_start >= 30.0: # maximum time allowed # #cumulated_reward += -1 # done = True # When the episode is done if done: break