def main(): replay_memory_size = 400000 replay_buffer_path_1 = "/home/pulver/replay_2.pickle" #replay_buffer_path = "./replay_buffer.pickle" replay_buffer_1 = ExperienceReplayBuffer(capacity=replay_memory_size) timer_start = time.time() # Load the Replay buffer from file or accumulate experiences replay_buffer_1.load(replay_buffer_path_1) timer_stop = time.time() print "Time episode: " + str(timer_stop - timer_start) + " seconds" print "Time episode: " + str((timer_stop - timer_start) / 60) + " minutes" print "Size_1 : " + str(replay_buffer_1.return_size()) print("") print "Starting shuffling..." replay_buffer_1.shuffle() print "Shuffle completed!" print "Starting saving..." replay_buffer_1.save("/home/pulver/replay_2.pickle") print "Size_1 : " + str(replay_buffer_1.return_size())
def main(): replay_memory_size = 400000 replay_buffer_path_1 = "/home/pulver/Desktop/marine_descending/replay_buffer.pickle" # replay_buffer_path_2 = "/home/thebeast/Desktop/simulation_4/replay_buffer.pickle" replay_buffer_path = "/home/pulver/Desktop/marine_descending/replay_buffer_neutral_clean.pickle" replay_buffer_1 = ExperienceReplayBuffer(capacity=replay_memory_size) # replay_buffer_2 = ExperienceReplayBuffer(capacity=replay_memory_size) timer_start = time.time() # Load the Replay buffer from file or accumulate experiences replay_buffer_1.load(replay_buffer_path_1) # replay_buffer_2.load(replay_buffer_path_2) timer_stop = time.time() print "Time episode: " + str(timer_stop - timer_start) + " seconds" print "Time episode: " + str((timer_stop - timer_start) / 60) + " minutes" print "Size_1 : " + str(replay_buffer_1.return_size()) # print "Size_2 : " + str(replay_buffer_2.return_size()) print("") replay_buffer_1.pop_experience(27300) replay_buffer_1.save(replay_buffer_path) # replay_buffer_2.pop_experience(12898) # replay_buffer_2.save("/home/thebeast/Desktop/simulation_4/replay_buffer_clean.pickle") print "Size_1 : " + str(replay_buffer_1.return_size())
def main(): replay_memory_size = 400000 replay_buffer_path = "/home/thebeast/Desktop/simulation_7/replay_buffer_positive.pickle" replay_buffer = ExperienceReplayBuffer(capacity=replay_memory_size) #timer_start = time.time() # Load the Replay buffer from file or accumulate experiences replay_buffer.load(replay_buffer_path) #timer_stop = time.time() original_size = replay_buffer.return_size() print "Original size: " + str(original_size) counter_removed = replay_buffer.clean_action("descend") print "New size: " + str(replay_buffer.return_size()) print "Number experiences removed:" + str(counter_removed) print "Percentage removed experiences: " + str(100 * counter_removed/float(original_size)) + "%" replay_buffer.save("/home/thebeast/Desktop/simulation_7/replay_buffer_positive_clean.pickle")
def main(): #TODO: Variables to change based on paths and buffer size to_keep_original_experiences = 370664 #remove with a pop right the experiences in the original buffer (useful if corrupted) sub_buffer_size = 37066 #size of the sub-buffer created replay_memory_size = 500000 root_path = "/media/pulver/6a87c355-c51f-4e43-9527-f0c5bd730ec4/pulver/Desktop/simulation_11/negative/" replay_buffer_path = "/media/pulver/6a87c355-c51f-4e43-9527-f0c5bd730ec4/pulver/Desktop/simulation_11/negative/replay_buffer_negative_big_shuffled_370664.pickle" #Load the main buffer replay_buffer = ExperienceReplayBuffer(capacity=replay_memory_size) if (os.path.isfile(replay_buffer_path) == True): print("Replay buffer loading from file: " + str(replay_buffer_path)) replay_buffer.load(replay_buffer_path) print "Size: " + str(replay_buffer.return_size()) else: print "Buffer replay does not exist!" # print("Starting pruning experiences...") # print("Initial size ..... " + str(replay_buffer.return_size())) # to_prune = replay_buffer.return_size() - to_keep_original_experiences # replay_buffer.pop_experience(to_prune, side="right") # print("New size ..... " + str(replay_buffer.return_size())) # print("Done!") print("Starting shuffling experiences...") replay_buffer.shuffle() print("Done!") print("Starting multisplit...") print "Sit back and relax, it may take a while..." multibuffer_list = [ (root_path + "replay_buffer_negative_1.pickle", sub_buffer_size), (root_path + "replay_buffer_negative_2.pickle", sub_buffer_size), (root_path + "replay_buffer_negative_3.pickle", sub_buffer_size), (root_path + "replay_buffer_negative_4.pickle", sub_buffer_size), (root_path + "replay_buffer_negative_5.pickle", sub_buffer_size), (root_path + "replay_buffer_negative_6.pickle", sub_buffer_size), (root_path + "replay_buffer_negative_7.pickle", sub_buffer_size), (root_path + "replay_buffer_negative_8.pickle", sub_buffer_size), (root_path + "replay_buffer_negative_9.pickle", sub_buffer_size), (root_path + "replay_buffer_negative_10.pickle", sub_buffer_size) ] replay_buffer.multisplit(multibuffer_list) print("Done!")
def main(): """ Main function for training the DQN network in learning how to accomplish autonomous landing. """ timer_total_start = time.time() rospy.init_node("DeepReinforcedLanding") rospy.loginfo("----- Deep Reinforced Landing Node -----") replay_memory_size = 400000 replay_buffer_path = "./replay_buffer_test.pickle" #replay_buffer_path = "./replay_buffer.pickle" replay_buffer = ExperienceReplayBuffer(capacity=replay_memory_size) # Load the Replay buffer from file or accumulate experiences replay_buffer.load(replay_buffer_path) #----------------- PRINT EXPERIENCE FOR TESTING----------------- index = 0 increment = 1 image, image_t1 = replay_buffer.debug_experience(index=index) image_t2, image_t3 = replay_buffer.debug_experience(index=index + increment) image_t4, image_t5 = replay_buffer.debug_experience(index=index + increment * 2) image_t6, image_t7 = replay_buffer.debug_experience(index=index + increment * 3) image_t8, image_t9 = replay_buffer.debug_experience(index=index + increment * 4) image_vstack = np.vstack( (image, image_t1, image_t2, image_t3, image_t4, image_t5, image_t6, image_t7, image_t8, image_t9)) cv2.imshow("experience 0-9 vertical stack", image_vstack) while True: if cv2.waitKey(33) == ord('q'): cv2.destroyAllWindows() break
def main(): replay_memory_size = 400000 replay_buffer_path = "/home/thebeast/Desktop/buffer_to_merge/negative/buffer_negative.pickle" replay_buffer = ExperienceReplayBuffer(capacity=replay_memory_size) #timer_start = time.time() # Load the Replay buffer from file or accumulate experiences replay_buffer.load(replay_buffer_path) #timer_stop = time.time() original_size = replay_buffer.return_size() print "Original size: " + str(original_size) # counter_removed = replay_buffer.clean_action_reward("descend", -1.0) # print "New size: " + str(replay_buffer.return_size()) # print "Number experiences removed:" + str(counter_removed) # print "Percentage removed experiences: " + str(100 * counter_removed/float(original_size)) + "%" # replay_buffer.save("./clean_buffer_positive.pickle") counter = replay_buffer.count_experiences("descend", -1.0) print "Tot wrond experiences: " + str(counter)
def main(): """ Initialize and run the rospy node """ timer_total_start = time.time() rospy.init_node("ReplayBufferFiller") rospy.loginfo("----- Replay Buffer Filler -----") replay_memory_size = 400000 replay_buffer_path = "./replay_buffer.pickle" replay_buffer = ExperienceReplayBuffer(capacity=replay_memory_size) if (os.path.isfile(replay_buffer_path) == True): print("Replay buffer loading from file: " + str(replay_buffer_path)) replay_buffer.load(replay_buffer_path) else: print "Buffer replay does not exist!" replay_buffer_path_2 = "./replay_buffer_2.pickle" replay_buffer_2 = ExperienceReplayBuffer(capacity=replay_memory_size) if (os.path.isfile(replay_buffer_path_2) == True): print("Replay buffer loading from file: " + str(replay_buffer_path_2)) replay_buffer_2.load(replay_buffer_path_2) # Merge the two replay buffers in the first replay_buffer.append(replay_buffer_2) else: print "Second replay buffer does not exist!" # Create a subscriber fot the greyscale image rospy.Subscriber("/quadrotor/ardrone/bottom/ardrone/bottom/image_raw", ROSImage, image_callback) images_stack_size = 4 tot_steps = 3000000 # finite-horizont simulation frame_preliminary = 0 saving_every_tot_experiences = 2500 noop_time = 2.0 # pause in seconds between actions steps_per_episodes = 20 #saving_every_tot_experiences = 450 #TODO SET TO 250 JUST FOR TEST #r = rospy.Rate(10) # 10hz num_ground_plane = 70 frame_per_ground_plane = int(replay_memory_size / num_ground_plane) actual_ground_index = 0 episode = 1 while True: if replay_buffer.return_size() >= replay_memory_size: break # Reset the ground in the environment every 25k frames ground_index = replay_buffer.return_size() / frame_per_ground_plane if ground_index != actual_ground_index: generate_new_world(ground_index) actual_ground_index = ground_index cumulated_reward = 0 print "" print "Preliminary Episode: " + str(episode) # Reset UAV at random pose reset_pose() send_action('stop') rospy.sleep(1.0) #get_image() image_t = _last_image # When the replay buffer is empty, fill it with the same picture 4 # times image_t = np.stack([image_t] * images_stack_size, axis=2) # create a stack of X images timer_start = time.time() actual_time = rospy.get_rostime() rospy_start_time = actual_time.secs + actual_time.nsecs / 1000000000.0 frame_episode = 0 for step in range(tot_steps): # Execute a random action in the world and observe the reward and # state_t1. action = get_random_action() #print "Action taken: " + action send_action(action) rospy.sleep(noop_time) # Acquire a new frame and convert it in a numpy array #get_image() image_t1 = _last_image # Get the reward and done status done_reward = get_done_reward() reward = done_reward.reward done = done_reward.done # Calculate the new cumulated_reward cumulated_reward += reward # state_t1, reward, done, info = env.step(action) image_t1 = np.expand_dims(image_t1, 2) # stack the images image_t1 = np.append(image_t[:, :, 1:], image_t1, axis=2) # Store the experience in the replay buffer replay_buffer.add_experience(image_t, action, reward, image_t1, done) frame_preliminary += 1 # To call every time a frame is obtained image_t = image_t1 timer_episode_stop = time.time() frame_episode += 1 send_action("stop") if frame_episode >= steps_per_episodes: done = True # Save the buffer every 25000 episodes if replay_buffer.return_size() % saving_every_tot_experiences == 0: timer_saving_start = time.time() print "Saving the replay buffer in: " + replay_buffer_path print "Sit back and relax, it may take a while..." replay_buffer.save(replay_buffer_path) timer_saving_stop = time.time() print "Done!" print "Time to save the buffer: " + str( timer_saving_stop - timer_saving_start) + " seconds" print "Time to save the buffer: " + str( (timer_saving_stop - timer_saving_start) / 60) + " minutes" if done: episode += 1 timer_stop = time.time() actual_time = rospy.get_rostime() rospy_stop_time = actual_time.secs + actual_time.nsecs / 1000000000.0 rospy_time_elapsed = rospy_stop_time - rospy_start_time print "Replay Buffer Size: " + str(replay_buffer.return_size( )) + " out of " + str(replay_memory_size) print "Replay buffer memory uses (KB): " + str( replay_buffer.return_size_byte(value="kilobyte")) print "Replay buffer memory uses (MB): " + str( replay_buffer.return_size_byte(value="megabyte")) print "Replay buffer memory uses (GB): " + str( replay_buffer.return_size_byte(value="gigabyte")) print "Frame counter: " + str(frame_preliminary) print "Time episode: " + str(timer_stop - timer_start) + " seconds" print("Ros time episode: " + str(rospy_time_elapsed) + " seconds") print "Replay Buffer experiences: " + str( replay_buffer.return_size()) if cumulated_reward >= 0: rospy.logwarn("Positive reward obtained!") print "Cumulated reward: " + str(cumulated_reward) print "Episode finished after {} timesteps".format(step + 1) break timer_total_stop = time.time() print "Total time simulation: " + str( (timer_total_stop - timer_total_start) / 60.0) + " minutes" print "Total time simulation: " + str( (timer_total_stop - timer_total_start) / 3600.0) + " hours" # Once the buffer is filled, save it to disk timer_saving_start = time.time() print "Saving the replay buffer in: " + replay_buffer_path print "Sit back and relax, it may take a while..." replay_buffer.save(replay_buffer_path) print "Done!" timer_saving_stop = time.time() print "Time to save the buffer: " + str(timer_saving_stop - timer_saving_start) + " seconds" print "Time to save the buffer: " + str( (timer_saving_stop - timer_saving_start) / 60) + " minutes" # Shutdown the node rospy.signal_shutdown("Rospy Shutdown!")
def main(): replay_memory_size = 400000 replay_buffer_path_1 = "/home/pulver/Desktop/replay_buffer_positive_soil.pickle" replay_buffer_path_2 = "/home/pulver/Desktop/new_replay_buffer.pickle" split_experiences = 3000 #replay_buffer_path = "./replay_buffer.pickle" replay_buffer_1 = ExperienceReplayBuffer(capacity=replay_memory_size) replay_buffer_2 = ExperienceReplayBuffer(capacity=replay_memory_size) timer_start = time.time() # Load the Replay buffer from file or accumulate experiences replay_buffer_1.load(replay_buffer_path_1) timer_stop = time.time() print "Time episode: " + str(timer_stop - timer_start) + " seconds" print "Time episode: " + str((timer_stop - timer_start) / 60) + " minutes" print "Size_1 : " + str(replay_buffer_1.return_size()) print("") print "Starting splitting..." replay_buffer_1.split(replay_buffer_path_1, replay_buffer_2, replay_buffer_path_2, split_experiences) print "Splitting completed!" print "Starting saving..." replay_buffer_1.load(replay_buffer_path_1) print "Size_1 : " + str(replay_buffer_1.return_size()) print "Size_2 : " + str(replay_buffer_2.return_size())
def main(): """ Initialize and run the rospy node """ timer_total_start = time.time() rospy.init_node("ReplayBufferFiller") rospy.loginfo("----- Replay Buffer Filler -----") replay_memory_size = 20000 replay_buffer_path = "./replay_buffer_shared.pickle" replay_buffer_path_neutral = "./replay_buffer_neutral.pickle" replay_buffer_path_positive = "./replay_buffer_positive.pickle" replay_buffer_path_negative = "./replay_buffer_negative.pickle" replay_buffer = ExperienceReplayBuffer(capacity=replay_memory_size) replay_buffer_neutral = ExperienceReplayBuffer(capacity=replay_memory_size) replay_buffer_positive = ExperienceReplayBuffer( capacity=replay_memory_size) replay_buffer_negative = ExperienceReplayBuffer( capacity=replay_memory_size) # Load the Replay buffer from file or accumulate experiences if (os.path.isfile(replay_buffer_path) == True): print("Replay buffer loading from file: " + str(replay_buffer_path)) replay_buffer.load(replay_buffer_path) else: print('No buffer_1 found') if (os.path.isfile(replay_buffer_path_neutral) == True): print("Replay buffer loading from file: " + str(replay_buffer_path_neutral)) replay_buffer_neutral.load(replay_buffer_path_neutral) else: print('No buffer_2 found') if (os.path.isfile(replay_buffer_path_positive) == True): print("Replay buffer loading from file: " + str(replay_buffer_path_positive)) replay_buffer_positive.load(replay_buffer_path_positive) else: print('No buffer_2 found') if (os.path.isfile(replay_buffer_path_negative) == True): print("Replay buffer loading from file: " + str(replay_buffer_path_negative)) replay_buffer_negative.load(replay_buffer_path_negative) else: print('No buffer_2 found') # Create a subscriber fot the greyscale image rospy.Subscriber("/quadrotor/ardrone/bottom/ardrone/bottom/image_raw", ROSImage, image_callback) images_stack_size = 4 tot_steps = 3000000 # finite-horizont simulation frame_preliminary = 0 saving_every_tot_experiences = 250 is_buffer_saved = True noop_time = 2.0 # pause in seconds between actions steps_per_episodes = 30 total_experience_counter = 0.0 episode = 0 start_episode = 1 tot_episodes = 20000 wrong_altitude = False quadrotor_pose = ModelState() quadrotor_pose.model_name = "quadrotor" quadrotor_pose.reference_frame = "world" for episode in range(start_episode, tot_episodes): cumulated_reward = 0 print "" print "Preliminary Episode: " + str(episode) # print "Ground counter : " + str(ground_counter) # Reset UAV at random pose reset_pose() send_action('stop') rospy.sleep(3.0) # get_image() image_t = _last_image # When the replay buffer is empty, fill it with the same picture 4 # times image_t = np.stack([image_t] * images_stack_size, axis=2) # create a stack of X images timer_start = time.time() actual_time = rospy.get_rostime() rospy_start_time = actual_time.secs + actual_time.nsecs / 1000000000.0 frame_episode = 0 done_reward = get_done_reward() update_quadrotor_pose(quadrotor_pose, done_reward) for step in range(tot_steps): # Execute a random action in the world and observe the reward and # state_t1. action = get_random_action() send_action(action) if action == "descend": rospy.sleep(5.0) send_action("stop") rospy.sleep(1.0) else: rospy.sleep(noop_time) # Acquire a new frame and convert it in a numpy array image_t1 = _last_image done_reward = get_done_reward() # NOTE: moved here to fix problem with baricenter (partially # reduced) send_action("stop") # Get the reward and done status reward = done_reward.reward done = done_reward.done print "Step(" + str( step) + "), Action: " + action + ", Altitude: " + str( done_reward.z) + ", Reward: " + str(reward) wrong_altitude = done_reward.wrong_altitude if wrong_altitude == True: rospy.logerr("[ERROR] Wrong altitude!") # Calculate the new cumulated_reward cumulated_reward += reward # state_t1, reward, done, info = env.step(action) image_t1 = np.expand_dims(image_t1, 2) # stack the images image_t1 = np.append(image_t[:, :, 1:], image_t1, axis=2) # Store the experience in the replay buffer if reward > 0: if action == "descend": replay_buffer_positive.add_experience( image_t, action, reward, image_t1, done) is_buffer_saved = False # pass else: rospy.logerr( "[POSITIVE]Wrong action for positive reward: %s", action) elif reward == -1.0: if action == "descend": replay_buffer_negative.add_experience( image_t, action, reward, image_t1, done) # pass else: rospy.logerr( "[NEGATIVE]Wrong action for negative reward: %s", action) else: # pass replay_buffer_neutral.add_experience(image_t, action, reward, image_t1, done) # Add all the experiences without distinction into the shared # buffer replay_buffer.add_experience(image_t, action, reward, image_t1, done) frame_preliminary += 1 # To call every time a frame is obtained total_experience_counter += 1 image_t = image_t1 timer_episode_stop = time.time() frame_episode += 1 update_quadrotor_pose(quadrotor_pose, done_reward) # rospy.sleep(2.0) #NOTE: fix the descend bug affecting the # altitude if frame_episode >= steps_per_episodes: done = True # Save the buffer every 25000 experiences # if replay_buffer_positive.return_size() % # saving_every_tot_experiences == 0 and is_buffer_saved == False: if total_experience_counter % saving_every_tot_experiences == 0: timer_start = time.time() print("") print("Saving the replay buffer in: " + replay_buffer_path) print("Sit back and relax, it may take a while...") replay_buffer.save(replay_buffer_path) timer_stop = time.time() print "Time episode: " + str(timer_stop - timer_start) + " seconds" print "Time episode: " + str( (timer_stop - timer_start) / 60) + " minutes" print("Done!") timer_start = time.time() print("") print("Saving the replay buffer in: " + replay_buffer_path_neutral) print("Sit back and relax, it may take a while...") replay_buffer_neutral.save(replay_buffer_path_neutral) timer_stop = time.time() print "Time episode: " + str(timer_stop - timer_start) + " seconds" print "Time episode: " + str( (timer_stop - timer_start) / 60) + " minutes" print("Done!") print("") print("Saving the replay buffer in: " + replay_buffer_path_positive) print("Sit back and relax, it may take a while...") replay_buffer_positive.save(replay_buffer_path_positive) timer_stop = time.time() print "Time episode: " + str(timer_stop - timer_start) + " seconds" print "Time episode: " + str( (timer_stop - timer_start) / 60) + " minutes" print("Done!") print("") print("Saving the replay buffer in: " + replay_buffer_path_negative) print("Sit back and relax, it may take a while...") replay_buffer_negative.save(replay_buffer_path_negative) timer_stop = time.time() print "Time episode: " + str(timer_stop - timer_start) + " seconds" print "Time episode: " + str( (timer_stop - timer_start) / 60) + " minutes" print("Done!") print("") is_buffer_saved = True if done: episode += 1 timer_stop = time.time() actual_time = rospy.get_rostime() rospy_stop_time = actual_time.secs + actual_time.nsecs / 1000000000.0 rospy_time_elapsed = rospy_stop_time - rospy_start_time print "Replay Buffer Size: " + str(replay_buffer.return_size( )) + " out of " + str(replay_memory_size) print "Replay Buffer Neutral Size: " + str( replay_buffer_neutral.return_size()) + " out of " + str( replay_memory_size) print "Replay Buffer Positive Size: " + str( replay_buffer_positive.return_size()) + " out of " + str( replay_memory_size) print "Replay Buffer Negative Size: " + str( replay_buffer_negative.return_size()) + " out of " + str( replay_memory_size) print "Frame counter: " + str(frame_preliminary) print "Time episode: " + str(timer_stop - timer_start) + " seconds" print("Ros time episode: " + str(rospy_time_elapsed) + " seconds") if cumulated_reward >= 0: rospy.logwarn("Positive reward obtained!") print "Cumulated reward: " + str(cumulated_reward) print "Episode finished after {} timesteps".format(step + 1) break
def main(): replay_memory_size = 400000 replay_buffer_path = "/home/pulver/Desktop/buffers_comparison/replay_buffer_shared.pickle" replay_buffer_positive_path = "/home/pulver/Desktop/buffers_comparison/replay_buffer_positive.pickle" replay_buffer_negative_path = "/home/pulver/Desktop/buffers_comparison/replay_buffer_negative.pickle" replay_buffer_neutral_path = "/home/pulver/Desktop/buffers_comparison/replay_buffer_neutral.pickle" #replay_buffer_path = "./replay_buffer.pickle" replay_buffer = ExperienceReplayBuffer(capacity=replay_memory_size) replay_buffer_positive = ExperienceReplayBuffer( capacity=replay_memory_size) replay_buffer_negative = ExperienceReplayBuffer( capacity=replay_memory_size) replay_buffer_neutral = ExperienceReplayBuffer(capacity=replay_memory_size) timer_start = time.time() # Load the Replay buffer from file or accumulate experiences replay_buffer.load(replay_buffer_path) replay_buffer_positive.load(replay_buffer_positive_path) replay_buffer_negative.load(replay_buffer_negative_path) replay_buffer_neutral.load(replay_buffer_neutral_path) timer_stop = time.time() print "Time episode: " + str(timer_stop - timer_start) + " seconds" print "Time episode: " + str((timer_stop - timer_start) / 60) + " minutes" print "Size shared buffer: " + str(replay_buffer.return_size()) print "Size positive buffer: " + str(replay_buffer_positive.return_size()) print "Size negative buffer: " + str(replay_buffer_negative.return_size()) print "Size neutral buffer: " + str(replay_buffer_neutral.return_size()) print("") #----------------- PRINT EXPERIENCE COUNTER ----------------- print "Shared buffer" replay_buffer.count_experience() print "" print "Positive buffer" replay_buffer_positive.count_experience() print "" print "Negative buffer" replay_buffer_negative.count_experience() print "" print "Neutral buffer" replay_buffer_neutral.count_experience() print ""
def main(): replay_memory_size = 100000 replay_buffer_path = "/home/pulver/Desktop/simulation_11/replay_buffer_negative_sand_laptop.pickle" replay_buffer = ExperienceReplayBuffer(capacity=replay_memory_size) if(os.path.isfile(replay_buffer_path) == True): print("Replay buffer loading from file: " + str(replay_buffer_path)) replay_buffer.load(replay_buffer_path) print "Size: " + str(replay_buffer.return_size()) else: print "Buffer replay does not exist!" #Load the second replay buffer replay_buffer_positive_path = "/home/pulver/Desktop/simulation_11/replay_buffer_negative_sand.pickle" replay_buffer_positive = ExperienceReplayBuffer(capacity=replay_memory_size) if(os.path.isfile(replay_buffer_positive_path) == True): print("Replay buffer loading from file: " + str(replay_buffer_positive_path)) replay_buffer_positive.load(replay_buffer_positive_path) # Merge the two replay buffers in the first replay_buffer.append_from_file(replay_buffer_positive_path) #TODO: uncomment # Load the second buffer into the first one # replay_buffer.append(replay_buffer_positive, starting_point=0) # TODO: comment to enable the load from path print "Merge done! New size: " + str(replay_buffer.return_size()) print "Saving the replay buffer in: " + replay_buffer_path print "Sit back and relax, it may take a while..." replay_buffer.save(replay_buffer_path) print "Done!" else: print "Second replay buffer does not exist!" print("Start merging...") # Merge the two replay buffers in the first replay_buffer.append_from_file(replay_buffer_positive_path) #TODO: uncomment # Load the second buffer into the first one #replay_buffer.append(replay_buffer_positive, starting_point=0) # TODO: comment to enable the load from path print "Merge done! New size: " + str(replay_buffer.return_size()) print "Saving the replay buffer in: " + replay_buffer_path print "Sit back and relax, it may take a while..." eplay_buffer.save(replay_buffer_path)
def main(): """ Initialize and run the rospy node """ timer_total_start = time.time() rospy.init_node("ReplayBufferFiller") rospy.loginfo("----- Replay Buffer Filler -----") ground_list = [ "water1", "water2", "water3", "water4", "water5", "water6", "water7", "water8", "water9", "water10" ] replay_memory_size = 400000 replay_buffer_path = "./replay_buffer.pickle" # replay_buffer_path_positive = "./replay_buffer_positive.pickle" # replay_buffer_path_negative = "./replay_buffer_negative.pickle" replay_buffer = ExperienceReplayBuffer(capacity=replay_memory_size) # replay_buffer_positive = ExperienceReplayBuffer(capacity=replay_memory_size) # replay_buffer_negative = ExperienceReplayBuffer(capacity=replay_memory_size) # Load the Replay buffer from file or accumulate experiences if (os.path.isfile(replay_buffer_path) == True): print("Replay buffer loading from file: " + str(replay_buffer_path)) replay_buffer.load(replay_buffer_path) else: print('No buffer_1 found') # if(os.path.isfile(replay_buffer_path_positive) == True): # print("Replay buffer loading from file: " + # str(replay_buffer_path_positive)) # replay_buffer_positive.load(replay_buffer_path_positive) # else: # print('No buffer_2 found') # if(os.path.isfile(replay_buffer_path_negative) == True): # print("Replay buffer loading from file: " + # str(replay_buffer_path_negative)) # replay_buffer_negative.load(replay_buffer_path_negative) # else: # print('No buffer_2 found') # Create a subscriber fot the greyscale image rospy.Subscriber("/quadrotor/ardrone/bottom/ardrone/bottom/image_raw", ROSImage, image_callback) images_stack_size = 4 tot_steps = 3000000 # finite-horizont simulation frame_preliminary = 0 saving_every_tot_experiences = 2500 is_buffer_saved = True noop_time = 2.0 # pause in seconds between actions steps_per_episodes = 30 #saving_every_tot_experiences = 450 #TODO SET TO 250 JUST FOR TEST #r = rospy.Rate(10) # 10hz num_ground_plane = 15 frame_per_ground_plane = int(replay_memory_size / num_ground_plane) frame_per_ground_plane = 3125 #!M positive / 4 classes / 10 grounds / 8 transformations actual_ground_index = 0 episode_per_ground = 50 #ground_counter = replay_buffer_positive.return_size() / frame_per_ground_plane ground_counter = 1 positive_experience_counter = 0 positive_experience_print_episode = 50 old_positive_experience_counter = 0 total_experience_counter = 0.0 old_total_experience_counter = 0.0001 episode = 1 wrong_altitude = False quadrotor_pose = ModelState() quadrotor_pose.model_name = "quadrotor" quadrotor_pose.reference_frame = "world" while True: # if replay_buffer_positive.return_size() >= replay_memory_size: # break # if replay_buffer_positive.return_size() <= ground_counter * frame_per_ground_plane and episode != 1: # pass # else: # print ground_counter # generate_new_world(ground_list, ground_counter) # ground_counter = ground_counter + 1 if (ground_counter < episode_per_ground) and episode != 1: ground_counter = ground_counter + 1 else: ground = choose_random_ground(ground_list) generate_new_world(ground, ground_list) ground_counter = 1 cumulated_reward = 0 print "" print "Preliminary Episode: " + str(episode) print "Ground counter value: " + str(ground_counter) # Reset UAV at random pose reset_pose() send_action('stop') rospy.sleep(3.0) #get_image() image_t = _last_image # When the replay buffer is empty, fill it with the same picture 4 # times image_t = np.stack([image_t] * images_stack_size, axis=2) # create a stack of X images timer_start = time.time() actual_time = rospy.get_rostime() rospy_start_time = actual_time.secs + actual_time.nsecs / 1000000000.0 frame_episode = 0 done_reward = get_done_reward() update_quadrotor_pose(quadrotor_pose, done_reward) for step in range(tot_steps): # Execute a random action in the world and observe the reward and # state_t1. action = get_random_action() send_action(action) if action == "descend": # setpoint = round( quadrotor_pose.pose.position.z ) - 0.8 # while True: # done_reward = get_done_reward() # update_quadrotor_pose(quadrotor_pose, done_reward) # if quadrotor_pose.pose.position.z < setpoint + 0.05 and quadrotor_pose.pose.position.z > setpoint - 0.05: # print "Setpoint: " + str(setpoint) # send_action("stop") # rospy.sleep(2.0) # break rospy.sleep(5.0) send_action("stop") rospy.sleep(1.0) #quadrotor_pose.pose.position.z = adjust_altitude(quadrotor_pose.pose.position.z) #set_pose(quadrotor_pose) else: #print "Action taken: " + action #send_action(action) rospy.sleep(noop_time) # Acquire a new frame and convert it in a numpy array image_t1 = _last_image done_reward = get_done_reward() send_action( "stop" ) #NOTE: moved here to fix problem with baricenter (partially reduced) # Get the reward and done status reward = done_reward.reward done = done_reward.done print "Step(" + str( step) + "), Action: " + action + ", Altitude: " + str( done_reward.z) + ", Reward: " + str(reward) wrong_altitude = done_reward.wrong_altitude if wrong_altitude == True: rospy.logerr("[ERROR] Wrong altitude!") # Calculate the new cumulated_reward cumulated_reward += reward # state_t1, reward, done, info = env.step(action) image_t1 = np.expand_dims(image_t1, 2) # stack the images image_t1 = np.append(image_t[:, :, 1:], image_t1, axis=2) # Store the experience in the replay buffer if reward > 0: if action == "descend": # replay_buffer_positive.add_experience(image_t, action, reward, image_t1, done) # is_buffer_saved = False pass else: rospy.logerr( "[POSITIVE]Wrong action for positive reward: %s", action) elif reward == -1.0: if action == "descend": # replay_buffer_negative.add_experience(image_t, action, reward, image_t1, done) pass else: rospy.logerr( "[NEGATIVE]Wrong action for negative reward: %s", action) else: # pass replay_buffer.add_experience(image_t, action, reward, image_t1, done) frame_preliminary += 1 # To call every time a frame is obtained total_experience_counter += 1 image_t = image_t1 timer_episode_stop = time.time() frame_episode += 1 update_quadrotor_pose(quadrotor_pose, done_reward) #rospy.sleep(2.0) #NOTE: fix the descend bug affecting the altitude if frame_episode >= steps_per_episodes: done = True # Save the buffer every 25000 experiences # if replay_buffer_positive.return_size() % saving_every_tot_experiences == 0 and is_buffer_saved == False: if replay_buffer.return_size() % saving_every_tot_experiences == 0: timer_start = time.time() print("") print("Saving the replay buffer in: " + replay_buffer_path) print("Sit back and relax, it may take a while...") replay_buffer.save(replay_buffer_path) timer_stop = time.time() print "Time episode: " + str(timer_stop - timer_start) + " seconds" print "Time episode: " + str( (timer_stop - timer_start) / 60) + " minutes" print("Done!") # timer_start = time.time() # print("") # print("Saving the replay buffer in: " + replay_buffer_path_positive) # print("Sit back and relax, it may take a while...") # replay_buffer_positive.save(replay_buffer_path_positive) # timer_stop = time.time() # print "Time episode: " + str(timer_stop - timer_start) + " seconds" # print "Time episode: " + str((timer_stop - timer_start) / 60) + " minutes" # print("Done!") # print("") # print("Saving the replay buffer in: " + replay_buffer_path_negative) # print("Sit back and relax, it may take a while...") # replay_buffer_negative.save(replay_buffer_path_negative) # timer_stop = time.time() # print "Time episode: " + str(timer_stop - timer_start) + " seconds" # print "Time episode: " + str((timer_stop - timer_start) / 60) + " minutes" # print("Done!") # print("") # is_buffer_saved = True if done: episode += 1 timer_stop = time.time() actual_time = rospy.get_rostime() rospy_stop_time = actual_time.secs + actual_time.nsecs / 1000000000.0 rospy_time_elapsed = rospy_stop_time - rospy_start_time print "Replay Buffer Size: " + str(replay_buffer.return_size( )) + " out of " + str(replay_memory_size) # print "Replay Buffer Positive Size: " + str(replay_buffer_positive.return_size()) + " out of " + str(replay_memory_size) # print "Replay Buffer Negative Size: " + str(replay_buffer_negative.return_size()) + " out of " + str(replay_memory_size) print "Frame counter: " + str(frame_preliminary) print "Time episode: " + str(timer_stop - timer_start) + " seconds" print("Ros time episode: " + str(rospy_time_elapsed) + " seconds") if cumulated_reward >= 0: rospy.logwarn("Positive reward obtained!") print "Cumulated reward: " + str(cumulated_reward) print "Episode finished after {} timesteps".format(step + 1) break # timer_total_stop = time.time() # print "Total time simulation: " + str((timer_total_stop - timer_total_start) / 60.0) + " minutes" # print "Total time simulation: " + str((timer_total_stop - timer_total_start) / 3600.0) + " hours" # # Once the buffer is filled, save it to disk # timer_saving_start = time.time() # print "Saving the replay buffer in: " + replay_buffer_positive_path # print "Sit back and relax, it may take a while..." # replay_buffer_positive.save(replay_buffer_positive_path) # print "Done!" # timer_saving_stop = time.time() # print "Time to save the buffer: " + str(timer_saving_stop - timer_saving_start) + " seconds" # print "Time to save the buffer: " + str((timer_saving_stop - timer_saving_start) / 60) + " minutes" # timer_saving_start = time.time() # print "Saving the replay buffer in: " + replay_buffer_negative_path # print "Sit back and relax, it may take a while..." # replay_buffer_negative.save(replay_buffer_negative_path) # print "Done!" # timer_saving_stop = time.time() # print "Time to save the buffer: " + str(timer_saving_stop - timer_saving_start) + " seconds" # print "Time to save the buffer: " + str((timer_saving_stop - timer_saving_start) / 60) + " minutes" # Shutdown the node rospy.signal_shutdown("Rospy Shutdown!")
def main(): replay_memory_size = 400000 replay_buffer_path = "/media/pulver/Pulver HD/replay_buffer.pickle" #replay_buffer_path = "./replay_buffer.pickle" replay_buffer = ExperienceReplayBuffer(capacity=replay_memory_size) timer_start = time.time() # Load the Replay buffer from file or accumulate experiences replay_buffer.load(replay_buffer_path) timer_stop = time.time() print "Time episode: " + str(timer_stop - timer_start) + " seconds" print "Time episode: " + str((timer_stop - timer_start) / 60) + " minutes" print "Size : " + str(replay_buffer.return_size()) print("") #----------------- PRINT EXPERIENCE FOR TESTING----------------- index = 97000 increment = 1 image, image_t1 = replay_buffer.debug_experience(index=index) image_t2, image_t3 = replay_buffer.debug_experience(index=index + increment) image_t4, image_t5 = replay_buffer.debug_experience(index=index + increment * 2) image_t6, image_t7 = replay_buffer.debug_experience(index=index + increment * 3) image_t8, image_t9 = replay_buffer.debug_experience(index=index + increment * 4) image_vstack = np.vstack( (image, image_t1, image_t2, image_t3, image_t4, image_t5, image_t6, image_t7, image_t8, image_t9)) cv2.imshow("experience 0-9 vertical stack", image_vstack) index = 98000 increment = 1 image, image_t1 = replay_buffer.debug_experience(index=index) image_t2, image_t3 = replay_buffer.debug_experience(index=index + increment) image_t4, image_t5 = replay_buffer.debug_experience(index=index + increment * 2) image_t6, image_t7 = replay_buffer.debug_experience(index=index + increment * 3) image_t8, image_t9 = replay_buffer.debug_experience(index=index + increment * 4) image_vstack = np.vstack( (image, image_t1, image_t2, image_t3, image_t4, image_t5, image_t6, image_t7, image_t8, image_t9)) cv2.imshow("experience 0-9 vertical stack - buffer 2", image_vstack) while True: if cv2.waitKey(33) == ord('q'): cv2.destroyAllWindows() break
def main(): replay_memory_size = 100000 replay_buffer_path = "/home/pulver/Desktop/simulation_11/sand/replay_buffer_positive_sand.pickle" replay_buffer = ExperienceReplayBuffer(capacity=replay_memory_size) if (os.path.isfile(replay_buffer_path) == True): print("Replay buffer loading from file: " + str(replay_buffer_path)) replay_buffer.load(replay_buffer_path) print "Size: " + str(replay_buffer.return_size()) else: print "Buffer replay does not exist!" return # experience = replay_buffer.buffer[0] # image_t = experience[0] # action_t = experience[1] # reward_t = experience[2] # image_t1 = experience[3] # done_t1 = experience[4] # Modify the whole buffer or just add a single experience replay_buffer.rotate_and_push(rotations_list=[ '90', '180', '270', 'vflip', 'vflip90', 'vflip180', 'vflip270' ], tot_elements=None) replay_buffer.shuffle() #replay_buffer.add_experience_and_rotate(image_t, action_t, reward_t, image_t1, done_t1, rotation_list=['90', '180', '270', 'vflip', 'vflip90', 'vflip180', 'vflip270']) print("Rotation done! New size: " + str(replay_buffer.return_size())) print("Saving the buffer...") replay_buffer.save( file_name= "/home/pulver/Desktop/simulation_11/sand/replay_buffer_positive_sand_rotated_" + str(replay_buffer.return_size()) + ".pickle") print("Done!")
def main(): #Load the main buffer replay_memory_size = 500000 replay_buffer_path = "/home/pulver/Desktop/marine_descending/replay_buffer_192k.pickle" replay_buffer = ExperienceReplayBuffer(capacity=replay_memory_size) if (os.path.isfile(replay_buffer_path) == True): print("Replay buffer loading from file: " + str(replay_buffer_path)) replay_buffer.load(replay_buffer_path) print "Size: " + str(replay_buffer.return_size()) else: print "Buffer replay does not exist!" #Here we load all the mini-buffer needed later print("Loading the buffer list...") max_capacity = 200000 path = "/home/pulver/Desktop/marine_descending/replay_buffer_194k.pickle" buffer_1 = ExperienceReplayBuffer(capacity=max_capacity) buffer_1.load(path) print "Size: " + str(buffer_1.return_size()) # max_capacity = 100000 # path = "/home/pulver/Desktop/marine_descending/replay_buffer_negative_8910.pickle" # buffer_2 = ExperienceReplayBuffer(capacity=max_capacity) # buffer_2.load(path) # print "Size: " + str(buffer_2.return_size()) # max_capacity = 100000 # path = "/home/pulver/Desktop/simulation_11/soil/replay_buffer_negative_soil.pickle" # buffer_3 = ExperienceReplayBuffer(capacity=max_capacity) # buffer_3.load(path) # print "Size: " + str(buffer_3.return_size()) # max_capacity = 100000 # path = "/home/pulver/Desktop/simulation_11/snow/replay_buffer_negative_snow.pickle" # buffer_4 = ExperienceReplayBuffer(capacity=max_capacity) # buffer_4.load(path) # print "Size: " + str(buffer_4.return_size()) #max_capacity = 100000 #path = "/home/pulver/Desktop/simulation_11/snow/replay_buffer_positive_snow_rotated_78200.pickle" #buffer_5 = ExperienceReplayBuffer(capacity=max_capacity) #buffer_5.load(path) #Here we append the file to the main buffer print("Starting multiappend...") print "Sit back and relax, it may take a while..." multibuffer_list = [buffer_1] #, buffer_2]#, buffer_3, buffer_4] replay_buffer.multiappend(multibuffer_list) print "New size: " + str(replay_buffer.return_size()) print("Done!") #In the end we shuffle and save print("Starting shuffling experiences...") replay_buffer.shuffle() print("Done!") print("Starting save buffer...") replay_buffer.save( "/home/pulver/Desktop/marine_descending/replay_buffer_neutral_final.pickle" ) print("Done!")
def main(): replay_memory_size = 400000 replay_buffer_path = "/home/pulver/Desktop/test/replay_buffer_positive.pickle" replay_buffer_path_2 = "/home/pulver/Desktop/replay_buffer_positive_rick.pickle" #replay_buffer_path = "./replay_buffer.pickle" replay_buffer = ExperienceReplayBuffer(capacity=replay_memory_size) replay_buffer_2 = ExperienceReplayBuffer(capacity=replay_memory_size) # Save the empty buffer if(os.path.isfile(replay_buffer_path_2) == True): print "The second buffer exist" else: print "Buffer not existing. I am saving an empty one." replay_buffer_2.save(replay_buffer_path_2) timer_start = time.time() # Load the Replay buffer from file or accumulate experiences replay_buffer.load(replay_buffer_path) timer_stop = time.time() replay_buffer.copy_experience_to(replay_buffer_2, replay_buffer_path_2, 2000) replay_buffer_2.load(replay_buffer_path_2) print "Time episode: " + str(timer_stop - timer_start) + " seconds" print "Time episode: " + str((timer_stop - timer_start) / 60) + " minutes" print "Size : " + str(replay_buffer.return_size()) print "Size 2: " + str(replay_buffer_2.return_size()) print("")