def __init__(self, instance_id=0, real_drone=False, policy=None, oracle=None, no_reward=False): self.presenter = Presenter() self.instance_id = instance_id self.word2token = None self.all_instructions = None self.all_env_ids, self.all_instructions, self.corpus, self.token2term, self.word2token = self.load_all_envs( ) self.env = PomdpInterface(instance_id=self.instance_id, is_real=real_drone) self.policy = policy self.oracle = oracle self.no_reward = no_reward
def take_pics(): P.initialize_experiment() train_i, dev_i, test_i, _ = get_all_instructions() all_instructions = {**train_i, **dev_i, **test_i} save_dir = paths.get_env_image_path(0) os.makedirs(os.path.dirname(save_dir), exist_ok=True) keylist = list(all_instructions.keys()) envs = [PomdpInterface(instance_id=i) for i in range(0, NUM_WORKERS)] env_id_splits = [[] for _ in range(NUM_WORKERS)] keylist = [6825] for i, key in enumerate(keylist): env_id_splits[i % NUM_WORKERS].append(key) time.sleep(1.0) for i in range(len(keylist)): d = False # For each worker, start the correct env for w in range(NUM_WORKERS): if i >= len(env_id_splits[w]): continue env_id = env_id_splits[w][i] # FIXME: :This assumes that there is only 1 instruction set per env! fname = paths.get_env_image_path(env_id) if os.path.isfile(fname): print("Img exists: " + fname) continue d = True instruction_set = all_instructions[env_id][0] envs[w].set_environment(env_id, instruction_set["instructions"], fast=True) print("setting env on worker " + str(w) + " iter " + str(i) + " env_id: " + str(env_id)) # Then for each worker, take a picture and save it if d: time.sleep(0.1) for w in range(NUM_WORKERS): if i >= len(env_id_splits[w]): continue env_id = env_id_splits[w][i] fname = paths.get_env_image_path(env_id) if os.path.isfile(fname): print("Img exists: " + fname) continue envs[w].snap_birdseye(fast=True, small_env=SMALL_ENV) image = envs[w].snap_birdseye(fast=True, small_env=SMALL_ENV) image = np.flip(image, 0) imsave(fname, image) print("saving pic on worker " + str(w) + " iter " + str(i) + " env_id: " + str(env_id))
self.thread.daemon = True self.thread.start() def run(self): self.mon.run() def get_command(self): return self.mon.current_vel initialize_experiment("nl_datacollect_cage") teleoper = KeyTeleop() rate = Rate(0.1) env = PomdpInterface() train_instructions, dev_instructions, test_instructions, _ = get_all_instructions( ) count = 0 stuck_count = 0 def show_depth(image): grayscale = np.mean(image[:, :, 0:3], axis=2) depth = image[:, :, 3] comb = np.stack([grayscale, grayscale, depth], axis=2) comb -= comb.min() comb /= (comb.max() + 1e-9) Presenter().show_image(comb,
from pykeyboard import PyKeyboardEvent from drones.airsim_interface.rate import Rate from data_io.instructions import get_all_instructions from pomdp.pomdp_interface import PomdpInterface from visualization import Presenter from parameters.parameter_server import initialize_experiment, get_current_parameters from utils.keyboard import KeyTeleop initialize_experiment() teleoper = KeyTeleop() rate = Rate(0.1) env = PomdpInterface(is_real=get_current_parameters()["Setup"]["real_drone"]) train_instructions, dev_instructions, test_instructions, _ = get_all_instructions( ) count = 0 stuck_count = 0 def show_depth(image): grayscale = np.mean(image[:, :, 0:3], axis=2) depth = image[:, :, 3] comb = np.stack([grayscale, grayscale, depth], axis=2) comb -= comb.min() comb /= (comb.max() + 1e-9) Presenter().show_image(comb,
def interactive_demo(): P.initialize_experiment() InteractAPI.launch_ui() rate = Rate(0.1) env = PomdpInterface( is_real=get_current_parameters()["Setup"]["real_drone"]) train_instructions, dev_instructions, test_instructions, corpus = get_all_instructions( ) all_instr = { **train_instructions, **dev_instructions, **train_instructions } token2term, word2token = get_word_to_token_map(corpus) # Run on dev set interact_instructions = dev_instructions env_range_start = get_current_parameters()["Setup"].get( "env_range_start", 0) env_range_end = get_current_parameters()["Setup"].get( "env_range_end", 10e10) interact_instructions = { k: v for k, v in interact_instructions.items() if env_range_start < k < env_range_end } count = 0 stuck_count = 0 model, _ = load_model(get_current_parameters()["Setup"]["model"]) InteractAPI.write_empty_instruction() InteractAPI.write_real_instruction("None") instruction_str = InteractAPI.read_instruction_file() print("Initial instruction: ", instruction_str) for instruction_sets in interact_instructions.values(): for set_idx, instruction_set in enumerate(instruction_sets): env_id = instruction_set['env'] env.set_environment(env_id, instruction_set["instructions"]) presenter = Presenter() cumulative_reward = 0 for seg_idx in range(len(instruction_set["instructions"])): print(f"RUNNING ENV {env_id} SEG {seg_idx}") real_instruction_str = instruction_set["instructions"][ seg_idx]["instruction"] InteractAPI.write_real_instruction(real_instruction_str) valid_segment = env.set_current_segment(seg_idx) if not valid_segment: continue state = env.reset(seg_idx) keep_going = True while keep_going: InteractAPI.write_real_instruction(real_instruction_str) while True: cv2.waitKey(200) instruction = InteractAPI.read_instruction_file() if instruction == "CMD: Next": print("Advancing") keep_going = False InteractAPI.write_empty_instruction() break elif instruction == "CMD: Reset": print("Resetting") env.reset(seg_idx) InteractAPI.write_empty_instruction() elif len(instruction.split(" ")) > 1: instruction_str = instruction break if not keep_going: continue env.override_instruction(instruction_str) tok_instruction = tokenize_instruction( instruction_str, word2token) state = env.reset(seg_idx) print("Executing: f{instruction_str}") while True: rate.sleep() action, internals = model.get_action( state, tok_instruction) state, reward, done, expired, oob = env.step(action) cumulative_reward += reward presenter.show_sample(state, action, reward, cumulative_reward, instruction_str) #show_depth(state.image) if done: break InteractAPI.write_empty_instruction() print("Segment finished!") print("Env finished!")
def train_top_down_pred(): P.initialize_experiment() setup = P.get_current_parameters()["Setup"] launch_ui() env = PomdpInterface() print("model_name:", setup["top_down_model"]) print("model_file:", setup["top_down_model_file"]) model, model_loaded = load_model( model_name_override=setup["top_down_model"], model_file_override=setup["top_down_model_file"]) exec_model, wrapper_model_loaded = load_model( model_name_override=setup["wrapper_model"], model_file_override=setup["wrapper_model_file"]) affine2d = Affine2D() if model.is_cuda: affine2d.cuda() eval_envs = get_correct_eval_env_id_list() print("eval_envs:", eval_envs) train_instructions, dev_instructions, test_instructions, corpus = get_all_instructions( max_size=setup["max_envs"]) all_instr = { **train_instructions, **dev_instructions, **train_instructions } token2term, word2token = get_word_to_token_map(corpus) dataset = model.get_dataset(envs=eval_envs, dataset_name="supervised", eval=True, seg_level=False) dataloader = DataLoader(dataset, collate_fn=dataset.collate_fn, batch_size=1, shuffle=False, num_workers=1, pin_memory=True) for b, batch in list(enumerate(dataloader)): print("batch:", batch) images = batch["images"] instructions = batch["instr"] label_masks = batch["traj_labels"] affines = batch["affines_g_to_s"] env_ids = batch["env_id"] set_idxs = batch["set_idx"] seg_idxs = batch["seg_idx"] env_id = env_ids[0][0] set_idx = set_idxs[0][0] print("env_id of this batch:", env_id) env.set_environment( env_id, instruction_set=all_instr[env_id][set_idx]["instructions"]) env.reset(0) num_segments = len(instructions[0]) print("num_segments in this batch:", num_segments) write_instruction("") write_real_instruction("None") instruction_str = read_instruction_file() print("Initial instruction: ", instruction_str) # TODO: Reset model state here if we keep any temporal memory etc for s in range(num_segments): start_state = env.reset(s) keep_going = True real_instruction = cuda_var(instructions[0][s], setup["cuda"], 0) tmp = list(real_instruction.data.cpu()[0].numpy()) real_instruction_str = debug_untokenize_instruction(tmp) write_real_instruction(real_instruction_str) #write_instruction(real_instruction_str) #instruction_str = real_instruction_str image = cuda_var(images[0][s], setup["cuda"], 0) label_mask = cuda_var(label_masks[0][s], setup["cuda"], 0) affine_g_to_s = affines[0][s] print("Your current environment:") with open( "/storage/dxsun/unreal_config_nl/configs/configs/random_config_" + str(env_id) + ".json") as fp: config = json.load(fp) print(config) while keep_going: write_real_instruction(real_instruction_str) while True: cv2.waitKey(200) instruction = read_instruction_file() if instruction == "CMD: Next": print("Advancing") keep_going = False write_empty_instruction() break elif instruction == "CMD: Reset": print("Resetting") env.reset(s) write_empty_instruction() elif len(instruction.split(" ")) > 1: instruction_str = instruction print("Executing: ", instruction_str) break if not keep_going: continue #instruction_str = read_instruction_file() # TODO: Load instruction from file tok_instruction = tokenize_instruction(instruction_str, word2token) instruction_t = torch.LongTensor(tok_instruction).unsqueeze(0) instruction_v = cuda_var(instruction_t, setup["cuda"], 0) instruction_mask = torch.ones_like(instruction_v) tmp = list(instruction_t[0].numpy()) instruction_dbg_str = debug_untokenize_instruction( tmp, token2term) # import matplotlib.pyplot as plt #plt.plot(image.squeeze(0).permute(1,2,0).cpu().numpy()) #plt.show() res = model(image, instruction_v, instruction_mask) mask_pred = res[0] shp = mask_pred.shape mask_pred = F.softmax(mask_pred.view([2, -1]), 1).view(shp) #mask_pred = softmax2d(mask_pred) # TODO: Rotate the mask_pred to the global frame affine_s_to_g = np.linalg.inv(affine_g_to_s) S = 8.0 affine_scale_up = np.asarray([[S, 0, 0], [0, S, 0], [0, 0, 1]]) affine_scale_down = np.linalg.inv(affine_scale_up) affine_pred_to_g = np.dot( affine_scale_down, np.dot(affine_s_to_g, affine_scale_up)) #affine_pred_to_g_t = torch.from_numpy(affine_pred_to_g).float() mask_pred_np = mask_pred.data.cpu().numpy()[0].transpose( 1, 2, 0) mask_pred_g_np = apply_affine(mask_pred_np, affine_pred_to_g, 32, 32) print("Sum of global mask: ", mask_pred_g_np.sum()) mask_pred_g = torch.from_numpy( mask_pred_g_np.transpose(2, 0, 1)).float()[np.newaxis, :, :, :] exec_model.set_ground_truth_visitation_d(mask_pred_g) # Create a batch axis for pytorch #mask_pred_g = affine2d(mask_pred, affine_pred_to_g_t[np.newaxis, :, :]) mask_pred_np[:, :, 0] -= mask_pred_np[:, :, 0].min() mask_pred_np[:, :, 0] /= (mask_pred_np[:, :, 0].max() + 1e-9) mask_pred_np[:, :, 0] *= 2.0 mask_pred_np[:, :, 1] -= mask_pred_np[:, :, 1].min() mask_pred_np[:, :, 1] /= (mask_pred_np[:, :, 1].max() + 1e-9) presenter = Presenter() presenter.show_image(mask_pred_g_np, "mask_pred_g", torch=False, waitkey=1, scale=4) #import matplotlib.pyplot as plt #print("image.data shape:", image.data.cpu().numpy().shape) #plt.imshow(image.data.squeeze().permute(1,2,0).cpu().numpy()) #plt.show() # presenter.show_image(image.data, "mask_pred_g", torch=False, waitkey=1, scale=4) #import pdb; pdb.set_trace() pred_viz_np = presenter.overlaid_image(image.data, mask_pred_np, channel=0) # TODO: Don't show labels # TODO: OpenCV colours #label_mask_np = p.data.cpu().numpy()[0].transpose(1,2,0) labl_viz_np = presenter.overlaid_image(image.data, label_mask.data, channel=0) viz_img_np = np.concatenate((pred_viz_np, labl_viz_np), axis=1) viz_img_np = pred_viz_np viz_img = presenter.overlay_text(viz_img_np, instruction_dbg_str) cv2.imshow("interactive viz", viz_img) cv2.waitKey(100) rollout_model(exec_model, env, env_ids[0][s], set_idxs[0][s], seg_idxs[0][s], tok_instruction) write_instruction("")
from env_config.definitions.landmarks import LANDMARK_RADII from data_io.paths import get_landmark_images_dir from pomdp.pomdp_interface import PomdpInterface from visualization import Presenter """ This script is used to take pictures of various landmarks """ from parameters import parameter_server as P P.initialize_experiment("nl_datacollect") rate = Rate(0.1) IMAGES_PER_LANDMARK_TRAIN = 1000 IMAGES_PER_LANDMARK_TEST = 200 env = PomdpInterface() count = 0 presenter = Presenter() def save_landmark_img(state, landmark_name, i, eval): data_dir = get_landmark_images_dir(landmark_name, eval) os.makedirs(data_dir, exist_ok=True) full_path = os.path.join(data_dir, landmark_name + "_" + str(i) + ".jpg") scipy.misc.imsave(full_path, state.image) for landmark_name, landmark_radius in LANDMARK_RADII.items():
def roll_out_policy(self, params): """ Given the provided rollout parameters, spawn a simulator instance and execute the specified policy on all environments specified in params.setEnvIds. Awful function that really needs to be simplified. A lot of the code is simply checking various error conditions, because the data has issues, and logging the outcome. The actual rollout is a very small part of the code. :param params: RollOutParams instance defining the parameters of the rollout :return: Aggregated dataset with images, states and oracle actions. If params.isSegmentLevel(), the returned dataset will be a list (over environments) of samples otherwise it will be a list (over environments) of lists (over segments) of samples """ if params.isDebug(): run_metadata.WRITE_DEBUG_DATA = True dataset = [] try: # Load the neural network policy from file # We can't just pass a neural network into this function, because it can't be pickled params.loadPolicy() assert params.hasPolicy() self.env = PomdpInterface(instance_id=self.instance_id, is_real=params.real_drone) all_env_ids, all_instructions, corpus, token2term, self.word2token = self.load_all_envs( ) env_ids = params.envs # if params.envs is not None else all_env_ids seg_indices = params.seg_list custom_instructions = params.custom_instructions # Filter out the envs that are not in all_instructions (we don't have instructions available for them) valid_env_ids = [i for i in env_ids if i in all_instructions] count = 0 # Loop through environments for i, env_id in enumerate(valid_env_ids): #print ("Rolling out on env: " + str(env_id)) # Loop through all non-empty sets of instructions for each pomdp instruction_sets = [ s for s in all_instructions[env_id] if len(s) > 0 ] if len(instruction_sets) == 0: print("No instruction sets for env: " + str(env_id)) for j, instructions_set in enumerate(instruction_sets): count += 1 try: seg_id = seg_indices[ i] if seg_indices is not None else None custom_instr = custom_instructions[ i] if custom_instructions is not None else None import rollout.run_metadata as md md.CUSTOM_INSTR_NO = i # TODO: Check if this works! dataset.append( self.roll_out_on_env(params, instructions_set, j, seg_id, custom_instr)) #log("Path finished!") DebugWriter().commit() if params.isRealDrone(): break except Exception as e: import traceback from utils.colors import print_error print_error("Error encountered during policy rollout!") print_error(e) print_error(traceback.format_exc()) continue except Exception as e: import traceback from utils.colors import print_error print_error("Error encountered during policy rollout!") print_error(e) print_error(traceback.format_exc()) self.env.land() return dataset
class PolicyRoller: """ Really only a wrapper around the roll_out_policy function, which does the policy rollout in the pomdp It collects actions both from the user-provided policy and from the oracle (as labels) and accumulates a dataset """ def __init__(self, instance_id=0): self.presenter = Presenter() self.instance_id = instance_id self.env = None self.word2token = None self.all_instructions = None def reset(self): self.__init__() def load_all_envs(self): train_i, dev_i, test_i, corpus = get_all_instructions() all_instructions = merge_instruction_sets(train_i, dev_i, test_i) token2term, word2token = get_word_to_token_map(corpus) env_ids = list(all_instructions.keys()) return env_ids, all_instructions, corpus, token2term, word2token def tokenize_string(self, s): word_list = filter(None, s.split(" ")) token_instruction = list(map(lambda w: self.word2token[w], word_list)) return token_instruction def roll_out_on_segment(self, ): pass def choose_action(self, params, step, switch_thres, reference_action, policy_action): """ Choose whether to perform the policy action or the reference (oracle) action based on the type of mixture policy that is being executed :param params: RolloutParams instance :param step: current control step number :param switch_thres: roll-in/roll-out control step number :param reference_action: action executed by oracle :param policy_action: action executed by policy :return: """ if params.rollout_strategy == RolloutStrategy.POLICY: return policy_action elif params.rollout_strategy == RolloutStrategy.REFERENCE: return reference_action elif params.rollout_strategy == RolloutStrategy.POLICY_IN_REF_OUT: if step > switch_thres: return reference_action else: return policy_action elif params.rollout_strategy == RolloutStrategy.MIXTURE: if random.uniform(0, 1) < params.mixture_ref_prob: return reference_action else: return policy_action def roll_out_on_env(self, params, instructions_set, set_idx, only_seg_idx=None, custom_instr=None): env_dataset = [] failed = False env_id = instructions_set["env"] self.env.set_environment( env_id, instruction_set=instructions_set['instructions']) path = load_and_convert_path(env_id) params.initPolicyContext(env_id, path) import rollout.run_metadata as md segments = list(instructions_set['instructions']) # all segments with at least length 2 valid_segments = [ (segments[i], segments[i]["seg_idx"]) for i in range(len(segments)) if segments[i]["end_idx"] - segments[i]["start_idx"] >= 2 ] if len(valid_segments) == 0: print("Ding dong!") first_seg = True # For recurrent policy, we need to explicity start a segment and reset the LSTM state # TODO: Make sure this still works for the older non-NL model params.policy.start_sequence() for segment, seg_idx in valid_segments: if only_seg_idx is not None and seg_idx != only_seg_idx: print("Skipping seg: " + str(seg_idx) + " as not requested") continue valid_segment = self.env.set_current_segment(seg_idx) if not valid_segment: print( f"Skipping segment {seg_idx} as it is empty / invalid for env {env_id}" ) continue if params.segment_level: params.policy.start_sequence() segment_dataset = [] # Decide when to switch policies switch_threshold = params.horizon + 1 # Never switch policies by default do_switch = random.uniform(0, 1) < params.switch_prob if do_switch and params.threshold_strategy == SwitchThresholdStrategy.UNIFORM: switch_threshold = random.uniform(0, params.horizon) string_instruction, end_idx, start_idx = segment[ "instruction"], segment["end_idx"], segment["start_idx"] # Manual instruction override to allow rolling out arbitrary instructions for debugging if custom_instr is not None: print("REPLACED: ", string_instruction) string_instruction = custom_instr print("INSTRUCTION:", string_instruction) # Set some global parameters that can be accessed by other parts of the system md.IS_ROLLOUT = True md.REAL_DRONE = params.real_drone md.RUN_NAME = params.run_name md.ENV_ID = env_id md.SET_IDX = set_idx md.SEG_IDX = seg_idx md.START_IDX = start_idx md.END_IDX = end_idx md.INSTRUCTION = string_instruction if hasattr(params.policy, "start_segment_rollout"): params.policy.start_segment_rollout(env_id, set_idx, seg_idx) token_instruction = self.tokenize_string(string_instruction) # At the end of segment N, should we reset drone position to the start of segment N+1 or continue # rolling out seamlessly? if first_seg or params.shouldResetAlways() or ( failed and params.shouldResetIfFailed()): state = self.env.reset(seg_idx) #instr_str = debug_untokenize_instruction(instruction) #Presenter().show_instruction(string_instruction.replace(" ", " ")) failed = False first_seg = False sleep(sleepytime) # Tell the oracle which part of the path is currently being executed params.setCurrentSegment(start_idx, end_idx) step_num = 0 total_reward = 0 # If the path has been finished according to the oracle, allow rolling out STEPS_TO_KILL more steps # If we finish the segment, but don't stop, log the position at which we finish the segment oracle_finished_countdown = params.steps_to_kill # Finally the actual policy roll out on the path segment! while True: # Get oracle action (labels) ref_action, _ = params.ref_policy.get_action( state, token_instruction) if ref_action is None or step_num == params.horizon: failed = True # Either veered off too far, or ran out of time. Either way, we consider it a fail print("Failed segment") break # Get the policy action (actions to be rolled out) action, _ = params.policy.get_action( state, token_instruction) #, env_id=env_id) if action is None: print("POLICY PRODUCED None ACTION") break # Choose which action to execute (reference or policy) based on the selected procedure exec_action = self.choose_action(params, step_num, switch_threshold, ref_action, action) # action = [vel_x, vel_y, vel_yaw] vel_y is unused currently. Execute the action in the pomdp state, reward, done, exceeded, oob = self.env.step(exec_action) total_reward += reward # Collect the data into a dataset sample = { "instruction": string_instruction, "state": state, "ref_action": ref_action, "reward": reward, "done": done, #"metadata": { "seg_path": path[start_idx:end_idx + 1], "path": path, "env_id": env_id, "set_idx": set_idx, "seg_idx": seg_idx, "start_idx": start_idx, "end_idx": end_idx, "action": exec_action, "pol_action": action, #"ref_action": ref_action, #"instruction": string_instruction, "flag": params.getFlag() #} } segment_dataset.append(sample) if not params.isSegmentLevel(): env_dataset.append(sample) # Do visual feedback and logging if params.first_person: self.presenter.show_sample(state, exec_action, reward, string_instruction) if params.plot: self.presenter.plot_paths(segment_dataset, interactive=True) if params.save_samples: file_path = params.getSaveSamplesPath( env_id, set_idx, seg_idx, step_num) self.presenter.save_sample(file_path, state, exec_action, reward, string_instruction) if params.show_action: self.presenter.show_action(ref_action, "ref_action") self.presenter.show_action(exec_action, "exec_action") # If the policy is finished, we stop. Otherwise the oracle should just keep outputing # examples that say that the policy should output finished at this point if exec_action[3] > 0.5 and not params.shouldIgnorePolicyStop( ): print("Policy stop!") break # If oracle says we're finished, allow a number of steps before terminating. if ref_action[3] > 0.5: if oracle_finished_countdown == params.steps_to_kill: drone_pos_force_stop = state.get_pos_2d() oracle_finished_countdown -= 1 if oracle_finished_countdown == 0: print("Oracle forced stop!") break step_num += 1 # Call the rollout end callback, so that the model can save any debugging information, such as feature maps if callable(getattr(params.policy, "on_rollout_end", None)): params.policy.on_rollout_end(env_id, set_idx, seg_idx) if params.isSegmentLevel(): env_dataset.append(segment_dataset) # Plot the trajectories for error tracking # TODO: Plot entire envs not segment by segment if params.save_plots: if not params.isSegmentLevel(): self.presenter.plot_paths( env_dataset, segment_path=path[start_idx:end_idx + 1], interactive=False, bg=True, world_size=4.7) self.presenter.save_plot( params.getSavePlotPath(env_id, set_idx, seg_idx)) # Calculate end of segment error if end_idx > len(path) - 1: end_idx = len(path) - 1 # The reward is proportional to path length. Weigh it down, so that max reward is 1: seg_len = end_idx - start_idx #self.error_tracker.add_sample(not failed, drone_pos_force_stop, state.get_pos(), path[end_idx], # path[end_idx - 1], total_reward, seg_len) if params.first_segment_only: print("Only running the first segment") break #sleep(sleepytime) return env_dataset def roll_out_policy(self, params): """ Given the provided rollout parameters, spawn a simulator instance and execute the specified policy on all environments specified in params.setEnvIds. Awful function that really needs to be simplified. A lot of the code is simply checking various error conditions, because the data has issues, and logging the outcome. The actual rollout is a very small part of the code. :param params: RollOutParams instance defining the parameters of the rollout :return: Aggregated dataset with images, states and oracle actions. If params.isSegmentLevel(), the returned dataset will be a list (over environments) of samples otherwise it will be a list (over environments) of lists (over segments) of samples """ if params.isDebug(): run_metadata.WRITE_DEBUG_DATA = True dataset = [] try: # Load the neural network policy from file # We can't just pass a neural network into this function, because it can't be pickled params.loadPolicy() assert params.hasPolicy() self.env = PomdpInterface(instance_id=self.instance_id, is_real=params.real_drone) all_env_ids, all_instructions, corpus, token2term, self.word2token = self.load_all_envs( ) env_ids = params.envs # if params.envs is not None else all_env_ids seg_indices = params.seg_list custom_instructions = params.custom_instructions # Filter out the envs that are not in all_instructions (we don't have instructions available for them) valid_env_ids = [i for i in env_ids if i in all_instructions] count = 0 # Loop through environments for i, env_id in enumerate(valid_env_ids): #print ("Rolling out on env: " + str(env_id)) # Loop through all non-empty sets of instructions for each pomdp instruction_sets = [ s for s in all_instructions[env_id] if len(s) > 0 ] if len(instruction_sets) == 0: print("No instruction sets for env: " + str(env_id)) for j, instructions_set in enumerate(instruction_sets): count += 1 try: seg_id = seg_indices[ i] if seg_indices is not None else None custom_instr = custom_instructions[ i] if custom_instructions is not None else None import rollout.run_metadata as md md.CUSTOM_INSTR_NO = i # TODO: Check if this works! dataset.append( self.roll_out_on_env(params, instructions_set, j, seg_id, custom_instr)) #log("Path finished!") DebugWriter().commit() if params.isRealDrone(): break except Exception as e: import traceback from utils.colors import print_error print_error("Error encountered during policy rollout!") print_error(e) print_error(traceback.format_exc()) continue except Exception as e: import traceback from utils.colors import print_error print_error("Error encountered during policy rollout!") print_error(e) print_error(traceback.format_exc()) self.env.land() return dataset
def automatic_demo(): P.initialize_experiment() instruction_display = InstructionDisplay() rate = Rate(0.1) env = PomdpInterface( is_real=get_current_parameters()["Setup"]["real_drone"]) train_instructions, dev_instructions, test_instructions, corpus = get_all_instructions( ) all_instr = { **train_instructions, **dev_instructions, **train_instructions } token2term, word2token = get_word_to_token_map(corpus) # Run on dev set interact_instructions = dev_instructions env_range_start = get_current_parameters()["Setup"].get( "env_range_start", 0) env_range_end = get_current_parameters()["Setup"].get( "env_range_end", 10e10) interact_instructions = { k: v for k, v in interact_instructions.items() if env_range_start < k < env_range_end } model, _ = load_model(get_current_parameters()["Setup"]["model"]) # Loop over the select few examples while True: for instruction_sets in interact_instructions.values(): for set_idx, instruction_set in enumerate(instruction_sets): env_id = instruction_set['env'] found_example = None for example in examples: if example[0] == env_id: found_example = example if found_example is None: continue env.set_environment(env_id, instruction_set["instructions"]) presenter = Presenter() cumulative_reward = 0 for seg_idx in range(len(instruction_set["instructions"])): if seg_idx != found_example[2]: continue print(f"RUNNING ENV {env_id} SEG {seg_idx}") real_instruction_str = instruction_set["instructions"][ seg_idx]["instruction"] instruction_display.show_instruction(real_instruction_str) valid_segment = env.set_current_segment(seg_idx) if not valid_segment: continue state = env.reset(seg_idx) for i in range(START_PAUSE): instruction_display.tick() time.sleep(1) tok_instruction = tokenize_instruction( real_instruction_str, word2token) state = env.reset(seg_idx) print("Executing: f{instruction_str}") while True: instruction_display.tick() rate.sleep() action, internals = model.get_action( state, tok_instruction) state, reward, done, expired, oob = env.step(action) cumulative_reward += reward #presenter.show_sample(state, action, reward, cumulative_reward, real_instruction_str) #show_depth(state.image) if done: break for i in range(END_PAUSE): instruction_display.tick() time.sleep(1) print("Segment finished!") instruction_display.show_instruction("...") print("Env finished!")
class SimplePolicyRoller: """ Really only a wrapper around the roll_out_policy function, which does the policy rollout in the pomdp It collects actions both from the user-provided policy and from the oracle (as labels) and accumulates a dataset """ def __init__(self, instance_id=0, real_drone=False, policy=None, oracle=None, no_reward=False): self.presenter = Presenter() self.instance_id = instance_id self.word2token = None self.all_instructions = None self.all_env_ids, self.all_instructions, self.corpus, self.token2term, self.word2token = self.load_all_envs( ) self.env = PomdpInterface(instance_id=self.instance_id, is_real=real_drone) self.policy = policy self.oracle = oracle self.no_reward = no_reward def load_all_envs(self): train_i, dev_i, test_i, corpus = get_all_instructions() all_instructions = merge_instruction_sets(train_i, dev_i, test_i) token2term, word2token = get_word_to_token_map(corpus) env_ids = list(all_instructions.keys()) return env_ids, all_instructions, corpus, token2term, word2token def tokenize_string(self, s): word_list = filter(None, s.split(" ")) token_instruction = list(map(lambda w: self.word2token[w], word_list)) return token_instruction def set_policy(self, policy): self.policy = policy def save_rollouts(self, rollouts, dataset_name): env_rollouts = {} for rollout in rollouts: env_id = rollout[0]["env_id"] if env_id not in env_rollouts: env_rollouts[env_id] = [] env_rollouts[env_id] += rollout for env_id, rollouts in env_rollouts.items(): # This saves just a single segment per environment, as opposed to all segments that the oracle saves. Problem? if len(rollouts) > 0: #pruned_rollouts = [prune_sample(s) for s in rollouts] save_dataset(dataset_name, rollouts, env_id=env_id, lock=True) #save_metadata(dataset_name, env_id, {"seg_ids": segments}) def choose_action(self, pol_action, ref_action, dagger_beta): use_expert = random.uniform(0, 1) < dagger_beta if use_expert: return ref_action else: return pol_action def sample_to_cpu(self, sample): for k, v in sample.items(): if hasattr(v, "to") and isinstance(v.to, types.MethodType): sample[k] = v.to("cpu") def single_segment_rollout(self, env_id, set_idx, seg_idx, do_sample, dagger_beta=0, rl_rollout=True): instruction_sets = self.all_instructions[env_id][set_idx][ 'instructions'] for instruction_set in instruction_sets: if instruction_set["seg_idx"] == seg_idx: break # TODO: Get rid of this idiocy: md.IS_ROLLOUT = True instruction_set = get_instruction_segment( env_id, set_idx, seg_idx, all_instr=self.all_instructions) self.env.set_environment(env_id, instruction_set=instruction_sets, fast=True) self.env.set_current_segment(seg_idx) self.policy.start_sequence() if hasattr(self.policy, "start_segment_rollout"): self.policy.start_segment_rollout(env_id, set_idx, seg_idx) if self.oracle: self.oracle.start_segment_rollout(env_id, set_idx, seg_idx) string_instruction, end_idx, start_idx = instruction_set[ "instruction"], instruction_set["end_idx"], instruction_set[ "start_idx"] token_instruction = self.tokenize_string(string_instruction) # TODO: Support oracle (including setCurrentSegment, and setting the path) rollout_sample = [] # Reset the drone to the segment starting position: state = self.env.reset(seg_idx) first = True while True: action, rl_stuff = self.policy.get_action(state, token_instruction, sample=do_sample, rl_rollout=rl_rollout) if self.oracle: ref_action, _ = self.oracle.get_action(state, token_instruction) exec_action = self.choose_action(action, ref_action, dagger_beta) else: ref_action = action exec_action = action next_state, extrinsic_reward, done, expired, oob = self.env.step( exec_action) # Calculate intrinsic reward (I don't like that this delays the loop) if hasattr(self.policy, "calc_intrinsic_rewards") and not self.no_reward: intrinsic_rewards = self.policy.calc_intrinsic_rewards( next_state, action, done, first) else: intrinsic_rewards = {"x": 0} intrinsic_reward = sum(intrinsic_rewards.values()) sample = { "instruction": string_instruction, "ref_action": ref_action, "pol_action": action, "action": exec_action, "state": state, "extrinsic_reward": extrinsic_reward, "intrinsic_reward": intrinsic_reward - (1.0 if oob else 0.0), "full_reward": extrinsic_reward + intrinsic_reward, "done": done, "expired": expired, "env_id": env_id, "set_idx": set_idx, "seg_idx": seg_idx, } sample = dict_merge(sample, rl_stuff) if not self.no_reward: sample = dict_merge(sample, intrinsic_rewards) rollout_sample.append(sample) # Multiprocessing has stopped playing nice with PyTorch cuda. Move sample to cpu first. if rl_rollout: self.sample_to_cpu(sample) state = next_state first = False if done: #print(f"Done! Last action: {exec_action}") break md.IS_ROLLOUT = False # Add discounted returns return rollout_sample def rollout_segments(self, env_ids, seg_ids, policy_state, sample, dagger_beta=0, save_dataset_name=None, land_afterwards=False, rl_rollout=True): if policy_state is not None: self.policy.set_policy_state(policy_state) data = [] for env_id, seg_idx in zip(env_ids, seg_ids): done = False while not done: try: seg_data = self.single_segment_rollout( env_id, 0, seg_idx, sample, dagger_beta, rl_rollout) done = True except PomdpInterface.EnvException as e: continue data.append(seg_data) if save_dataset_name: self.save_rollouts(data, save_dataset_name) # Land the real drone if we have one. if land_afterwards: self.env.land() return data
self.thread = threading.Thread(target=self.run, args=()) self.thread.daemon = True self.thread.start() def run(self): self.mon.run() def get_command(self): return self.mon.current_vel initialize_experiment("nl_datacollect_cage") teleoper = KeyTeleop() rate = Rate(0.1) env = PomdpInterface() env_ids = get_available_env_ids() count = 0 stuck_count = 0 def show_depth(image): grayscale = np.mean(image[:, :, 0:3], axis=2) depth = image[:, :, 3] comb = np.stack([grayscale, grayscale, depth], axis=2) comb -= comb.min() comb /= (comb.max() + 1e-9) Presenter().show_image(comb, "depth_alignment", torch=False, waitkey=1, scale=4)