def evaluate_saved_rollouts(): params = P.get_current_parameters() setup = params["Setup"] model_name = setup["model"] run_name = setup["run_name"] eval_dname = get_eval_tmp_dataset_name(model_name, run_name) eval_envs = set(list(sorted(get_correct_eval_env_id_list()))) rollouts = load_multiple_env_data(eval_dname) present_envs = set( [rollout[0]["env_id"] for rollout in rollouts if len(rollout) > 0]) missing_envs = eval_envs - present_envs logdir = get_results_dir(run_name) if len(missing_envs) > 0: print(f"Warning! {len(missing_envs)} envs missing: {missing_envs}") #sys.exit(1) log("", logdir) log( "--------------------------------------------------------------------------------------------", logdir) log(f"Evaluating rollouts for run {run_name}", logdir) log(f" using dataset {eval_dname}", logdir) log(f" missing envs {missing_envs}", logdir) log( "--------------------------------------------------------------------------------------------", logdir) evaler1 = DataEvalNL(setup["run_name"] + "1-1", save_images=False, entire_trajectory=False, aug_len=1) evaler1.evaluate_dataset(rollouts) results1 = evaler1.get_results() evaler2 = DataEvalNL(setup["run_name"] + "2-2", save_images=False, entire_trajectory=False, aug_len=2) evaler2.evaluate_dataset(rollouts) results2 = evaler2.get_results() evalerf = DataEvalNL(setup["run_name"] + "1-2", save_images=True, entire_trajectory=False) evalerf.evaluate_dataset(rollouts) resultsf = evalerf.get_results() log(f"Results 1-1:{results1}", logdir) log(f"Results 2-2:{results2}", logdir) log(f"Results 1-2:{resultsf}", logdir) log(f" -- END EVALUATION FOR {run_name}-- ", logdir) log( "--------------------------------------------------------------------------------------------", logdir)
def evaluate_top_down_pred(): P.initialize_experiment() setup = P.get_current_parameters()["Setup"] model, model_loaded = load_model() eval_envs = get_correct_eval_env_id_list() dataset_name = P.get_current_parameters().get("Data").get("dataset_name") dataset = model.get_dataset(envs=eval_envs, dataset_prefix=dataset_name, dataset_prefix="supervised", eval=eval) dataloader = DataLoader(dataset, collate_fn=dataset.collate_fn, batch_size=1, shuffle=False, num_workers=1, pin_memory=False) total_loss = 0 count = 0 num_batches = len(dataloader) for b, batch in enumerate(dataloader): loss_var = model.sup_loss_on_batch(batch, eval=True, viz=True) total_loss += loss_var.data[0] count += 1 print("batch: " + str(b) + " / " + str(num_batches) + \ " loss: " + str(loss_var.data[0])) avg_loss = total_loss / count results_dir = get_results_dir(setup["run_name"]) results_json_path = get_results_path(setup["run_name"]) os.makedirs(results_dir, exist_ok=True) viz = model.get_viz() for key, lst in viz.items(): for i, img in enumerate(lst): img_path = os.path.join( results_dir, key + str(i) + "_" + setup["model"] + ".jpg") sp.misc.imsave(img_path, img) print("Saved image: " + img_path) with open(results_json_path, "w") as fp: json.dump({"loss": avg_loss}, fp)
def evaluate(): P.initialize_experiment() setup = P.get_current_parameters()["Setup"] # At this point test and dev have been swapped. # Whatever we've been developing on called "test" is hereafter called dev # Test is the data that hasn't been touched at all eval_envs = get_correct_eval_env_id_list() dataset = faux_dataset_random_pt(eval_envs) #dataset = faux_dataset_random_landmark(eval_envs) results = {} if setup["eval_landmark_side"]: evaler = DataEvalLandmarkSide(setup["run_name"], save_images=False) evaler.evaluate_dataset(dataset) results = evaler.get_results() results["all_dist"] = [] print("Results:", results)
def evaluate(): P.initialize_experiment() params = P.get_current_parameters() setup = params["Setup"] # import pdb;pdb.set_trace() models = [] for i in range(setup["num_workers"]): model, model_loaded = load_model() if setup["restore_weights_name"]: restore_pretrained_weights(model, setup["restore_weights_name"], setup["fix_restored_weights"]) models.append(model) eval_envs = get_correct_eval_env_id_list() roll_out_params = RollOutParams() \ .setModelName(setup["model"]) \ .setModelFile(setup["model_file"]) \ .setRunName(setup["run_name"]) \ .setSetupName(P.get_setup_name()) \ .setEnvList(eval_envs) \ .setMaxDeviation(400) \ .setHorizon(100) \ .setStepsToForceStop(10) \ .setPlot(False) \ .setShowAction(False) \ .setIgnorePolicyStop(False) \ .setPlotDir("evaluate/" + setup["run_name"]) \ .setSavePlots(True) \ .setRealtimeFirstPerson(False) \ .setSaveSamples(False) \ .setBuildTrainData(False) \ .setSegmentReset("always") \ .setSegmentLevel(True) \ .setFirstSegmentOnly(False) \ .setDebug(setup["debug"]) \ .setCuda(setup["cuda"]) custom_eval = "Eval" in params and params["Eval"]["custom_eval"] instructions = None if custom_eval: examples = params["Eval"]["examples"] eval_envs, eval_sets, eval_segs, instructions = tuple( map(lambda m: list(m), list(zip(*examples)))) print("!! Running custom evaluation with the following setup:") print(examples) roll_out_params.setEnvList(eval_envs) roll_out_params.setSegList(eval_segs) roll_out_params.setCustomInstructions(instructions) if setup["num_workers"] > 1: roller = ParallelPolicyRoller(num_workers=setup["num_workers"]) else: roller = PolicyRoller() dataset = roller.roll_out_policy(roll_out_params) results = {} if setup["eval_landmark_side"]: evaler = DataEvalLandmarkSide(setup["run_name"]) evaler.evaluate_dataset(dataset) results = evaler.get_results() if setup["eval_nl"]: evaler = DataEvalNL(setup["run_name"], save_images=True, entire_trajectory=False, custom_instr=instructions) evaler.evaluate_dataset(dataset) results = evaler.get_results() print("Results:", results)
def multiple_eval_rollout(): params, system_namespaces = setup_parameter_namespaces() setup_overlay = params["MultipleEval"]["SetupOverlay"] domain = "real" if setup_overlay["real_drone"] else "sim" one_at_a_time = params["MultipleEval"]["one_at_a_time"] check_and_prompt_if_data_exists(system_namespaces) # Load the systems # TODO: Check how many can fit in GPU memory. If not too many, perhaps we can move them off-GPU between rounds policies = [] for system_namespace in system_namespaces: P.switch_to_namespace(system_namespace) setup = P.get_current_parameters()["Setup"] policy, _ = load_model(setup["model"], setup["model_file"], domain) policies.append(policy) # ---------------------------------------------------------------------------------------- # Initialize Roller # ---------------------------------------------------------------------------------------- policy_roller = SimplePolicyRoller(instance_id=7, real_drone=setup_overlay["real_drone"], policy=None, oracle=None, no_reward=True) # ---------------------------------------------------------------------------------------- # Collect rollouts # ---------------------------------------------------------------------------------------- eval_envs = list(sorted(get_correct_eval_env_id_list())) count = 0 # Loop over environments for env_id in eval_envs: seg_ids = get_segs_available_for_env(env_id, 0) env_ids = [env_id] * len(seg_ids) print("Beginning rollouts for env: {env_id}") if len(seg_ids) == 0: print(" NO SEGMENTS! Next...") continue # Loop over systems and save data for i, (policy, system_namespace) in enumerate(zip(policies, system_namespaces)): print( f"Rolling policy in namespace {system_namespace} for env: {env_id}" ) P.switch_to_namespace(system_namespace) setup = P.get_current_parameters()["Setup"] if env_data_already_collected(env_id, setup["model"], setup["run_name"]): print(f"Skipping env_id: {env_id}, policy: {setup['model']}") continue eval_dataset_name = get_eval_tmp_dataset_name( setup["model"], setup["run_name"]) policy_roller.set_policy(policy) # when the last policy is done, we should land the drone policy_roller.rollout_segments( env_ids, seg_ids, None, False, 0, save_dataset_name=eval_dataset_name, rl_rollout=False, land_afterwards=(i == len(policies) - 1)) count += 1 if one_at_a_time and count > 0: print("Stopping. Run again to roll-out on the next environment!") break print("Done")
def train_top_down_pred(): P.initialize_experiment() setup = P.get_current_parameters()["Setup"] launch_ui() env = PomdpInterface() print("model_name:", setup["top_down_model"]) print("model_file:", setup["top_down_model_file"]) model, model_loaded = load_model( model_name_override=setup["top_down_model"], model_file_override=setup["top_down_model_file"]) exec_model, wrapper_model_loaded = load_model( model_name_override=setup["wrapper_model"], model_file_override=setup["wrapper_model_file"]) affine2d = Affine2D() if model.is_cuda: affine2d.cuda() eval_envs = get_correct_eval_env_id_list() print("eval_envs:", eval_envs) train_instructions, dev_instructions, test_instructions, corpus = get_all_instructions( max_size=setup["max_envs"]) all_instr = { **train_instructions, **dev_instructions, **train_instructions } token2term, word2token = get_word_to_token_map(corpus) dataset = model.get_dataset(envs=eval_envs, dataset_name="supervised", eval=True, seg_level=False) dataloader = DataLoader(dataset, collate_fn=dataset.collate_fn, batch_size=1, shuffle=False, num_workers=1, pin_memory=True) for b, batch in list(enumerate(dataloader)): print("batch:", batch) images = batch["images"] instructions = batch["instr"] label_masks = batch["traj_labels"] affines = batch["affines_g_to_s"] env_ids = batch["env_id"] set_idxs = batch["set_idx"] seg_idxs = batch["seg_idx"] env_id = env_ids[0][0] set_idx = set_idxs[0][0] print("env_id of this batch:", env_id) env.set_environment( env_id, instruction_set=all_instr[env_id][set_idx]["instructions"]) env.reset(0) num_segments = len(instructions[0]) print("num_segments in this batch:", num_segments) write_instruction("") write_real_instruction("None") instruction_str = read_instruction_file() print("Initial instruction: ", instruction_str) # TODO: Reset model state here if we keep any temporal memory etc for s in range(num_segments): start_state = env.reset(s) keep_going = True real_instruction = cuda_var(instructions[0][s], setup["cuda"], 0) tmp = list(real_instruction.data.cpu()[0].numpy()) real_instruction_str = debug_untokenize_instruction(tmp) write_real_instruction(real_instruction_str) #write_instruction(real_instruction_str) #instruction_str = real_instruction_str image = cuda_var(images[0][s], setup["cuda"], 0) label_mask = cuda_var(label_masks[0][s], setup["cuda"], 0) affine_g_to_s = affines[0][s] print("Your current environment:") with open( "/storage/dxsun/unreal_config_nl/configs/configs/random_config_" + str(env_id) + ".json") as fp: config = json.load(fp) print(config) while keep_going: write_real_instruction(real_instruction_str) while True: cv2.waitKey(200) instruction = read_instruction_file() if instruction == "CMD: Next": print("Advancing") keep_going = False write_empty_instruction() break elif instruction == "CMD: Reset": print("Resetting") env.reset(s) write_empty_instruction() elif len(instruction.split(" ")) > 1: instruction_str = instruction print("Executing: ", instruction_str) break if not keep_going: continue #instruction_str = read_instruction_file() # TODO: Load instruction from file tok_instruction = tokenize_instruction(instruction_str, word2token) instruction_t = torch.LongTensor(tok_instruction).unsqueeze(0) instruction_v = cuda_var(instruction_t, setup["cuda"], 0) instruction_mask = torch.ones_like(instruction_v) tmp = list(instruction_t[0].numpy()) instruction_dbg_str = debug_untokenize_instruction( tmp, token2term) # import matplotlib.pyplot as plt #plt.plot(image.squeeze(0).permute(1,2,0).cpu().numpy()) #plt.show() res = model(image, instruction_v, instruction_mask) mask_pred = res[0] shp = mask_pred.shape mask_pred = F.softmax(mask_pred.view([2, -1]), 1).view(shp) #mask_pred = softmax2d(mask_pred) # TODO: Rotate the mask_pred to the global frame affine_s_to_g = np.linalg.inv(affine_g_to_s) S = 8.0 affine_scale_up = np.asarray([[S, 0, 0], [0, S, 0], [0, 0, 1]]) affine_scale_down = np.linalg.inv(affine_scale_up) affine_pred_to_g = np.dot( affine_scale_down, np.dot(affine_s_to_g, affine_scale_up)) #affine_pred_to_g_t = torch.from_numpy(affine_pred_to_g).float() mask_pred_np = mask_pred.data.cpu().numpy()[0].transpose( 1, 2, 0) mask_pred_g_np = apply_affine(mask_pred_np, affine_pred_to_g, 32, 32) print("Sum of global mask: ", mask_pred_g_np.sum()) mask_pred_g = torch.from_numpy( mask_pred_g_np.transpose(2, 0, 1)).float()[np.newaxis, :, :, :] exec_model.set_ground_truth_visitation_d(mask_pred_g) # Create a batch axis for pytorch #mask_pred_g = affine2d(mask_pred, affine_pred_to_g_t[np.newaxis, :, :]) mask_pred_np[:, :, 0] -= mask_pred_np[:, :, 0].min() mask_pred_np[:, :, 0] /= (mask_pred_np[:, :, 0].max() + 1e-9) mask_pred_np[:, :, 0] *= 2.0 mask_pred_np[:, :, 1] -= mask_pred_np[:, :, 1].min() mask_pred_np[:, :, 1] /= (mask_pred_np[:, :, 1].max() + 1e-9) presenter = Presenter() presenter.show_image(mask_pred_g_np, "mask_pred_g", torch=False, waitkey=1, scale=4) #import matplotlib.pyplot as plt #print("image.data shape:", image.data.cpu().numpy().shape) #plt.imshow(image.data.squeeze().permute(1,2,0).cpu().numpy()) #plt.show() # presenter.show_image(image.data, "mask_pred_g", torch=False, waitkey=1, scale=4) #import pdb; pdb.set_trace() pred_viz_np = presenter.overlaid_image(image.data, mask_pred_np, channel=0) # TODO: Don't show labels # TODO: OpenCV colours #label_mask_np = p.data.cpu().numpy()[0].transpose(1,2,0) labl_viz_np = presenter.overlaid_image(image.data, label_mask.data, channel=0) viz_img_np = np.concatenate((pred_viz_np, labl_viz_np), axis=1) viz_img_np = pred_viz_np viz_img = presenter.overlay_text(viz_img_np, instruction_dbg_str) cv2.imshow("interactive viz", viz_img) cv2.waitKey(100) rollout_model(exec_model, env, env_ids[0][s], set_idxs[0][s], seg_idxs[0][s], tok_instruction) write_instruction("")
def evaluate(): P.initialize_experiment() params = P.get_current_parameters() setup = params["Setup"] models = [] for i in range(setup["num_workers"]): model, model_loaded = load_model() models.append(model) eval_envs = list(sorted(get_correct_eval_env_id_list())) round_size = P.get_current_parameters()["Data"].get("collect_n_at_a_time") # TODO: Scrap RollOutParams and use parameter server JSON params instead roll_out_params = RollOutParams() \ .setModelName(setup["model"]) \ .setModelFile(setup["model_file"]) \ .setRunName(setup["run_name"]) \ .setSetupName(P.get_setup_name()) \ .setEnvList(eval_envs) \ .setMaxDeviation(800) \ .setHorizon(setup["trajectory_length"]) \ .setStepsToForceStop(20) \ .setPlot(False) \ .setShowAction(False) \ .setIgnorePolicyStop(False) \ .setPlotDir("evaluate/" + setup["run_name"]) \ .setSavePlots(False) \ .setRealtimeFirstPerson(False) \ .setSaveSamples(False) \ .setBuildTrainData(False) \ .setSegmentReset("always") \ .setSegmentLevel(False) \ .setFirstSegmentOnly(False) \ .setDebug(setup["debug"]) \ .setCuda(setup["cuda"]) \ .setRealDrone(setup["real_drone"]) custom_eval = "Eval" in params and params["Eval"]["custom_eval"] instructions = None if custom_eval: examples = params["Eval"]["examples"] eval_envs, eval_sets, eval_segs, instructions = tuple( map(lambda m: list(m), list(zip(*examples)))) print("!! Running custom evaluation with the following setup:") print(examples) roll_out_params.setEnvList(eval_envs) roll_out_params.setSegList(eval_segs) roll_out_params.setCustomInstructions(instructions) if setup["num_workers"] > 1: roller = ParallelPolicyRoller(num_workers=setup["num_workers"]) else: roller = PolicyRoller() if round_size: eval_dataset_name = data_io.paths.get_eval_tmp_dataset_name( setup["model"], setup["run_name"]) eval_dataset_path = data_io.paths.get_dataset_dir(eval_dataset_name) cumulative_dataset = [] if os.path.exists(eval_dataset_path): result = query_user_load_discard(eval_dataset_path) if result == "load": print("Loading dataset and continuing evaluation") cumulative_dataset = load_multiple_env_data_from_dir( eval_dataset_path) elif result == "discard": print("Discarding existing evaluation data") shutil.rmtree(eval_dataset_path) elif result == "cancel": print("Cancelling evaluation") return os.makedirs(eval_dataset_path, exist_ok=True) collected_envs = set([ rollout[0]["env_id"] for rollout in cumulative_dataset if len(rollout) > 0 ]) eval_envs = [e for e in eval_envs if e not in collected_envs] if setup.get("compute_results_no_rollout", False): eval_envs = [] for i in range(0, len(eval_envs), round_size): j = min(len(eval_envs), i + round_size) round_envs = eval_envs[i:j] roll_out_params.setEnvList(round_envs) dataset = roller.roll_out_policy(roll_out_params) # Save this data for rollout in dataset: if len(rollout) == 0: print( "WARNING! DROPPING EMPTY ROLLOUTS! SHOULDN'T DO THIS") continue ## rollout is a list of samples: env_id = rollout[0]["env_id"] if "metadata" in rollout[ 0] else rollout[0]["env_id"] if True: if len(rollout) > 0: save_dataset_to_path( os.path.join(eval_dataset_path, str(env_id)), rollout) ## rollout is a list of segments, each is a list of samples else: if len(rollout) > 0: save_dataset_to_path( os.path.join(eval_dataset_path, str(env_id)), rollout) cumulative_dataset += dataset print(f"Saved cumulative dataset to: {eval_dataset_path}") dataset = cumulative_dataset else: dataset = roller.roll_out_policy(roll_out_params) results = {} if setup["eval_landmark_side"]: evaler = DataEvalLandmarkSide(setup["run_name"], save_images=True, world_size=setup["world_size_m"]) evaler.evaluate_dataset(dataset) results = evaler.get_results() if setup["eval_nl"]: evaler = DataEvalNL(setup["run_name"], save_images=True, entire_trajectory=False, custom_instr=instructions) evaler.evaluate_dataset(dataset) results = evaler.get_results() print("Results:", results)
def evaluate(): P.initialize_experiment() model, model_loaded = load_model() eval_envs = get_correct_eval_env_id_list() model.eval() dataset_name = P.get_current_parameters().get("Data").get("dataset_name") dataset = model.get_dataset(data=None, envs=eval_envs, dataset_prefix=dataset_name, dataset_prefix="supervised", eval=eval, seg_level=False) dataloader = DataLoader(dataset, collate_fn=dataset.collate_fn, batch_size=1, shuffle=False, num_workers=4, pin_memory=True, timeout=0) count = 0 success = 0 total_dist = 0 for batch in dataloader: if batch is None: print("None batch!") continue images = batch["images"] instructions = batch["instr"] label_masks = batch["traj_labels"] # Each of the above is a list of lists of tensors, where the outer list is over the batch and the inner list # is over the segments. Loop through and accumulate loss for each batch sequentially, and for each segment. # Reset model state (embedding etc) between batches, but not between segments. # We don't process each batch in batch-mode, because it's complicated, with the varying number of segments and all. # TODO: This code is outdated and wrongly discretizes the goal location. Grab the fixed version from the old branch. batch_size = len(images) print("batch: ", count) print("successes: ", success) for i in range(batch_size): num_segments = len(instructions[i]) for s in range(num_segments): instruction = cuda_var(instructions[i][s], model.is_cuda, model.cuda_device) instruction_mask = torch.ones_like(instruction) image = cuda_var(images[i][s], model.is_cuda, model.cuda_device) label_mask = cuda_var(label_masks[i][s], model.is_cuda, model.cuda_device) label_mask = model.label_pool(label_mask) goal_mask_l = label_mask[0, 1, :, :] goal_mask_l_np = goal_mask_l.data.cpu().numpy() goal_mask_l_flat = np.reshape(goal_mask_l_np, [-1]) max_index_l = np.argmax(goal_mask_l_flat) argmax_loc_l = np.asarray([ int(max_index_l / goal_mask_l_np.shape[1]), int(max_index_l % goal_mask_l_np.shape[1]) ]) if np.sum(goal_mask_l_np) < 0.01: continue mask_pred, features, emb_loss = model(image, instruction, instruction_mask) goal_mask = mask_pred[0, 1, :, :] goal_mask_np = goal_mask.data.cpu().numpy() goal_mask_flat = np.reshape(goal_mask_np, [-1]) max_index = np.argmax(goal_mask_flat) argmax_loc = np.asarray([ int(max_index / goal_mask_np.shape[1]), int(max_index % goal_mask_np.shape[1]) ]) dist = np.linalg.norm(argmax_loc - argmax_loc_l) if dist < OK_DIST: success += 1 count += 1 total_dist += dist print("Correct goal predictions: ", success) print("Total evaluations: ", count) print("total dist: ", total_dist) print("avg dist: ", total_dist / float(count)) print("success rate: ", success / float(count))