Python get_correct_eval_env_id_list示例，data_io.instructions.get_correct_eval_env_id_list Python示例

示例#1

0

显示文件

文件： evaluate_saved_rollouts.py 项目： pianpwk/drif

def evaluate_saved_rollouts():
    params = P.get_current_parameters()
    setup = params["Setup"]
    model_name = setup["model"]
    run_name = setup["run_name"]
    eval_dname = get_eval_tmp_dataset_name(model_name, run_name)

    eval_envs = set(list(sorted(get_correct_eval_env_id_list())))
    rollouts = load_multiple_env_data(eval_dname)
    present_envs = set(
        [rollout[0]["env_id"] for rollout in rollouts if len(rollout) > 0])
    missing_envs = eval_envs - present_envs

    logdir = get_results_dir(run_name)

    if len(missing_envs) > 0:
        print(f"Warning! {len(missing_envs)} envs missing: {missing_envs}")
        #sys.exit(1)

    log("", logdir)
    log(
        "--------------------------------------------------------------------------------------------",
        logdir)
    log(f"Evaluating rollouts for run {run_name}", logdir)
    log(f"   using dataset {eval_dname}", logdir)
    log(f"   missing envs {missing_envs}", logdir)
    log(
        "--------------------------------------------------------------------------------------------",
        logdir)

    evaler1 = DataEvalNL(setup["run_name"] + "1-1",
                         save_images=False,
                         entire_trajectory=False,
                         aug_len=1)
    evaler1.evaluate_dataset(rollouts)
    results1 = evaler1.get_results()

    evaler2 = DataEvalNL(setup["run_name"] + "2-2",
                         save_images=False,
                         entire_trajectory=False,
                         aug_len=2)
    evaler2.evaluate_dataset(rollouts)
    results2 = evaler2.get_results()

    evalerf = DataEvalNL(setup["run_name"] + "1-2",
                         save_images=True,
                         entire_trajectory=False)
    evalerf.evaluate_dataset(rollouts)
    resultsf = evalerf.get_results()

    log(f"Results 1-1:{results1}", logdir)
    log(f"Results 2-2:{results2}", logdir)
    log(f"Results 1-2:{resultsf}", logdir)

    log(f" -- END EVALUATION FOR {run_name}-- ", logdir)
    log(
        "--------------------------------------------------------------------------------------------",
        logdir)

示例#2

0

显示文件

def evaluate_top_down_pred():
    P.initialize_experiment()
    setup = P.get_current_parameters()["Setup"]

    model, model_loaded = load_model()

    eval_envs = get_correct_eval_env_id_list()

    dataset_name = P.get_current_parameters().get("Data").get("dataset_name")
    dataset = model.get_dataset(envs=eval_envs,
                                dataset_prefix=dataset_name,
                                dataset_prefix="supervised",
                                eval=eval)
    dataloader = DataLoader(dataset,
                            collate_fn=dataset.collate_fn,
                            batch_size=1,
                            shuffle=False,
                            num_workers=1,
                            pin_memory=False)

    total_loss = 0
    count = 0
    num_batches = len(dataloader)
    for b, batch in enumerate(dataloader):
        loss_var = model.sup_loss_on_batch(batch, eval=True, viz=True)
        total_loss += loss_var.data[0]
        count += 1
        print("batch: " + str(b) + " / " + str(num_batches) + \
              " loss: " + str(loss_var.data[0]))
    avg_loss = total_loss / count

    results_dir = get_results_dir(setup["run_name"])
    results_json_path = get_results_path(setup["run_name"])
    os.makedirs(results_dir, exist_ok=True)

    viz = model.get_viz()
    for key, lst in viz.items():
        for i, img in enumerate(lst):
            img_path = os.path.join(
                results_dir, key + str(i) + "_" + setup["model"] + ".jpg")
            sp.misc.imsave(img_path, img)
            print("Saved image: " + img_path)

    with open(results_json_path, "w") as fp:
        json.dump({"loss": avg_loss}, fp)

示例#3

0

显示文件

def evaluate():
    P.initialize_experiment()
    setup = P.get_current_parameters()["Setup"]

    # At this point test and dev have been swapped.
    # Whatever we've been developing on called "test" is hereafter called dev
    # Test is the data that hasn't been touched at all
    eval_envs = get_correct_eval_env_id_list()

    dataset = faux_dataset_random_pt(eval_envs)
    #dataset = faux_dataset_random_landmark(eval_envs)

    results = {}
    if setup["eval_landmark_side"]:
        evaler = DataEvalLandmarkSide(setup["run_name"], save_images=False)
        evaler.evaluate_dataset(dataset)
        results = evaler.get_results()

    results["all_dist"] = []
    print("Results:", results)

示例#4

0

显示文件

def evaluate():
    P.initialize_experiment()
    params = P.get_current_parameters()
    setup = params["Setup"]

    # import pdb;pdb.set_trace()
    models = []
    for i in range(setup["num_workers"]):
        model, model_loaded = load_model()
        if setup["restore_weights_name"]:
            restore_pretrained_weights(model, setup["restore_weights_name"],
                                       setup["fix_restored_weights"])
        models.append(model)

    eval_envs = get_correct_eval_env_id_list()

    roll_out_params = RollOutParams() \
                        .setModelName(setup["model"]) \
                        .setModelFile(setup["model_file"]) \
                        .setRunName(setup["run_name"]) \
                        .setSetupName(P.get_setup_name()) \
                        .setEnvList(eval_envs) \
                        .setMaxDeviation(400) \
                        .setHorizon(100) \
                        .setStepsToForceStop(10) \
                        .setPlot(False) \
                        .setShowAction(False) \
                        .setIgnorePolicyStop(False) \
                        .setPlotDir("evaluate/" + setup["run_name"]) \
                        .setSavePlots(True) \
                        .setRealtimeFirstPerson(False) \
                        .setSaveSamples(False) \
                        .setBuildTrainData(False) \
                        .setSegmentReset("always") \
                        .setSegmentLevel(True) \
                        .setFirstSegmentOnly(False) \
                        .setDebug(setup["debug"]) \
                        .setCuda(setup["cuda"])

    custom_eval = "Eval" in params and params["Eval"]["custom_eval"]
    instructions = None
    if custom_eval:
        examples = params["Eval"]["examples"]
        eval_envs, eval_sets, eval_segs, instructions = tuple(
            map(lambda m: list(m), list(zip(*examples))))
        print("!! Running custom evaluation with the following setup:")
        print(examples)
        roll_out_params.setEnvList(eval_envs)
        roll_out_params.setSegList(eval_segs)
        roll_out_params.setCustomInstructions(instructions)

    if setup["num_workers"] > 1:
        roller = ParallelPolicyRoller(num_workers=setup["num_workers"])
    else:
        roller = PolicyRoller()

    dataset = roller.roll_out_policy(roll_out_params)

    results = {}
    if setup["eval_landmark_side"]:
        evaler = DataEvalLandmarkSide(setup["run_name"])
        evaler.evaluate_dataset(dataset)
        results = evaler.get_results()
    if setup["eval_nl"]:
        evaler = DataEvalNL(setup["run_name"],
                            save_images=True,
                            entire_trajectory=False,
                            custom_instr=instructions)
        evaler.evaluate_dataset(dataset)
        results = evaler.get_results()

    print("Results:", results)

示例#5

0

显示文件

def multiple_eval_rollout():

    params, system_namespaces = setup_parameter_namespaces()
    setup_overlay = params["MultipleEval"]["SetupOverlay"]
    domain = "real" if setup_overlay["real_drone"] else "sim"
    one_at_a_time = params["MultipleEval"]["one_at_a_time"]
    check_and_prompt_if_data_exists(system_namespaces)

    # Load the systems
    # TODO: Check how many can fit in GPU memory. If not too many, perhaps we can move them off-GPU between rounds
    policies = []
    for system_namespace in system_namespaces:
        P.switch_to_namespace(system_namespace)
        setup = P.get_current_parameters()["Setup"]
        policy, _ = load_model(setup["model"], setup["model_file"], domain)
        policies.append(policy)

    # ----------------------------------------------------------------------------------------
    # Initialize Roller
    # ----------------------------------------------------------------------------------------
    policy_roller = SimplePolicyRoller(instance_id=7,
                                       real_drone=setup_overlay["real_drone"],
                                       policy=None,
                                       oracle=None,
                                       no_reward=True)

    # ----------------------------------------------------------------------------------------
    # Collect rollouts
    # ----------------------------------------------------------------------------------------

    eval_envs = list(sorted(get_correct_eval_env_id_list()))
    count = 0

    # Loop over environments
    for env_id in eval_envs:
        seg_ids = get_segs_available_for_env(env_id, 0)
        env_ids = [env_id] * len(seg_ids)
        print("Beginning rollouts for env: {env_id}")
        if len(seg_ids) == 0:
            print("   NO SEGMENTS! Next...")
            continue

        # Loop over systems and save data
        for i, (policy,
                system_namespace) in enumerate(zip(policies,
                                                   system_namespaces)):
            print(
                f"Rolling policy in namespace {system_namespace} for env: {env_id}"
            )
            P.switch_to_namespace(system_namespace)
            setup = P.get_current_parameters()["Setup"]
            if env_data_already_collected(env_id, setup["model"],
                                          setup["run_name"]):
                print(f"Skipping env_id: {env_id}, policy: {setup['model']}")
                continue

            eval_dataset_name = get_eval_tmp_dataset_name(
                setup["model"], setup["run_name"])
            policy_roller.set_policy(policy)
            # when the last policy is done, we should land the drone
            policy_roller.rollout_segments(
                env_ids,
                seg_ids,
                None,
                False,
                0,
                save_dataset_name=eval_dataset_name,
                rl_rollout=False,
                land_afterwards=(i == len(policies) - 1))
            count += 1

        if one_at_a_time and count > 0:
            print("Stopping. Run again to roll-out on the next environment!")
            break

    print("Done")

示例#6

0

显示文件

文件： interactive_top_down_pred.py 项目： dxsun/drif

def train_top_down_pred():
    P.initialize_experiment()
    setup = P.get_current_parameters()["Setup"]
    launch_ui()

    env = PomdpInterface()

    print("model_name:", setup["top_down_model"])
    print("model_file:", setup["top_down_model_file"])

    model, model_loaded = load_model(
        model_name_override=setup["top_down_model"],
        model_file_override=setup["top_down_model_file"])

    exec_model, wrapper_model_loaded = load_model(
        model_name_override=setup["wrapper_model"],
        model_file_override=setup["wrapper_model_file"])

    affine2d = Affine2D()
    if model.is_cuda:
        affine2d.cuda()

    eval_envs = get_correct_eval_env_id_list()
    print("eval_envs:", eval_envs)
    train_instructions, dev_instructions, test_instructions, corpus = get_all_instructions(
        max_size=setup["max_envs"])
    all_instr = {
        **train_instructions,
        **dev_instructions,
        **train_instructions
    }
    token2term, word2token = get_word_to_token_map(corpus)

    dataset = model.get_dataset(envs=eval_envs,
                                dataset_name="supervised",
                                eval=True,
                                seg_level=False)
    dataloader = DataLoader(dataset,
                            collate_fn=dataset.collate_fn,
                            batch_size=1,
                            shuffle=False,
                            num_workers=1,
                            pin_memory=True)

    for b, batch in list(enumerate(dataloader)):
        print("batch:", batch)
        images = batch["images"]
        instructions = batch["instr"]
        label_masks = batch["traj_labels"]
        affines = batch["affines_g_to_s"]
        env_ids = batch["env_id"]
        set_idxs = batch["set_idx"]
        seg_idxs = batch["seg_idx"]

        env_id = env_ids[0][0]
        set_idx = set_idxs[0][0]
        print("env_id of this batch:", env_id)
        env.set_environment(
            env_id, instruction_set=all_instr[env_id][set_idx]["instructions"])
        env.reset(0)

        num_segments = len(instructions[0])
        print("num_segments in this batch:", num_segments)
        write_instruction("")
        write_real_instruction("None")
        instruction_str = read_instruction_file()
        print("Initial instruction: ", instruction_str)

        # TODO: Reset model state here if we keep any temporal memory etc
        for s in range(num_segments):
            start_state = env.reset(s)
            keep_going = True
            real_instruction = cuda_var(instructions[0][s], setup["cuda"], 0)
            tmp = list(real_instruction.data.cpu()[0].numpy())
            real_instruction_str = debug_untokenize_instruction(tmp)
            write_real_instruction(real_instruction_str)
            #write_instruction(real_instruction_str)
            #instruction_str = real_instruction_str

            image = cuda_var(images[0][s], setup["cuda"], 0)
            label_mask = cuda_var(label_masks[0][s], setup["cuda"], 0)
            affine_g_to_s = affines[0][s]
            print("Your current environment:")
            with open(
                    "/storage/dxsun/unreal_config_nl/configs/configs/random_config_"
                    + str(env_id) + ".json") as fp:
                config = json.load(fp)
            print(config)
            while keep_going:
                write_real_instruction(real_instruction_str)

                while True:
                    cv2.waitKey(200)
                    instruction = read_instruction_file()
                    if instruction == "CMD: Next":
                        print("Advancing")
                        keep_going = False
                        write_empty_instruction()
                        break
                    elif instruction == "CMD: Reset":
                        print("Resetting")
                        env.reset(s)
                        write_empty_instruction()
                    elif len(instruction.split(" ")) > 1:
                        instruction_str = instruction
                        print("Executing: ", instruction_str)
                        break

                if not keep_going:
                    continue

                #instruction_str = read_instruction_file()
                # TODO: Load instruction from file
                tok_instruction = tokenize_instruction(instruction_str,
                                                       word2token)
                instruction_t = torch.LongTensor(tok_instruction).unsqueeze(0)
                instruction_v = cuda_var(instruction_t, setup["cuda"], 0)
                instruction_mask = torch.ones_like(instruction_v)
                tmp = list(instruction_t[0].numpy())
                instruction_dbg_str = debug_untokenize_instruction(
                    tmp, token2term)

                # import matplotlib.pyplot as plt
                #plt.plot(image.squeeze(0).permute(1,2,0).cpu().numpy())
                #plt.show()

                res = model(image, instruction_v, instruction_mask)
                mask_pred = res[0]
                shp = mask_pred.shape
                mask_pred = F.softmax(mask_pred.view([2, -1]), 1).view(shp)
                #mask_pred = softmax2d(mask_pred)

                # TODO: Rotate the mask_pred to the global frame
                affine_s_to_g = np.linalg.inv(affine_g_to_s)
                S = 8.0
                affine_scale_up = np.asarray([[S, 0, 0], [0, S, 0], [0, 0, 1]])
                affine_scale_down = np.linalg.inv(affine_scale_up)

                affine_pred_to_g = np.dot(
                    affine_scale_down, np.dot(affine_s_to_g, affine_scale_up))
                #affine_pred_to_g_t = torch.from_numpy(affine_pred_to_g).float()

                mask_pred_np = mask_pred.data.cpu().numpy()[0].transpose(
                    1, 2, 0)
                mask_pred_g_np = apply_affine(mask_pred_np, affine_pred_to_g,
                                              32, 32)
                print("Sum of global mask: ", mask_pred_g_np.sum())
                mask_pred_g = torch.from_numpy(
                    mask_pred_g_np.transpose(2, 0,
                                             1)).float()[np.newaxis, :, :, :]
                exec_model.set_ground_truth_visitation_d(mask_pred_g)

                # Create a batch axis for pytorch
                #mask_pred_g = affine2d(mask_pred, affine_pred_to_g_t[np.newaxis, :, :])

                mask_pred_np[:, :, 0] -= mask_pred_np[:, :, 0].min()
                mask_pred_np[:, :, 0] /= (mask_pred_np[:, :, 0].max() + 1e-9)
                mask_pred_np[:, :, 0] *= 2.0
                mask_pred_np[:, :, 1] -= mask_pred_np[:, :, 1].min()
                mask_pred_np[:, :, 1] /= (mask_pred_np[:, :, 1].max() + 1e-9)

                presenter = Presenter()
                presenter.show_image(mask_pred_g_np,
                                     "mask_pred_g",
                                     torch=False,
                                     waitkey=1,
                                     scale=4)
                #import matplotlib.pyplot as plt
                #print("image.data shape:", image.data.cpu().numpy().shape)
                #plt.imshow(image.data.squeeze().permute(1,2,0).cpu().numpy())
                #plt.show()
                # presenter.show_image(image.data, "mask_pred_g", torch=False, waitkey=1, scale=4)
                #import pdb; pdb.set_trace()
                pred_viz_np = presenter.overlaid_image(image.data,
                                                       mask_pred_np,
                                                       channel=0)
                # TODO: Don't show labels
                # TODO: OpenCV colours
                #label_mask_np = p.data.cpu().numpy()[0].transpose(1,2,0)
                labl_viz_np = presenter.overlaid_image(image.data,
                                                       label_mask.data,
                                                       channel=0)
                viz_img_np = np.concatenate((pred_viz_np, labl_viz_np), axis=1)
                viz_img_np = pred_viz_np

                viz_img = presenter.overlay_text(viz_img_np,
                                                 instruction_dbg_str)
                cv2.imshow("interactive viz", viz_img)
                cv2.waitKey(100)

                rollout_model(exec_model, env, env_ids[0][s], set_idxs[0][s],
                              seg_idxs[0][s], tok_instruction)
                write_instruction("")

示例#7

0

显示文件

文件： evaluate.py 项目： pianpwk/drif

def evaluate():
    P.initialize_experiment()
    params = P.get_current_parameters()
    setup = params["Setup"]

    models = []
    for i in range(setup["num_workers"]):
        model, model_loaded = load_model()
        models.append(model)

    eval_envs = list(sorted(get_correct_eval_env_id_list()))

    round_size = P.get_current_parameters()["Data"].get("collect_n_at_a_time")

    # TODO: Scrap RollOutParams and use parameter server JSON params instead
    roll_out_params = RollOutParams() \
                        .setModelName(setup["model"]) \
                        .setModelFile(setup["model_file"]) \
                        .setRunName(setup["run_name"]) \
                        .setSetupName(P.get_setup_name()) \
                        .setEnvList(eval_envs) \
                        .setMaxDeviation(800) \
                        .setHorizon(setup["trajectory_length"]) \
                        .setStepsToForceStop(20) \
                        .setPlot(False) \
                        .setShowAction(False) \
                        .setIgnorePolicyStop(False) \
                        .setPlotDir("evaluate/" + setup["run_name"]) \
                        .setSavePlots(False) \
                        .setRealtimeFirstPerson(False) \
                        .setSaveSamples(False) \
                        .setBuildTrainData(False) \
                        .setSegmentReset("always") \
                        .setSegmentLevel(False) \
                        .setFirstSegmentOnly(False) \
                        .setDebug(setup["debug"]) \
                        .setCuda(setup["cuda"]) \
                        .setRealDrone(setup["real_drone"])

    custom_eval = "Eval" in params and params["Eval"]["custom_eval"]
    instructions = None
    if custom_eval:
        examples = params["Eval"]["examples"]
        eval_envs, eval_sets, eval_segs, instructions = tuple(
            map(lambda m: list(m), list(zip(*examples))))
        print("!! Running custom evaluation with the following setup:")
        print(examples)
        roll_out_params.setEnvList(eval_envs)
        roll_out_params.setSegList(eval_segs)
        roll_out_params.setCustomInstructions(instructions)

    if setup["num_workers"] > 1:
        roller = ParallelPolicyRoller(num_workers=setup["num_workers"])
    else:
        roller = PolicyRoller()

    if round_size:
        eval_dataset_name = data_io.paths.get_eval_tmp_dataset_name(
            setup["model"], setup["run_name"])
        eval_dataset_path = data_io.paths.get_dataset_dir(eval_dataset_name)

        cumulative_dataset = []
        if os.path.exists(eval_dataset_path):
            result = query_user_load_discard(eval_dataset_path)
            if result == "load":
                print("Loading dataset and continuing evaluation")
                cumulative_dataset = load_multiple_env_data_from_dir(
                    eval_dataset_path)
            elif result == "discard":
                print("Discarding existing evaluation data")
                shutil.rmtree(eval_dataset_path)
            elif result == "cancel":
                print("Cancelling evaluation")
                return

        os.makedirs(eval_dataset_path, exist_ok=True)

        collected_envs = set([
            rollout[0]["env_id"] for rollout in cumulative_dataset
            if len(rollout) > 0
        ])
        eval_envs = [e for e in eval_envs if e not in collected_envs]
        if setup.get("compute_results_no_rollout", False):
            eval_envs = []

        for i in range(0, len(eval_envs), round_size):
            j = min(len(eval_envs), i + round_size)
            round_envs = eval_envs[i:j]
            roll_out_params.setEnvList(round_envs)
            dataset = roller.roll_out_policy(roll_out_params)

            # Save this data
            for rollout in dataset:
                if len(rollout) == 0:
                    print(
                        "WARNING! DROPPING EMPTY ROLLOUTS! SHOULDN'T DO THIS")
                    continue
                ## rollout is a list of samples:
                env_id = rollout[0]["env_id"] if "metadata" in rollout[
                    0] else rollout[0]["env_id"]
                if True:
                    if len(rollout) > 0:
                        save_dataset_to_path(
                            os.path.join(eval_dataset_path, str(env_id)),
                            rollout)
                ## rollout is a list of segments, each is a list of samples
                else:
                    if len(rollout) > 0:
                        save_dataset_to_path(
                            os.path.join(eval_dataset_path, str(env_id)),
                            rollout)

            cumulative_dataset += dataset
            print(f"Saved cumulative dataset to: {eval_dataset_path}")

        dataset = cumulative_dataset
    else:
        dataset = roller.roll_out_policy(roll_out_params)

    results = {}
    if setup["eval_landmark_side"]:
        evaler = DataEvalLandmarkSide(setup["run_name"],
                                      save_images=True,
                                      world_size=setup["world_size_m"])
        evaler.evaluate_dataset(dataset)
        results = evaler.get_results()
    if setup["eval_nl"]:
        evaler = DataEvalNL(setup["run_name"],
                            save_images=True,
                            entire_trajectory=False,
                            custom_instr=instructions)
        evaler.evaluate_dataset(dataset)
        results = evaler.get_results()

    print("Results:", results)

示例#8

0

显示文件

def evaluate():
    P.initialize_experiment()

    model, model_loaded = load_model()
    eval_envs = get_correct_eval_env_id_list()

    model.eval()
    dataset_name = P.get_current_parameters().get("Data").get("dataset_name")
    dataset = model.get_dataset(data=None,
                                envs=eval_envs,
                                dataset_prefix=dataset_name,
                                dataset_prefix="supervised",
                                eval=eval,
                                seg_level=False)
    dataloader = DataLoader(dataset,
                            collate_fn=dataset.collate_fn,
                            batch_size=1,
                            shuffle=False,
                            num_workers=4,
                            pin_memory=True,
                            timeout=0)

    count = 0
    success = 0
    total_dist = 0

    for batch in dataloader:
        if batch is None:
            print("None batch!")
            continue

        images = batch["images"]
        instructions = batch["instr"]
        label_masks = batch["traj_labels"]

        # Each of the above is a list of lists of tensors, where the outer list is over the batch and the inner list
        # is over the segments. Loop through and accumulate loss for each batch sequentially, and for each segment.
        # Reset model state (embedding etc) between batches, but not between segments.
        # We don't process each batch in batch-mode, because it's complicated, with the varying number of segments and all.
        # TODO: This code is outdated and wrongly discretizes the goal location. Grab the fixed version from the old branch.

        batch_size = len(images)
        print("batch: ", count)
        print("successes: ", success)

        for i in range(batch_size):
            num_segments = len(instructions[i])

            for s in range(num_segments):
                instruction = cuda_var(instructions[i][s], model.is_cuda,
                                       model.cuda_device)
                instruction_mask = torch.ones_like(instruction)
                image = cuda_var(images[i][s], model.is_cuda,
                                 model.cuda_device)
                label_mask = cuda_var(label_masks[i][s], model.is_cuda,
                                      model.cuda_device)

                label_mask = model.label_pool(label_mask)

                goal_mask_l = label_mask[0, 1, :, :]
                goal_mask_l_np = goal_mask_l.data.cpu().numpy()
                goal_mask_l_flat = np.reshape(goal_mask_l_np, [-1])
                max_index_l = np.argmax(goal_mask_l_flat)
                argmax_loc_l = np.asarray([
                    int(max_index_l / goal_mask_l_np.shape[1]),
                    int(max_index_l % goal_mask_l_np.shape[1])
                ])

                if np.sum(goal_mask_l_np) < 0.01:
                    continue

                mask_pred, features, emb_loss = model(image, instruction,
                                                      instruction_mask)
                goal_mask = mask_pred[0, 1, :, :]
                goal_mask_np = goal_mask.data.cpu().numpy()
                goal_mask_flat = np.reshape(goal_mask_np, [-1])
                max_index = np.argmax(goal_mask_flat)

                argmax_loc = np.asarray([
                    int(max_index / goal_mask_np.shape[1]),
                    int(max_index % goal_mask_np.shape[1])
                ])

                dist = np.linalg.norm(argmax_loc - argmax_loc_l)
                if dist < OK_DIST:
                    success += 1
                count += 1
                total_dist += dist

    print("Correct goal predictions: ", success)
    print("Total evaluations: ", count)
    print("total dist: ", total_dist)
    print("avg dist: ", total_dist / float(count))
    print("success rate: ", success / float(count))