def __init__(self):
        self.dataset_result = LoadRawData("openbook")

        self.bm25_test_mrrs = self.dataset_result.result_test_bm25["mrr"]
        self.useqa_test_mrrs = self.dataset_result.result_test_useqa["mrr"]

        with open(
                generated_data_path +
                "/hybrid_classifier_result/openbook_hybrid_result.pickle",
                "rb") as handle:
            self.hybrid_models_result = pickle.load(handle)

        self.hybrid_threshold_test_mrrs = self.hybrid_models_result[
            "hybrid_threshold"]["mrr"]
        self.hybrid_lr_test_mrrs = self.hybrid_models_result["hybrid_lr"][
            "mrr"]

        # load squad test data.
        with open(
                generated_data_path +
                "openbook_useqa/openbook_useqa_retrieval_data.pickle",
                "rb") as handle:
            openbook_retrieval_raw = pickle.load(handle)

        self.test_list = openbook_retrieval_raw["test_list"]
        self.kb = openbook_retrieval_raw["kb"]

        # load the array indicating which uses ueural methods.
        self.router_pred = self.hybrid_models_result["hybrid_lr"][
            "router_output"]
    def __init__(self):
        self.dataset_result = LoadRawData("nq")

        self.bm25_test_mrrs_raw = self.dataset_result.result_test_bm25
        self.useqa_test_mrrs_raw = self.dataset_result.result_test_useqa

        with open(
                generated_data_path +
                "/hybrid_classifier_result/nq_hybrid_result.pickle",
                "rb") as handle:
            self.hybrid_models_result = pickle.load(handle)

        all_test_indices = [
            single_seed["test_index_in_all_list"]
            for single_seed in self.hybrid_models_result
        ]

        self.bm25_test_mrrs = np.concatenate([
            self.bm25_test_mrrs_raw["mrr"][test_split]
            for test_split in all_test_indices
        ])
        self.useqa_test_mrrs = np.concatenate([
            self.useqa_test_mrrs_raw["mrr"][test_split]
            for test_split in all_test_indices
        ])

        self.hybrid_threshold_test_mrrs = np.concatenate([
            single_seed["hybrid_threshold"]["mrr"]
            for single_seed in self.hybrid_models_result
        ])
        self.hybrid_lr_test_mrrs = np.concatenate([
            single_seed["hybrid_lr"]["mrr"]
            for single_seed in self.hybrid_models_result
        ])

        # load nq test data.
        with open(
                generated_data_path +
                "nq_retrieval_raw/nq_retrieval_data.pickle", "rb") as handle:
            nq_retrieval_raw = pickle.load(handle)

        print(nq_retrieval_raw.keys())

        self.test_list = nq_retrieval_raw["train_list"]
        self.sent_list = nq_retrieval_raw["sent_list"]
        self.doc_list = nq_retrieval_raw["doc_list"]
        self.resp_list = nq_retrieval_raw["resp_list"]

        # load the array indicating which uses ueural methods.
        self.router_pred = np.concatenate([
            single_seed["hybrid_lr"]["router_output"]
            for single_seed in self.hybrid_models_result
        ])
def debug_experiment(dataset="openbook"):
    dataset_results = LoadRawData(dataset)

    unsupervised_sum = UnsupervisedSum(dataset_results.result_dev_bm25,
                                       dataset_results.result_dev_useqa,
                                       dataset)
    unsupervised_sum.reranking_with_sum_score(
        dataset_results.result_test_bm25, dataset_results.result_test_useqa)

    thresholdClassifier = UnsupervisedThreshold(
        dataset_results.result_dev_bm25, dataset_results.result_dev_useqa,
        dataset)
    thresholdClassifier.reranking_by_threshold(
        dataset_results.result_test_bm25, dataset_results.result_test_useqa)

    lrClassifier = LogisticRegressionRouter(dataset_results.result_dev_bm25,
                                            dataset_results.result_dev_useqa,
                                            dataset, 7)
    lrClassifier.reranking_by_lr_router(dataset_results.result_test_bm25,
                                        dataset_results.result_test_useqa)
    def __init__(self):
        self.dataset_result = LoadRawData("squad")

        self.bm25_test_mrrs = np.concatenate(
            [self.dataset_result.result_test_bm25["mrr"] for i in range(5)])
        self.useqa_test_mrrs = np.concatenate(
            [self.dataset_result.result_test_useqa["mrr"] for i in range(5)])

        with open(
                generated_data_path +
                "/hybrid_classifier_result/squad_hybrid_result.pickle",
                "rb") as handle:
            self.hybrid_models_result = pickle.load(handle)

        self.hybrid_threshold_test_mrrs = np.concatenate([
            single_seed["hybrid_threshold"]["mrr"]
            for single_seed in self.hybrid_models_result
        ])
        self.hybrid_lr_test_mrrs = np.concatenate([
            single_seed["hybrid_lr"]["mrr"]
            for single_seed in self.hybrid_models_result
        ])

        # load squad test data.
        with open(
                generated_data_path +
                "squad_useqa/squad_retrieval_data.pickle", "rb") as handle:
            squad_retrieval_raw = pickle.load(handle)

        self.test_list = squad_retrieval_raw["dev_list"]
        self.sent_list = squad_retrieval_raw["sent_list"]
        self.doc_list = squad_retrieval_raw["doc_list"]
        self.resp_list = squad_retrieval_raw["resp_list"]

        # load the array indicating which uses ueural methods.
        self.router_pred = np.concatenate([
            single_seed["hybrid_lr"]["router_output"]
            for single_seed in self.hybrid_models_result
        ])
 def __init__(self):
     self.dataset = "nq"
     self.dataset_results = LoadRawData(self.dataset)
 def __init__(self):
     self.dataset = "openbook"
     self.dataset_results = LoadRawData(self.dataset)