Пример #1
0
    def get_available_indices():
        cache_path = os.environ.get("CAPREOLUS_CACHE", get_default_cache_dir())
        index_dirs = search_files_or_folders_in_directory(cache_path, "index")
        index_dirs_with_done = [
            index_dir for index_dir in index_dirs if len(search_files_or_folders_in_directory(index_dir, "done"))
        ]

        return index_dirs_with_done
Пример #2
0
def test_search_files_in_directory(tmpdir):
    create_dummy_configs(tmpdir)
    files = search_files_or_folders_in_directory(tmpdir.strpath, "config.json")
    assert set(files) == {
        tmpdir.strpath + "/dummy_dir/nested_dummy_dir_1/config.json",
        tmpdir.strpath + "/dummy_dir/nested_dummy_dir_2/config.json",
    }
Пример #3
0
 def get_weight_file(pipeline):
     """
     Given a pipeline, get the path to a weight file.
     If there are multiple weight files, returns the path to the first one
     Here weight file = a file created from torch.model.save()
     """
     run_path = os.path.join(pipeline.model_path, pipeline.cfg["fold"])
     weight_dir = os.path.join(run_path, WEIGHTS_DIR_NAME)
     weight_file = search_files_or_folders_in_directory(weight_dir, WEIGHTS_FILE_NAME)
     return weight_file[0]
Пример #4
0
    def get_config_from_results():
        # TODO: Using 'results' if there's no env varialbe - this logic is duplicated. Move to a single place?
        results_folder = os.environ.get("CAPREOLUS_RESULTS", get_default_results_dir())
        config_files = search_files_or_folders_in_directory(results_folder, "config.json")
        configs = []

        for file in config_files:
            with open(file) as json_file:
                data = json.load(json_file)
                configs.append(data)

        return configs
Пример #5
0
def test_convknrm(monkeypatch, tmpdir):
    monkeypatch.setenv("CAPREOLUS_RESULTS",
                       str(os.path.join(tmpdir, "results")))
    monkeypatch.setenv("CAPREOLUS_CACHE", str(os.path.join(tmpdir, "cache")))

    def fake_magnitude_embedding(*args, **kwargs):
        return Magnitude(None)

    pipeline = Pipeline({
        "reranker": "ConvKNRM",
        "niters": 1,
        "benchmark": "dummy",
        "itersize": 1,
        "batch": 1
    })
    pipeline.ex.main(train.train)
    monkeypatch.setattr(train, "pipeline", pipeline)
    monkeypatch.setattr(EmbedText, "get_magnitude_embeddings",
                        fake_magnitude_embedding)
    pipeline.ex.run(
        config_updates={
            "reranker": "ConvKNRM",
            "niters": 1,
            "benchmark": "dummy",
            "itersize": 1,
            "batch": 1
        })
    logger.info("Base path is {0}".format(pipeline.base_path))

    config_files = search_files_or_folders_in_directory(
        pipeline.base_path, "config.json")
    assert len(config_files) == 1
    config_file = json.load(open(config_files[0], "rt"))
    assert config_file["reranker"] == "ConvKNRM"
    assert config_file["niters"] == 1

    run_path = os.path.join(pipeline.reranker_path, pipeline.cfg["fold"])
    weight_dir = os.path.join(run_path, "weights")
    weight_file = search_files_or_folders_in_directory(weight_dir, "dev")
    assert len(weight_file) == 1
Пример #6
0
def test_deeptilebar(monkeypatch, tmpdir):
    monkeypatch.setenv("CAPREOLUS_RESULTS",
                       str(os.path.join(tmpdir, "results")))
    monkeypatch.setenv("CAPREOLUS_CACHE", str(os.path.join(tmpdir, "cache")))

    def fake_magnitude_embedding(*args, **kwargs):
        return Magnitude(None)

    monkeypatch.setattr(DeepTileExtractor, "get_magnitude_embeddings",
                        fake_magnitude_embedding)
    pipeline = Pipeline({
        "reranker": "DeepTileBar",
        "niters": 1,
        "benchmark": "dummy",
        "itersize": 1,
        "batch": 1,
        "passagelen": "3"
    })
    pipeline.ex.main(train.train)
    monkeypatch.setattr(train, "pipeline", pipeline)
    pipeline.ex.run(
        config_updates={
            "reranker": "DeepTileBar",
            "niters": 1,
            "benchmark": "dummy",
            "itersize": 1,
            "batch": 1
        })

    config_files = search_files_or_folders_in_directory(
        pipeline.base_path, "config.json")
    assert len(config_files) == 1
    config_file = json.load(open(config_files[0], "rt"))
    assert config_file["reranker"] == "DeepTileBar"
    assert config_file["niters"] == 1

    run_path = os.path.join(pipeline.reranker_path, pipeline.cfg["fold"])
    weight_dir = os.path.join(run_path, "weights")
    weight_file = search_files_or_folders_in_directory(weight_dir, "dev")
    assert len(weight_file) == 1