def get_available_indices(): cache_path = os.environ.get("CAPREOLUS_CACHE", get_default_cache_dir()) index_dirs = search_files_or_folders_in_directory(cache_path, "index") index_dirs_with_done = [ index_dir for index_dir in index_dirs if len(search_files_or_folders_in_directory(index_dir, "done")) ] return index_dirs_with_done
def test_search_files_in_directory(tmpdir): create_dummy_configs(tmpdir) files = search_files_or_folders_in_directory(tmpdir.strpath, "config.json") assert set(files) == { tmpdir.strpath + "/dummy_dir/nested_dummy_dir_1/config.json", tmpdir.strpath + "/dummy_dir/nested_dummy_dir_2/config.json", }
def get_weight_file(pipeline): """ Given a pipeline, get the path to a weight file. If there are multiple weight files, returns the path to the first one Here weight file = a file created from torch.model.save() """ run_path = os.path.join(pipeline.model_path, pipeline.cfg["fold"]) weight_dir = os.path.join(run_path, WEIGHTS_DIR_NAME) weight_file = search_files_or_folders_in_directory(weight_dir, WEIGHTS_FILE_NAME) return weight_file[0]
def get_config_from_results(): # TODO: Using 'results' if there's no env varialbe - this logic is duplicated. Move to a single place? results_folder = os.environ.get("CAPREOLUS_RESULTS", get_default_results_dir()) config_files = search_files_or_folders_in_directory(results_folder, "config.json") configs = [] for file in config_files: with open(file) as json_file: data = json.load(json_file) configs.append(data) return configs
def test_convknrm(monkeypatch, tmpdir): monkeypatch.setenv("CAPREOLUS_RESULTS", str(os.path.join(tmpdir, "results"))) monkeypatch.setenv("CAPREOLUS_CACHE", str(os.path.join(tmpdir, "cache"))) def fake_magnitude_embedding(*args, **kwargs): return Magnitude(None) pipeline = Pipeline({ "reranker": "ConvKNRM", "niters": 1, "benchmark": "dummy", "itersize": 1, "batch": 1 }) pipeline.ex.main(train.train) monkeypatch.setattr(train, "pipeline", pipeline) monkeypatch.setattr(EmbedText, "get_magnitude_embeddings", fake_magnitude_embedding) pipeline.ex.run( config_updates={ "reranker": "ConvKNRM", "niters": 1, "benchmark": "dummy", "itersize": 1, "batch": 1 }) logger.info("Base path is {0}".format(pipeline.base_path)) config_files = search_files_or_folders_in_directory( pipeline.base_path, "config.json") assert len(config_files) == 1 config_file = json.load(open(config_files[0], "rt")) assert config_file["reranker"] == "ConvKNRM" assert config_file["niters"] == 1 run_path = os.path.join(pipeline.reranker_path, pipeline.cfg["fold"]) weight_dir = os.path.join(run_path, "weights") weight_file = search_files_or_folders_in_directory(weight_dir, "dev") assert len(weight_file) == 1
def test_deeptilebar(monkeypatch, tmpdir): monkeypatch.setenv("CAPREOLUS_RESULTS", str(os.path.join(tmpdir, "results"))) monkeypatch.setenv("CAPREOLUS_CACHE", str(os.path.join(tmpdir, "cache"))) def fake_magnitude_embedding(*args, **kwargs): return Magnitude(None) monkeypatch.setattr(DeepTileExtractor, "get_magnitude_embeddings", fake_magnitude_embedding) pipeline = Pipeline({ "reranker": "DeepTileBar", "niters": 1, "benchmark": "dummy", "itersize": 1, "batch": 1, "passagelen": "3" }) pipeline.ex.main(train.train) monkeypatch.setattr(train, "pipeline", pipeline) pipeline.ex.run( config_updates={ "reranker": "DeepTileBar", "niters": 1, "benchmark": "dummy", "itersize": 1, "batch": 1 }) config_files = search_files_or_folders_in_directory( pipeline.base_path, "config.json") assert len(config_files) == 1 config_file = json.load(open(config_files[0], "rt")) assert config_file["reranker"] == "DeepTileBar" assert config_file["niters"] == 1 run_path = os.path.join(pipeline.reranker_path, pipeline.cfg["fold"]) weight_dir = os.path.join(run_path, "weights") weight_file = search_files_or_folders_in_directory(weight_dir, "dev") assert len(weight_file) == 1