Python get_default_cache_dir примеры использования

Язык программирования: Python

Пространство имен/Пакет: capreolus.utils.common

Метод/Функция: get_default_cache_dir

Примеров на hotexamples.com: 6

Python get_default_cache_dir - 6 примеров найдено. Это лучшие примеры Python кода для capreolus.utils.common.get_default_cache_dir, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Пример #1

Показать файл

    def get_documents_from_disk(self, doc_ids):
        """
        Does not make use of the index. We use pyserini's disk traversal methods to retrieve documents. This allows
        us to get away with much smaller index sizes on disk, since indexes now does not have to store the document
        """
        start = time.time()
        logger.info("Starting to get documents from disk")
        document_type = self.collection.config["documents"]["type"]
        if document_type == "trec":
            ctype = "TrecCollection"
        elif document_type == "trecweb":
            ctype = "TrecwebCollection"
        else:
            # For clueweb12, document_type in yaml is the same as anserini - ClueWeb12Collection
            ctype = document_type

        rootdir = self.collection.config["documents"]["path"]
        p = subprocess.run(
            ["python", get_crawl_collection_script(), rootdir, ctype],
            stdout=subprocess.PIPE,
            input=",".join(doc_ids),
            check=True,
            encoding="utf-8",
        )
        with open(
                "{0}/disk_crawl_temp_dump.json".format(
                    os.getenv("CAPREOLUS_CACHE", get_default_cache_dir())),
                "rt") as fp:
            fetched_docs = json.load(fp)

        return [fetched_docs.get(doc_id, []) for doc_id in doc_ids]

Пример #2

Показать файл

Файл: crawl_collection.py Проект: bpiwowar/capreolus-xpm

def crawl():
    """
    Iterates through every document in a collection and looks for the doc ids passed as command line arguments.
    Spawns multiple processes to do this for us. Clueweb12 crawl completes in approximately 42 hours with 8 processes
    See `get_documents_from_disk()` in anserini.py to know how this file is being used
    """
    rootdir = sys.argv[1]
    ctype = sys.argv[2]
    doc_ids = set(input().split(","))
    manager = Manager()
    shared_dict = manager.dict()
    multiprocess_start = time.time()
    logger.debug("Start multiprocess")
    args_list = []
    for subdir in os.listdir(rootdir):
        if os.path.isdir(rootdir + "/" + subdir):
            args_list.append({
                "doc_ids": doc_ids,
                "rootdir": rootdir + "/" + subdir,
                "ctype": ctype,
                "shared_dict": shared_dict
            })

    pool = Pool(processes=8)
    pool.map(spawn_child_process_to_read_docs, args_list)

    logger.debug(
        "Getting all documents from disk took: {0}".format(time.time() -
                                                           multiprocess_start))
    # TODO: This will fail if multiple crawls are running at the same time
    with open(
            "{0}/disk_crawl_temp_dump.json".format(
                os.getenv("CAPREOLUS_CACHE", get_default_cache_dir())),
            "w") as fp:
        json.dump(shared_dict.copy(), fp)

Пример #3

Показать файл

Файл: views.py Проект: bpiwowar/capreolus-xpm

    def get_available_indices():
        cache_path = os.environ.get("CAPREOLUS_CACHE", get_default_cache_dir())
        index_dirs = search_files_or_folders_in_directory(cache_path, "index")
        index_dirs_with_done = [
            index_dir for index_dir in index_dirs if len(search_files_or_folders_in_directory(index_dir, "done"))
        ]

        return index_dirs_with_done

Пример #4

Показать файл

    def get_paths(self, config):
        """
        Returns a dictionary of various paths
        :param config: A sacred config
        :return: A dict. Eg:
        {
            "collection_path": "path",
            "base_path": "path",
            "cache_path": "path",
            "index_path": "path",
            "run_path": "path",
            "model_path": "path"
        }
        """
        expid = config["expid"]
        collection_path = self.module2cls["collection"].basepath
        base_path = os.environ.get("CAPREOLUS_RESULTS",
                                   get_default_results_dir())
        cache_path = os.environ.get("CAPREOLUS_CACHE", get_default_cache_dir())
        index_key = os.path.join(cache_path, config["collection"],
                                 self.module_key("index"))
        index_path = os.path.join(index_key, "index")
        run_path = os.path.join(index_key, "searcher",
                                self.module_key("searcher"))
        model_path = os.path.join(
            base_path,
            expid,
            config["collection"],
            self.module_key("index"),
            self.module_key("searcher"),
            self.module_key("benchmark"),
            self.module_key("pipeline"),
            self.module_key("reranker") + "_" + self.module_key("extractor"),
        )
        trained_weight_path = os.path.join(model_path, config["fold"],
                                           "weights", "dev")

        return {
            "collection_path": collection_path,
            "base_path": base_path,
            "cache_path": cache_path,
            "index_path": index_path,
            "index_key": index_key,
            "run_path": run_path,
            "model_path": model_path,
            "trained_weight_path": trained_weight_path,
        }

Пример #5

Показать файл

Файл: embedding.py Проект: bpiwowar/capreolus-xpm

 def __init__(self, embedding_name):
     """
         If the _is_initialized class property is not set, build the benchmark and model (expensive)
         Else, do nothing.
     """
     self.embedding_name = embedding_name
     self.embedding = Magnitude(
         MagnitudeUtils.download_model(
             self.SUPPORTED_EMBEDDINGS[embedding_name], download_dir=os.environ.get("CAPREOLUS_CACHE", get_default_cache_dir())
         ),
         lazy_loading=-1,
         blocking=True,
     )
     self.stoi = {self.PAD: 0}  # string to integer. Associates an integer value with every token
     self.itos = {0: self.PAD}

Пример #6

Показать файл

 def create(self):
     self.tokenizer = BertTokenizer.from_pretrained(
         self.tokmodel,
         cache_dir=os.environ.get("CAPREOLUS_CACHE",
                                  get_default_cache_dir()))
     self.vocab = self.tokenizer.vocab