示例#1
0
def get_index(corpus: Corpus, monitor: ProgressMonitor, multiple_processors=False, **kargs) -> Index:
    """
    Get the index for the provided corpus, reindexing (and tokenizing) if needed
    """
    with _GLOBAL_LOCK:
        if not hasattr(corpus, "_orange3sma_index_lock"):
            corpus._orange3sma_index_lock = Lock()
    with corpus._orange3sma_index_lock:
        ix = getattr(corpus, "_orange3sma_index", None)
        if not (ix and ix.tokens is corpus._tokens):
            monitor.update(0, "Getting tokens")
            corpus.tokens  # force tokens
            procs = max(1, multiprocessing.cpu_count()-1) if multiple_processors else 1
            monitor.update(50, "Creating index")
            ix = Index(corpus, procs=procs, **kargs)
            corpus._orange3sma_index = ix
    return ix