Пример #1
0
def _compute_combination(gen):
    # handle if tqdm is installed or not.
    if is_tqdm_installed():
        from tqdm import tqdm
        _generator = tqdm(gen, position=0)
    else:
        _generator = gen

    F = [_ratio_and_distance(a, b, True) for a, b in _generator]
    res = pd.DataFrame(gen, columns=["x", "y"])
    res["L"] = F
    return res
Пример #2
0
def _map_comp(f, *args):
    N = len(args)
    narg = len(args[0])
    if is_tqdm_installed():
        from tqdm import tqdm
        _gen = (tqdm(args, position=0, total=narg) if N == 0 else tqdm(
            it.zip_longest(*args), position=0, total=narg))
    else:
        _gen = args if N == 0 else it.zip_longest(*args)
    # check to make sure every argument is an iterable, and make it one if not
    if N == 0:
        return [f(arg) for arg in _gen]
    else:
        return [f(*arg) for arg in _gen]
Пример #3
0
def _corr_combination(data, comb, niter, covar, cart_z, method, output, verbose):
    # calculate the number of combinations to pass to tqdm to set the progressbar length
    # as comb is an iterable

    # handle if tqdm is installed whether to use progressbar.
    if is_tqdm_installed():
        from tqdm import tqdm
        # wrap the generator around tqdm
        if covar is not None and cart_z:
            _generator = tqdm(it.product(comb, covar), position=0, total=niter)
        else:
            _generator = tqdm(comb, position=0, total=niter)
    else:
        # there is no tqdm
        if covar is not None and cart_z:
            _generator = it.product(comb, covar)
        else:
            _generator = comb

    # with no covariates, simple correlation.
    if covar is None:
        # select appropriate function rho.
        rho = _bicorr_inner_score if output == "score" else _bicorr_inner_full
        # iterate and calculate rho
        result_k = [rho(data[x], data[y], method)
                    for x, y in _generator]
    else:
        # if we cartesian over covariates, produce the product of combinations to each covariate
        if cart_z:
            niter *= len(covar)
            result_k = [
                _partial_bicorr_inner(data, x, y, covar, method=method, output=output)
                for (x, y), z in _generator
            ]
        else:
            # otherwise do all pairwise correlations with a fixed covariate matrix
            result_k = [
                _partial_bicorr_inner(data, x, y, covar, method=method, output=output)
                for x, y, in _generator
            ]

    # we should have a list of dict - assemble the records
    return (
        pd.DataFrame.from_records(result_k)
    )
Пример #4
0
def _parallel_list_comprehension(f, *args):
    from joblib import cpu_count, Parallel, delayed

    N = len(args)

    if N == 0:
        return f()
    else:
        if is_tqdm_installed():
            # use tqdm to wrap around our iterable
            _Threaded = TqdmParallel(use_tqdm=True, total=len(args[0]))
        else:
            _Threaded = Parallel

        if N == 1:
            n = len(args[0])
            ncpu = n if n < cpu_count() else (cpu_count() - 1)
            um = _Threaded(ncpu)(delayed(f)(arg) for arg in args[0])
        else:
            ncpu = N if N < cpu_count() else (cpu_count() - 1)
            um = _Threaded(ncpu)(delayed(f)(*arg)
                                 for arg in it.zip_longest(*args))
        return um
Пример #5
0
def umappcc(fn: str, f: Callable, *args):
    """Performs Map comprehension with Parallelism and Caching by Chunks.

    That is to say that the first time this runs, function f(*args) is called,
        storing a cache file. The second time and onwards, the resulting
        cached file is read and no execution takes place.

    Further to this, 'by-chunks' means that each step is stored separately as a file
        and concatenated together at the end. This means that if a program stops half way through
        execution, when re-run, it restarts from the last cached element, which is incredibly
        useful during debugging and prototype development.

    This assumes each iteration is independent from each other in the list comprehension.

    Parameters
    ----------
    fn : str
        The path and filename.
    f : function
        The function to call
    *args : list-like
        Arguments to pass as f(*args)

    Returns
    -------
    res : Any
        The results from f(*args) or from file

    Examples
    --------
    See `turb.utils.umap` for examples.
    """
    from joblib import Parallel, cpu_count, delayed
    if os.path.isfile(fn):
        return _load_file(fn)
    else:
        # pre-compute iterable
        its = list(it.zip_longest(*args))
        n = len(its)
        ncpu = n if n < cpu_count() else (cpu_count() - 1)
        # check the directory actually exists before continuing
        check_file_path(fn, False, True, 0)

        # use tqdm for display.
        if is_tqdm_installed():
            # use our custom tqdm parallel class
            _Threaded = TqdmParallel(use_tqdm=True, total=n)
        else:
            _Threaded = Parallel

        # create a cache directory in the directory below where to plant the file
        relfile, abscachedir = _create_cache_directory(fn)
        # do list comprehension using parallelism
        um = _Threaded(ncpu)(
            delayed(_mini_cache)(add_suf(relfile, str(i)), f, *arg)
            for i, arg in enumerate(its))
        # save final version
        _write_file(um, fn)
        # delete temp versions
        _delete_temps(abscachedir)
        # return
        return um
Пример #6
0
def umapcc(fn: str, f: Callable, *args):
    """Performs Map comprehension with Caching by Chunks.

    That is to say that the first time this runs, function f(*args) is called,
        storing a cache file. The second time and onwards, the resulting
        cached file is read and no execution takes place.

    Further to this, 'by-chunks' means that each step is stored separately as a file
    and concatenated together at the end. The intermediate caches are removed
    at the end of the process automatically. If the program crashes part-way through
    this, re-running will resume from the last stored chunk.

    Parameters
    ----------
    fn : str
        The path and filename.
    f : function
        The function to call
    *args : list-like
        Arguments to pass as f(*args)

    Returns
    -------
    res : Any
        The results from f(*args) or from file

    Examples
    --------
    See `turb.utils.umap` for examples.
    """
    if os.path.isfile(fn):
        return _load_file(fn)
    else:
        # pre-compute iterable
        its = list(it.zip_longest(*args))
        n = len(its)
        # check the directory actually exists before continuing
        check_file_path(fn, False, True, 0)

        # use tqdm for display.
        if is_tqdm_installed():
            from tqdm import tqdm
            _generator = enumerate(tqdm(its, position=0, total=n))
        else:
            _generator = enumerate(its)

        # create a cache directory in the directory below where to plant the file
        relfile, abscachedir = _create_cache_directory(fn)
        # run and do chunked caching, using item cache
        # _cache_part = partial(cache, f=f, debug=False, expand_filepath=False)

        um = [
            _mini_cache(add_suf(relfile, str(i)), f, *arg)
            for i, arg in _generator
        ]
        # save final version
        _write_file(um, fn)
        # delete the temp files
        _delete_temps(abscachedir)
        # return
        return um