示例#1
0
def _wrapper(*args):
    with threadpool_limits(limits=1, user_api='blas'):
        # writes results of statistical_inefficiency to destination memmap (array_fn)
        seqs, truncate_acf, mact, I, J, array_fn, start, stop = args[0]
        array = np.memmap(array_fn, mode='r+', dtype=np.float64)
        partial = np.empty(len(I))
        for n, (i, j) in enumerate(zip(I, J)):
             s = _indicator_multitraj(seqs, i, j)
             partial[n] = statistical_inefficiency(s, truncate_acf=truncate_acf, mact=mact)
        array[start:stop] = partial
示例#2
0
def statistical_inefficiencies(dtrajs,
                               lag,
                               C=None,
                               truncate_acf=True,
                               mact=2.0,
                               n_jobs=1,
                               callback=None):
    r""" Computes statistical inefficiencies of sliding-window transition counts at given lag

    Consider a discrete trajectory :math`{ x_t }` with :math:`x_t \in {1, ..., n}`. For each starting state :math:`i`,
    we collect the target sequence

    .. mathh:
        Y^(i) = {x_{t+\tau} | x_{t}=i}

    which contains the time-ordered target states at times :math:`t+\tau` whenever we started in state :math:`i`
    at time :math:`t`. Then we define the indicator sequence:

    .. math:
        a^{(i,j)}_t (\tau) = 1(Y^(i)_t = j)

    The statistical inefficiency for transition counts :math:`c_{ij}(tau)` is computed as the statistical inefficiency
    of the sequence :math:`a^{(i,j)}_t (\tau)`.

    Parameters
    ----------
    dtrajs : list of int-iterables
        discrete trajectories
    lag : int
        lag time
    C : scipy sparse matrix (n, n) or None
        sliding window count matrix, if already available
    truncate_acf : bool, optional, default=True
        When the normalized autocorrelation function passes through 0, it is truncated in order to avoid integrating
        random noise
    n_jobs: int, default=1
        If greater one, the function will be evaluated with multiple processes.
    callback: callable, default=None
        will be called for every statistical inefficiency computed (number of nonzero elements in count matrix).
        If n_jobs is greater one, the callback will be invoked per finished batch.

    Returns
    -------
    I : scipy sparse matrix (n, n)
        Statistical inefficiency matrix with a sparsity pattern identical to the sliding-window count matrix at the
        same lag time. Will contain a statistical inefficiency :math:`I_{ij} \in (0,1]` whenever there is a count
        :math:`c_{ij} > 0`. When there is no transition count (:math:`c_{ij} = 0`), the statistical inefficiency is 0.

    See also
    --------
    deeptime.markov.tools.util.statistics.statistical_inefficiency
        used to compute the statistical inefficiency for conditional trajectories

    """
    # count matrix
    if C is None:
        C = count_matrix_coo2_mult(dtrajs, lag, sliding=True, sparse=True)
    if callback is not None:
        if not callable(callback):
            raise ValueError('Provided callback is not callable')
    # split sequences
    splitseq = _split_sequences_multitraj(dtrajs, lag)
    # compute inefficiencies
    I, J = C.nonzero()
    if n_jobs > 1:
        from multiprocessing.pool import Pool, MapResult
        from contextlib import closing
        import tempfile

        # to avoid pickling partial results, we store these in a numpy.memmap
        ntf = tempfile.NamedTemporaryFile(delete=False)
        arr = np.memmap(ntf.name, dtype=np.float64, mode='w+', shape=C.nnz)
        #arr[:] = np.nan
        gen = _arguments_generator(I,
                                   J,
                                   splitseq,
                                   truncate_acf=truncate_acf,
                                   mact=truncate_acf,
                                   array=ntf.name,
                                   njobs=n_jobs)
        if callback:
            x = gen.n_blocks()
            _callback = lambda _: callback(x)
        else:
            _callback = callback
        with closing(Pool(n_jobs)) as pool:
            result_async = [
                pool.apply_async(_wrapper, (args, ), callback=_callback)
                for args in gen
            ]

            [t.get() for t in result_async]
            data = np.array(arr[:])
            #assert np.all(np.isfinite(data))
        import os
        os.unlink(ntf.name)
    else:
        data = np.empty(C.nnz)
        for index, (i, j) in enumerate(zip(I, J)):
            data[index] = statistical_inefficiency(_indicator_multitraj(
                splitseq, i, j),
                                                   truncate_acf=truncate_acf,
                                                   mact=mact)
            if callback is not None:
                callback(1)
    res = csr_matrix((data, (I, J)), shape=C.shape)
    return res