def _wrapper(*args): with threadpool_limits(limits=1, user_api='blas'): # writes results of statistical_inefficiency to destination memmap (array_fn) seqs, truncate_acf, mact, I, J, array_fn, start, stop = args[0] array = np.memmap(array_fn, mode='r+', dtype=np.float64) partial = np.empty(len(I)) for n, (i, j) in enumerate(zip(I, J)): s = _indicator_multitraj(seqs, i, j) partial[n] = statistical_inefficiency(s, truncate_acf=truncate_acf, mact=mact) array[start:stop] = partial
def statistical_inefficiencies(dtrajs, lag, C=None, truncate_acf=True, mact=2.0, n_jobs=1, callback=None): r""" Computes statistical inefficiencies of sliding-window transition counts at given lag Consider a discrete trajectory :math`{ x_t }` with :math:`x_t \in {1, ..., n}`. For each starting state :math:`i`, we collect the target sequence .. mathh: Y^(i) = {x_{t+\tau} | x_{t}=i} which contains the time-ordered target states at times :math:`t+\tau` whenever we started in state :math:`i` at time :math:`t`. Then we define the indicator sequence: .. math: a^{(i,j)}_t (\tau) = 1(Y^(i)_t = j) The statistical inefficiency for transition counts :math:`c_{ij}(tau)` is computed as the statistical inefficiency of the sequence :math:`a^{(i,j)}_t (\tau)`. Parameters ---------- dtrajs : list of int-iterables discrete trajectories lag : int lag time C : scipy sparse matrix (n, n) or None sliding window count matrix, if already available truncate_acf : bool, optional, default=True When the normalized autocorrelation function passes through 0, it is truncated in order to avoid integrating random noise n_jobs: int, default=1 If greater one, the function will be evaluated with multiple processes. callback: callable, default=None will be called for every statistical inefficiency computed (number of nonzero elements in count matrix). If n_jobs is greater one, the callback will be invoked per finished batch. Returns ------- I : scipy sparse matrix (n, n) Statistical inefficiency matrix with a sparsity pattern identical to the sliding-window count matrix at the same lag time. Will contain a statistical inefficiency :math:`I_{ij} \in (0,1]` whenever there is a count :math:`c_{ij} > 0`. When there is no transition count (:math:`c_{ij} = 0`), the statistical inefficiency is 0. See also -------- deeptime.markov.tools.util.statistics.statistical_inefficiency used to compute the statistical inefficiency for conditional trajectories """ # count matrix if C is None: C = count_matrix_coo2_mult(dtrajs, lag, sliding=True, sparse=True) if callback is not None: if not callable(callback): raise ValueError('Provided callback is not callable') # split sequences splitseq = _split_sequences_multitraj(dtrajs, lag) # compute inefficiencies I, J = C.nonzero() if n_jobs > 1: from multiprocessing.pool import Pool, MapResult from contextlib import closing import tempfile # to avoid pickling partial results, we store these in a numpy.memmap ntf = tempfile.NamedTemporaryFile(delete=False) arr = np.memmap(ntf.name, dtype=np.float64, mode='w+', shape=C.nnz) #arr[:] = np.nan gen = _arguments_generator(I, J, splitseq, truncate_acf=truncate_acf, mact=truncate_acf, array=ntf.name, njobs=n_jobs) if callback: x = gen.n_blocks() _callback = lambda _: callback(x) else: _callback = callback with closing(Pool(n_jobs)) as pool: result_async = [ pool.apply_async(_wrapper, (args, ), callback=_callback) for args in gen ] [t.get() for t in result_async] data = np.array(arr[:]) #assert np.all(np.isfinite(data)) import os os.unlink(ntf.name) else: data = np.empty(C.nnz) for index, (i, j) in enumerate(zip(I, J)): data[index] = statistical_inefficiency(_indicator_multitraj( splitseq, i, j), truncate_acf=truncate_acf, mact=mact) if callback is not None: callback(1) res = csr_matrix((data, (I, J)), shape=C.shape) return res