def transfer_info(leader,follower,timescale): """ Wrapper for computing transfer entropy between two trajectories using a binary representation. 2016-11-09 Params: ------- leader (vector) follower (vector) timescale (vector) Given in units of the number of entries to skip in sample. """ from entropy.transfer import TransferEntropy te = TransferEntropy() lchange = discrete_vel(leader,timescale) fchange = discrete_vel(follower,timescale) if lchange.ndim>1: lchange = unique_rows(lchange,return_inverse=True) else: lchange = unique_rows(lchange[:,None],return_inverse=True) if fchange.ndim>1: fchange = unique_rows(fchange,return_inverse=True) else: fchange = unique_rows(fchange[:,None],return_inverse=True) ltofinfo = te.n_step_transfer_entropy(lchange,fchange,discretize=False) ftolinfo = te.n_step_transfer_entropy(fchange,lchange,discretize=False) return ltofinfo,ftolinfo
def zipf_law(X): """Return frequency rank of states. Parameters ---------- X : ndarray (n_samples,n_dim) Returns ------- uniqX : ndarray Unique states. uniqIx : ndarray uniqX[uniqIx] recovers given X. p : ndarray Probability of each unique state. """ from misc.utils import unique_rows # Collect unique states. uniqIx = unique_rows(X) uniqX = X[uniqIx] p = np.bincount(unique_rows(X, return_inverse=True)) p = p / p.sum() # Sort everything by the probability. sortIx = np.argsort(p)[::-1] p = p[sortIx] uniqIx = uniqIx[sortIx] uniqX = uniqX[sortIx] return uniqX, uniqIx, p
def preprocess_average_repeat_values(X, Y): """For any repeat data points X, take the average of the measured values Y. Parameters ---------- X : ndarray Y : ndarray Returns ------- XnoRepeats : ndarray YnoRepeats : ndarray """ Xsquished = X[unique_rows(X)] Ysquished = np.zeros(len(Xsquished)) for i, row in enumerate(Xsquished): Ysquished[i] = Y[(row[None, :] == X).all(1)].mean() return Xsquished, Ysquished
def n_step_transfer_entropy(self, x, y, kPast=1, kPastOther=1, kFuture=1, bins=[10, 10, 10], discretize=True, returnProbabilities=False): """ Transfer entropy from x->y Using histogram binning for unidimensional data and k-means clustering for k-dimensional data where input data points are a set of points from a trajectory. Compute n step transfer entropy by summing the entropies when the transfer entropy is rewritten. Note: Random seeds with k-means clustering might affect the computed results. Good idea to try several iterations or many different k-means seeds. 2016-11-09 Params: ------- x,y (n_samples,n_dim) kPast (int) k steps into the past kPastOther (int) k steps into the past for other trajectory that we're conditioning on kFuture(int) k steps into the future [binsPast,binsOtherPast,binsFuture] (list of ints) number of bins (or clusters) for trajectories discretize (bool=True) Whether or not to discretize the data. returnProbabilities (False, bool) """ # Variables # discreteFuture,discretePast,discreteOtherPast : 1d vectors labeling sets of trajectories kPastMx = max([kPast, kPastOther]) transferEntropy = 0. # Construct matrix of data points (i_{n+1},i_n,j_n) where i and j are vectors. future = np.zeros((x.shape[0] - kPastMx - kFuture + 1, kFuture)) past = np.zeros((x.shape[0] - kPastMx - kFuture + 1, kPast)) otherPast = np.zeros((x.shape[0] - kPastMx - kFuture + 1, kPastOther)) for i in range(future.shape[0]): future[i, :] = y[(i + kPastMx):(i + kPastMx + kFuture)] past[i, :] = y[(i + kPastMx - kPast):(i + kPastMx)] otherPast[i, :] = x[(i + kPastMx - kPastOther):(i + kPastMx)] if discretize: discreteFuture = self.digitize_vector_or_scalar(future, bins[2]) discretePast = self.digitize_vector_or_scalar(past, bins[0]) discreteOtherPast = self.digitize_vector_or_scalar( otherPast, bins[1]) else: discreteFuture = unique_rows(future, return_inverse=True) discretePast = unique_rows(past, return_inverse=True) discreteOtherPast = unique_rows(otherPast, return_inverse=True) # Marginal distributions. # Compute p(i_{n+1},i_n,j_n) xy = np.c_[( discreteFuture, discretePast, discreteOtherPast)] # data as row vectors arranged into matrix uniqxy = xy[unique_rows( xy )] # unique entries in data that will be assigned probabilities using kernel pXXkY = np.zeros((uniqxy.shape[0])) for i, row in enumerate(uniqxy): pXXkY[i] = np.sum(np.all(row[None, :] == xy, 1)) pXXkY /= np.sum(pXXkY) Xk = np.bincount(discretePast) pXk = Xk / np.sum(Xk) YXk = np.c_[(discretePast, discreteOtherPast)] uniqYXk = YXk[unique_rows(YXk)] pYXk = np.zeros((uniqYXk.shape[0])) for i, r in enumerate(uniqYXk): pYXk[i] = np.sum(np.all(r[None, :] == YXk, 1)) pYXk = pYXk / np.sum(pYXk) XXk = np.c_[(discreteFuture, discretePast)] uniqXXk = XXk[unique_rows(XXk)] pXXk = np.zeros((uniqXXk.shape[0])) for i, r in enumerate(uniqXXk): pXXk[i] = np.sum(np.all(r[None, :] == XXk, 1)) pXXk = pXXk / np.sum(pXXk) transferEntropy = (np.nansum(pXXkY * np.log2(pXXkY)) + np.nansum(pXk * np.log2(pXk)) - np.nansum(pYXk * np.log2(pYXk)) - np.nansum(pXXk * np.log2(pXXk))) return transferEntropy
def _n_step_transfer_entropy(self, x, y, kPast=1, kPastOther=1, kFuture=1, bins=[10, 10, 10], returnProbabilities=False): """ Transfer entropy from x->y Using histogram binning for unidimensional data and k-means clustering for k-dimensional data where input data points are a set of points from a trajectory. We compute the empirical distribution p(i_{n+1},i_n,j_n) and marginalize over this to get the conditional probabilities required for transfer entropy calculation. NOTE: Random seeds with k-means clustering might affect the computed results. Good idea to try several iterations or many different k-means seeds. 2015-12-23 Params: x (n_samples,n_dim) y kPast (int) k steps into the past kPastOther (int) k steps into the past for other trajectory that we're conditioning on kFuture(int) k steps into the future [binsPast,binsOtherPast,binsFuture] (list of ints) number of bins (or clusters) for trajectories returnProbabilities (False, bool) """ kPastMx = max([kPast, kPastOther]) transferEntropy = 0. # Construct matrix of data points (i_{n+1},i_n,j_n) where i and j are vectors. future = np.zeros((x.shape[0] - kPastMx - kFuture + 1, kFuture)) past = np.zeros((x.shape[0] - kPastMx - kFuture + 1, kPast)) otherPast = np.zeros((x.shape[0] - kPastMx - kFuture + 1, kPastOther)) for i in range(future.shape[0]): future[i, :] = y[(i + kPastMx):(i + kPastMx + kFuture)] past[i, :] = y[(i + kPastMx - kPast):(i + kPastMx)] otherPast[i, :] = x[(i + kPastMx - kPastOther):(i + kPastMx)] discreteFuture = self.digitize_vector_or_scalar(future, bins[2]) discretePast = self.digitize_vector_or_scalar(past, bins[0]) discreteOtherPast = self.digitize_vector_or_scalar(otherPast, bins[1]) xy = np.c_[( discreteFuture, discretePast, discreteOtherPast)] # data as row vectors arranged into matrix uniqxy = xy[unique_rows( xy )] # unique entries in data that will be assigned probabilities using kernel pijk = np.zeros((uniqxy.shape[0])) # Compute p(i_{n+1},i_n,j_n) for i, row in enumerate(uniqxy): pijk[i] = np.sum(np.prod(row[None, :] == xy, 1)) pijk /= np.sum(pijk) # Define functions for multiprocessing. ------------------------------------------------ def calc_piCondij(i, row, store): """p( i_future | i_past, j_past )""" ix = np.where(np.prod(row[None, 1:] == uniqxy[:, 1:], 1))[0] p = pijk[ix] p /= np.sum(p) piCondij = np.sum(p[uniqxy[ix][:, 0] == row[0]]) # Store result in shared memory access. store[i] = piCondij def calc_piCondi(i, row, store): """p( i_future | i_past )""" ix = np.where(row[None, 1] == uniqxy[:, 1])[0] p = pijk[ix] p /= np.sum(p) piCondi = np.sum(p[uniqxy[ix][:, 0] == row[0]]) # Store result in shared memory access. store[i] = piCondi # Parallelization steps: # 1. Create shared memory access for storing results across independent processes. # 2. Generate list of tasks in store them in a Queue. Queue must have sentinel (or it holds indefinitely) # 3. Generate workers and start them. # 4. End workers. # Define workers that will take jobs from queue to complete. def worker_piCondij(work_queue, storage): while True: nextTask = work_queue.get() if not nextTask is None: nextTask.append(storage) calc_piCondij(*nextTask) else: break def worker_piCondi(work_queue, storage): while True: nextTask = work_queue.get() if not nextTask is None: nextTask.append(storage) calc_piCondi(*nextTask) else: break def generate_work_queue(): # Iterate over all unique data points. workQueue = Queue() # List of jobs to complete. for i, row in enumerate(uniqxy): workQueue.put([i, row]) # Place one sentinel for each worker so it stops waiting for the queue to fill. for i in range(self.N_WORKERS): workQueue.put(None) return workQueue # Memory map for shared memory access to processes. # These will store results of computation. piCondijStore = Array('d', np.zeros((uniqxy.shape[0]))) piCondiStore = Array('d', np.zeros((uniqxy.shape[0]))) self.run_parallel_job(worker_piCondij, generate_work_queue(), piCondijStore) self.run_parallel_job(worker_piCondi, generate_work_queue(), piCondiStore) piCondij = np.array(piCondijStore[:]) piCondi = np.array(piCondiStore[:]) transferEntropy = np.nansum(pijk * (np.log2(piCondij) - np.log2(piCondi))) if returnProbabilities: return transferEntropy, pijk, piCondij, piCondi return transferEntropy