def filter_index(path, savepath, pindices): """Filter out particle by a set of indices. For HDF5 files. Parameters ---------- path : string path to the HDF5 file which contains DataFrames(['particle']) savepath: string path to be saved the result file as a HDF5 file pindices : list list of particle index, to be removed from the DataFrame Returns ------- a subset of tracks. Dataframe([x, y, frame, particle]), to be saved in savepath """ with PandasHDFStoreSingleNode(path) as traj: with PandasHDFStoreSingleNode(savepath) as result: for f, frame in enumerate(traj): print('Frame:', f) frame.set_index('particle', drop=False, inplace=True) #to be removed: intersection between particle in frame and pindices remove = np.intersect1d(frame.particle.values, pindices) frame.drop(remove, inplace=True) frame.drop(frame.columns[[2, 3, 4]], axis=1, inplace=True) # drop [mass, size, ecc] result.put(frame) print('Before:', len(traj.list_traj())) print('After:', len(result.list_traj()))
def filter_index(path, savepath, pindices): """Filter out particle by a set of indices. For HDF5 files. Parameters ---------- path : string path to the HDF5 file which contains DataFrames(['particle']) savepath: string path to be saved the result file as a HDF5 file pindices : list list of particle index, to be removed from the DataFrame Returns ------- a subset of tracks. Dataframe([x, y, frame, particle]), to be saved in savepath """ with PandasHDFStoreSingleNode(path) as traj: with PandasHDFStoreSingleNode(savepath) as result: for f in range(traj.max_frame): # loop frame frame = traj.store.select( traj.key, "frame == {}".format(f), columns=['x', 'y', 'frame', 'particle']) frame.set_index('particle', drop=False, inplace=True) # list of removing index = intersection between particle in frame and pindices remove = list(set(frame.particle.values) & set(pindices)) frame.drop(remove, inplace=True) # result.put(frame) print('Before:', len(list_traj(path))) print('After:', len(list_traj(savepath)))
def subtract_drift(path, savepath, drift=None): """Return a copy of particle trajectories with the overall drift subtracted out. Parameters ---------- path : string path to the HDF5 file which contains DataFrames(['x','y']) savepath : string path to be saved the result file as a HDF5 file drift : optional DataFrame([x, y], index=frame) If no drift is passed, drift is computed from traj. Returns ------- Dataframe, to be saved in savepath """ if drift is None: drift = compute_drift(path) with PandasHDFStoreSingleNode(path) as traj_old: with PandasHDFStoreSingleNode(savepath) as traj_new: for f, frame in enumerate(traj_old): print('Frame:', f) frame['x'] = frame['x'].sub(drift['x'][f]) frame['y'] = frame['y'].sub(drift['y'][f]) # put in the new file traj_new.put(frame)
def filter_stubs(path, savepath, threshold=30, pardump=False, chunksize=2**15): """Filter out trajectories which are shorter than the threshol value. Parameters ---------- path : string path to the HDF5 file which contains DataFrames(['particle']) savepath: string path to be saved the result file as a HDF5 file threshold : integer, default 30 minimum number of points (video frames) to survive pardump : boolean, defaults False get the trajectory sizes of all particles at once. 'True' to improve the speed. chunksize : integer, default is 2**15 Returns ------- a subset of DataFrame in path, to be saved in savepath """ with PandasHDFStoreSingleNode(path) as traj: # get a list of particle index parindex = traj.list_traj() print('Find trajectories length') # initialize a Dataframe [particle index, no. of apperance (frame)] trajsizes = pd.DataFrame(np.zeros(len(parindex)), index=parindex) # find the length of each trajectory if pardump is True: # able to get all particle index at once allpar = traj.store.select_column(traj.key, "particle") p = allpar.value_counts() trajsizes.loc[p.index, trajsizes.columns] = p p = [] allpar = [] else: for chunk in traj.store.select_column(traj.key, "particle", chunksize=chunksize): trajsizes.loc[chunk] += 1 # bin it # creat a new file to store the result after stubs with PandasHDFStoreSingleNode(savepath) as temp: for f, frame in enumerate(traj): # loop frame print('Frame:', f) # keep long enough trajectories frame = frame[(trajsizes.loc[frame.particle.astype(int)] >= threshold).values] #store in temp.h5 file temp.put(frame) print('Before:', len(parindex)) print('After:', len(temp.list_traj()))
def par_char(path, sample=20): """Get particle mass, size and ecc as a time average value. *Note that this is not the average of every frame, it is the average among few frames which are selected linearly from the minimum to maximum frames. Parameters ---------- path : string path to the HDF5 file which contains DataFrames(['mass','size', 'ecc', 'particle']) sample : integer, Default 20 a number of frame to be averaged Returns ------- DataFrame([index = particle, mass, size, ecc]) """ with PandasHDFStoreSingleNode(path) as traj: frame_sam = np.linspace(0, traj.max_frame, sample).astype(int) frames = traj.store.select(traj.key, "frame in frame_sam", columns=['mass', 'size', 'ecc', 'particle']) result = frames.groupby('particle').mean() return result
def filter_stubs(path, savepath, threshold=30, chunksize=2**12): """Filter out trajectories which are shorter than the threshol value. Parameters ---------- path : string path to the HDF5 file which contains DataFrames(['particle']) savepath: string path to be saved the result file as a HDF5 file threshold : integer, default 30 minimum number of points (video frames) to survive chunksize : integer, default is 2**12 Returns ------- a subset of DataFrame in path, to be saved in savepath """ from numpy import zeros from pandas import DataFrame with PandasHDFStoreSingleNode(path) as traj: parindex = list_traj(path) # get a list of particle index print('1/2 Find trajectories length') trajsizes = DataFrame( zeros(len(parindex)), index=parindex ) # initialize a Dataframe [particle index, no. of apperance (frame)] for chunk in traj.store.select_column( traj.key, "particle", chunksize=chunksize): # find the length of each trajectory trajsizes.loc[chunk] += 1 # bin it print('2/2 Save to a new file') with PandasHDFStoreSingleNode( savepath ) as temp: # creat a new file to store the result after stubs for f in range(traj.max_frame + 1): # loop frame frame = traj.get(f) # get frame by frame data frame = frame[( trajsizes.loc[frame.particle.astype(int)] >= threshold).values] # keep long enough trajectories # frame.reset_index(inplace=True) temp.put(frame) # store in temp.h5 file print('Before:', len(parindex)) print('After:', len(list_traj(savepath)))
def compute_drift_SingleNode(path, smoothing=0, pos_columns=None): """Return the ensemble drift, xy(t). Parameters ---------- path : string p path to the HDF5 file which contains DataFrames(['x','y','particle']) smoothing : integer Smooth the drift using a forward-looking rolling mean over this many frames. Returns ------- drift : DataFrame([x, y], index=frame) """ if pos_columns is None: pos_columns = ['x', 'y'] # Drift calculation print('Drift calc') with PandasHDFStoreSingleNode(path) as traj: Nframe = traj.max_frame # initialize drift DataFrame dx = pd.DataFrame(data=np.zeros((Nframe + 1, 2)), columns=['x', 'y']) for f, frameB in enumerate(traj): # loop frame print('Frame:', f) if f > 0: delta = frameB.set_index('particle')[ pos_columns] - frameA.set_index('particle')[pos_columns] dx.iloc[f].x = np.nanmean(delta.x.values) dx.iloc[f].y = np.nanmean(delta.y.values) # compute drift #remember the current frame frameA = frameB if smoothing > 0: dx = pd.rolling_mean(dx, smoothing, min_periods=0) x = np.cumsum(dx) return x
def compute_drift(path, smoothing=0, pos_columns=None): """Return the ensemble drift, xy(t). Parameters ---------- path : string p path to the HDF5 file which contains DataFrames(['x','y','particle']) smoothing : integer Smooth the drift using a forward-looking rolling mean over this many frames. Returns ------- drift : DataFrame([x, y], index=frame) """ import numpy as np import pandas as pd if pos_columns is None: pos_columns = ['x', 'y'] # Drift calculation print('Drift calc') with PandasHDFStoreSingleNode(path) as traj: # open traj.h5 Nframe = traj.max_frame dx = pd.DataFrame(data=np.zeros((Nframe + 1, 2)), columns=['x', 'y']) # initialize drift DataFrame for f in range(Nframe): # loop frame frameA = traj.get(f) # frame t frameB = traj.get(f + 1) # frame t+1 delta = frameB.set_index('particle')[ pos_columns] - frameA.set_index('particle')[pos_columns] dx.iloc[f + 1].x = np.nanmean(delta.x.values) dx.iloc[f + 1].y = np.nanmean(delta.y.values) # compute drift if smoothing > 0: dx = pd.rolling_mean(dx, smoothing, min_periods=0) x = np.cumsum(dx) return x
def list_traj(path, chunksize=2**12): """A list of unique index of all particles in a movie Parameters ---------- path: string path to the HDF5 file which contains DataFrames(['particle']) chunksize: integer, default is 2**12 Returns ------- a list that contains unique particle indices """ with PandasHDFStoreSingleNode(path) as traj_cell: chunk_uniq = [] for chunk in traj_cell.store.select_column(traj_cell.key, "particle", chunksize=chunksize): chunk_uniq.append(int(chunk)) chunk_uniq = list(set(chunk_uniq)) # get unique values return chunk_uniq
def par_char(path): """Get particle mass, size and ecc as a time average value *** will be improved to make it faster *** Parameters ---------- path : string path to the HDF5 file which contains DataFrames(['mass','size', 'ecc', 'particle']) Returns ------- DataFrame([mass, size, ecc, particle]) """ from pandas import DataFrame from numpy import zeros parindex = list_traj("test_traj2.h5") # get indices of all particles char_av = DataFrame(zeros((len(parindex), 4)), index=parindex, columns=['mass', 'size', 'ecc', 'particle']) # initialize result Dataframe char_av.particle = parindex with PandasHDFStoreSingleNode(path) as traj: for p in parindex: # loop by particle **can be improved?** char_t = traj.store.select( traj.key, "particle == p", columns=['mass', 'size', 'ecc', 'particle']) # char of one particle in every frame char_av.loc[p] = char_t.mean() # time average return char_av
def emsd(path, mpp, fps, nlagtime, max_lagtime, framejump=10, pos_columns=None): """Compute the mean displacement and mean squared displacement of one trajectory over a range of time intervals for the streaming function. Parameters ---------- path : string path to the HDF5 file which contains DataFrames(['particle']) mpp : microns per pixel fps : frames per second nlagtime : number of lagtime to which MSD is computed max_lagtime : maximum intervals of frames out to which MSD is computed framejump : integer indicates the jump in t0 loop (to increase the speed) Default : 10 Returns ------- DataFrame([<x^2>, <y^2>, msd, std, lagt]) Notes ----- Input units are pixels and frames. Output units are microns and seconds. """ if pos_columns is None: pos_columns = ['x', 'y'] result_columns = ['<{}^2>'.format(p) for p in pos_columns] + \ ['msd','std','lagt'] # define the lagtime to which MSD is computed. From 1 to fps, lagtime increases linearly with the step 1. # Above fps, lagtime increases in a log scale until max_lagtime. lagtime = np.unique( np.append( np.arange(1, fps), (np.logspace(0, np.log10(max_lagtime / fps), nlagtime - fps) * fps).astype(int))) with PandasHDFStoreSingleNode(path) as traj: # get number of frames Nframe = traj.max_frame # initialize the result Dataframe result = pd.DataFrame(index=lagtime, columns=result_columns) # loop delta t for lg in lagtime: print('lagtime', lg) # initialize t0 lframe = range(0, Nframe + 1 - lg, framejump) # initialize DataFrame for each t0 msds = pd.DataFrame(index=range(len(lframe)), columns=result_columns) for k, f in enumerate(lframe): # loop t0 frameA = traj.get(f) frameB = traj.get(f + lg) # compute different position between 2 frames for each particle diff = frameB.set_index('particle')[ pos_columns] - frameA.set_index('particle')[pos_columns] # <x^2> msds[result_columns[0]][k] = np.nanmean( (diff.x.values * mpp)**2) # <y^2> msds[result_columns[1]][k] = np.nanmean( (diff.y.values * mpp)**2) # <r^2> = <x^2> + <y^2> msds.msd = msds[result_columns[0]] + msds[result_columns[1]] # average over t0 result[result.index == lg] = [msds.mean()] # get the std over each t0 result.loc[result.index == lg, result.columns[3]] = msds.msd.std() result['lagt'] = lagtime / fps return result
def plot_traj_stream(path, mpp=None, label=False, superimpose=None, cmap=None, ax=None, t_column=None, pos_columns=None, chunksize = 2**10, interval = None, pindices = None, plot_style={}, **kwargs): """Plot traces of trajectories for each particle for HDF5 files Parameters ---------- path : DataFrame The DataFrame should include time and spatial coordinate columns. colorby : {'particle', 'frame'}, optional mpp : float, optional Microns per pixel. If omitted, the labels will have units of pixels. label : boolean, optional Set to True to write particle ID numbers next to trajectories. superimpose : ndarray, optional Background image, default None cmap : colormap, optional This is only used in colorby='frame' mode. Default = mpl.cm.winter ax : matplotlib axes object, optional Defaults to current axes t_column : string, optional DataFrame column name for time coordinate. Default is 'frame'. pos_columns : list of strings, optional Dataframe column names for spatial coordinates. Default is ['x', 'y']. chunksize : interger Default is 2**10 interval : list = [frame_min, frame_max], optional Interval of interestied frames pindices : list, optional Particle of interest index plot_style : dictionary Keyword arguments passed through to the `Axes.plot(...)` command Returns ------- Axes object See Also -------- plot_traj3d : the 3D equivalent of `plot_traj` """ import matplotlib as mpl import matplotlib.pyplot as plt from matplotlib.collections import LineCollection from trackpy import PandasHDFStoreSingleNode if cmap is None: cmap = plt.cm.winter if t_column is None: t_column = 'frame' if pos_columns is None: pos_columns = ['x', 'y'] _plot_style = dict(linewidth=1) _plot_style.update(**_normalize_kwargs(plot_style, 'line2d')) # Axes labels if mpp is None: _set_labels(ax, '{} [px]', pos_columns) mpp = 1. # for computations of image extent below else: if mpl.rcParams['text.usetex']: _set_labels(ax, r'{} [\textmu m]', pos_columns) else: _set_labels(ax, r'{} [\xb5m]', pos_columns) # Background image if superimpose is not None: ax.imshow(superimpose, cmap=plt.cm.gray, origin='lower', interpolation='nearest', vmin=kwargs.get('vmin'), vmax=kwargs.get('vmax')) with PandasHDFStoreSingleNode(path) as traj_cell: # assign the interval of interest if interval is None: fmin = 0 fmax = traj_cell.max_frame else: fmin = interval[0] fmax = interval[1] # get particle index if pindices is None: pindices = [] for chunk in traj_cell.store.select_column(traj_cell.key,"particle", chunksize = chunksize): pindices.append(chunk) pindices = list(set(pindices)) # plot trajectories for i in pindices: traj = traj_cell.store.select(traj_cell.key,"particle == {0}".format(int(i))) # get one particle trajectory traj = traj[((traj.frame >= fmin) & (traj.frame <= fmax))] # filter the frame interval _plot(ax, mpp*traj, pos_columns, **_plot_style) return invert_yaxis(ax)