def findVelocities(trajectories): """ This function admits a 'trajectories' dataframe as an input (usually from trackpy) and returns another dataframe where instant velocities are included """ n_tracks = len(set(trajectories.track)) col_names = ['frame', 'track', 'x', 'y', 'vx', 'vy'] # Creating an empty dataframe to store results data = pd.DataFrame(np.zeros(shape=(1, 6), dtype=np.int64), columns=col_names) for item in set(trajectories.track): sub = trajectories[trajectories.track==item] if sub.shape[0]<=2: #Para obviar los casos en los que solo hay una o dos filas de datos pass else: printp('Deriving velocities for track: '+ str(item) + '/'+ str(n_tracks)) dvx = pd.DataFrame(np.gradient(sub.x), columns=['vx',]) dvy = pd.DataFrame(np.gradient(sub.y), columns=['vy',]) new_df = pd.concat((sub.frame.reset_index(drop=True), sub.track.reset_index(drop=True), sub.x.reset_index(drop=True), sub.y.reset_index(drop=True), dvx, dvy), axis=1, names=col_names, sort=False) data = pd.concat((data, new_df), axis=0) # This is to get rid of the first 'np.zeros' row and to reset indexes data = data.reset_index(drop=True) data = data.drop(0) data = data.reset_index(drop=True) return data
def deleteShortTrajectories(velocities, minimumFrames=150): """ This function takes a pandas dataframe with the velocities of one or more particles and deletes all trajectories shorter than 'minimumFrames' """ # https://stackoverflow.com/questions/20618804/how-to-smooth-a-curve-in-the-right-way col_names = ['frame', 'track', 'x', 'y', 'vx', 'vy'] # Creating an empty dataframe to store results data = pd.DataFrame(np.zeros(shape=(1, 6), dtype=np.int64), columns=col_names) for item in set(velocities.track): sub = velocities[velocities.track==item] if sub.shape[0] <= minimumFrames: printp('Deleting velocities for track: '+str(item)+ ' --> Size: ' + str(sub.shape[0])) pass else: new_df = pd.concat((sub.vx.reset_index(drop=True), sub.vy.reset_index(drop=True), sub.x.reset_index(drop=True), sub.y.reset_index(drop=True), sub.frame.reset_index(drop=True), sub.track.reset_index(drop=True)), axis=1, names=col_names, sort=False) data = pd.concat((data, new_df), axis=0) # This is to get rid of the first 'np.zeros' row and to reset indexes data = data.reset_index(drop=True) data = data.drop(0) data = data.reset_index(drop=True) return data
def velocity_autocorrelation_ensemble(vel_data, max_lag='all'): """ Calculates the velocity autocorrelation function for integrating for every particle in the system Parameters ---------- vel_data : pandas Dataframe It must be have the usual structure with at least the folowing columns: ['frame', 'track', 'vx', 'vy'] max_lag : int, optional (default: 'all') Maximum number of steps (lag time) for which the VAC is calculated Returns ------- vac : pandas Dataframe A 1D pandas dataframe of shape N where N is equal to max_steps. Containing the value of the VAC for each interval. """ ensemble = pd.DataFrame() for traj in set(vel_data.track): if vel_data[vel_data.track == traj].shape[0] > 2: vel_aut = velocity_autocorrelation(vel_data, traj, max_lag=max_lag) ensemble = pd.concat((ensemble, vel_aut), axis=1) printp('Integrating velocity autocorrelations: ' + str(traj) + '/' + str(len(set(vel_data.track)))) vac = ensemble.mean(axis=1) return vac
def msd_ensemble(data, max_steps='all', column='x'): """ Calculates the mean square displacement using every available track on the Dataframe Parameters ---------- data : pandas Dataframe It must be have the usual structure with at least the folowing columns: ['frame', 'track', 'x', 'y'] max_steps : int, optional (default: 'all') Maximum number of steps (lag time) for which the MSD is calculated column : string (default: 'x') Name of the column for the component to be analysed Returns ------- results : array A 1D numpy array of shape N where N is equal to max_steps. Containing the value of the mean square displacement for that interval. """ ensemble = pd.DataFrame() for traj in set(data.track): if data[data.track == traj].shape[0] > 2: msd_traj = meanSquaredDisplacement(data, max_steps=max_steps, column=column, trajectory=traj) # I only need the data not the steps columns msd_traj = pd.DataFrame(msd_traj[:, 1]) ensemble = pd.concat((ensemble, msd_traj), axis=1) printp('Calculating MSD for trajectory: ' + str(traj) + '/' + str(len(set(data.track)))) return ensemble.mean(axis=1)
def smoothPositions(pos_data, window_length=25, poly_order=3, kind='savgol', butter_size=0.8): """ This function takes a pandas dataframe with the velocities of one or more particles and smooths positions applying an Savgol filter """ # https://stackoverflow.com/questions/20618804/how-to-smooth-a-curve-in-the-right-way if kind=='savgol': col_names = ['frame', 'track', 'x', 'y', 'vx', 'vy'] # Creating an empty dataframe to store results data = pd.DataFrame(np.zeros(shape=(1, 6), dtype=np.int64), columns=col_names) for item in set(pos_data.track): sub = pos_data[pos_data.track==item] if sub.shape[0] <= window_length+1: #Para obviar los casos en los que la trayectoria dura menos que la ventana de suavizado pass else: printp('Smoothing positions for track: '+ str(item)) # Savgol filter x = pd.DataFrame(savgol_filter(sub.x, window_length, poly_order), columns=['x',]) y = pd.DataFrame(savgol_filter(sub.y, window_length, poly_order), columns=['y',]) new_df = pd.concat((x, y, sub.vx.reset_index(drop=True), sub.vy.reset_index(drop=True), sub.frame.reset_index(drop=True), sub.track.reset_index(drop=True)), axis=1, names=col_names, sort=False) data = pd.concat((data, new_df), axis=0, sort=True) # This is to get rid of the first 'np.zeros' row and to reset indexes data = data.reset_index(drop=True) data = data.drop(0) data = data.reset_index(drop=True) elif kind=='butter': col_names = ['frame', 'track', 'x', 'y', 'vx', 'vy'] # Creating an empty dataframe to store results data = pd.DataFrame(np.zeros(shape=(1, 6), dtype=np.int64), columns=col_names) for item in set(pos_data.track): sub = pos_data[pos_data.track==item] if sub.shape[0] <= window_length+1: #Para obviar los casos en los que la trayectoria dura menos que la ventana de suavizado pass else: printp('Smoothing positions for track: '+ str(item)) # Savgol filter x = pd.DataFrame(butter_lowpass(butter_size, window_length, sub.x), columns=['x',]) y = pd.DataFrame(butter_lowpass(butter_size, window_length, sub.y), columns=['y',]) new_df = pd.concat((x, y, sub.vx.reset_index(drop=True), sub.vy.reset_index(drop=True), sub.frame.reset_index(drop=True), sub.track.reset_index(drop=True)), axis=1, names=col_names, sort=False) data = pd.concat((data, new_df), axis=0) # This is to get rid of the first 'np.zeros' row and to reset indexes data = data.reset_index(drop=True) data = data.drop(0) data = data.reset_index(drop=True) return data
def calculate_jumps(data, interval=1, moving_window=True): """ returns a neww dataframe with columns 'dx', 'dy', representing the jump during a jump of lenght 'interval' (n_frames) """ col_names = ['frame', 'track', 'x', 'y', 'dx', 'dy'] n_tracks = len(set(data.track)) # Creating an empty dataframe to store results # ============================================================================= # out = pd.DataFrame(np.zeros(shape=(1, 6), dtype=np.int64), columns=col_names) # ============================================================================= out = [] for item in set(data.track): sub = data[data.track==item] if sub.shape[0]<=interval+1: #Para obviar los casos en los que solo hay pocos datos pass else: printp('Calculating jumps for track: '+ str(item) + '/'+ str(n_tracks)) if moving_window==False: dx = pd.DataFrame(sub.x.iloc[interval::interval].values - sub.x.iloc[0::interval].values[:-1], columns=['dx',]) dy = pd.DataFrame(sub.y.iloc[interval::interval].values - sub.y.iloc[0::interval].values[:-1], columns=['dy',]) sub = sub.iloc[0::interval].reset_index(drop=True) elif moving_window==True: # Este es el comportamiento por defecto dx = pd.DataFrame(difference(sub.x.values, n=interval), columns=['dx',]) dy = pd.DataFrame(difference(sub.y.values, n=interval), columns=['dy',]) new_df = pd.concat((sub.frame.reset_index(drop=True), sub.track.reset_index(drop=True), sub.x.reset_index(drop=True), sub.y.reset_index(drop=True), dx, dy), axis=1, names=col_names, sort=False) # ============================================================================= # out = pd.concat((out, new_df), axis=0) # ============================================================================= out.append(new_df) out = pd.concat(out, axis=0) # This is to get rid of the first 'np.zeros' row and to reset indexes, also, delete nans that appear at the end (in [-interval:]) data = out.reset_index(drop=True) data = data.drop(0) data = data.dropna() #26-09-2019 data = data.reset_index(drop=True) return data
def alternative_delete_short_trajectories(data, minimumFrames=10): """ This function takes a pandas dataframe with the velocities of one or more particles and deletes all trajectories shorter than 'minimumFrames' """ # First, find what tracks have less datapoints that 'minimumFrames' tracks_to_delete = [] for item in set(data.track): sub = data[data.track==item] n_elements = len(sub) if n_elements <= minimumFrames: printp('Deleting velocities for track: ' + str(item) + ' --> Size: ' + str(n_elements)) tracks_to_delete.append(item) # Actual cleaning of short trajectories, first locate indexes indexNames = data[data.track.isin(tracks_to_delete)].index #https://stackoverflow.com/questions/12096252/use-a-list-of-values-to-select-rows-from-a-pandas-dataframe #https://thispointer.com/python-pandas-how-to-drop-rows-in-dataframe-by-conditions-on-column-values/ data = data.drop(indexNames) data = data.reset_index(drop=True) return data
def alternative_calculate_velocities(trajectories, n=1, use_gradient=False): n_tracks = len(set(trajectories.track)) # Con gradient solo se puede usar n=1 if use_gradient==True: out = [] for t in set(trajectories.track): sub = trajectories[trajectories.track == t] try: vxvy = np.gradient(sub[['x','y']].values, axis=0) sub['vx'] = vxvy[:,0] sub['vy'] = vxvy[:,1] out.append(sub) printp('Deriving velocities for track: '+ str(t+1) + '/'+ str(n_tracks)) except: pass else: if n==1: out = [] for t in set(trajectories.track): sub = trajectories[trajectories.track == t] vxvy = np.diff(sub[['x','y']].values, axis=0) sub = sub[:-1] sub['vx'] = vxvy[:,0] sub['vy'] = vxvy[:,1] out.append(sub) printp('Deriving velocities for track: '+ str(t+1) + '/'+ str(n_tracks)) # Calculo de desplazamientos con ventana movil else: out = [] for t in set(trajectories.track): sub = trajectories[trajectories.track == t] try: vxvy = sub[['x','y']][n:].values - sub[['x','y']][:-n].values sub = sub[:-n] sub['vx'] = vxvy[:,0] / n sub['vy'] = vxvy[:,1] / n out.append(sub) printp('Deriving velocities for track: '+ str(t+1) + '/'+ str(n_tracks)) except: pass out = pd.concat(out) out = out.reset_index(drop=True) out = reset_track_indexes(out) return out
def radial_distribution_function_square_region(data, roi_corner=[250, 50], roi_width=750, roi_height=600, min_step=2, particle_diameter=78): from tqdm import tqdm """ Calculates the radial distribution function, averaging g(r) for all frames. All parameters must be in the same units (either pixels, cm or natural units) Parameters ---------- data : pandas Dataframe It must be have the usual structure with at least the folowing columns: ['frame', 'track', 'x', 'y'] roi_center : int list or tuple Coordinates of the bottom left corner of the ROI roi_radius : int Self-explanatory min_step : float This is the size of the distance step we use to calculate g(r). Reducing this will increase the resolution of data (at the cost of higher computing times) particle_diameter : float Used to express the results in terms of the particle's diameter (useful for plotting) Use 1 if no conversion desired Returns ------- gr : pandas Dataframe A pandas dataframe of shape (N, 2) where N is equal to max_radial_distance/min_step. Containing the value of the g(r) for each step. Column names are: ['r', 'gr'] """ # The maximum radial distance we can get away from each particle is the distance # from the particle to the edge of the ROI. We add a new column with that info data = data.assign( distances_to_left_roi_wall=(data.x - roi_corner[0]), distances_to_right_roi_wall=(roi_corner[0] + roi_width - data.x), distances_to_top_roi_wall=(roi_corner[1] + roi_height - data.y), distances_to_bottom_roi_wall=(data.y - roi_corner[1])) distance_to_closest_wall = np.abs(data.loc[:, [ 'distances_to_left_roi_wall', 'distances_to_right_roi_wall', 'distances_to_top_roi_wall', 'distances_to_bottom_roi_wall' ]].min(axis=1)) data = data.assign(distance_to_closest_wall=distance_to_closest_wall) list_grs = [] N = 0 n_frames = len(set(data.frame)) # Looping for each frame and storing the instant g(r) in a dataframe for f in tqdm(set(data.frame)): printp(f'Calculating g(r), frame: {f} / {n_frames}') sub_data = data[data['frame'] == f] gr = instant_radial_distribution_function_square_region( sub_data, min_step=min_step) list_grs.append(gr) N += len(data[data.frame == f]) # To calculate the number of particles in the ROI ensemble = pd.concat(list_grs, axis=1) # Mean for all frames gr = ensemble.mean(axis=1) # Firs row's g(r) must be zero (0) gr.iloc[0] = 0 # New column with distances (in terms of particle's diameter) distances = pd.DataFrame( np.arange(0, min_step * len(gr), step=min_step) / particle_diameter) gr = pd.concat((distances, gr), axis=1) # Naming columns gr.columns = ['r', 'gr'] # Last normalization area = roi_width * roi_height mean_N = N / len(set(data.frame)) gr.gr = gr.gr * area / (2 * mean_N) return gr
def radial_distribution_function(data, roi_center=[650, 400], roi_radius=390, min_step=2, particle_diameter=78): """ Calculates the radial distribution function, averaging g(r) for all frames. All parameters must be in the same units (either pixels, cm or natural units) Parameters ---------- data : pandas Dataframe It must be have the usual structure with at least the folowing columns: ['frame', 'track', 'x', 'y'] roi_center : int list or tuple Coordinates of the central point of the region of interest (in the case of a circular ROI) roi_radius : int Self-explanatory min_step : float This is the size of the distance step we use to calculate g(r). Reducing this will increase the resolution of data (at the cost of higher computing times) particle_diameter : float Used to express the results in terms of the particle's diameter (useful for plotting) Use 1 if no conversion desired Returns ------- gr : pandas Dataframe A pandas dataframe of shape (N, 2) where N is equal to max_radial_distance/min_step. Containing the value of the g(r) for each step. Column names are: ['r', 'gr'] """ # The maximum radial distance we can get away from each particle is the distance # from the particle to the edge of the ROI. We add a new column with that info distances_to_center = distanceToCenter(data.x, data.y, roi_center[0], roi_center[1]) distances_to_edge = roi_radius - distances_to_center data = data.assign(dist_edge=distances_to_edge) list_grs = [] N = 0 # Looping for each frame and storing the instant g(r) in a dataframe for f in set(data.frame): printp('Calculating g(r), frame: ' + str(f) + ' / ' + str(len(set(data.frame)))) gr = instant_radial_distribution_function(data, f, roi_center=roi_center, roi_radius=roi_radius, min_step=min_step) list_grs.append(gr) N += len(data[data.frame == f]) # To calculate the number of particles in the ROI ensemble = pd.concat(list_grs, axis=1) # Mean for all frames gr = ensemble.mean(axis=1) # Firs row's g(r) must be zero (0) gr.iloc[0] = 0 # New column with distances (in terms of particle's diameter) distances = pd.DataFrame( np.arange(0, min_step * len(gr), step=min_step) / particle_diameter) gr = pd.concat((distances, gr), axis=1) # Naming columns gr.columns = ['r', 'gr'] # Last normalization area = np.pi * (roi_radius**2) mean_N = N / len(set(data.frame)) gr.gr = gr.gr * area / (2 * mean_N) return gr