def mean_lags(tracks): """Calculate mean lag in velocity and turning angle of track(s)""" means = [] for _, track in tracks.groupby(track_identifiers(tracks)): lags = np.mean(track[['Velocity', 'Turning Angle']].diff()**2) if not np.isnan(lags).any(): means.append(lags) return np.mean(means, axis=0)
def _split_at_skip(tracks, jump_threshold, verbose): """Split track if timestep is missing or too long""" if 'Time' not in tracks.columns: return if not tracks.index.is_unique: tracks.reset_index(drop=True, inplace=True) if 'Track_ID' in tracks.columns: max_track_id = tracks['Track_ID'].max() else: max_track_id = 0 for criterium, track in tracks.groupby(track_identifiers(tracks)): timesteps = track['Time'].diff() skips = ((timesteps - timesteps.min())/timesteps.min()).round() if skips.max() > 0: index = track.index if 'Track_ID' in track.columns: tracks.loc[index, 'Orig. Track_ID'] = track['Track_ID'] skip_sum = skips.fillna(0).cumsum() tracks.loc[index, 'Track_ID'] = max_track_id + 1 + skip_sum max_track_id += max(skip_sum) + 1 if verbose: print(' Warning: Split track {} with non-uniform timesteps.' .format(criterium)) if jump_threshold is None: return for criterium, track in tracks.groupby(track_identifiers(tracks)): positions = track[['X', 'Y', 'Z']] dr = positions.diff() dr_norms = np.linalg.norm(dr, axis=1) skips = dr_norms > jump_threshold if skips.max() > 0: index = track.index if 'Track_ID' in track.columns: tracks.loc[index, 'Orig. Track_ID'] = track['Track_ID'] skip_sum = skips.cumsum() tracks.loc[index, 'Track_ID'] = max_track_id + 1 + skip_sum max_track_id += max(skip_sum) + 1 if verbose: print(' Warning: Split track {} with jump > {}um.' .format(criterium, jump_threshold))
def plot_arrest(tracks, condition='Condition', arrest_velocity=3, save=False, context='notebook'): """Plot velocity aligned to minimum and distribution of arrested steps""" if 'Displacement' not in tracks.columns: tracks = analyze(tracks) if condition not in tracks.columns: tracks[condition] = 'Default' sns.set(style='ticks', context=context) fig, axes = plt.subplots(1, 2, figsize=(8, 5.5)) axes[0].set_xlabel('Time to minimum') axes[0].set_ylabel('Velocity') axes[1].set_xlabel(r'Consecutive steps below {} $\mu$m/min'.format(arrest_velocity)) axes[1].set_ylabel('Proportion') for i, (cond, cond_tracks) in enumerate(tracks.groupby(condition)): velocities = pd.Series() arrested_segment_lengths = [] for _, track in cond_tracks.groupby(track_identifiers(cond_tracks)): min_index = track['Velocity'].argmin() track_velocities = pd.Series( track['Velocity'].values, track['Time'] - track.loc[min_index, 'Time']) velocities = velocities.append(track_velocities.dropna()) arrested = track['Velocity'] < arrest_velocity arrested_segments = np.split(arrested, np.where(np.diff(arrested))[0] + 1) arrested_segment_lengths.extend([sum(segment) for segment in arrested_segments if sum(segment) > 0]) velocities.index = np.round(velocities.index, 5) # Handle non-integer 'Times' arrestats = velocities.groupby(velocities.index).describe().unstack() color = sns.color_palette(n_colors=i+1)[-1] axes[0].plot(arrestats.index, arrestats['50%'], color=color) axes[0].fill_between(arrestats.index, arrestats['25%'], arrestats['75%'], color=color, alpha=0.2) axes[0].fill_between(arrestats.index, arrestats['min'], arrestats['max'], color=color, alpha=0.2) sns.distplot(arrested_segment_lengths, bins=np.arange(1, max(arrested_segment_lengths) + 1) - 0.5, norm_hist=True, kde=False, color=color, ax=axes[1]) axes[0].set_xlim([-3, 3]) axes[1].get_xaxis().set_major_locator(plt.MaxNLocator(integer=True)) sns.despine() plt.tight_layout() if save: conditions = [cond.replace('= ', '') for cond in tracks[condition].unique()] plt.savefig('Arrest_' + '-'.join(conditions) + '.png', dpi=300) else: plt.show()
def plot_dr(raw_tracks, save=False, condition='Condition', context='notebook'): """Plot the differences in X, Y (and Z) to show biases""" tracks = raw_tracks.copy() _uniquize_tracks(tracks) _split_at_skip(tracks) dimensions = [dim for dim in ['X', 'Y', 'Z'] if dim in tracks.columns] differences = pd.DataFrame() for _, track in tracks.groupby(track_identifiers(tracks)): differences = differences.append(track[dimensions].diff().dropna()) if 'Track_ID' in differences.columns: differences = differences.fillna(track['Track_ID'].iloc[0]) else: differences['Track_ID'] = track['Track_ID'].iloc[0] sns.set(style="ticks", palette='deep', context=context) fig, axes = plt.subplots(ncols=3, figsize=(15.5,5.5)) plt.setp(axes, yticks=[]) plt.setp(axes, xticks=[]) axes[0].set_title(r'$\Delta \vec r$') axes[0].set_xticks([0]) axes[0].set_xticklabels([r'$0$']) for dimension in dimensions: sns.kdeplot(differences[dimension], shade=True, ax=axes[0]) axes[1].set_title('Joint Distribution') axes[1].set_xlabel(r'$\Delta x$') axes[1].set_ylabel(r'$\Delta y$') axes[1].axis('equal') axes[1].set_xlim([differences['X'].quantile(0.1), differences['X'].quantile(0.9)]) axes[1].set_ylim([differences['Y'].quantile(0.1), differences['Y'].quantile(0.9)]) sns.kdeplot(differences[['X', 'Y']], shade=False, cmap='Greys', ax=axes[1]) axes[2].set_title(r'$\Delta \vec r$ Lag Plot') axes[2].axis('equal') axes[2].set_xlabel(r'$\Delta r_i(t)$') axes[2].set_ylabel(r'$\Delta r_i(t+1)$') for i, dim in enumerate(dimensions): color = sns.color_palette()[i] for _, track in differences.groupby('Track_ID'): axes[2].scatter(track[dim], track[dim].shift(), facecolors=color) sns.despine() plt.tight_layout() if save: conditions = [cond.replace('= ', '') for cond in tracks[condition].unique()] plt.savefig('dr_' + '-'.join(conditions) + '.png', dpi=300) else: plt.show()
def _uniquize_tracks(tracks, verbose): """Cluster tracks, if not unique""" if 'Time' not in tracks.columns: return tracks['Orig. Index'] = tracks.index if not tracks.index.is_unique: tracks.reset_index(drop=True, inplace=True) if 'Track_ID' in tracks.columns: max_track_id = tracks['Track_ID'].max() else: max_track_id = 0 for identifiers, track in tracks.groupby(track_identifiers(tracks)): if sum(track['Time'].duplicated()) != 0: n_clusters = track['Time'].value_counts().max() track = track.copy() index = track.index if 'Track_ID' in track.columns: tracks.loc[index, 'Orig. Track_ID'] = track['Track_ID'] clusters = AgglomerativeClustering(n_clusters).fit( track[['X', 'Y', 'Z']]) track.loc[:, 'Cluster'] = clusters.labels_ if sum(track[['Cluster', 'Time']].duplicated()) != 0: clusters = AgglomerativeClustering(n_clusters).fit( track[['Orig. Index']]) track.loc[:, 'Cluster'] = clusters.labels_ if sum(track[['Cluster', 'Time']].duplicated()) == 0: tracks.loc[index, 'Track_ID'] = max_track_id+1+clusters.labels_ max_track_id += n_clusters pd.set_option('display.max_rows', 1000) if verbose: print(' Warning: Split non-unique track {} by clustering.' .format(identifiers)) else: tracks.drop(index, inplace=True) if verbose: print(' Warning: Delete non-unique track {}.' .format(identifiers))
def plot_situation(tracks, n_tracks=6*3, n_dcs=50, tcz_volume=0.524e9/400, min_distance=0, min_distance_std=200/10, zoom=1, t_detail=None, save=False, context='notebook'): """Plot some T cell tracks, DC positions and T cell zone volume""" sns.set(style='ticks', context=context) _ = plt.figure(figsize=(8, 5.5)) gs = gridspec.GridSpec(2,3) space_ax = plt.subplot(gs[:,:-1], projection='3d') time_ax = plt.subplot(gs[0,-1]) reach_ax = plt.subplot(gs[1,-1]) plt.locator_params(nbins=6) space_ax.set_title('{} T Cell Tracks & {} DCs'.format(n_tracks, n_dcs)) n_conditions = len(tracks['Condition'].unique()) palette = itertools.cycle(sns.color_palette()) if min_distance_std != 0: moved_tracks = tracks.copy() for id in tracks['Track_ID'].unique(): moved_tracks.loc[moved_tracks['Track_ID'] == id, ['X', 'Y', 'Z']] += \ np.random.randn(3)*min_distance_std else: moved_tracks = tracks for i, (cond, cond_tracks) in enumerate(moved_tracks.groupby('Condition')): choice = np.random.choice(cond_tracks['Track_ID'].unique(), n_tracks/n_conditions) chosen_tracks = cond_tracks[cond_tracks['Track_ID'].isin(choice)] for _, track in chosen_tracks.groupby(track_identifiers(chosen_tracks)): if t_detail: track = track[track['Time'] <= t_detail*60] if n_conditions > 1: color = sns.color_palette(n_colors=i+1)[-1] else: color = next(palette) space_ax.plot(track['X'].values, track['Y'].values, track['Z'].values, color=color) tcz_radius = (3*tcz_volume/(4*np.pi))**(1/3) ratio = (min_distance/tcz_radius)**3 r = tcz_radius*(ratio + (1 - ratio)*np.random.rand(n_dcs))**(1/3) theta = np.random.rand(n_dcs)*2*np.pi phi = np.arccos(2*np.random.rand(n_dcs) - 1) dcs = pd.DataFrame({ 'X': r*np.sin(theta)*np.sin(phi), 'Y': r*np.cos(theta)*np.sin(phi), 'Z': r*np.cos(phi)}) space_ax.scatter(dcs['X'], dcs['Y'], dcs['Z'], c='y') r = (3*tcz_volume/(4*np.pi))**(1/3) for i in ['x', 'y', 'z']: circle = Circle((0, 0), r, fill=False, linewidth=2) space_ax.add_patch(circle) art3d.pathpatch_2d_to_3d(circle, z=0, zdir=i) time_ax.set_xlabel('Time within Lymph Node [h]') time_ax.set_ylabel('Probab. Density') reach_ax.set_xlabel(r'Maximal Reach [$\mu$m]') reach_ax.set_ylabel('Probab. Density') def residence_time(track): return track['Time'].diff().mean()/60*len( track[np.linalg.norm(track[['X', 'Y', 'Z']], axis=1) < r]) for i, (cond, cond_tracks) in enumerate(moved_tracks.groupby('Condition')): color = sns.color_palette(n_colors=i+1)[-1] residence_times = [residence_time(track) for _, track in cond_tracks.groupby('Track_ID')] if not all(time == residence_times[0] for time in residence_times): sns.distplot(residence_times, kde=False, norm_hist=True, ax=time_ax, label=cond, color=color) max_reaches = [max(np.linalg.norm(track[['X', 'Y', 'Z']], axis=1)) for _, track in cond_tracks.groupby('Track_ID')] sns.distplot(max_reaches, kde=False, norm_hist=True, ax=reach_ax, label=cond, color=color) time_ax.set_yticks([]) time_ax.axvline(np.median(residence_times), c='0', ls=':') sns.despine(ax=time_ax) reach_ax.set_yticks([]) reach_ax.legend() reach_ax.axvline(tcz_radius, c='0', ls=':') sns.despine(ax=reach_ax) equalize_axis3d(space_ax, zoom) plt.tight_layout() if save == True: save = 'situation.png' if save: plt.savefig(save, dpi=300) else: plt.show()
def summarize(tracks, arrest_velocity=3, skip_steps=4): """Summarize track statistics, e.g. mean velocity per track""" if 'Displacement' not in tracks.columns: tracks = analyze(tracks) print('\nSummarizing track statistics') summary = pd.DataFrame() for i, (_, track) in enumerate(tracks.groupby(track_identifiers(tracks))): if 'Track_ID' in track.columns: summary.loc[i, 'Track_ID'] = track.iloc[0]['Track_ID'] if 'Condition' in track.columns: summary.loc[i, 'Condition'] = track.iloc[0]['Condition'] else: summary.loc[i, 'Condition'] = 'Default' if 'Sample' in track.columns: summary.loc[i, 'Sample'] = track.iloc[0]['Sample'] summary.loc[i, 'Mean Velocity'] = track['Velocity'].mean() summary.loc[i, 'Mean Turning Angle'] = track['Turning Angle'].mean() if 'Plane Angle' in track.columns: summary.loc[i, 'Mean Plane Angle'] = track['Plane Angle'].mean() summary.loc[i, 'Track Duration'] = \ track['Time'].iloc[-1] - track['Time'].iloc[0] summary.loc[i, 'Arrest Coefficient'] = \ len(track[track['Velocity'] < arrest_velocity])/ \ len(track['Velocity'].dropna()) if 'Z' in track.columns: positions = track[['X', 'Y', 'Z']] ndim = 3 else: positions = track[['X', 'Y']] ndim = 2 summary.loc[i, 'Motility Coefficient'] = np.pi* \ track['Displacement'].iloc[-1]/(2*ndim)/track['Track Time'].max() dr = positions.diff() dr_norms = np.linalg.norm(dr, axis=1) summary.loc[i, 'Confinement Ratio'] = track['Displacement'].iloc[-1] \ /dr_norms[1:].sum() summary.loc[i, 'Corr. Confinement Ratio'] = track['Displacement'].iloc[-1] \ /dr_norms[1:].sum()*np.sqrt(track['Track Time'].max()) summary.loc[i, 'Mean Sq. Velocity Lag'] = np.mean( track['Velocity'].diff()**2) summary.loc[i, 'Mean Sq. Turn. Angle Lag'] = np.mean( track['Turning Angle'].diff()**2) if len(track) > skip_steps + 1: dot_products = np.sum(dr.shift(-skip_steps)*dr, axis=1) norm_products = dr_norms[skip_steps:]*dr_norms[:-skip_steps] turns = np.arccos(dot_products.iloc[1:-skip_steps]/norm_products[1:]) summary.loc[i, 'Max. Turn Over {} Steps'.format(skip_steps + 1)] = \ max(turns) summary.loc[i, 'Turn Time'] = track.loc[turns.idxmax(), 'Time'] cross_product = np.cross(dr.shift(-skip_steps).loc[turns.idxmax()], dr.loc[turns.idxmax()]) normal_vec = cross_product/np.linalg.norm(cross_product) summary.loc[i, 'Skew Lines Distance'] = abs(np.sum( (positions.shift(-skip_steps).loc[turns.idxmax()] - \ positions.loc[turns.idxmax()])*normal_vec)) hull = ConvexHull(track[['X', 'Y', 'Z']]) summary.loc[i, 'Scan. Area/Step'] = hull.area/len(track) summary.loc[i, 'Scan. Vol./Step'] = hull.volume/len(track) if 'Surface Area (µm2)' in track.columns: summary.loc[i, 'Mean Surface Area (µm2)'] = track['Surface Area (µm2)'].mean() if 'Volume (µm3)' in track.columns: summary.loc[i, 'Mean Volume (µm3)'] = track['Volume (µm3)'].mean() if 'Surface Area (µm2)' in track.columns and 'Volume (µm3)' in track.columns: summary.loc[i, 'Mean Sphericity'] = (np.pi**(1/3) \ *(6*track['Volume (µm3)'])**(2/3)/track['Surface Area (µm2)']).mean() for cond, cond_summary in summary.groupby('Condition'): print(' {} tracks in {} with {} timesteps in total.'.format( cond_summary.__len__(), cond, tracks[tracks['Condition'] == cond].__len__())) return summary
def plot_tracks(raw_tracks, summary=None, draw_turns=True, n_tracks=25, condition='Condition', context='notebook', save=False): """Plot tracks""" tracks = raw_tracks.copy() _uniquize_tracks(tracks) if type(summary) == pd.core.frame.DataFrame: skip_steps = int(next(word for column in summary.columns for word in column.split() if word.isdigit())) if summary is not None and draw_turns: alpha = 0.33 else: alpha = 1 if condition not in tracks.columns: tracks[condition] = 'Default' n_conditions = len(tracks[condition].unique()) sns.set(style='ticks', context=context) fig = plt.figure(figsize=(12,12)) if 'Z' in tracks.columns: ax = fig.add_subplot(111, projection='3d') else: ax = fig.add_subplot(111, aspect='equal') labels = [] for i, (cond, cond_tracks) in enumerate(tracks.groupby(condition)): if summary is not None and draw_turns: cond_summary = summary[summary[condition] == cond] max_turn_column = next(column for column in summary.columns if column.startswith('Max. Turn')) if len(cond_tracks['Track_ID'].unique()) > n_tracks/n_conditions: choice = cond_summary.sort_values(max_turn_column, ascending=False)\ ['Track_ID'][:int(n_tracks/n_conditions)] cond_tracks = cond_tracks[cond_tracks['Track_ID'].isin(choice)] elif len(cond_tracks['Track_ID'].unique()) > n_tracks/n_conditions: choice = np.random.choice(cond_tracks['Track_ID'].unique(), n_tracks/n_conditions, replace=False) cond_tracks = cond_tracks[cond_tracks['Track_ID'].isin(choice)] color = sns.color_palette(n_colors=i+1)[-1] for j, (_, track) in enumerate(cond_tracks.groupby(track_identifiers(cond_tracks))): labels.append(cond) track_id = track['Track_ID'].iloc[0] if 'Z' in tracks.columns: ax.plot(track['X'].values, track['Y'].values, track['Z'].values, color=color, alpha=alpha, label=track_id, picker=5) else: ax.plot(track['X'].values, track['Y'].values, color=color, alpha=alpha, label=track_id, picker=5) if summary is not None and draw_turns: turn_time = cond_summary[cond_summary['Track_ID'] == track_id]['Turn Time'] turn_loc = track.index.get_loc( track[np.isclose(track['Time'], turn_time.values[0])].index.values[0]) turn_times = track['Time'][turn_loc - 1:turn_loc + skip_steps] turn = track[track['Time'].isin(turn_times)] if 'Z' in tracks.columns: ax.plot(turn['X'].values, turn['Y'].values, turn['Z'].values, color=color) else: ax.plot(turn['X'].values, turn['Y'].values, color=color) def on_pick(event): track_id = event.artist.get_label() if summary is not None: print(summary[summary['Track_ID'] == float(track_id)] [['Track_ID', 'Condition', 'Mean Velocity', 'Track Duration']]) else: print('Track_ID: ' + track_id) fig.canvas.mpl_connect('pick_event', on_pick) if 'Z' in tracks.columns: equalize_axis3d(ax) else: sns.despine() handles, _ = ax.get_legend_handles_labels() unique_entries = OrderedDict(zip(labels, handles)) ax.legend(unique_entries.values(), unique_entries.keys()) plt.tight_layout() if save: conditions = [cond.replace('= ', '') for cond in tracks[condition].unique()] plt.savefig('Tracks' + '-'.join(conditions) + '.png', dpi=300) else: plt.show()
def analyze(raw_tracks, uniform_timesteps=True, min_length=6, jump_threshold=None, verbose=True): """Return dataframe with velocity, turning angle & plane angle""" print('\nAnalyzing tracks') tracks = raw_tracks.copy() if 'Time' not in tracks.columns: print(' Warning: no time given, using index!') tracks['Time'] = tracks.index if not tracks.index.is_unique: # For inplace analysis! tracks.reset_index(drop=True, inplace=True) else: _uniquize_tracks(tracks, verbose) if uniform_timesteps: _split_at_skip(tracks, jump_threshold, verbose) if not verbose and 'Orig. Track_ID' in tracks.columns: print(' Warning: Some tracks were split, use verbose=True for more info.') n_i = tracks.Track_ID.unique().size for criterium, track in tracks.groupby(track_identifiers(tracks)): if len(track) < min_length: tracks.drop(track.index, inplace=True) if verbose: print(' Warning: Delete track {} with {} timesteps.' .format(criterium, len(track))) else: tracks.loc[track.index, 'Track Time'] = \ (track['Time'] - track['Time'].iloc[0]).round(4) if 'Z' in track.columns: positions = track[['X', 'Y', 'Z']] else: positions = track[['X', 'Y']].copy() positions['Z'] = 0 tracks.loc[track.index, 'Displacement'] = \ np.linalg.norm(positions - positions.iloc[0], axis=1) dr = positions.diff() dr_norms = np.linalg.norm(dr, axis=1) tracks.loc[track.index, 'Velocity'] = dr_norms/track['Time'].diff() dot_products = np.sum(dr.shift(-1)*dr, axis=1) norm_products = dr_norms[1:]*dr_norms[:-1] tracks.loc[track.index, 'Turning Angle'] = \ np.arccos(dot_products[:-1]/norm_products) tracks.loc[track.index, 'Plane Angle'] = np.nan n_vectors = np.cross(dr, dr.shift()) n_norms = np.linalg.norm(n_vectors, axis=1) dot_products = np.sum(n_vectors[1:]*n_vectors[:-1], axis=1) norm_products = n_norms[1:]*n_norms[:-1] angles = np.arccos(dot_products/norm_products) cross_products = np.cross(n_vectors[1:], n_vectors[:-1]) cross_dot_dr = np.sum(cross_products[2:]*dr.as_matrix()[2:-1], axis=1) cross_norms = np.linalg.norm(cross_products[2:], axis=1) signs = cross_dot_dr/cross_norms/dr_norms[2:-1] if 'Z' in track.columns: tracks.loc[track.index[2:-1], 'Plane Angle'] = signs*angles[2:] else: tracks.loc[track.index[2:-1], 'Plane Angle'] = angles[2:] n_f = tracks.Track_ID.unique().size if not verbose and n_f != n_i: print(' Warning: Some tracks were deleted, use verbose=True for more info.') return tracks