def reconstruct_matched_datasets_mean_cell(self): lrec = 2*self.control.get_lmax() output = self.output / "avgshape" output.mkdir(parents=True, exist_ok=True) for ds in [k for k in self.datasets]: ct_indices = self.result.loc['base',ds].loc[lambda x: x==True].index.tolist() pt_indices = self.result.loc[ds,"Match"].loc[lambda x: x==True].index.tolist() aliases = ["base"] * len(ct_indices) + [ds] * len(pt_indices) indices = [(alias, *index) for alias, index in zip(aliases, ct_indices+pt_indices)] matrix = self.result.loc[indices, self.control.get_shape_modes()].copy() for sm in self.control.get_shape_modes(): matrix[sm] *= self.norm_stds[sm] matrix = matrix.values.mean(axis=0, keepdims=True) df = self.space.invert(matrix) row = df.loc[df.index[0]] meshes = {} for alias in self.control.get_aliases_for_pca(): mesh = viz.MeshToolKit.get_mesh_from_series(row, alias, lrec) fname = output / f"{ds}_{alias}_base_matched.vtk" shtools.save_polydata(mesh, str(fname)) meshes[alias] = [mesh] projs = viz.MeshToolKit.get_2d_contours(meshes) for proj, contours in projs.items(): fname = output / f"{ds}_{alias}_base_matched_{proj}.gif" viz.MeshToolKit.animate_contours(self.control, contours, save=fname)
def reconstruct_datasets_mean_cell(self): lrec = 2*self.control.get_lmax() output = self.output / "avgshape" output.mkdir(parents=True, exist_ok=True) for ds in [k for k in self.datasets] + ["base"]: matrix = self.result.loc[ds].values.mean(axis=0, keepdims=True) df = self.space.invert(matrix) row = df.loc[df.index[0]] meshes = {} for alias in self.control.get_aliases_for_pca(): mesh = viz.MeshToolKit.get_mesh_from_series(row, alias, lrec) fname = output / f"{ds}_{alias}.vtk" shtools.save_polydata(mesh, str(fname)) meshes[alias] = [mesh] projs = viz.MeshToolKit.get_2d_contours(meshes) for proj, contours in projs.items(): fname = output / f"{ds}_{alias}_{proj}.gif" viz.MeshToolKit.animate_contours(self.control, contours, save=fname)
def recontruct_meshes(self, save_meshes=True): self.meshes = {} # Reconstruct mesh with twice more detail than original parameterization lrec = 2 * self.control.get_lmax() abs_path_avgshape = self.control.get_staging() / f"shapemode/avgshape" for sm, df_sm in self.df_coeffs.groupby("shape_mode"): self.meshes[sm] = {} for alias in self.control.get_aliases_for_pca(): self.meshes[sm][alias] = [] for _, row in df_sm.iterrows(): mesh = self.get_mesh_from_series(row, alias, lrec) if f'{alias}_dx' in self.df_coeffs.columns: dr_mean = row[[ f'{alias}_d{u}' for u in ['x', 'y', 'z'] ]] mesh = self.translate_mesh_points(mesh, dr_mean.values) if save_meshes: fname = abs_path_avgshape / f"{alias}_{sm}_{row.mpId}.vtk" shtools.save_polydata(mesh, str(fname)) self.meshes[sm][alias].append(mesh) return
def _process_cell( cell_id: str, cell_details: pd.Series, struct: str, lmax: int, save_dir: Path ) -> CellProcessResult: # Alert of which cell we are processing log.info(f"Beginning processing of cell: {cell_id}") # Read segmentation image impath = cell_details.SegFilePath seg = AICSImage(impath).get_image_data("ZYX", S=0, T=0, C=0) # Get spherical harmonic decomposition of segmentation (coeffs, grid_rec), (_, mesh_init, grid_init, transform) = shparam.get_shcoeffs( image=seg, lmax=lmax, sigma=1 ) # Compute reconstruction error mean_sq_error = shtools.get_reconstruction_error( grid_input=grid_init, grid_rec=grid_rec ) # Store spherical harmonic coefficients in dataframe by cell id df_coeffs = pd.DataFrame(coeffs, index=[cell_id]) df_coeffs.index = df_coeffs.index.rename("CellId") # Mesh reconstructed with the sh coefficients mesh_shparam = shtools.get_reconstruction_from_grid(grid=grid_rec) # Save meshes as PLY files compatible with both Blender and Paraview shtools.save_polydata( mesh=mesh_init, filename=str(save_dir / f"{cell_id}.initial_{struct}.ply") ) shtools.save_polydata( mesh=mesh_shparam, filename=str(save_dir / f"{cell_id}.shparam_{struct}.ply") ) # Save coeffs into a csv file in local staging df_coeffs.to_csv( str(save_dir / f"{cell_id}.shparam_{struct}.csv") ) # Build dataframe of saved files to store in manifest data = { "InitialMeshFilePath": save_dir / f"{cell_id}.initial_{struct}.ply", "ShparamMeshFilePath": save_dir / f"{cell_id}.shparam_{struct}.ply", "CoeffsFilePath": save_dir / f"{cell_id}.shparam_{struct}.csv", "MeanSqError": mean_sq_error, "Structure": struct, "CellId": cell_id, } # Alert completed log.info(f"Completed processing for cell: {cell_id}") return CellProcessResult(cell_id, data)
def animate_shape_modes_and_save_meshes( df_agg: pd.DataFrame, mode: str, save: Path, plot_limits: Optional[List] = None, fix_nuclear_position: Optional[bool] = None, distributed_executor_address: Optional[str] = None, ): """ Generate animated GIFs to illustrate cell and nuclear shape variation as a single shape space dimension is transversed. The function also saves the cell and nuclear shape in VTK polydata format. Parameters -------------------- df_agg: pd.DataFrame Dataframe that contains the cell and nuclear SHE coefficients that will be reconstructed. Each line of this dataframe will generate 3 animated GIFs: one for each projection (xy, xz, and yz). bin_indexes: List [(a,b)] a's are integers for identifying the bin number and b's are lists of all data points id's that fall into that bin. mode: str Either DNA, MEM or DNA_MEM to specify whether the shape space has been created based on nucleus, cell or jointly combined cell and nuclear shape. save: Path Path to save results. plot_limits: Optional[bool] = None List of floats to be used as x-axis limits and y-axis limits in the animated GIFs. Default values used for the single-cell images dataset are [-150, 150, -80, 80], fix_nuclear_position: Tuple or None Use None here to not change the nuclear location relative to the cell. Otherwise, this should be a tuple like (df,bin_indexes), where df is a single cell dataframe that contains the columns necessary to correct the nuclear location realtive to the cell. bin_indexes is alist of tuple (a,b), where a is an integer for that specifies the bin number and b is a list of all data point ids from the single cell dataframe that fall into that bin. distributed_executor_address: Optionalstr = None Dask executor address. Return ------ df_paths: pd.DataFrame Dataframe with path for VTK meshes and GIF files generated. """ df_paths = [] if fix_nuclear_position is not None: df, bin_indexes = fix_nuclear_position def process_this_index(index_row): ''' Change the coordinate system of nuclear centroid from nuclear to the aligned cell. ''' index, row = index_row dxc, dyc, dzc = transform_coords_to_mem_space( xo=row["dna_position_x_centroid_lcc"], yo=row["dna_position_y_centroid_lcc"], zo=row["dna_position_z_centroid_lcc"], # Cell alignment angle angle=row["mem_shcoeffs_transform_angle_lcc"], # Cell centroid cm=[ row[f"mem_position_{k}_centroid_lcc"] for k in ["x", "y", "z"] ], ) return (dxc, dyc, dzc) # Change the reference system of the vector that # defines the nuclear location relative to the cell # of all cells that fall into the same bin. for (b, indexes) in bin_indexes: # Subset with cells from the same bin. df_tmp = df.loc[df.index.isin(indexes)] # Change reference system for all cells in parallel. nuclei_cm_fix = [] with DistributedHandler(distributed_executor_address) as handler: future = handler.batched_map( process_this_index, [index_row for index_row in df_tmp.iterrows()], ) nuclei_cm_fix.append(future) # Average changed nuclear centroid over all cells mean_nuclei_cm_fix = np.array(nuclei_cm_fix[0]).mean(axis=0) # Store df_agg.loc[b, "dna_dxc"] = mean_nuclei_cm_fix[0] df_agg.loc[b, "dna_dyc"] = mean_nuclei_cm_fix[1] df_agg.loc[b, "dna_dzc"] = mean_nuclei_cm_fix[2] else: # Save nuclear displacement as zeros if no adjustment # is requested. df_agg["dna_dxc"] = 0 df_agg["dna_dyc"] = 0 df_agg["dna_dzc"] = 0 hlimits = [] vlimits = [] all_mem_contours = [] all_dna_contours = [] # Loop over 3 different projections: xy=[0,1], xz=[0,2] and # yz=[1,2] for proj_id, projection in enumerate([[0, 1], [0, 2], [1, 2]]): # Get nuclear meshes and their 2D projections # for 3 different projections,xy, xz and yz. mem_contours, mem_meshes, mem_limits = get_contours_of_consecutive_reconstructions( df=df_agg, prefix="mem_shcoeffs_L", proj=projection, lmax=32) # Get cells meshes and their 2D projections # for 3 different projections,xy, xz and yz. dna_contours, dna_meshes, dna_limits = get_contours_of_consecutive_reconstructions( df=df_agg, prefix="dna_shcoeffs_L", proj=projection, lmax=32) if proj_id == 0: # Change the nuclear position relative to the cell # in the reconstructed meshes when running the # first projection for b, mem_mesh, dna_mesh in zip(df_agg.index, mem_meshes, dna_meshes): # Get nuclear mesh coordinates dna_coords = vtk_to_numpy(dna_mesh.GetPoints().GetData()) # Shift coordinates according averaged # nuclear centroid relative to the cell dna_coords[:, 0] += df_agg.loc[b, "dna_dxc"] dna_coords[:, 1] += df_agg.loc[b, "dna_dyc"] dna_coords[:, 2] += df_agg.loc[b, "dna_dzc"] dna_mesh.GetPoints().SetData(numpy_to_vtk(dna_coords)) # Save meshes as vtk polydatas shtools.save_polydata(mem_mesh, f"{save}/MEM_{mode}_{b:02d}.vtk") shtools.save_polydata(dna_mesh, f"{save}/DNA_{mode}_{b:02d}.vtk") # Save paths df_paths.append({ 'bin': b, 'shapemode': mode, 'memMeshPath': f"{save}/MEM_{mode}_{b:02d}.vtk", 'dnaMeshPath': f"{save}/DNA_{mode}_{b:02d}.vtk" }) all_mem_contours.append(mem_contours) all_dna_contours.append(dna_contours) # Store bounds xmin = np.min([lim[0] for lim in mem_limits]) xmax = np.max([lim[1] for lim in mem_limits]) ymin = np.min([lim[2] for lim in mem_limits]) ymax = np.max([lim[3] for lim in mem_limits]) zmin = np.min([lim[4] for lim in mem_limits]) zmax = np.max([lim[5] for lim in mem_limits]) # Vertical and horizontal limits for plots hlimits += [xmin, xmax, ymin, ymax] vlimits += [ymin, ymax, zmin, zmax] # Dataframe with paths to be returned df_paths = pd.DataFrame(df_paths) # Set limits for plots if plot_limits is not None: hmin, hmax, vmin, vmax = plot_limits else: hmin = np.min(hlimits) hmax = np.max(hlimits) vmin = np.min(vlimits) vmax = np.max(vlimits) offset = 0.05 * (hmax - hmin) # Plot 2D contours and animate accross bins for projection, mem_contours, dna_contours in zip([[0, 1], [0, 2], [1, 2]], all_mem_contours, all_dna_contours): hcomp = projection[0] vcomp = projection[1] fig, ax = plt.subplots(1, 1, figsize=(3, 3)) plt.close() ax.set_xlim(hmin - offset, hmax + offset) ax.set_ylim(vmin - offset, vmax + offset) ax.set_aspect("equal") # initial plot for cell (mline, ) = ax.plot([], [], lw=2, color="#F200FF" if "MEM" in mode else "#3AADA7") # initial plot for nucleus (dline, ) = ax.plot([], [], lw=2, color="#3AADA7" if "DNA" in mode else "#F200FF") def animate(i): ''' Animates cell and nuclear contour accross bins ''' mct = mem_contours[i] mx = mct[:, hcomp] my = mct[:, vcomp] dct = dna_contours[i] dx = dct[:, hcomp] dy = dct[:, vcomp] hlabel = ["x", "y", "z"][[0, 1, 2].index(projection[0])] vlabel = ["x", "y", "z"][[0, 1, 2].index(projection[1])] # Shift 2D nuclear coordinates according averaged # nuclear centroid relative to the cell dx += df_agg.loc[i + 1, f"dna_d{hlabel}c"] dy += df_agg.loc[i + 1, f"dna_d{vlabel}c"] mline.set_data(mx, my) dline.set_data(dx, dy) return (mline, dline) # Generate animated GIF using scikit-image anim = animation.FuncAnimation(fig, animate, frames=len(mem_contours), interval=100, blit=True) try: anim.save( f"{save}/{mode}_{''.join(str(x) for x in projection)}.gif", writer="imagemagick", fps=len(mem_contours)) # Path of GIF files df_paths['gifXYPath'] = f"{save}/{mode}_01.gif" df_paths['gifXZPath'] = f"{save}/{mode}_02.gif" df_paths['gifYZPath'] = f"{save}/{mode}_12.gif" except: warnings.warn( "Export to animated GIF has failed. Please check your imagemagick installation." ) plt.close("all") return df_paths
def run(self, sh_df=None, struct="Nuc", **kwargs): """ This step uses the amplitudes of the spherical harmonic components of the nuclear shapes in the dataset to construct an average nuclear mesh. Parameters ---------- sh_df: dataframe dataframe containing results from running Shparam step See the construction of the manifest in shparam.py for details struct: str String giving name of structure to run analysis on. Currently, this must be "Nuc" (nucleus) or "Cell" (cell membrane). """ # If no dataframe is passed in, load manifest from previous step if sh_df is None: sh_df = pd.read_csv( self.step_local_staging_dir.parent / "shparam" / f"shparam_{struct}" / "manifest.csv", index_col="CellId" ) # Load sh coefficients of all samples in manifest coeffs_df = pd.DataFrame([]) for CellId in sh_df.index: coeffs_df_path = sh_df["CoeffsFilePath"][CellId] coeffs_df = coeffs_df.append( pd.read_csv(coeffs_df_path, index_col=["CellId"]), ignore_index=False ) # Create directory to hold results from this step struct_dir = self.step_local_staging_dir / f"avgshape_{struct}" struct_dir.mkdir(parents=True, exist_ok=True) avg_data_dir = struct_dir / f"avgshape_data" avg_data_dir.mkdir(parents=True, exist_ok=True) # Move init and run parameters to structure dir to avoid overwriting for filetype in ["init", "run"]: filename = f"{filetype}_parameters.json" os.rename( self.step_local_staging_dir / filename, struct_dir / filename ) # Perform some per-cell analysis run_shcoeffs_analysis(df=coeffs_df, savedir=avg_data_dir, struct=struct) # Avg the sh coefficients over all samples and create avg mesh coeffs_df_avg = coeffs_df.agg(['mean']) coeffs_avg = coeffs_df_avg.values # Number of columns = 2*lmax*lmax lmax = int(np.sqrt(0.5 * coeffs_avg.size)) coeffs_avg = coeffs_avg.reshape(-2, lmax, lmax) # Here we use the new meshing implementation for a more evenly distributed mesh ''' mesh_avg, _ = shtools.get_even_reconstruction_from_coeffs( coeffs=coeffs_avg, npoints=1024 ) ''' mesh_avg, grid_avg = shtools.get_reconstruction_from_coeffs( coeffs=coeffs_avg, ) shtools.save_polydata( mesh=mesh_avg, filename=str(avg_data_dir / f"avgshape_{struct}.ply") ) # Save mesh as obj save_mesh_as_obj(mesh_avg, avg_data_dir / f"avgshape_{struct}.obj") # Save displacement map save_displacement_map(grid_avg, avg_data_dir / f"avgshape_dmap_{struct}.tif") # Save mesh as image domain = save_voxelization(mesh_avg, avg_data_dir / f"avgshape_{struct}.tif") # Save mesh as stl file for blender import save_mesh_as_stl(mesh_avg, avg_data_dir / f"avgshape_{struct}.stl") # Remesh voxelization remesh_avg = get_smooth_and_coarse_mesh_from_voxelization(domain, sigma=3, npoints=2000) # Save remesh as PLY shtools.save_polydata( mesh=remesh_avg, filename=str(avg_data_dir / f"avgshape_remesh_{struct}.ply") ) # Save avg coeffs to csv file coeffs_df_avg.to_csv( str(avg_data_dir / f"avgshape_{struct}.csv") ) # Save path to avg shape in the manifest self.manifest = pd.DataFrame({ "Label": "Average_mesh", "AvgShapeFilePath": avg_data_dir / f"avgshape_{struct}.ply", "AvgShapeRemeshFilePath": avg_data_dir / f"avgshape_remesh_{struct}.ply", "AvgShapeFilePathStl": avg_data_dir / f"avgshape_{struct}.stl", "AvgShapeFilePathObj": avg_data_dir / f"avgshape_{struct}.obj", "AvgShapeFilePathTif": avg_data_dir / f"avgshape_{struct}.tif", "AvgShapeDMapFilePathTif": avg_data_dir / f"avgshape_dmap_{struct}.tif", "Structure": struct, }, index=[0]) # Save manifest as csv self.manifest.to_csv( struct_dir / Path(f"manifest.csv"), index=False ) return self.manifest