def _default_and_validate_layouts(self): """ function: a) generate list of default layouts, if not already user specified b) validate layouts are legal. remove/warn on any that are not c) cap total list of layouts at global const MAX_LAYOUTS """ layouts = self.config['layout'] # handle default if layouts is None or len(layouts) == 0: # load default layouts from the data. layouts = [key[2:] for key in self.data.obsm_keys() if type(key) == str and key.startswith("X_")] if len(layouts) == 0: raise PrepareError(f"Unable to find any precomputed layouts within the dataset.") # remove invalid layouts valid_layouts = [] obsm_keys = self.data.obsm_keys() for layout in layouts: layout_name = f"X_{layout}" if layout_name not in obsm_keys: warnings.warn(f"Ignoring unknown layout name: {layout}.") elif not self._is_valid_layout(self.data.obsm[layout_name]): warnings.warn(f"Ignoring layout due to malformed shape or data type: {layout}") else: valid_layouts.append(layout) if len(valid_layouts) == 0: raise PrepareError(f"No valid layout data.") # cap layouts to MAX_LAYOUTS self.config['layout'] = valid_layouts[0:MAX_LAYOUTS]
def layout(self, filter, interactive_limit=None): """ Computes a n-d layout for cells through dimensionality reduction. :param filter: filter: dictionary with filter params :param interactive_limit: -- don't compute if total # genes in dataframes are larger than this :return: [cellid, x, y, ...] """ try: df = self.filter_dataframe(filter) except (KeyError, IndexError) as e: raise FilterError(f"Error parsing filter: {e}") from e if interactive_limit and len(df.obs.index) > interactive_limit: raise InteractiveError( "Size data is too large for interactive computation") # TODO Filtering cells is fine, but filtering genes does nothing because the neighbors are # calculated using the original vars (geneset) and this doesn’t get updated when you use less. # Need to recalculate neighbors (long) if user requests new layout filtered by var # TODO for MVP we are pushing computation of layout to preprocessing and not allowing re-layout # this will probably change after user feedback # getattr(sc.tl, self.layout_method)(df, random_state=123) try: df_layout = df.obsm[f"X_{self.layout_method}"] except ValueError as e: raise PrepareError( f"Layout has not been calculated using {self.layout_method}, " f"please prepare your datafile and relaunch cellxgene") from e normalized_layout = DataFrame((df_layout - df_layout.min()) / (df_layout.max() - df_layout.min()), index=df.obs.index) return { "ndims": normalized_layout.shape[1], "coordinates": normalized_layout.to_records(index=True).tolist() }
def _validate_data_calculations(self): layout_key = f"X_{self.layout_method}" try: assert layout_key in self.data.obsm_keys() except AssertionError: raise PrepareError( f"Cannot find a field with coordinates for the {self.layout_method} layout requested. A different" f" layout may have been computed. The requested layout must be pre-calculated and saved " f"back in the h5ad file. You can run " f"`cellxgene prepare --layout {self.layout_method} <datafile>` " f"to solve this problem. ")
def layout_to_fbs_matrix(self): """ Return the default 2-D layout for cells as a FBS Matrix. Caveats: * does not support filtering * only returns Matrix in columnar layout """ try: df_layout = self.data.obsm[f"X_{self.layout_method}"] except ValueError as e: raise PrepareError( f"Layout has not been calculated using {self.layout_method}, " f"please prepare your datafile and relaunch cellxgene") from e normalized_layout = (df_layout - df_layout.min()) / (df_layout.max() - df_layout.min()) return encode_matrix_fbs(normalized_layout.astype(dtype=np.float32), col_idx=None, row_idx=None)
def layout_to_fbs_matrix(self): """ Return the default 2-D layout for cells as a FBS Matrix. Caveats: * does not support filtering * only returns Matrix in columnar layout All embeddings must be individually centered & scaled (isotropically) to a [0, 1] range. """ try: layout_data = [] for layout in self.config["layout"]: full_embedding = self.data.obsm[f"X_{layout}"] embedding = full_embedding[:, :2] # scale isotropically min = embedding.min(axis=0) max = embedding.max(axis=0) scale = np.amax(max - min) normalized_layout = (embedding - min) / scale # translate to center on both axis translate = 0.5 - ((max - min) / scale / 2) normalized_layout = normalized_layout + translate normalized_layout = normalized_layout.astype(dtype=np.float32) layout_data.append( pandas.DataFrame(normalized_layout, columns=[f"{layout}_0", f"{layout}_1"])) except ValueError as e: raise PrepareError( f"Layout has not been calculated using {self.config['layout']}, " f"please prepare your datafile and relaunch cellxgene") from e df = pandas.concat(layout_data, axis=1, copy=False) return encode_matrix_fbs(df, col_idx=df.columns, row_idx=None)