def diffexp_topN(self, obsFilterA, obsFilterB, top_n=None, interactive_limit=None): if Axis.VAR in obsFilterA or Axis.VAR in obsFilterB: raise FilterError( "Observation filters may not contain vaiable conditions") try: obs_mask_A = self._axis_filter_to_mask(obsFilterA["obs"], self.data.obs, self.data.n_obs) obs_mask_B = self._axis_filter_to_mask(obsFilterB["obs"], self.data.obs, self.data.n_obs) except (KeyError, IndexError) as e: raise FilterError(f"Error parsing filter: {e}") from e if top_n is None: top_n = DEFAULT_TOP_N result = diffexp_ttest(self.data, obs_mask_A, obs_mask_B, top_n, self.config['diffexp_lfc_cutoff']) try: return jsonify_scanpy(result) except ValueError: raise JSONEncodingValueError( "Error encoding differential expression to JSON")
def annotation_put_fbs(self, axis, fbs): fname = self.config["label_file"] if not fname or self.labels is None: raise DisabledFeatureError("Writable annotations are not enabled") if axis != Axis.OBS: raise ValueError("Only OBS dimension access is supported") new_label_df = decode_matrix_fbs(fbs) new_label_df.index = self.original_obs_index self._validate_label_data() # paranoia # if any of the new column labels overlap with our existing labels, raise error duplicate_columns = list( set(new_label_df.columns) & set(self.data.obs.columns)) if not new_label_df.columns.is_unique or len(duplicate_columns) > 0: raise KeyError( f"Labels file may not contain column names which overlap " f"with h5ad obs columns {duplicate_columns}") # update our internal state and save it. Multi-threading often enabled, # so treat this as a critical section critical section. with self.label_lock: self.labels = new_label_df write_labels(fname, self.labels) return jsonify_scanpy({"status": "OK"})
def annotation_put_fbs(self, axis, fbs, uid=None, collection=None): if not self.config["annotations"]: raise DisabledFeatureError("Writable annotations are not enabled") fname = self.get_anno_fname(uid, collection) if not fname: raise ScanpyFileError("Writable annotations - unable to determine file name for annotations") if axis != Axis.OBS: raise ValueError("Only OBS dimension access is supported") new_label_df = decode_matrix_fbs(fbs) if not new_label_df.empty: new_label_df.index = self.original_obs_index self._validate_label_data(new_label_df) # paranoia # if any of the new column labels overlap with our existing labels, raise error duplicate_columns = list(set(new_label_df.columns) & set(self.data.obs.columns)) if not new_label_df.columns.is_unique or len(duplicate_columns) > 0: raise KeyError(f"Labels file may not contain column names which overlap " f"with h5ad obs columns {duplicate_columns}") # update our internal state and save it. Multi-threading often enabled, # so treat this as a critical section. with self.label_lock: lastmod = self.data_locator.lastmodtime() lastmodstr = "'unknown'" if lastmod is None else lastmod.isoformat(timespec="seconds") header = f"# Annotations generated on {datetime.now().isoformat(timespec='seconds')} " \ f"using cellxgene version {cellxgene_version}\n" \ f"# Input data file was {self.data_locator.uri_or_path}, " \ f"which was last modified on {lastmodstr}\n" write_labels(fname, new_label_df, header, backup_dir=self.get_anno_backup_dir(uid, collection)) return jsonify_scanpy({"status": "OK"})
def annotation(self, filter, axis, fields=None): """ Gets annotation value for each observation :param filter: filter: dictionary with filter params :param axis: string obs or var :param fields: list of keys for annotation to return, returns all annotation values if not set. :return: dict: names - list of fields in order, data - list of lists or metadata [observation ids, val1, val2...] """ try: obs_selector, var_selector = self._filter_to_mask(filter) except (KeyError, IndexError) as e: raise FilterError(f"Error parsing filter: {e}") from e if axis == Axis.OBS: obs = self.data.obs[obs_selector] if not fields: fields = obs.columns.tolist() result = { "names": fields, "data": DataFrame(obs[fields]).to_records(index=True).tolist(), } else: var = self.data.var[var_selector] if not fields: fields = var.columns.tolist() result = { "names": fields, "data": DataFrame(var[fields]).to_records(index=True).tolist(), } try: return jsonify_scanpy(result) except ValueError: raise JSONEncodingValueError("Error encoding annotations to JSON")
def layout(self, filter, interactive_limit=None): """ Computes a n-d layout for cells through dimensionality reduction. :param filter: filter: dictionary with filter params :param interactive_limit: -- don't compute if total # genes in dataframes are larger than this :return: [cellid, x, y, ...] """ try: df = self.filter_dataframe(filter) except (KeyError, IndexError) as e: raise FilterError(f"Error parsing filter: {e}") from e if interactive_limit and len(df.obs.index) > interactive_limit: raise InteractiveError( "Size data is too large for interactive computation") # TODO Filtering cells is fine, but filtering genes does nothing because the neighbors are # calculated using the original vars (geneset) and this doesn’t get updated when you use less. # Need to recalculate neighbors (long) if user requests new layout filtered by var # TODO for MVP we are pushing computation of layout to preprocessing and not allowing re-layout # this will probably change after user feedback # getattr(sc.tl, self.layout_method)(df, random_state=123) try: df_layout = df.obsm[f"X_{self.layout_method}"] except ValueError as e: raise PrepareError( f"Layout has not been calculated using {self.layout_method}, " f"please prepare your datafile and relaunch cellxgene") from e normalized_layout = DataFrame( (df_layout - df_layout.min()) / (df_layout.max() - df_layout.min()), index=df.obs.index, ) try: return jsonify_scanpy({ "layout": { "ndims": normalized_layout.shape[1], "coordinates": normalized_layout.to_records(index=True).tolist(), } }) except ValueError: raise JSONEncodingValueError("Error encoding layout to JSON")
def data_frame(self, filter, axis): """ Retrieves data for each variable for observations in data frame :param filter: filter: dictionary with filter params :param axis: string obs or var :return: { "var": list of variable ids, "obs": [cellid, var1 expression, var2 expression, ...], } """ try: obs_selector, var_selector = self._filter_to_mask(filter) except (KeyError, IndexError) as e: raise FilterError(f"Error parsing filter: {e}") from e _X = self.data._X[obs_selector, var_selector] if sparse.issparse(_X): _X = _X.toarray() var_index_sliced = self.data.var.index[var_selector] obs_index_sliced = self.data.obs.index[obs_selector] if axis == Axis.OBS: result = { "var": var_index_sliced.tolist(), "obs": DataFrame( _X, index=obs_index_sliced).to_records(index=True).tolist(), } else: result = { "obs": obs_index_sliced.tolist(), "var": DataFrame( _X.T, index=var_index_sliced).to_records(index=True).tolist(), } try: return jsonify_scanpy(result) except ValueError: raise JSONEncodingValueError("Error encoding dataframe to JSON")