示例#1
0
 def diffexp_topN(self,
                  obsFilterA,
                  obsFilterB,
                  top_n=None,
                  interactive_limit=None):
     if Axis.VAR in obsFilterA or Axis.VAR in obsFilterB:
         raise FilterError(
             "Observation filters may not contain vaiable conditions")
     try:
         obs_mask_A = self._axis_filter_to_mask(obsFilterA["obs"],
                                                self.data.obs,
                                                self.data.n_obs)
         obs_mask_B = self._axis_filter_to_mask(obsFilterB["obs"],
                                                self.data.obs,
                                                self.data.n_obs)
     except (KeyError, IndexError) as e:
         raise FilterError(f"Error parsing filter: {e}") from e
     if top_n is None:
         top_n = DEFAULT_TOP_N
     result = diffexp_ttest(self.data, obs_mask_A, obs_mask_B, top_n,
                            self.config['diffexp_lfc_cutoff'])
     try:
         return jsonify_scanpy(result)
     except ValueError:
         raise JSONEncodingValueError(
             "Error encoding differential expression to JSON")
示例#2
0
    def annotation_put_fbs(self, axis, fbs):
        fname = self.config["label_file"]
        if not fname or self.labels is None:
            raise DisabledFeatureError("Writable annotations are not enabled")

        if axis != Axis.OBS:
            raise ValueError("Only OBS dimension access is supported")

        new_label_df = decode_matrix_fbs(fbs)
        new_label_df.index = self.original_obs_index
        self._validate_label_data()  # paranoia

        # if any of the new column labels overlap with our existing labels, raise error
        duplicate_columns = list(
            set(new_label_df.columns) & set(self.data.obs.columns))
        if not new_label_df.columns.is_unique or len(duplicate_columns) > 0:
            raise KeyError(
                f"Labels file may not contain column names which overlap "
                f"with h5ad obs columns {duplicate_columns}")

        # update our internal state and save it.  Multi-threading often enabled,
        # so treat this as a critical section critical section.
        with self.label_lock:
            self.labels = new_label_df
            write_labels(fname, self.labels)

        return jsonify_scanpy({"status": "OK"})
示例#3
0
    def annotation_put_fbs(self, axis, fbs, uid=None, collection=None):
        if not self.config["annotations"]:
            raise DisabledFeatureError("Writable annotations are not enabled")

        fname = self.get_anno_fname(uid, collection)
        if not fname:
            raise ScanpyFileError("Writable annotations - unable to determine file name for annotations")

        if axis != Axis.OBS:
            raise ValueError("Only OBS dimension access is supported")

        new_label_df = decode_matrix_fbs(fbs)
        if not new_label_df.empty:
            new_label_df.index = self.original_obs_index
        self._validate_label_data(new_label_df)  # paranoia

        # if any of the new column labels overlap with our existing labels, raise error
        duplicate_columns = list(set(new_label_df.columns) & set(self.data.obs.columns))
        if not new_label_df.columns.is_unique or len(duplicate_columns) > 0:
            raise KeyError(f"Labels file may not contain column names which overlap "
                           f"with h5ad obs columns {duplicate_columns}")

        # update our internal state and save it.  Multi-threading often enabled,
        # so treat this as a critical section.
        with self.label_lock:
            lastmod = self.data_locator.lastmodtime()
            lastmodstr = "'unknown'" if lastmod is None else lastmod.isoformat(timespec="seconds")
            header = f"# Annotations generated on {datetime.now().isoformat(timespec='seconds')} " \
                     f"using cellxgene version {cellxgene_version}\n" \
                     f"# Input data file was {self.data_locator.uri_or_path}, " \
                     f"which was last modified on {lastmodstr}\n"
            write_labels(fname, new_label_df, header, backup_dir=self.get_anno_backup_dir(uid, collection))

        return jsonify_scanpy({"status": "OK"})
示例#4
0
 def annotation(self, filter, axis, fields=None):
     """
      Gets annotation value for each observation
     :param filter: filter: dictionary with filter params
     :param axis: string obs or var
     :param fields: list of keys for annotation to return, returns all annotation values if not set.
     :return: dict: names - list of fields in order, data - list of lists or metadata
     [observation ids, val1, val2...]
     """
     try:
         obs_selector, var_selector = self._filter_to_mask(filter)
     except (KeyError, IndexError) as e:
         raise FilterError(f"Error parsing filter: {e}") from e
     if axis == Axis.OBS:
         obs = self.data.obs[obs_selector]
         if not fields:
             fields = obs.columns.tolist()
         result = {
             "names": fields,
             "data": DataFrame(obs[fields]).to_records(index=True).tolist(),
         }
     else:
         var = self.data.var[var_selector]
         if not fields:
             fields = var.columns.tolist()
         result = {
             "names": fields,
             "data": DataFrame(var[fields]).to_records(index=True).tolist(),
         }
     try:
         return jsonify_scanpy(result)
     except ValueError:
         raise JSONEncodingValueError("Error encoding annotations to JSON")
示例#5
0
 def layout(self, filter, interactive_limit=None):
     """
     Computes a n-d layout for cells through dimensionality reduction.
     :param filter: filter: dictionary with filter params
     :param interactive_limit: -- don't compute if total # genes in dataframes are larger than this
     :return:  [cellid, x, y, ...]
     """
     try:
         df = self.filter_dataframe(filter)
     except (KeyError, IndexError) as e:
         raise FilterError(f"Error parsing filter: {e}") from e
     if interactive_limit and len(df.obs.index) > interactive_limit:
         raise InteractiveError(
             "Size data is too large for interactive computation")
     # TODO Filtering cells is fine, but filtering genes does nothing because the neighbors are
     # calculated using the original vars (geneset) and this doesn’t get updated when you use less.
     # Need to recalculate neighbors (long) if user requests new layout filtered by var
     # TODO for MVP we are pushing computation of layout to preprocessing and not allowing re-layout
     # this will probably change after user feedback
     # getattr(sc.tl, self.layout_method)(df, random_state=123)
     try:
         df_layout = df.obsm[f"X_{self.layout_method}"]
     except ValueError as e:
         raise PrepareError(
             f"Layout has not been calculated using {self.layout_method}, "
             f"please prepare your datafile and relaunch cellxgene") from e
     normalized_layout = DataFrame(
         (df_layout - df_layout.min()) /
         (df_layout.max() - df_layout.min()),
         index=df.obs.index,
     )
     try:
         return jsonify_scanpy({
             "layout": {
                 "ndims":
                 normalized_layout.shape[1],
                 "coordinates":
                 normalized_layout.to_records(index=True).tolist(),
             }
         })
     except ValueError:
         raise JSONEncodingValueError("Error encoding layout to JSON")
示例#6
0
 def data_frame(self, filter, axis):
     """
     Retrieves data for each variable for observations in data frame
     :param filter: filter: dictionary with filter params
     :param axis: string obs or var
     :return: {
         "var": list of variable ids,
         "obs": [cellid, var1 expression, var2 expression, ...],
     }
     """
     try:
         obs_selector, var_selector = self._filter_to_mask(filter)
     except (KeyError, IndexError) as e:
         raise FilterError(f"Error parsing filter: {e}") from e
     _X = self.data._X[obs_selector, var_selector]
     if sparse.issparse(_X):
         _X = _X.toarray()
     var_index_sliced = self.data.var.index[var_selector]
     obs_index_sliced = self.data.obs.index[obs_selector]
     if axis == Axis.OBS:
         result = {
             "var":
             var_index_sliced.tolist(),
             "obs":
             DataFrame(
                 _X,
                 index=obs_index_sliced).to_records(index=True).tolist(),
         }
     else:
         result = {
             "obs":
             obs_index_sliced.tolist(),
             "var":
             DataFrame(
                 _X.T,
                 index=var_index_sliced).to_records(index=True).tolist(),
         }
     try:
         return jsonify_scanpy(result)
     except ValueError:
         raise JSONEncodingValueError("Error encoding dataframe to JSON")