Пример #1
0
    def annotation_to_fbs_matrix(self, axis, fields=None, labels=None):
        with ServerTiming.time(f"annotations.{axis}.query"):
            A = self.open_array(str(axis))

            # may raise if fields contains unknown key
            cxg_fields, anno_fields, return_fields, index_field = self._annotations_field_split(
                axis, fields, A, labels)

            if cxg_fields is None:
                data = A[:]
            elif cxg_fields:
                data = A.query(attrs=cxg_fields)[:]
            else:
                data = {}

            df = pd.DataFrame.from_dict(data)

            if axis == Axis.OBS and labels is not None and not labels.empty:
                if anno_fields is None:
                    assert index_field
                    df = df.join(labels, index_field)
                elif anno_fields:
                    assert index_field
                    df = df.join(labels[anno_fields], index_field)

            if return_fields:
                df = df[return_fields]

        with ServerTiming.time(f"annotations.{axis}.encode"):
            fbs = encode_matrix_fbs(df, col_idx=df.columns)

        return fbs
Пример #2
0
    def layout_to_fbs_matrix(self, fields):
        """
        return specified embeddings as a flatbuffer, using the cellxgene matrix fbs encoding.

        * returns only first two dimensions, with name {ename}_0 and {ename}_1,
          where {ename} is the embedding name.
        * client assumes each will be individually centered & scaled (isotropically)
          to a [0, 1] range.
        * does not support filtering

        """
        embeddings = self.get_embedding_names() if fields is None or len(fields) == 0 else fields
        layout_data = []
        with ServerTiming.time("layout.query"):
            for ename in embeddings:
                embedding = self.get_embedding_array(ename, 2)
                normalized_layout = DataAdaptor.normalize_embedding(embedding)
                layout_data.append(pd.DataFrame(normalized_layout, columns=[f"{ename}_0", f"{ename}_1"]))

        with ServerTiming.time("layout.encode"):
            if layout_data:
                df = pd.concat(layout_data, axis=1, copy=False)
            else:
                df = pd.DataFrame()
            fbs = encode_matrix_fbs(df, col_idx=df.columns, row_idx=None)

        return fbs
Пример #3
0
 def get_embedding_names(self):
     with ServerTiming.time("layout.lsuri"):
         pemb = self.get_path("emb")
         embeddings = [os.path.basename(p) for (p, t) in self.lsuri(pemb) if t == "array"]
     if len(embeddings) == 0:
         raise DatasetAccessError("cxg matrix missing embeddings")
     return embeddings
Пример #4
0
    def compute_embedding(self, method, obsFilter):
        if Axis.VAR in obsFilter:
            raise FilterError(
                "Observation filters may not contain variable conditions")
        if method != "umap":
            raise NotImplementedError(
                f"re-embedding method {method} is not available.")
        try:
            shape = self.get_shape()
            obs_mask = self._axis_filter_to_mask(Axis.OBS, obsFilter["obs"],
                                                 shape[0])
        except (KeyError, IndexError):
            raise FilterError("Error parsing filter")
        with ServerTiming.time("layout.compute"):
            X_umap = scanpy_umap(self.data, obs_mask)
            normalized_layout = DataAdaptor.normalize_embedding(X_umap)

        # Server picks reemedding name, which must not collide with any other
        # embedding name generated by this backed.
        name = f"reembed:{method}_{datetime.now().isoformat(timespec='milliseconds')}"
        dims = [f"{name}_0", f"{name}_1"]
        df = pd.DataFrame(normalized_layout, columns=dims)
        fbs = encode_matrix_fbs(df, col_idx=df.columns, row_idx=None)
        schema = {"name": name, "type": "float32", "dims": dims}
        return (schema, fbs)
Пример #5
0
    def compute_embedding(self, method, obsFilter):
        if Axis.VAR in obsFilter:
            raise FilterError("Observation filters may not contain variable conditions")
        if method != "umap":
            raise NotImplementedError(f"re-embedding method {method} is not available.")
        try:
            shape = self.get_shape()
            obs_mask = self._axis_filter_to_mask(Axis.OBS, obsFilter["obs"], shape[0])
        except (KeyError, IndexError):
            raise FilterError("Error parsing filter")
        with ServerTiming.time("layout.compute"):
            X_umap = scanpy_umap(self.data, obs_mask)

        # Server picks reemedding name, which must not collide with any other
        # embedding name generated by this backend.
        name = f"reembed:{method}_{datetime.now().isoformat(timespec='milliseconds')}"
        dims = [f"{name}_0", f"{name}_1"]
        layout_schema = {"name": name, "type": "float32", "dims": dims}
        self.schema["layout"]["obs"].append(layout_schema)
        self.data.obsm[f"X_{name}"] = X_umap
        return layout_schema