示例#1
0
    def test_corpora_get_props_from_anndata_v110(self):
        adata = self._get_h5ad()

        if "version" in adata.uns:
            del adata.uns["version"]
        self.assertIsNone(corpora_get_props_from_anndata(adata))

        # legit version, but missing required values
        adata.uns["version"] = {
            "corpora_schema_version": "1.1.0",
            "corpora_encoding_version": "0.1.0"
        }
        with self.assertRaises(KeyError):
            corpora_get_props_from_anndata(adata)

        # Metadata following schema 1.1.0, which removes some fields relative to 1.1.0
        some_110_fields = {
            "version": {
                "corpora_schema_version": "1.0.0",
                "corpora_encoding_version": "0.1.0"
            },
            "title": "title",
            "layer_descriptions": "layer_descriptions",
            "organism": "organism",
            "organism_ontology_term_id": "organism_ontology_term_id",
        }
        for k in some_110_fields:
            adata.uns[k] = some_110_fields[k]
        self.assertEqual(corpora_get_props_from_anndata(adata),
                         some_110_fields)
示例#2
0
    def extract_metadata_about_dataset(self):
        """
        Extract metadata information about the dataset that upon conversion will be saved as group metadata with the
        CXG that is generated. This metadata information includes Corpora schema properties, the dataset title and
        a link that details more information about the dataset.
        """

        self.corpora_properties = corpora_get_props_from_anndata(
            self.anndata) if self.use_corpora_schema else None
        if self.corpora_properties is None and self.use_corpora_schema:
            # If the return value is None, this means that we were not able to figure out what version of the Corpora
            # schema the object is using and therefore cannot extract any properties.
            raise ValueError(
                "Unknown source file schema version is unsupported.")

        # The title and about properties of the dataset are set by the following order: if they are explicitly defined
        # then use the explicit value. If the dataset is a Corpora-schema based schema, then extract the title and about
        # from the corpora_properties. Otherwise, use the input filename (only for title, about will be blank).
        if self.corpora_properties:
            corpora_project_links = self.corpora_properties.get(
                "project_links", [])
            corpora_about_link = next(
                (link for link in corpora_project_links
                 if (link.get("link_type", None) == "SUMMARY")), {})
        else:
            corpora_about_link = {}

        filename = path.splitext(path.basename(self.input_filename))[0]

        self.dataset_title = self.dataset_title if self.dataset_title else corpora_about_link.get(
            "link_name", filename)
        self.dataset_about = self.dataset_about if self.dataset_about else corpora_about_link.get(
            "link_url")
示例#3
0
    def test_corpora_get_props_from_anndata(self):
        adata = self._get_h5ad()

        if "version" in adata.uns:
            del adata.uns["version"]
        self.assertIsNone(corpora_get_props_from_anndata(adata))

        # something bogus
        adata.uns["version"] = 99
        self.assertIsNone(corpora_get_props_from_anndata(adata))

        # unsupported version, but missing required values
        adata.uns["version"] = {
            "corpora_schema_version": "99.0.0",
            "corpora_encoding_version": "32.1.0"
        }
        with self.assertRaises(ValueError):
            corpora_get_props_from_anndata(adata)

        # legit version, but missing required values
        adata.uns["version"] = {
            "corpora_schema_version": "1.0.0",
            "corpora_encoding_version": "0.1.0"
        }
        with self.assertRaises(KeyError):
            corpora_get_props_from_anndata(adata)

        some_fields = {
            "version": {
                "corpora_schema_version": "1.0.0",
                "corpora_encoding_version": "0.1.0"
            },
            "title":
            "title",
            "layer_descriptions":
            "layer_descriptions",
            "organism":
            "organism",
            "organism_ontology_term_id":
            "organism_ontology_term_id",
            "project_name":
            "project_name",
            "project_description":
            "project_description",
            "contributors":
            json.dumps([{
                "contributors": "contributors"
            }]),
            "project_links":
            json.dumps([{
                "link_name": "link_name",
                "link_url": "link_url",
                "link_type": "SUMMARY"
            }]),
        }
        for k in some_fields:
            adata.uns[k] = some_fields[k]
        some_fields["contributors"] = json.loads(some_fields["contributors"])
        some_fields["project_links"] = json.loads(some_fields["project_links"])
        self.assertEqual(corpora_get_props_from_anndata(adata), some_fields)
示例#4
0
 def get_corpora_props(self):
     return corpora_get_props_from_anndata(self.data)