def test_corpora_get_props_from_anndata_v110(self): adata = self._get_h5ad() if "version" in adata.uns: del adata.uns["version"] self.assertIsNone(corpora_get_props_from_anndata(adata)) # legit version, but missing required values adata.uns["version"] = { "corpora_schema_version": "1.1.0", "corpora_encoding_version": "0.1.0" } with self.assertRaises(KeyError): corpora_get_props_from_anndata(adata) # Metadata following schema 1.1.0, which removes some fields relative to 1.1.0 some_110_fields = { "version": { "corpora_schema_version": "1.0.0", "corpora_encoding_version": "0.1.0" }, "title": "title", "layer_descriptions": "layer_descriptions", "organism": "organism", "organism_ontology_term_id": "organism_ontology_term_id", } for k in some_110_fields: adata.uns[k] = some_110_fields[k] self.assertEqual(corpora_get_props_from_anndata(adata), some_110_fields)
def extract_metadata_about_dataset(self): """ Extract metadata information about the dataset that upon conversion will be saved as group metadata with the CXG that is generated. This metadata information includes Corpora schema properties, the dataset title and a link that details more information about the dataset. """ self.corpora_properties = corpora_get_props_from_anndata( self.anndata) if self.use_corpora_schema else None if self.corpora_properties is None and self.use_corpora_schema: # If the return value is None, this means that we were not able to figure out what version of the Corpora # schema the object is using and therefore cannot extract any properties. raise ValueError( "Unknown source file schema version is unsupported.") # The title and about properties of the dataset are set by the following order: if they are explicitly defined # then use the explicit value. If the dataset is a Corpora-schema based schema, then extract the title and about # from the corpora_properties. Otherwise, use the input filename (only for title, about will be blank). if self.corpora_properties: corpora_project_links = self.corpora_properties.get( "project_links", []) corpora_about_link = next( (link for link in corpora_project_links if (link.get("link_type", None) == "SUMMARY")), {}) else: corpora_about_link = {} filename = path.splitext(path.basename(self.input_filename))[0] self.dataset_title = self.dataset_title if self.dataset_title else corpora_about_link.get( "link_name", filename) self.dataset_about = self.dataset_about if self.dataset_about else corpora_about_link.get( "link_url")
def test_corpora_get_props_from_anndata(self): adata = self._get_h5ad() if "version" in adata.uns: del adata.uns["version"] self.assertIsNone(corpora_get_props_from_anndata(adata)) # something bogus adata.uns["version"] = 99 self.assertIsNone(corpora_get_props_from_anndata(adata)) # unsupported version, but missing required values adata.uns["version"] = { "corpora_schema_version": "99.0.0", "corpora_encoding_version": "32.1.0" } with self.assertRaises(ValueError): corpora_get_props_from_anndata(adata) # legit version, but missing required values adata.uns["version"] = { "corpora_schema_version": "1.0.0", "corpora_encoding_version": "0.1.0" } with self.assertRaises(KeyError): corpora_get_props_from_anndata(adata) some_fields = { "version": { "corpora_schema_version": "1.0.0", "corpora_encoding_version": "0.1.0" }, "title": "title", "layer_descriptions": "layer_descriptions", "organism": "organism", "organism_ontology_term_id": "organism_ontology_term_id", "project_name": "project_name", "project_description": "project_description", "contributors": json.dumps([{ "contributors": "contributors" }]), "project_links": json.dumps([{ "link_name": "link_name", "link_url": "link_url", "link_type": "SUMMARY" }]), } for k in some_fields: adata.uns[k] = some_fields[k] some_fields["contributors"] = json.loads(some_fields["contributors"]) some_fields["project_links"] = json.loads(some_fields["project_links"]) self.assertEqual(corpora_get_props_from_anndata(adata), some_fields)
def get_corpora_props(self): return corpora_get_props_from_anndata(self.data)