def test_check_object_field_error(self): er = ExternalResources('terms') data = Data(name="species", data=['H**o sapiens', 'Mus musculus']) er._check_object_field(data, '') er._add_object(data, '', '') with self.assertRaises(ValueError): er._check_object_field(data, '')
def test_object_key_unqiueness(self): er = ExternalResources('terms') data = Data(name='data_name', data=np.array([('Mus musculus', 9, 81.0), ('H**o sapien', 3, 27.0)], dtype=[('species', 'U14'), ('age', 'i4'), ('weight', 'f4')])) er.add_ref( container=data, key='Mus musculus', resource_name='NCBI_Taxonomy', resource_uri='https://www.ncbi.nlm.nih.gov/taxonomy', entity_id='NCBI:txid10090', entity_uri= 'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090' ) existing_key = er.get_key('Mus musculus') er.add_ref(container=data, key=existing_key, resource_name='resource2', resource_uri='resource_uri2', entity_id='entity2', entity_uri='entity_uri2') self.assertEqual(er.object_keys.data, [(0, 0)])
def test_get_object_resources(self): er = ExternalResources('terms') data = Data(name='data_name', data=np.array([('Mus musculus', 9, 81.0), ('H**o sapien', 3, 27.0)], dtype=[('species', 'U14'), ('age', 'i4'), ('weight', 'f4')])) er.add_ref( container=data, key='Mus musculus', resource_name='NCBI_Taxonomy', resource_uri='https://www.ncbi.nlm.nih.gov/taxonomy', entity_id='NCBI:txid10090', entity_uri= 'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090' ) received = er.get_object_resources(data) expected = pd.DataFrame(data=[[ 0, 0, 'NCBI:txid10090', 'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090' ]], columns=[ 'keys_idx', 'resource_idx', 'entity_id', 'entity_uri' ]) pd.testing.assert_frame_equal(received, expected)
def test_check_object_field_add(self): er = ExternalResources('terms') data = Data(name="species", data=['H**o sapiens', 'Mus musculus']) er._check_object_field('uuid1', '') er._check_object_field(data, '') self.assertEqual(er.objects.data, [('uuid1', '', ''), (data.object_id, '', '')])
def test_add_ref(self): er = ExternalResources('terms') data = Data(name="species", data=['H**o sapiens', 'Mus musculus']) er.add_ref(container=data, key='key1', resource_name='resource1', resource_uri='uri1', entity_id='entity_id1', entity_uri='entity1') self.assertEqual(er.keys.data, [('key1', )]) self.assertEqual(er.resources.data, [('resource1', 'uri1')]) self.assertEqual(er.entities.data, [(0, 0, 'entity_id1', 'entity1')]) self.assertEqual(er.objects.data, [(data.object_id, '', '')])
def test_add_ref_compound_data(self): er = ExternalResources(name='example') data = Data(name='data_name', data=np.array([('Mus musculus', 9, 81.0), ('H**o sapiens', 3, 27.0)], dtype=[('species', 'U14'), ('age', 'i4'), ('weight', 'f4')])) er.add_ref(container=data, field='species', key='Mus musculus', resource_name='NCBI_Taxonomy', resource_uri='resource0_uri', entity_id='NCBI:txid10090', entity_uri='entity_0_uri') self.assertEqual(er.keys.data, [('Mus musculus', )]) self.assertEqual(er.resources.data, [('NCBI_Taxonomy', 'resource0_uri')]) self.assertEqual(er.entities.data, [(0, 0, 'NCBI:txid10090', 'entity_0_uri')]) self.assertEqual(er.objects.data, [(data.object_id, '', 'species')])
def test_to_dataframe(self): # Setup complex external resources with keys reused across objects and # multiple resources per key er = ExternalResources(name='example') # Add a species dataset with 2 keys data1 = Data(name='data_name', data=np.array([('Mus musculus', 9, 81.0), ('H**o sapiens', 3, 27.0)], dtype=[('species', 'U14'), ('age', 'i4'), ('weight', 'f4')])) k1, r1, e1 = er.add_ref( container=data1, field='species', key='Mus musculus', resource_name='NCBI_Taxonomy', resource_uri='https://www.ncbi.nlm.nih.gov/taxonomy', entity_id='NCBI:txid10090', entity_uri= 'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090' ) k2, r2, e2 = er.add_ref( container=data1, field='species', key='H**o sapiens', resource_name='NCBI_Taxonomy', resource_uri='https://www.ncbi.nlm.nih.gov/taxonomy', entity_id='NCBI:txid9606', entity_uri= 'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=9606') # Add a second species dataset that uses the same keys as the first dataset and add an additional key data2 = Data(name="species", data=['H**o sapiens', 'Mus musculus', 'Pongo abelii']) o2 = er._add_object(data2, relative_path='', field='') er._add_object_key(o2, k1) er._add_object_key(o2, k2) k2, r2, e2 = er.add_ref( container=data2, field='', key='Pongo abelii', resource_name='NCBI_Taxonomy', resource_uri='https://www.ncbi.nlm.nih.gov/taxonomy', entity_id='NCBI:txid9601', entity_uri= 'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=9601') # Add a third data object, this time with 2 entities for a key data3 = Data(name="genotypes", data=['Rorb']) k3, r3, e3 = er.add_ref( container=data3, field='', key='Rorb', resource_name='MGI Database', resource_uri='http://www.informatics.jax.org/', entity_id='MGI:1346434', entity_uri='http://www.informatics.jax.org/marker/MGI:1343464') _ = er.add_ref( container=data3, field='', key=k3, resource_name='Ensembl', resource_uri='https://uswest.ensembl.org/index.html', entity_id='ENSG00000198963', entity_uri= 'https://uswest.ensembl.org/Homo_sapiens/Gene/Summary?db=core;g=ENSG00000198963' ) # Convert to dataframe and compare against the expected result result_df = er.to_dataframe() expected_df_data = \ {'objects_idx': {0: 0, 1: 0, 2: 1, 3: 1, 4: 1, 5: 2, 6: 2}, 'object_id': {0: data1.object_id, 1: data1.object_id, 2: data2.object_id, 3: data2.object_id, 4: data2.object_id, 5: data3.object_id, 6: data3.object_id}, 'field': {0: 'species', 1: 'species', 2: '', 3: '', 4: '', 5: '', 6: ''}, 'keys_idx': {0: 0, 1: 1, 2: 0, 3: 1, 4: 2, 5: 3, 6: 3}, 'key': {0: 'Mus musculus', 1: 'H**o sapiens', 2: 'Mus musculus', 3: 'H**o sapiens', 4: 'Pongo abelii', 5: 'Rorb', 6: 'Rorb'}, 'resources_idx': {0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 1, 6: 2}, 'resource': {0: 'NCBI_Taxonomy', 1: 'NCBI_Taxonomy', 2: 'NCBI_Taxonomy', 3: 'NCBI_Taxonomy', 4: 'NCBI_Taxonomy', 5: 'MGI Database', 6: 'Ensembl'}, 'resource_uri': {0: 'https://www.ncbi.nlm.nih.gov/taxonomy', 1: 'https://www.ncbi.nlm.nih.gov/taxonomy', 2: 'https://www.ncbi.nlm.nih.gov/taxonomy', 3: 'https://www.ncbi.nlm.nih.gov/taxonomy', 4: 'https://www.ncbi.nlm.nih.gov/taxonomy', 5: 'http://www.informatics.jax.org/', 6: 'https://uswest.ensembl.org/index.html'}, 'entities_idx': {0: 0, 1: 1, 2: 0, 3: 1, 4: 2, 5: 3, 6: 4}, 'entity_id': {0: 'NCBI:txid10090', 1: 'NCBI:txid9606', 2: 'NCBI:txid10090', 3: 'NCBI:txid9606', 4: 'NCBI:txid9601', 5: 'MGI:1346434', 6: 'ENSG00000198963'}, 'entity_uri': {0: 'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090', 1: 'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=9606', 2: 'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090', 3: 'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=9606', 4: 'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=9601', 5: 'http://www.informatics.jax.org/marker/MGI:1343464', 6: 'https://uswest.ensembl.org/Homo_sapiens/Gene/Summary?db=core;g=ENSG00000198963'}} expected_df = pd.DataFrame.from_dict(expected_df_data) pd.testing.assert_frame_equal(result_df, expected_df) # Convert to dataframe with categories and compare against the expected result result_df = er.to_dataframe(use_categories=True) cols_with_categories = [('objects', 'objects_idx'), ('objects', 'object_id'), ('objects', 'field'), ('keys', 'keys_idx'), ('keys', 'key'), ('resources', 'resources_idx'), ('resources', 'resource'), ('resources', 'resource_uri'), ('entities', 'entities_idx'), ('entities', 'entity_id'), ('entities', 'entity_uri')] expected_df_data = { c: expected_df_data[c[1]] for c in cols_with_categories } expected_df = pd.DataFrame.from_dict(expected_df_data) pd.testing.assert_frame_equal(result_df, expected_df)
# Ignore experimental feature warnings in the tutorial to improve rendering import warnings warnings.filterwarnings("ignore", category=UserWarning, message="ExternalResources is experimental*") er = ExternalResources(name='example') ############################################################################### # Using the add_ref method # ------------------------------------------------------ # :py:func:`~hdmf.common.resources.ExternalResources.add_ref` # is a wrapper function provided by the ``ExternalResources`` class that # simplifies adding data. Using ``add_ref`` allows us to treat new entries similar # to adding a new row to a flat table, with ``add_ref`` taking care of populating # the underlying data structures accordingly. data = Data(name="species", data=['H**o sapiens', 'Mus musculus']) er.add_ref( container=data, key='H**o sapiens', resource_name='NCBI_Taxonomy', resource_uri='https://www.ncbi.nlm.nih.gov/taxonomy', entity_id='NCBI:txid9606', entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=9606' ) key, resource, entity = er.add_ref( container=data, key='Mus musculus', resource_name='NCBI_Taxonomy', resource_uri='https://www.ncbi.nlm.nih.gov/taxonomy', entity_id='NCBI:txid10090',