def test_piecewise_add(self): er = ExternalResources('terms') # this is the term the user wants to use. They will need to specify this key = er._add_key('mouse') resource1 = er._add_resource(resource='resource0', uri='resource_uri0') # the user will have to supply this info as well. This is the information # needed to retrieve info about the controled term er._add_entity(key, resource1, '10090', 'uri') # The user can also pass in the container or it can be wrapped up under NWBFILE obj = er._add_object('object', 'species') # This could also be wrapped up under NWBFile er._add_object_key(obj, key) self.assertEqual(er.keys.data, [('mouse', )]) self.assertEqual(er.entities.data, [(0, 0, '10090', 'uri')]) self.assertEqual(er.objects.data, [('object', 'species')])
def test_to_dataframe(self): # Setup complex external resources with keys reused across objects and # multiple resources per key er = ExternalResources(name='example') # Add a species dataset with 2 keys data1 = Data(name='data_name', data=np.array([('Mus musculus', 9, 81.0), ('H**o sapiens', 3, 27.0)], dtype=[('species', 'U14'), ('age', 'i4'), ('weight', 'f4')])) k1, r1, e1 = er.add_ref( container=data1, field='species', key='Mus musculus', resource_name='NCBI_Taxonomy', resource_uri='https://www.ncbi.nlm.nih.gov/taxonomy', entity_id='NCBI:txid10090', entity_uri= 'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090' ) k2, r2, e2 = er.add_ref( container=data1, field='species', key='H**o sapiens', resource_name='NCBI_Taxonomy', resource_uri='https://www.ncbi.nlm.nih.gov/taxonomy', entity_id='NCBI:txid9606', entity_uri= 'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=9606') # Add a second species dataset that uses the same keys as the first dataset and add an additional key data2 = Data(name="species", data=['H**o sapiens', 'Mus musculus', 'Pongo abelii']) o2 = er._add_object(data2, relative_path='', field='') er._add_object_key(o2, k1) er._add_object_key(o2, k2) k2, r2, e2 = er.add_ref( container=data2, field='', key='Pongo abelii', resource_name='NCBI_Taxonomy', resource_uri='https://www.ncbi.nlm.nih.gov/taxonomy', entity_id='NCBI:txid9601', entity_uri= 'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=9601') # Add a third data object, this time with 2 entities for a key data3 = Data(name="genotypes", data=['Rorb']) k3, r3, e3 = er.add_ref( container=data3, field='', key='Rorb', resource_name='MGI Database', resource_uri='http://www.informatics.jax.org/', entity_id='MGI:1346434', entity_uri='http://www.informatics.jax.org/marker/MGI:1343464') _ = er.add_ref( container=data3, field='', key=k3, resource_name='Ensembl', resource_uri='https://uswest.ensembl.org/index.html', entity_id='ENSG00000198963', entity_uri= 'https://uswest.ensembl.org/Homo_sapiens/Gene/Summary?db=core;g=ENSG00000198963' ) # Convert to dataframe and compare against the expected result result_df = er.to_dataframe() expected_df_data = \ {'objects_idx': {0: 0, 1: 0, 2: 1, 3: 1, 4: 1, 5: 2, 6: 2}, 'object_id': {0: data1.object_id, 1: data1.object_id, 2: data2.object_id, 3: data2.object_id, 4: data2.object_id, 5: data3.object_id, 6: data3.object_id}, 'field': {0: 'species', 1: 'species', 2: '', 3: '', 4: '', 5: '', 6: ''}, 'keys_idx': {0: 0, 1: 1, 2: 0, 3: 1, 4: 2, 5: 3, 6: 3}, 'key': {0: 'Mus musculus', 1: 'H**o sapiens', 2: 'Mus musculus', 3: 'H**o sapiens', 4: 'Pongo abelii', 5: 'Rorb', 6: 'Rorb'}, 'resources_idx': {0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 1, 6: 2}, 'resource': {0: 'NCBI_Taxonomy', 1: 'NCBI_Taxonomy', 2: 'NCBI_Taxonomy', 3: 'NCBI_Taxonomy', 4: 'NCBI_Taxonomy', 5: 'MGI Database', 6: 'Ensembl'}, 'resource_uri': {0: 'https://www.ncbi.nlm.nih.gov/taxonomy', 1: 'https://www.ncbi.nlm.nih.gov/taxonomy', 2: 'https://www.ncbi.nlm.nih.gov/taxonomy', 3: 'https://www.ncbi.nlm.nih.gov/taxonomy', 4: 'https://www.ncbi.nlm.nih.gov/taxonomy', 5: 'http://www.informatics.jax.org/', 6: 'https://uswest.ensembl.org/index.html'}, 'entities_idx': {0: 0, 1: 1, 2: 0, 3: 1, 4: 2, 5: 3, 6: 4}, 'entity_id': {0: 'NCBI:txid10090', 1: 'NCBI:txid9606', 2: 'NCBI:txid10090', 3: 'NCBI:txid9606', 4: 'NCBI:txid9601', 5: 'MGI:1346434', 6: 'ENSG00000198963'}, 'entity_uri': {0: 'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090', 1: 'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=9606', 2: 'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090', 3: 'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=9606', 4: 'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=9601', 5: 'http://www.informatics.jax.org/marker/MGI:1343464', 6: 'https://uswest.ensembl.org/Homo_sapiens/Gene/Summary?db=core;g=ENSG00000198963'}} expected_df = pd.DataFrame.from_dict(expected_df_data) pd.testing.assert_frame_equal(result_df, expected_df) # Convert to dataframe with categories and compare against the expected result result_df = er.to_dataframe(use_categories=True) cols_with_categories = [('objects', 'objects_idx'), ('objects', 'object_id'), ('objects', 'field'), ('keys', 'keys_idx'), ('keys', 'key'), ('resources', 'resources_idx'), ('resources', 'resource'), ('resources', 'resource_uri'), ('entities', 'entities_idx'), ('entities', 'entity_id'), ('entities', 'entity_uri')] expected_df_data = { c: expected_df_data[c[1]] for c in cols_with_categories } expected_df = pd.DataFrame.from_dict(expected_df_data) pd.testing.assert_frame_equal(result_df, expected_df)