def test_taxonomy_from_list_summarize_group_simple_cached_model(self): # NOTE: do not delete this test when converting _alt methods to non # _alt methods with patch( 'microsetta_public_api.repo._taxonomy_repo.TaxonomyRepo.' 'tables', new_callable=PropertyMock) as mock_tables: mock_tables.return_value = { 'some-table': { 'table': self.table, 'feature-data-taxonomy': self.taxonomy_df, 'model': TaxonomyModel(self.table, self.taxonomy_df) }, } response, code = _summarize_group( self.post_body['sample_ids'], "some-table", taxonomy_repo=TaxonomyRepo(), ) self.assertEqual(code, 200) exp_keys = [ 'taxonomy', 'features', 'feature_values', 'feature_variances' ] obs = json.loads(response) self.assertCountEqual(exp_keys, obs.keys()) self.assertEqual( '((((feature-1,((feature-2)e)d)c)b,' '(((feature-3)h)g)f)a);', obs['taxonomy']) self.assertListEqual(['feature-1', 'feature-2', 'feature-3'], obs['features']) assert_allclose([1. / 10, 6. / 10, 3. / 10], obs['feature_values']) assert_allclose([0, 0, 0], obs['feature_variances'])
def model(self, table_name): model = self._get_resource(table_name, component='model') if model is None: table = self.table(table_name) features = self.feature_data_taxonomy(table_name) variances = self.variances(table_name) model = TaxonomyModel(table, features, variances) return model
def setUpClass(cls): cls.post_body = { 'sample_ids': [ 'sample-1', 'sample-2', ] } cls.table = biom.Table(np.array([[0, 1, 2], [2, 4, 6], [3, 0, 1]]), ['feature-1', 'feature-2', 'feature-3'], ['sample-1', 'sample-2', 'sample-3']) cls.taxonomy_df = pd.DataFrame( [['feature-1', 'a; b; c', 0.123], ['feature-2', 'a; b; c; d; e', 0.345], ['feature-3', 'a; f; g; h', 0.678]], columns=['Feature ID', 'Taxon', 'Confidence']) cls.taxonomy_df.set_index('Feature ID', inplace=True) # variances cls.table_vars = biom.Table( np.array([[0, 1, 2], [2, 4, 6], [3, 0, 1]]), ['feature-1', 'feature-2', 'feature-3'], ['sample-1', 'sample-2', 'sample-3']) cls.taxonomy_model = TaxonomyModel(cls.table, cls.taxonomy_df, cls.table_vars)
def _transform_single_table(dict_, resource_name): taxonomy = { 'feature-data-taxonomy': dict_.pop('feature-data-taxonomy', None) } supported_table_types = {'qza', 'biom'} table_type = dict_.get('table-format', 'qza') if table_type not in supported_table_types: raise ValueError(f"'table-format'={table_type} not in supported table " f"types: {supported_table_types}.") _validate_dict_of_paths( dict_, resource_name, allow_none=True, required_fields=['table'], non_ext_entries=['q2-type', 'table-format', 'cache-taxonomy'], allow_extras=True, extensions=['.' + table_type]) _validate_dict_of_paths( taxonomy, resource_name, allow_none=True, ) if taxonomy['feature-data-taxonomy'] is not None: dict_.update(taxonomy) semantic_types = { 'feature-data-taxonomy': FeatureData[Taxonomy], } biom_kws = set() if table_type == 'qza': semantic_types.update({ 'table': dict_.get('table-type', FeatureTable[Frequency]), 'variances': FeatureTable[Frequency], }) elif table_type == 'biom': biom_kws.update({'table', 'variances'}) else: # shouldn't happen because error check earlier but seems better than # silently ignoring.... raise ValueError(f"'table-type'={table_type} not in supported table " f"types: {supported_table_types}.") views = { 'table': biom.Table, 'feature-data-taxonomy': pd.DataFrame, 'variances': biom.Table, } new_resource = deepcopy(dict_) for key, value in dict_.items(): if key in semantic_types: new_resource[key] = _parse_q2_data( value, semantic_types[key], view_type=views.get(key, None), ) elif key in biom_kws: new_resource[key] = biom.load_table(value) cache_taxonomy = new_resource.get('cache-taxonomy', True) if 'feature-data-taxonomy' in new_resource and cache_taxonomy: table = new_resource['table'] taxonomy = new_resource['feature-data-taxonomy'] variances = new_resource.get('variances', None) # rank_level=5 -> genus model = TaxonomyModel(table, taxonomy, variances, rank_level=5) new_resource['model'] = model return new_resource