def test_get_group_raw(self): adiv = Alpha(self.series) self.series = pd.Series([0.1, 0.2, 0.8, 0.7, 0.7, 0.6], index=['a', 'b', 'c', 'd', 'e', 'f'], name='shannon') exp_all = GroupAlphaRaw(name=None, alpha_metric='shannon', alpha_diversity={ 'a': 0.1, 'b': 0.2, 'c': 0.8, 'd': 0.7, 'e': 0.7, 'f': 0.6 }) obs_all = adiv.get_group_raw() self.assertEqual(obs_all, exp_all) exp_partial = GroupAlphaRaw(name='foo', alpha_metric='shannon', alpha_diversity={ 'a': 0.1, 'c': 0.8, 'f': 0.6 }) obs_partial = adiv.get_group_raw(['a', 'c', 'f'], 'foo') self.assertEqual(obs_partial, exp_partial)
def _alpha_group(body, alpha_repo, metadata_repo_getter, alpha_metric, percentiles, return_raw, summary_statistics): if not (summary_statistics or return_raw): # swagger does not account for parameter dependencies, so we should # give a bad request error here raise IncompatibleOptions('Either `summary_statistics`, ' '`return_raw`, or both are required to be ' 'true.') sample_ids = [] # do the common checks available_metrics = alpha_repo.available_metrics() type_ = 'metric' validate_resource_alt(available_metrics, alpha_metric, type_) if 'sample_ids' in body: sample_ids = body['sample_ids'] # figure out if the user asked for a metric we have data on # make sure all of the data the samples the user asked for have values # for the given metric missing_ids = [ id_ for id_ in sample_ids if not alpha_repo.exists(id_, alpha_metric) ] check_missing_ids_alt(missing_ids, alpha_metric, type_) # find sample IDs matching the metadata query if 'metadata_query' in body: query = body['metadata_query'] metadata_repo = metadata_repo_getter() matching_ids = metadata_repo.sample_id_matches(query) matching_ids = [ id_ for id_ in matching_ids if alpha_repo.exists(id_, alpha_metric) ] if 'sample_ids' not in body: sample_ids = matching_ids elif body['condition'] == 'OR': sample_ids = list(set(sample_ids) | set(matching_ids)) elif body['condition'] == 'AND': sample_ids = list(set(sample_ids) & set(matching_ids)) # retrieve the alpha diversity for each sample alpha_series = alpha_repo.get_alpha_diversity( sample_ids, alpha_metric, ) alpha_ = Alpha(alpha_series, percentiles=percentiles) alpha_data = dict() if return_raw: # not using name right now, so give it a placeholder name alpha_values = alpha_.get_group_raw(name='').to_dict() del alpha_values['name'] alpha_data.update(alpha_values) if summary_statistics: # not using name right now, so give it a placeholder name alpha_summary = alpha_.get_group(name='').to_dict() del alpha_summary['name'] alpha_data.update({'alpha_metric': alpha_summary.pop('alpha_metric')}) alpha_data.update({'group_summary': alpha_summary}) return alpha_data
def alpha_group(body, alpha_metric, summary_statistics=True, percentiles=None, return_raw=False): if not (summary_statistics or return_raw): # swagger does not account for parameter dependencies, so we should # give a bad request error here return jsonify(error=400, text='Either `summary_statistics`, `return_raw`, ' 'or both are required to be true.'), 400 sample_ids = body['sample_ids'] alpha_repo = AlphaRepo() # figure out if the user asked for a metric we have data on available_metrics = alpha_repo.available_metrics() type_ = 'metric' missing_metric = validate_resource(available_metrics, alpha_metric, type_) if missing_metric: return missing_metric # make sure all of the data the samples the user asked for have values # for the given metric missing_ids = [ id_ for id_ in sample_ids if not alpha_repo.exists(id_, alpha_metric) ] missing_ids_msg = check_missing_ids(missing_ids, alpha_metric, type_) if missing_ids_msg: return missing_ids_msg # retrieve the alpha diversity for each sample alpha_series = alpha_repo.get_alpha_diversity( sample_ids, alpha_metric, ) alpha_ = Alpha(alpha_series, percentiles=percentiles) alpha_data = dict() if return_raw: # not using name right now, so give it a placeholder name alpha_values = alpha_.get_group_raw(name='').to_dict() del alpha_values['name'] alpha_data.update(alpha_values) if summary_statistics: # not using name right now, so give it a placeholder name alpha_summary = alpha_.get_group(name='').to_dict() del alpha_summary['name'] alpha_data.update({'alpha_metric': alpha_summary.pop('alpha_metric')}) alpha_data.update({'group_summary': alpha_summary}) response = jsonify(alpha_data) return response, 200
def _get_alpha(alpha_repo, alpha_metric, sample_id): if not all(alpha_repo.exists([sample_id], alpha_metric)): raise UnknownID(f"Sample ID not found. Got: {sample_id}") alpha_series = alpha_repo.get_alpha_diversity([sample_id], alpha_metric) alpha_ = Alpha(alpha_series) alpha_data = alpha_.get_group_raw().to_dict() ret_val = { 'sample_id': sample_id, 'alpha_metric': alpha_data['alpha_metric'], 'data': alpha_data['alpha_diversity'][sample_id], } return ret_val
def test_get_group_single(self): adiv = Alpha(self.series) exp = GroupAlpha(name='b', alpha_metric='shannon', mean=0.2, median=0.2, std=0.0, group_size=1, percentile=None, percentile_values=None) obs = adiv.get_group(['b']) self.assertEqual(obs, exp)
def get_alpha(sample_id, alpha_metric): alpha_repo = AlphaRepo() if not all(alpha_repo.exists([sample_id], alpha_metric)): return jsonify(error=404, text="Sample ID not found."), \ 404 alpha_series = alpha_repo.get_alpha_diversity([sample_id], alpha_metric) alpha_ = Alpha(alpha_series) alpha_data = alpha_.get_group_raw().to_dict() ret_val = { 'sample_id': sample_id, 'alpha_metric': alpha_data['alpha_metric'], 'data': alpha_data['alpha_diversity'][sample_id], } return jsonify(ret_val), 200
def _get_alpha_info(alpha_metric, matching_ids, percentiles, sample_id): alpha_repo = AlphaRepo() # retrieve the alpha diversity for each sample alpha_series = alpha_repo.get_alpha_diversity( matching_ids, alpha_metric, ) alpha_ = Alpha(alpha_series, percentiles=percentiles) alpha_summary = alpha_.get_group(name='').to_dict() if sample_id: sample_diversity, = alpha_repo.get_alpha_diversity( sample_id, alpha_metric) else: sample_diversity = None return alpha_summary, sample_diversity
def test_get_group_multi(self): adiv = Alpha(self.series) exp = GroupAlpha(name='bar', alpha_metric='shannon', mean=0.35, median=0.35, std=0.25, group_size=2, percentile=[10, 20, 30, 40, 50, 60, 70, 80, 90], percentile_values=[ 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55 ]) obs = adiv.get_group(['a', 'f'], 'bar') self.assertEqual(obs.name, exp.name) self.assertEqual(obs.alpha_metric, exp.alpha_metric) self.assertAlmostEqual(obs.mean, exp.mean) self.assertAlmostEqual(obs.median, exp.median) self.assertAlmostEqual(obs.std, exp.std) self.assertEqual(obs.group_size, exp.group_size) npt.assert_equal(obs.percentile, exp.percentile) npt.assert_almost_equal(obs.percentile_values, exp.percentile_values)
def test_get_group_noname(self): adiv = Alpha(self.series) with self.assertRaises(ValueError): adiv.get_group(['a', 'b'])
def test_get_group_missing(self): adiv = Alpha(self.series) with self.assertRaisesRegex(UnknownID, "Identifier not found."): adiv.get_group(['foobarbaz'], 'asd')
def test_feature_ids(self): adiv = Alpha(self.series) self.assertEqual(adiv.feature_ids(), frozenset())
def test_sample_ids(self): adiv = Alpha(self.series) self.assertEqual(adiv.sample_ids(), frozenset({'a', 'b', 'c', 'd', 'e', 'f'}))
def test_init(self): adiv = Alpha(self.series) pdt.assert_series_equal(adiv._series, self.series)
def test_get_group_raw_noname(self): adiv = Alpha(self.series) with self.assertRaisesRegex(ValueError, "Name not specified."): adiv.get_group_raw(['a', 'c'])