def test_invalid_denom(self, get_mp_data): num = "Firm" denom = "beyblade" with pytest.raises(ValueError) as excinfo: qarcoal(get_mp_data.table, get_mp_data.taxonomy, num, denom) denom_err = "No feature(s) found matching denominator string!" assert denom_err == str(excinfo.value)
def test_both_invalid(self, get_mp_data): num = "beyblade" denom = "yugioh" with pytest.raises(ValueError) as excinfo: qarcoal(get_mp_data.table, get_mp_data.taxonomy, num, denom) both_err = ("No feature(s) found matching either numerator or " "denominator string!") assert both_err == str(excinfo.value)
def test_shared_features_allowed(self, get_mp_data): num = "Firmicutes" denom = "Bacilli" qarcoal( get_mp_data.table, get_mp_data.taxonomy, num, denom, allow_shared_features=True, )
def test_shared_features_disallowed(self, get_mp_data): num = "Firmicutes" denom = "Bacilli" with pytest.raises(ValueError) as excinfo: qarcoal( get_mp_data.table, get_mp_data.taxonomy, num, denom, allow_shared_features=False, ) shared_err = "Shared features between num and denom!" assert shared_err == str(excinfo.value)
def test_negative_counts(self): samps = ["S{}".format(i) for i in range(3)] feats = ["S{}".format(i) for i in range(4)] mat = [np.random.randint(1, 10) for i in range(12)] mat = np.reshape(mat, (4, 3)) mat[0] = -1 table = biom.table.Table(mat, feats, samps) tax_labels = ["AB", "BC", "CD", "DE"] confidence = ["0.99"] * 4 taxonomy = pd.DataFrame([feats, tax_labels, confidence]).T taxonomy.columns = ["feature-id", "Taxon", "Confidence"] taxonomy.set_index("feature-id", inplace=True, drop=True) with pytest.raises(ValueError) as excinfo: qarcoal(table, taxonomy, "A", "C") assert "Feature table has negative counts!" == str(excinfo.value)
def test_samples_to_use(self, get_mp_data): metadata_url = os.path.join(MP_URL, "sample-metadata.tsv") sample_metadata = pd.read_csv(metadata_url, sep="\t", index_col=0, skiprows=[1], header=0) gut_samples = sample_metadata[sample_metadata["BodySite"] == "gut"] num_gut_samples = gut_samples.shape[0] gut_samples = Metadata(gut_samples) num = "p__Bacteroidetes" denom = "p__Firmicutes" q = qarcoal( get_mp_data.table, get_mp_data.taxonomy, num, denom, samples_to_use=gut_samples, ) assert q.shape[0] == num_gut_samples
def get_mp_results(get_mp_data): num = "g__Bacteroides" denom = "g__Streptococcus" q = qarcoal(get_mp_data.table, get_mp_data.taxonomy, num, denom) return q
def test_large_numbers(self): """Test large numbers on which Qurro fails. Qurro fails when |x| > 2^53 - 1 due to JS implementation. Make a sample feature table with large numbers: ----------------------------------- ** 53 | S0 S1 S2 | | F0/Charmander 2.0 2.5 2.2 | | F1/Charmeleon 3.2 2.6 3.5 | | F2/Charizard 4.1 2.9 3.1 | | F3/Bulbasaur 6.2 5.2 3.0 | | F4/Ivysaur 4.3 2.1 2.2 | | F5/Venusaur 3.7 2.5 4.0 | ----------------------------------- Num: Char Denom: saur Output should be (from WolframAlpha): ----------------- | log_ratio | | S0 -21.9188 | | S1 -30.9458 | | S2 -7.07556 | ----------------- """ samps = ["S{}".format(i) for i in range(3)] feats = ["F{}".format(i) for i in range(6)] s0 = [x**53 for x in (2.0, 3.2, 4.1, 6.2, 4.3, 3.7)] s1 = [x**53 for x in (2.5, 2.6, 2.9, 5.2, 2.1, 2.5)] s2 = [x**53 for x in (2.2, 3.5, 3.1, 3.0, 2.2, 4.0)] confidence = ["0.99"] * 6 tax_labels = [ "Charmander", "Charmeleon", "Charizard", "Bulbasaur", "Ivysaur", "Vensaur", ] mat = np.matrix([s0, s1, s2]).T table = biom.table.Table(mat, feats, samps) taxonomy = pd.DataFrame([feats, tax_labels, confidence]).T taxonomy.columns = ["feature-id", "Taxon", "Confidence"] taxonomy.set_index("feature-id", inplace=True, drop=True) q = qarcoal( table, taxonomy, "Char", "saur", ) wolfram_alpha_vals = np.array([-21.9188, -30.9458, -7.07556]) qarcoal_vals = q.sort_index()["log_ratio"].to_numpy() diff = wolfram_alpha_vals - qarcoal_vals # differences are ~ 10^-6 assert diff == pytest.approx(0, abs=1e-5)