Пример #1
0
 def test_invalid_denom(self, get_mp_data):
     num = "Firm"
     denom = "beyblade"
     with pytest.raises(ValueError) as excinfo:
         qarcoal(get_mp_data.table, get_mp_data.taxonomy, num, denom)
     denom_err = "No feature(s) found matching denominator string!"
     assert denom_err == str(excinfo.value)
Пример #2
0
 def test_both_invalid(self, get_mp_data):
     num = "beyblade"
     denom = "yugioh"
     with pytest.raises(ValueError) as excinfo:
         qarcoal(get_mp_data.table, get_mp_data.taxonomy, num, denom)
     both_err = ("No feature(s) found matching either numerator or "
                 "denominator string!")
     assert both_err == str(excinfo.value)
Пример #3
0
 def test_shared_features_allowed(self, get_mp_data):
     num = "Firmicutes"
     denom = "Bacilli"
     qarcoal(
         get_mp_data.table,
         get_mp_data.taxonomy,
         num,
         denom,
         allow_shared_features=True,
     )
Пример #4
0
 def test_shared_features_disallowed(self, get_mp_data):
     num = "Firmicutes"
     denom = "Bacilli"
     with pytest.raises(ValueError) as excinfo:
         qarcoal(
             get_mp_data.table,
             get_mp_data.taxonomy,
             num,
             denom,
             allow_shared_features=False,
         )
     shared_err = "Shared features between num and denom!"
     assert shared_err == str(excinfo.value)
Пример #5
0
    def test_negative_counts(self):
        samps = ["S{}".format(i) for i in range(3)]
        feats = ["S{}".format(i) for i in range(4)]
        mat = [np.random.randint(1, 10) for i in range(12)]
        mat = np.reshape(mat, (4, 3))
        mat[0] = -1
        table = biom.table.Table(mat, feats, samps)

        tax_labels = ["AB", "BC", "CD", "DE"]
        confidence = ["0.99"] * 4
        taxonomy = pd.DataFrame([feats, tax_labels, confidence]).T
        taxonomy.columns = ["feature-id", "Taxon", "Confidence"]
        taxonomy.set_index("feature-id", inplace=True, drop=True)

        with pytest.raises(ValueError) as excinfo:
            qarcoal(table, taxonomy, "A", "C")
        assert "Feature table has negative counts!" == str(excinfo.value)
Пример #6
0
    def test_samples_to_use(self, get_mp_data):
        metadata_url = os.path.join(MP_URL, "sample-metadata.tsv")
        sample_metadata = pd.read_csv(metadata_url,
                                      sep="\t",
                                      index_col=0,
                                      skiprows=[1],
                                      header=0)
        gut_samples = sample_metadata[sample_metadata["BodySite"] == "gut"]
        num_gut_samples = gut_samples.shape[0]
        gut_samples = Metadata(gut_samples)

        num = "p__Bacteroidetes"
        denom = "p__Firmicutes"
        q = qarcoal(
            get_mp_data.table,
            get_mp_data.taxonomy,
            num,
            denom,
            samples_to_use=gut_samples,
        )
        assert q.shape[0] == num_gut_samples
Пример #7
0
def get_mp_results(get_mp_data):
    num = "g__Bacteroides"
    denom = "g__Streptococcus"
    q = qarcoal(get_mp_data.table, get_mp_data.taxonomy, num, denom)
    return q
Пример #8
0
    def test_large_numbers(self):
        """Test large numbers on which Qurro fails.

        Qurro fails when |x| > 2^53 - 1 due to JS implementation.
        Make a sample feature table with large numbers:

           ----------------------------------- ** 53
          |                    S0    S1    S2 |
          | F0/Charmander     2.0   2.5   2.2 |
          | F1/Charmeleon     3.2   2.6   3.5 |
          | F2/Charizard      4.1   2.9   3.1 |
          | F3/Bulbasaur      6.2   5.2   3.0 |
          | F4/Ivysaur        4.3   2.1   2.2 |
          | F5/Venusaur       3.7   2.5   4.0 |
           -----------------------------------

        Num: Char
        Denom: saur

        Output should be (from WolframAlpha):

           -----------------
          |       log_ratio |
          | S0     -21.9188 |
          | S1     -30.9458 |
          | S2     -7.07556 |
           -----------------
        """
        samps = ["S{}".format(i) for i in range(3)]
        feats = ["F{}".format(i) for i in range(6)]
        s0 = [x**53 for x in (2.0, 3.2, 4.1, 6.2, 4.3, 3.7)]
        s1 = [x**53 for x in (2.5, 2.6, 2.9, 5.2, 2.1, 2.5)]
        s2 = [x**53 for x in (2.2, 3.5, 3.1, 3.0, 2.2, 4.0)]
        confidence = ["0.99"] * 6
        tax_labels = [
            "Charmander",
            "Charmeleon",
            "Charizard",
            "Bulbasaur",
            "Ivysaur",
            "Vensaur",
        ]
        mat = np.matrix([s0, s1, s2]).T
        table = biom.table.Table(mat, feats, samps)

        taxonomy = pd.DataFrame([feats, tax_labels, confidence]).T
        taxonomy.columns = ["feature-id", "Taxon", "Confidence"]
        taxonomy.set_index("feature-id", inplace=True, drop=True)

        q = qarcoal(
            table,
            taxonomy,
            "Char",
            "saur",
        )
        wolfram_alpha_vals = np.array([-21.9188, -30.9458, -7.07556])
        qarcoal_vals = q.sort_index()["log_ratio"].to_numpy()
        diff = wolfram_alpha_vals - qarcoal_vals

        # differences are ~ 10^-6
        assert diff == pytest.approx(0, abs=1e-5)