def test_validate_taxrank_taxmap_taxtree_fail2(self): # test for case which taxmap file has more taxids # than is present in the taxonomy tree. input_taxrank = _prep_taxranks(self.taxranks) input_taxmap = _prep_taxmap(self.taxmap) self.assertRaises(ValueError, _validate_taxrank_taxmap_taxtree, input_taxrank, input_taxmap, self.taxtree)
def test_validate_taxrank_taxmap_taxtree_pass(self): # all files should meet the criteria and return None input_taxrank = _prep_taxranks(self.taxranks) input_taxmap = _prep_taxmap(self.taxmap2) exp = _validate_taxrank_taxmap_taxtree(input_taxrank, input_taxmap, self.taxtree) self.assertEqual(None, exp)
def test_prep_taxranks(self): obs_taxranks = _prep_taxranks(self.taxranks) obs_taxranks.sort_index(inplace=True) dd = {'taxid': ['2', '11084', '42913', '42914', '42915', '11089', '24228', '24229', '42916', '42917'], 'taxid_taxonomy': ['Archaea', 'Aenigmarchaeota', 'Aenigmarchaeia', 'Aenigmarchaeales', 'Candidatus_Aenigmarchaeum', 'Deep_Sea_Euryarchaeotic_Group(DSEG)', 'Altiarchaeota', 'Altiarchaeia', 'Altiarchaeales', 'Altiarchaeaceae'], 'taxrank': ['domain', 'phylum', 'class', 'order', 'genus', 'class', 'phylum', 'class', 'order', 'family']} exp_taxranks = pd.DataFrame(dd) exp_taxranks.set_index('taxid', inplace=True) exp_taxranks.sort_index(inplace=True) assert_frame_equal(obs_taxranks, exp_taxranks)
def test_compile_taxonomy_output_default(self): input_taxrank = _prep_taxranks(self.taxranks) silva_tax = _build_base_silva_taxonomy(self.taxtree, input_taxrank, ALLOWED_RANKS, rank_propagation=True) input_taxmap = _prep_taxmap(self.taxmap2) updated_taxmap = pd.merge(input_taxmap, silva_tax, left_on='taxid', right_index=True) obs_6r_tax = _compile_taxonomy_output(updated_taxmap, ranks=DEFAULT_RANKS, include_species_labels=False) obs_6r_tax.sort_index(inplace=True) # expected 6-rank taxonomy t1 = ("d__Archaea; p__Aenigmarchaeota; c__Aenigmarchaeia; " "o__Aenigmarchaeales; f__Aenigmarchaeales; " "g__Candidatus_Aenigmarchaeum") exp_6r_tax = pd.Series(t1, index=['AB600437.1.1389']) exp_6r_tax.rename('Taxon', inplace=True) exp_6r_tax.index.name = 'Feature ID' exp_6r_tax.sort_index(inplace=True) assert_series_equal(obs_6r_tax, exp_6r_tax)
def test_validate_taxrank_taxmap_taxtree_fail(self): # test for missing taxid in tree file input_taxrank = _prep_taxranks(self.taxranks) input_taxmap = _prep_taxmap(self.taxmap2) self.assertRaises(ValueError, _validate_taxrank_taxmap_taxtree, input_taxrank, input_taxmap, self.taxtree2)
def test_build_base_silva_taxonomy(self): input_taxranks = _prep_taxranks(self.taxranks) obs_taxonomy = _build_base_silva_taxonomy(self.taxtree, input_taxranks, ALLOWED_RANKS) obs_taxonomy.sort_index(inplace=True) tid = { 'taxid': [ '2', '11084', '42913', '42914', '42915', '11089', '24228', '24229', '42916', '42917' ], 'd__': [ 'Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea' ], 'sk__': [ 'Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea' ], 'k__': [ 'Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea' ], 'ks__': [ 'Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea' ], 'sp__': [ 'Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea', 'Archaea' ], 'p__': [ 'Archaea', 'Aenigmarchaeota', 'Aenigmarchaeota', 'Aenigmarchaeota', 'Aenigmarchaeota', 'Aenigmarchaeota', 'Altiarchaeota', 'Altiarchaeota', 'Altiarchaeota', 'Altiarchaeota' ], 'ps__': [ 'Archaea', 'Aenigmarchaeota', 'Aenigmarchaeota', 'Aenigmarchaeota', 'Aenigmarchaeota', 'Aenigmarchaeota', 'Altiarchaeota', 'Altiarchaeota', 'Altiarchaeota', 'Altiarchaeota' ], 'pi__': [ 'Archaea', 'Aenigmarchaeota', 'Aenigmarchaeota', 'Aenigmarchaeota', 'Aenigmarchaeota', 'Aenigmarchaeota', 'Altiarchaeota', 'Altiarchaeota', 'Altiarchaeota', 'Altiarchaeota' ], 'sc__': [ 'Archaea', 'Aenigmarchaeota', 'Aenigmarchaeota', 'Aenigmarchaeota', 'Aenigmarchaeota', 'Aenigmarchaeota', 'Altiarchaeota', 'Altiarchaeota', 'Altiarchaeota', 'Altiarchaeota' ], 'c__': [ 'Archaea', 'Aenigmarchaeota', 'Aenigmarchaeia', 'Aenigmarchaeia', 'Aenigmarchaeia', 'Deep_Sea_Euryarchaeotic_Group(DSEG)', 'Altiarchaeota', 'Altiarchaeia', 'Altiarchaeia', 'Altiarchaeia' ], 'cs__': [ 'Archaea', 'Aenigmarchaeota', 'Aenigmarchaeia', 'Aenigmarchaeia', 'Aenigmarchaeia', 'Deep_Sea_Euryarchaeotic_Group(DSEG)', 'Altiarchaeota', 'Altiarchaeia', 'Altiarchaeia', 'Altiarchaeia' ], 'ci__': [ 'Archaea', 'Aenigmarchaeota', 'Aenigmarchaeia', 'Aenigmarchaeia', 'Aenigmarchaeia', 'Deep_Sea_Euryarchaeotic_Group(DSEG)', 'Altiarchaeota', 'Altiarchaeia', 'Altiarchaeia', 'Altiarchaeia' ], 'so__': [ 'Archaea', 'Aenigmarchaeota', 'Aenigmarchaeia', 'Aenigmarchaeia', 'Aenigmarchaeia', 'Deep_Sea_Euryarchaeotic_Group(DSEG)', 'Altiarchaeota', 'Altiarchaeia', 'Altiarchaeia', 'Altiarchaeia' ], 'o__': [ 'Archaea', 'Aenigmarchaeota', 'Aenigmarchaeia', 'Aenigmarchaeales', 'Aenigmarchaeales', 'Deep_Sea_Euryarchaeotic_Group(DSEG)', 'Altiarchaeota', 'Altiarchaeia', 'Altiarchaeales', 'Altiarchaeales' ], 'os__': [ 'Archaea', 'Aenigmarchaeota', 'Aenigmarchaeia', 'Aenigmarchaeales', 'Aenigmarchaeales', 'Deep_Sea_Euryarchaeotic_Group(DSEG)', 'Altiarchaeota', 'Altiarchaeia', 'Altiarchaeales', 'Altiarchaeales' ], 'sf__': [ 'Archaea', 'Aenigmarchaeota', 'Aenigmarchaeia', 'Aenigmarchaeales', 'Aenigmarchaeales', 'Deep_Sea_Euryarchaeotic_Group(DSEG)', 'Altiarchaeota', 'Altiarchaeia', 'Altiarchaeales', 'Altiarchaeales' ], 'f__': [ 'Archaea', 'Aenigmarchaeota', 'Aenigmarchaeia', 'Aenigmarchaeales', 'Aenigmarchaeales', 'Deep_Sea_Euryarchaeotic_Group(DSEG)', 'Altiarchaeota', 'Altiarchaeia', 'Altiarchaeales', 'Altiarchaeaceae' ], 'fs__': [ 'Archaea', 'Aenigmarchaeota', 'Aenigmarchaeia', 'Aenigmarchaeales', 'Aenigmarchaeales', 'Deep_Sea_Euryarchaeotic_Group(DSEG)', 'Altiarchaeota', 'Altiarchaeia', 'Altiarchaeales', 'Altiarchaeaceae' ], 'g__': [ 'Archaea', 'Aenigmarchaeota', 'Aenigmarchaeia', 'Aenigmarchaeales', 'Candidatus_Aenigmarchaeum', 'Deep_Sea_Euryarchaeotic_Group(DSEG)', 'Altiarchaeota', 'Altiarchaeia', 'Altiarchaeales', 'Altiarchaeaceae' ] } exp_taxonomy = pd.DataFrame(tid) exp_taxonomy.set_index('taxid', inplace=True) exp_taxonomy.sort_index(inplace=True) assert_frame_equal(obs_taxonomy, exp_taxonomy)