def alpha_plot(opts, sample_ids): """Produces digestable alpha diversity distribution plots per sample Parameters ---------- opts : dict A dict of relevant opts. sample_ids : Iterable of str A list of sample IDs of interest Returns ------- dict A dict containing each sample ID and any errors observed or None if no error was observed for the sample. {str: str or None} """ results = {} alpha_map = pd.read_csv( agu.get_existing_path(opts['collapsed']['100nt']['alpha-map']), sep='\t', dtype=str, ) alpha_metrics = ['shannon_1k', 'PD_whole_tree_1k'] # Checks the alpha_field is in the mapping file for metric in alpha_metrics: if metric not in alpha_map.columns: raise ValueError('%s is not a valid alpha diversity field name.' % metric) # Checks the group_field is in the mapping file if 'SIMPLE_BODY_SITE' not in alpha_map.columns: raise ValueError('SIMPLE_BODY_SITE is not a valid field name.') alpha_map[alpha_metrics] = alpha_map[alpha_metrics].astype(float) alpha_map.set_index('#SampleID', inplace=True) results = {} for id_ in sample_ids: if id_ not in alpha_map.index: results[id_] = 'ID not found' else: results[id_] = None shannon_path = os.path.join(_result_path(opts, id_), 'shannon_%s.png' % id_) _plot_alpha(id_, alpha_map, 'shannon_1k', xlabel='Shannon Diversity', fp=shannon_path) # Generates the pd whole tree diversity figure pd_path = os.path.join(_result_path(opts, id_), 'pd_%s.png' % id_) _plot_alpha(id_, alpha_map, 'PD_whole_tree_1k', xlabel='PD Whole Tree Diversity', fp=pd_path) return results
def test_cat_taxa_summaries(self): cat_taxa_summaries() path = get_existing_path( '../agp_processing/10-populated-templates/taxa') exp = ['ag-oral-flossing-Daily.txt', 'ag-oral-flossing-Never.txt', 'ag-oral-flossing-Occasionally.txt', 'ag-oral-flossing-Rarely.txt', 'ag-oral-flossing-Regularly.txt', 'ag-stool-average.txt'] files = listdir(path) self.assertItemsEqual(files, exp) with open(join(path, 'ag-oral-flossing-Rarely.txt')) as f: obs = f.read() exp = """k__Bacteria; p__Fusobacteria; c__Fusobacteriia; o__Fusobacteriales; f__Fusobacteriaceae; g__Fusobacterium\t0.0110430107527 k__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Planococcaceae; g__\t0.00196774193548 k__Bacteria; p__Bacteroidetes; c__Flavobacteriia; o__Flavobacteriales; f__[Weeksellaceae]; g__Chryseobacterium\t0.0125913978495 k__Bacteria; p__Bacteroidetes; c__Sphingobacteriia; o__Sphingobacteriales; f__Sphingobacteriaceae; g__Sphingobacterium\t0.00608602150538 k__Bacteria; p__Firmicutes; c__Erysipelotrichi; o__Erysipelotrichales; f__Erysipelotrichaceae; g__Bulleidia\t0.00269892473118 k__Bacteria; p__Proteobacteria; c__Betaproteobacteria; o__Neisseriales; f__Neisseriaceae; g__Neisseria\t0.074935483871 k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Lachnospiraceae; g__Oribacterium\t0.00749462365591 k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Xanthomonadales; f__Xanthomonadaceae; g__\t0.017311827957 k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Pseudomonadales; f__Moraxellaceae; g__Enhydrobacter\t0.000150537634409 k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Lachnospiraceae; g__\t0.00667741935484 k__Bacteria; p__Proteobacteria; c__Alphaproteobacteria; o__Caulobacterales; f__Caulobacteraceae; g__Brevundimonas\t0.00996774193548 k__Bacteria; p__Bacteroidetes; c__Flavobacteriia; o__Flavobacteriales; f__[Weeksellaceae]; g__\t0.00139784946237 k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Enterobacteriales; f__Enterobacteriaceae; g__Klebsiella\t0.00174193548387 k__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus\t0.00756989247312 k__Bacteria; p__Bacteroidetes; c__Bacteroidia; o__Bacteroidales; f__Prevotellaceae; g__Prevotella\t0.0509569892473 k__Bacteria; p__Fusobacteria; c__Fusobacteriia; o__Fusobacteriales; f__Leptotrichiaceae; g__Leptotrichia\t0.0143333333333 k__Bacteria; p__Firmicutes; c__Bacilli; o__Lactobacillales; f__Streptococcaceae; g__Streptococcus\t0.138817204301 k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Pseudomonadales; f__Pseudomonadaceae; g__\t0.0051935483871 k__Bacteria; p__Bacteroidetes; c__Bacteroidia; o__Bacteroidales; f__[Paraprevotellaceae]; g__[Prevotella]\t0.00132258064516 k__Bacteria; p__Proteobacteria; c__Epsilonproteobacteria; o__Campylobacterales; f__Campylobacteraceae; g__Campylobacter\t0.00133333333333 k__Bacteria; p__Firmicutes; c__Bacilli; o__Gemellales; f__Gemellaceae; g__\t0.0097311827957 k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Enterobacteriales; f__Enterobacteriaceae; g__\t0.0135268817204 k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Pasteurellales; f__Pasteurellaceae; g__Haemophilus\t0.0668279569892 k__Bacteria; p__Proteobacteria; c__Alphaproteobacteria; o__Rhizobiales; f__Brucellaceae; g__Ochrobactrum\t0.00575268817204 k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Veillonellaceae; g__Veillonella\t0.0709247311828 k__Bacteria; p__Bacteroidetes; c__Bacteroidia; o__Bacteroidales; f__Porphyromonadaceae; g__Porphyromonas\t0.0165698924731 k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Pasteurellales; f__Pasteurellaceae; g__Aggregatibacter\t0.00410752688172 k__Bacteria; p__Proteobacteria; c__Betaproteobacteria; o__Neisseriales; f__Neisseriaceae; g__\t0.00394623655914 k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Pseudomonadales; f__Moraxellaceae; g__Acinetobacter\t0.0247634408602 k__Bacteria; p__Firmicutes; c__Bacilli; o__Lactobacillales; f__Aerococcaceae; g__Alloiococcus\t0.000225806451613 k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Lachnospiraceae; g__Moryella\t0.000612903225806 k__Bacteria; p__Actinobacteria; c__Actinobacteria; o__Actinomycetales; f__Micrococcaceae; g__Rothia\t0.114602150538 k__Bacteria; p__Actinobacteria; c__Coriobacteriia; o__Coriobacteriales; f__Coriobacteriaceae; g__Atopobium\t0.00761290322581 k__Bacteria; p__Proteobacteria; c__Betaproteobacteria; o__Burkholderiales; f__Comamonadaceae; g__Alicycliphilus\t0.00530107526882 k__Bacteria; p__Actinobacteria; c__Actinobacteria; o__Actinomycetales; f__Actinomycetaceae; g__Actinomyces\t0.0204408602151 k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Xanthomonadales; f__Xanthomonadaceae; g__Stenotrophomonas\t0.0443655913978 k__Bacteria; p__Actinobacteria; c__Actinobacteria; o__Actinomycetales; f__Corynebacteriaceae; g__Corynebacterium\t0.000440860215054 k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Enterobacteriales; f__Enterobacteriaceae; g__Morganella\t0.000161290322581 k__Bacteria; p__Firmicutes; c__Bacilli; o__Lactobacillales; f__Carnobacteriaceae; g__Granulicatella\t0.025247311828 k__Bacteria; p__SR1; c__; o__; f__; g__\t0.00222580645161 k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Pseudomonadales; f__Pseudomonadaceae; g__Pseudomonas\t0.0665268817204 """ self.assertEqual(obs, exp)
def get_bloom_sequences(): """Get the filepath to the bloom sequences Returns ------- str The filepath to the bloom sequences Raises ------ IOError If the path does not exist """ repo = get_repository_dir() return get_existing_path(os.path.join(repo, 'data/AG/BLOOM.fasta'))