def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) sub_taxonomy_list=[] taxonomic_rank_dictionary=create_taxonomic_rank_dictionary(opts.input_taxonomy_fps) otu_dictionary=create_otu_dictionary(opts.input_fasta_fps) if opts.taxonomy_level=="": if path.isdir(opts.output_dir)==False: mkdir(opts.output_dir,0755) sub_taxonomy=split_taxonomy_list(opts.input_taxonomy_fps,7,opts.output_dir) sub_taxonomy_list.append(sub_taxonomy) assign_otuID_to_seqs(taxonomic_rank_dictionary,otu_dictionary,sub_taxonomy_list,opts.output_dir) else: sub_taxonomy=split_taxonomy_list(opts.input_taxonomy_fps,7,opts.output_dir) sub_taxonomy_list.append(sub_taxonomy) assign_otuID_to_seqs(taxonomic_rank_dictionary,otu_dictionary,sub_taxonomy_list,opts.output_dir) else: if path.isdir(opts.output_dir)==False: mkdir(opts.output_dir,0755) sub_taxonomy=split_taxonomy_list(opts.input_taxonomy_fps,opts.taxonomy_level,opts.output_dir) sub_taxonomy_list.append(sub_taxonomy) assign_otuID_to_seqs(taxonomic_rank_dictionary,otu_dictionary,sub_taxonomy_list,opts.output_dir) else: sub_taxonomy=split_taxonomy_list(opts.input_taxonomy_fps,opts.taxonomy_level,opts.output_dir) sub_taxonomy_list.append(sub_taxonomy) assign_otuID_to_seqs(taxonomic_rank_dictionary,otu_dictionary,sub_taxonomy_list,opts.output_dir) build_cm_models(opts.output_dir)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) sub_taxonomy_list = [] if opts.taxonomy_level == "": mkdir(opts.output_dir, 0755) sub_taxonomy = split_taxonomy_list(opts.input_taxonomy_fps, "7", opts.output_dir) sub_taxonomy_list.append(sub_taxonomy) assign_seqID_to_seqs(taxon_list, opts.input_fasta_fps, opts.output_dir) else: mkdir(opts.output_dir, 0755) sub_taxonomy = split_taxonomy_list(opts.input_taxonomy_fps, opts.taxonomy_level, opts.output_dir) sub_taxonomy_list.append(sub_taxonomy) assign_seqID_to_seqs(sub_taxonomy_list, opts.input_fasta_fps, opts.output_dir)
def test_assign_otuID_to_seqs(self): expected = """\ >822577 AGAGTTTGATCCTGGCTCAGGGTGAACGCTAGTAGTATGCTTAACACATGCGAGTTGAACGAAGATTTATCTTAGTGGCAAACGGGTGAGTAATACATAGGAATCTGCCTTTTAGTACGGGAAAAAATCCTGTATAATACAGTAAAAAACTAAAAAAGAAATATTTTGCTAAAAGATGAGCCTATGCAGGATTAGGTAGTTGGTAAGGTTAAGGCTTACCAAGCCTGTGATCCTTAGTTGTTTTGAGAGATTGAACAACCACACTGGGACTGAGACAAGGCCCAGGCTTCAGTAGAGGCCAGCAGTGAGGAATCTTGGGCAATGAGCGAAAGCTTGACCCAGCAATATTACATGAAGGAAGACTGCTCAAAAGTTGTAAACTTCATTAATTGAGGAGGACGATTGACGTTACTTAATTAACAGCCCCGGCTAACTTCGTGCCAGCAGCCGCGGTAAGACGAAGGGGGCGAGCGTTACCCATGATGACTGGGCGTAAAGGGTCCGTAGGCGGCTTTTTATGTTAAAAGTAAAATCAAAAAGCTTTACTTTTTGAGGCTTTTAATACGTTAGAGCTCGGAGTTTGAAGGAAGATAGTAGAATTTCATATGAAGGGGTGAAATCCGTAGAATTATGAAGGAATACTAAAGGCGAAGGCAACTATCTATTTCAATCTGACGTTGAGGGACGAAAGCATGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCATGCCGTCAAAGATGAGTGCTCTTGTTTGAATTATATTTTGGACAATTAGTTAACACATAAAGCACTCCGCCTGGGGAGTACGGCCGCAAGGACGGAACTCAAAGGAATTGGCGGGGGCCTATACGAGTGGTGGAGCATGTGGTTTAATGCGACAATACGCGCGAAACCTTACCAGTTTTTGATATATATTTTGGATAATCTTTGTGGATAATCAAAAGGATTTATACAGGTGCTGCATGGCTGTCGTCAGCCCGTGTCGTAAGATGTTAGGTTCACTCTTTTTAACGGGCGAAACCCCTGTAGTTAGTTACTATTTATATATCTTATATAAAGGACTTTAACTATACTGCTTTATTGTTTAAATATTTGCTAATTGTAAACAATTAGCAACGTTAAGTGTTATTTTTTTTAAATATATTACTTGTCGTTTCAACTTATTGTTGAGTTTTTATTACAATATATAATGAGGAAGGAGGGGATGACGTCAAGTCTTTATGGCCCTTATGAACTGGGCTACACACGTGCTACAATGGTAAAGACAAAAAGAAGCAATAAGGTAACTTGGAGCTAATCTTAAAAATTTACCAAAGTTCGGATTGTGGGCTGCAACTCGCCCACATGAAGGTGGAATCACTAGTAATCACGAATCAGAACGTCGTGGTGAATTTGTACTTAGGCCTTGTACACACCGCCCGTCACGTGCCGGAAGTCGGCTTGGCTAAAAAATATTTGTTGATTATTGTGTTTAATTTTTTTATTTTTTATGTTAACAAATTTCAAAGTTTACTTTAATTTGCATAATTTATAGTCGGGTTGATAACTGGGATGAAGTCGTAACAAGGTAACC >1837676 AAACAATTAAGAGTTTGATCCTGGCTCTGAGTGAATGCTAGCGGCATGCCTTACACATGCAAGTCGTACGAAATTAATAAAATGATTTCGTGGCGTATAGGTGAGTAAAGTACAAAGCGTGTATGCTAAGTTTAGCTAATACTAAATATATTAACTTAATTAAGTTATTAAAGATTTTTCGCTTAGCAGACATGTTTGTATAGGATTAGGTAGTTGGTGATGGGTTTAGCTCACCAAGCCTACGATCCTTAGCTGGTCTTGGAGGACTATCAGCCACAGCGGGACGGTTACATCCCGAACTTTTTGGCAGCAGTGAGGAATATTGGACAATGGGCGCAAGCTTGATCCAGCAATGCCGCGTGGGTGAGGAAGGCTTAGATTGTAAAATCCTTTCGTCGAAGATGATAATGACAGTATTCGAAAAAGAAGCCCCGGCTAACTTCGTGCCAGCAGCCGCGGTAAAACGGAGGGGGCTAGTGTTACTCAACTTGACTGGGCGTAAAGGGCGTGAAGGTGGTATGGTACGTTTTATTGTAAATACTCAAACATAATTTGAGGAGCTTTAAAATACGGCTATGCTTGAGTTTATTTGAAGAAAGTTGTACTTCTAGTGTAGAGGTGAAATTTGCAAAAATTAGAAGGACAGTCCACGGGCGAAAGCGACTTTCTACTATAAACTGACACTATAGCGCGAAAGCATAGGTAGCAAACGGGATTAGAGACCCCGGTAGTCTATGCAGTGACCGATGAATACTAAATGCTCTTATCCGTAAGAGAGTATTTAAGCTAACGCGTAAGTATTCCGCCTGGGAACTACAACCGCAAGGTTCAAACTTAAAGGAATTGACGGAGGGCTATTTCAGCGGTGGAGCATGTGGTTTAATCCGATATACCGCGTAGAACCTTACCAGCTCTTGATGAGCAAATCTGTTTAGGTTTAGAATCCTGAACATTCGTTTTTTTAGCGAAGTATTGGTTTGCACAGGTGTTGCATGGCTGTCGTCAGCTCGTGTTTTGAGATGTTAGGTTAACTCCTTTAACGGGCGCAACCCCCTAGCTTAATTATAAGTTTACTGCTTAGGAAATCTAAGAGGAGGGCAGGGATGATGTCAAGTCGTTATGACCTTTATGGGCTGGGCTACTCACGTGCTACAACGGTACGTACAAAGAGAAGCGAAAACGTAAGTTTTAGCAATACTCAAAAAACGTATCTCAGTTCAGATTGTACCCTGAAAATTGGATACATGAAGTTGGAATCGCTAGTAATCGTGTATCAGAATGGCACGGTGAATACCCCCTCAGCCCTAGTACACACCGCCCGTCACGCGCTGGAAATTAGTTTTGGTTGACATAAGTAGAGGATTATAATTCTTTAAGTCTAATAAAATTTAGTATCTTTAAATTGTTTTATTTTAGATTAAAATTACTGCTTATAACATTAAAGCTAAGGACTGGTGCGAAGTCGTAACAGGGTAGTCGTAGGGGAACCTGCGGCTGGACCA """ self.output_dir = mkdtemp(prefix="TaxonomyTest_") taxonomy_level = 5 otu_f_list = [] otu_match_list = [] otu_f_list.append(self.otu_f) otu_match_list.append(self.otu_match_f) sub_taxonomy_list = [] sub_taxonomy = split_taxonomy_list(otu_f_list, taxonomy_level, self.output_dir) sub_taxonomy_list.append(sub_taxonomy) assign_otuID_to_seqs(sub_taxonomy_list, otu_match_list, self.output_dir) output_taxon_fp = os.path.join(self.output_dir, "c_taxonomy", "c__Alphaproteobacteria.fasta") result = open(output_taxon_fp, "U") self.assertEqual(result.read(), expected) result.close()