Exemplo n.º 1
0
 def test_search_hmms_input(self):
     with tempdir.TempDir() as tmp:
         gpkg = tmp + ".gpkg"
         Create(prerequisites).main(
             sequences=os.path.join(path_to_data, 'create',
                                    'homologs.trimmed.unaligned.faa'),
             taxonomy=os.path.join(
                 path_to_data, 'create',
                 'homologs.tax2tree.rerooted.decorated.tree-consensus-strings'
             ),
             hmm=os.path.join(
                 path_to_data, 'create', 'first5.hmm'
             ),  # an HMM created from just the first 5 sequences
             search_hmm_files=[
                 os.path.join(path_to_data, 'create', 'homologs.hmm')
             ],
             prefix=gpkg,
             threads=5)
         self.assertEqual(
             'NAME  first10\n',
             open(GraftMPackageVersion2.acquire(
                 gpkg).alignment_hmm_path()).readlines()[1])
         self.assertEqual(
             1, len(GraftMPackageVersion2.acquire(gpkg).search_hmm_paths()))
         self.assertEqual(
             'NAME  homologs.trimmed.aligned\n',
             open(
                 GraftMPackageVersion2.acquire(gpkg).search_hmm_paths()
                 [0]).readlines()[1])
Exemplo n.º 2
0
 def test_search_hmms_input(self):
     with tempdir.TempDir() as tmp:
         gpkg = tmp+".gpkg"
         Create(prerequisites).main(sequences=os.path.join(path_to_data,'create','homologs.trimmed.unaligned.faa'),
                       taxonomy=os.path.join(path_to_data,'create','homologs.tax2tree.rerooted.decorated.tree-consensus-strings'),
                       hmm=os.path.join(path_to_data, 'create', 'first5.hmm'), # an HMM created from just the first 5 sequences
                       search_hmm_files=[os.path.join(path_to_data, 'create', 'homologs.hmm')],
                       prefix=gpkg,
                       threads=5)
         self.assertEqual('NAME  first10\n', open(GraftMPackageVersion2.acquire(gpkg).alignment_hmm_path()).readlines()[1])
         self.assertEqual(1, len(GraftMPackageVersion2.acquire(gpkg).search_hmm_paths()))
         self.assertEqual('NAME  homologs.trimmed.aligned\n', open(GraftMPackageVersion2.acquire(gpkg).search_hmm_paths()[0]).readlines()[1])
Exemplo n.º 3
0
 def test_create_stars(self):
     with tempdir.TempDir() as tmp:
         gpkg = tmp+".gpkg"
         Create(prerequisites).main(sequences=os.path.join(path_to_data,'create','homologs.trimmed.unaligned.with_stars.faa'),
                       taxonomy=os.path.join(path_to_data,'create','homologs.tax2tree.rerooted.decorated.tree-consensus-strings'),
                       prefix=gpkg)
         self.assertTrue(os.path.exists(GraftMPackageVersion2.acquire(gpkg).alignment_hmm_path()))
Exemplo n.º 4
0
    def test_create_dereplication(self):
        reads = '''>r1
MSHFLDRLTYFSAPKESFSGGHGLTNAEDRTWEDAYRNRWAHDKIVRSTHGVNCTGSCSWKIYVKGGIVTWETQQTDYPRTRWDMPNHEPRGCARGASYSWYLYSANRVKYPMVRGRLLERWRAALKVAKSPVDAWALIQQDPEARRDYQQVRGMGGFVRSTWDEVNQLIAAANVYTIKQHGPDRVIGFSPIPAMSMVSYAAGSRYLSLIGGVCMSFYDWYCDLPPSSPQVWGEQTDVPESADWYNSTFIIAWGSNVPQTRTPDAHFFTEVRYKGAKTVAVTPDYSEVAKLSDLWLHPKQGTDAALAMAMGHVALKEFYFPGNGKPRSAYFDDYARRYTDLPLLVMLKELTLPDGKVTLVPDRYVRASDFNGKMGQKSNPEWKTVAFDTAGKAVLPNGSIGFRWGEPGREDEGKWNLESKEARSNEDVKLKLSVLEDGEQTHQVVDVAFPYFGGQHNPHFPTNNQKGDVTVAKVPAVQLRLGKAGEERYAMVATVFDLQVAQYGINRGLGSGAKDFDDNAPYTPAWQESITGVPREQVITVARQFAENADKTHGKSMVIIGAAMNHWYHADMNYRGVINLLMMCGCIGQSGGGWSHYVGQEKLRPQTGWVPLAFATDWIRPSRQQNSTSFFYAHTDQWRYEKLGMGEIVSPTLSAQDRQAYSGSMIDFNVKAERMGWLPSAPQLKTNPLQVVKAAEAAGMDAKDYVVKALKDGTLEMSCEDPDASQNWPRNMFVWRSNLLGSSGKGHEYFCKHLLGTENGVQGKDLGKDDARPTEVKWHKDAPQGKLDLLVTLDFRMSTTCLYSDIVLPTASWYEKNDLNTSDMHPFIHPLSAAVDPVWQARSDWEIYKGFAKAFSDMCVGHLGVEKEVVLTPIMHDTAGEMAQPIDVREWKKGQCDLIPGKTAPQITVVERDYPNTFKRFTALGPLLDKLGNGGKGIGWQTGIEVEQLGQLNGLTQEEGISKGRPRIVSDIDACETILQLAPETNGHVAVKAWEALGKQTGRDHTHLALYREDEKIRYRDIQAQPRKIISSPTWSGIESETVSYNAGYTNVHELIPWRTLTGRQQFYQDHKWMIAFGEGFSTYRPPVNLKTTDALQNKRPNGHKEIVLNFITPHQKWGIHSTYSDNLLMLTLNRGGTVIWLSENDAKVAGIEDNDWIELFNVNGAVAGRAVVSQRVNDGMCLMYHSQEKIINTPGSEITGVRGGIHNSVTRIVTKPTHMIGGYAQLSYGFNYYGTIGTNRDEFVIIRKMVKVDWLDTPADDHLVRAYQSQGENP
>r2
MSHFLDRLRYFTAPRPQFSDGHGAVTDEDRQWENAYRQRWQHDKIVRSTHGVNCTGSCSWKIYVKGGIVTWETQQTDYPRTRPDMPNHEPRGCSRGASYSWYLYSANRLKYPLVRSALVKLWRERRVSLEPVQAWQSIVTDEAARRGYQSRRGLGGFIRSTWDEVNEIVAAANVYTVKQHGPDRVVGFSPIPAMSMVSYAAGSRYLSLIGGVCLSFYDWYCDLPPASPQTWGEQTDVPESADWYNSTFIVMWGSNVPQTRTPDAHFMTEVRYKGAKIVSIFPDYAEGAKFGDIWLHPKQGTDAALALAMGHVILKEFHLTGKSDYFVDYCRRFTDMPCLVRLVPHGDAYVPERLVRASDFDDALGQANNAEWKTVMIDAASNEFVVPLGSVGFRWGQKEGEDQGKWNLKSEAATGEPLMPRLSLANAHDDVVAVLFPYFGNIAHPHFHHTTHGSQLSRKIGVRRIATRNGEMLVATVHDLFVANYGLDQGLGGEHIARDYDDDLPYTPAWQEAITGVKRADVISVARQFAENAHKTQGKSMVIIGAGINHWFHMDMSYRAIINMLIMCGCVGKSGGGWSHYVGQEKLRPQTGWTALAFALDWHRPPRHMNATSFFYAHTDQWRYDPMNPTALLSPLADASRFHGAPIDYNVRAERMGWLPSAPQLAVNPLDYGRALNDPAQAAATVVRDLKSGSLRMACEDPDHPDNFPRNLFVWRSNLLGSSGKGHEYFLKHLLGTINGVQGEEVVKSGHPRPEEIAWHDEAPRGKLDLLVTLDFRMSTTCMYSDVVLPTATWYEKDDMNTSDMHPFIHPLSAAVDPAWQSKSDWEIFKGIAKRFSELSEGHLGVERDIVLAPIAHDSPAELAQPFDVQDWKRGECEPVPGKTMPSVLVVERDYPATYAKFTSLGPLMDKLGNGGKGVSWDTKDEVALLGDLNYRVADAGCAKGRPRIDTALDAAEVILSLAPETNGAVAVKAWSAVTAMTGIEHAHLADARADEKIRFRDIPGPAAKIISSPTWSGIESEHVSYNAGYTNVHELIPWRTLSGRQQLYQDHLWMRAFGESLCVYKPPIETGSYEHMAGARSNGNPEIVLNFITPHQKWGIHSTYTDNLLMLTLSRGGPIVWLSEADAQAIGLVDNDWIECYNANGALCARAVVSQRVPRGMVMMYHAQEKIVNTPGSEITGTRGGIHNSVTRVALKPTHMIGGYAQLAYGFNYYGTVGSNRDEFLIVRKMKHIDWLDDTPPLTMSPPVQPLRQGEAS
>r3
MSHFLDRLKFMSKVKSTFSNGHGAVVKEDRQWEDAYRQRWQHDKVVRSTHGVNCTGSCSWKVYVKNGLITWETQQTDYPRTRADLPNHEPRGCPRGASYSWYVYSAQRVKYPMMRGKLAQMWREARKTMSAVEAWEYISQDPERAREYKSRRGQGGFVRATWEEANEMVAAANAYTIKNYGPDRVIGFSPIPAMSMVSYAAGSRYLSLIGGVPLSFYDWYCDLPPASPQIWGEQTDVAESADWYNSTYLMVWGSNVPMTRTPDAHFYTEVRYKGTKTVAVSSDFGEMAKFGDIWLAPKQGTDAALAMAMGHVIFKEFHLDNPSEYFTDYIRRLTDMPMLVRLKEEGGHYLPEYFLRASQLADNLGEDNNPDWKTLMIDELTGDVVAPNGSIGFRWGQAEGKTGKWNLETREGTTDREVKGQLSLLGRHDEVVGVAFPYFGAEHDDQLTRNIPAKRVQLADGSSALVTTVFDLMAGNYGIDRGLGGGNVATSYMDDVPYTPAWQQKHTGVKPEMVIQVAREFAQNADQTQGKSMVIVGAGLNHWYHMDMSYRGIINMLMLCGCIGQSGGGWCHYVGQEKLRPQSGWAPLAFAQDWNRPARQMNGTSFFYAHTSQWRHEKLGVNEILCPTAAGNMEHMSLIDYNAKAERMGWLPSAPQLETNPLDLTRMAAEAGMDPAAYTVQGLKDGSIDMSCNDPDNPKNFPRNLFVWRSNILGSSGKGHEYFLKYLLGTQNAVMSDETQCIQPSEITVRPAAEGKLDLLVVLDFRMSTTCLYGDIVLPTATWYEKDDLNTSDMHPFIHPLSEAVQPLWQSKTDWEIYKGFAKAISEVGKDYLGVQKDLVLTPLMHDSPQELGQPFDPRDWKKGECDPIPGKTMPAMTVVERDYGAIYQKFTSIGPLMEKVGNGGKGMAWKTEREVEQLRKMNKVVADGVAKGQPRLDTAIDAAEMIMTLAPETNGHVAVKAWESLSKITGRDHTHLAIPREHDHITFRDIQAQPRKIISSPIWSGLESEEVSYNAGYTNVHELIPWRTLTGRQQFYQDHQWMRAFGEGMCVYRPAVDLKTTAAVHNHKPNGNKEILLNFLTPHQKWGIHSTYSENLRMLTLSRGGPHIWISEKDAREAGIVDNDWVEVFNVNGTLTARVVVSQRIPQGMTLMYHAQEKIVNVPGAEMSGKRGGIHNSVTRAVTKPTHMIGGYAQLAYGFNYYGTVGCNRDEFIVMRKMKNVDWMDQPLSK
>r6
MSRNDASHTDETAGESDDKRRAVDQSDDEQPKKRGANEERTDGGPAVGDPPGGDNPSRRGFLKGVGLASVLGIGSASASDDALFSMDGLQPVGDPIGEYPYRDWEDLYREQWDWDSVSRSTHSVNCTGSCSWNVYVKNGQVWREEQSGDYPRFDESLPDPNPRGCQKGACYTDYVNAEQRIKHPLKRVGERGEGKWKRITWDEALTEIAEHVVDEVEAGRYDAISGFTPIPAMSPVSFASGSRLINLLGGVSHSFYDWYSDLPPGQPITWGTQTDNAESADWYNADYIIAWGSNINVTRIPDAKYFLESGYNGTKRVGIFTDYSQTAIHTDEWLSPDPGSDTALALGMAQTIVSEGLYDEAHLKEQTDMPLLVRQDTGKFLRASDVPSVNSSADRPEWMLLMLDSNGQLREAPGSLGERDGQKDYSKSIDLDFDPRLDAETTVQTDDGSVQVRSVWAELRDELAQYDPETVTKMTGVGTETYQRVAREFADVERAKIIHGKGVNDWYHNDLGNRAIQLLVTLTGNLGRQGTGVDHYVGQEKIWTFHGWKTLSFPTGKVRGVPTTLWTYYHAGILDNTDADTAAKIRESIDKGWMPVYPEEREDGSRPDPTTMFVWRGNYFNQAKGNVAVEEELWPKLDLVVDINFRMDSTALYSDIVLPTASHYEKHDLSMTDMHTYVHPFTPAVEPLGESKTDWQIFKDLAEKIQEVATERGVEPISDRTFDREIDLQSVYDDYVRDWETETDGALAEDRAAAEYILEHSAESNPAGTDEQITFADTVEQPQRLLEAGDHWTSDIEDGEAYAPWKDFVQDKNPWPTVTGRQQYYIDHDWFLELGEQLPTHKEGPTNTGGDYPMEYNTPHGRWAIHSTWRDSEKMLRLQRGEPIVYINPDDAAERGIEDGDTVEVFNDLGAVEVQAKIYPSSERGTLRHFFSWEKFQYASRNNFNTLVPMYMKPTQLVQYPEDTGEHLYFFPNYWGPTGVNSDVRVDVRKKGGDGG
>r5
MMNNQELNPKTTRRSFIKASGMTALTVSAGINLPLQVKAETIPIKNMQDDKEVLSLCSVNCGSRCVLRLHVKNDEVRWVETDNSGDDEYGNHQIRACLRGRSMRYRMNHPDRLKYPMKRIGKRGEGKFQRISWDEALDTIADNLKRIVKDYGNEAVYNNYASGIVGGNMTRSSPFASLFTRLMNCYGGLLSYYGSYSTAQIARAMPFTYGSNVGNSTSDIVNSKLVVFFGNNPMETRMSGAGITYHLEQARERSNAKLIVIDPRYTDTASGREDEWIPIRPGTDAALVAGMAHVMITENLVDQVFLDKYCVGYDEKTLPASAPANGHYKAYILGQGDDGVEKTPEWAAKITGIPAAKIIRLAREIGTTKPCYIAQGWGLQRQSNGELACRAVAMLAILTGNVGISGGNSGAREGSFYPPIQRLPVLENPVKAKISVFSWTDAIDHGDQMTALKDGVQGKDKLDVPIKFMWNYAGNCITNQHSDINRTHEILSDDKKCEMIVVIENFMTDSAKYADILLPDLMTTEQEDIVPNDYAGNMGYLIFSQPATSAKFERRGIYDICCEIAKRLGQDVYDKFTEGGRTQEQWLQFLYSRMQEKDATLPSYDELKQMGVYKRQDPNGHLVAYQKFREDPEANPLKTPSGKIEIYSERLAEIAATWELKEDEVIHPLPIYHSGFNGWDDPKREKYPFQLVSFHYKSRTHSTYGNIDVLESACPQEMWINPIDAQRLTLTDGETVRVFNEIGETRIPVKITPRIMPGVLGMGQGAWHKANMFGDKVDHNGCVNVLTTLRPSPLAKGNPQHSNLVRIEKL
>r7
MSKIDLYLDNPAFHIVTGACPHDCPDTCTWQVAVDRITGAAVDIWGHPAHPITQGKLCGKVDRYLERTYHKDRLTVPLRRVGPKGSGRFERVSWEEAIADIARRLQAIIEEYGPEAVLPYSYSGTLGFLQGEGMASRFFNRLGASQLARTICAEAGFQGYLYTIGAAEGMLPEDFAHSRLILIWGSNTLTSNLHLWPFIQQARKQGARVLVIDPANTRTAQAGDEWIPIRPGTDAALALAMMQVIIAEERYDADYVARYTLGFKQLAERVRDWTPARSAEITGIPAERIAALAREYATIRPAGIRINYGLQRHYGGGMAVRTIACLPALVGAWRQHGGGIQLSSSGLFRHLDKRGLHRPDLLEKAKAELQKGNAGAVDQTRPFPRVINMNRLGDALSLDPARLARAHYRPRPIDPLPKPEEAGPPVKALIVYNCNPAAVAPDQTAVLAGLRREELFTVVLEHFQTDTADYADYLLPATTQLEHWDIHRAYGHAYLALNRPAIAPVGESLPNSEIFRRLALAMGYDDPCFYESDEEILRAFVAAQTHERFAGLTWERLLNEAFFRLNLPDPYLPFAEGNFPTPSGKCEFFSARMAEDGYDPLPTYTPPRWQDRSGASALSSFDGSLVCISPPAHSFLNSTFANLPRFLQREDRPMLWIHPQDARPRHIEDGTRVRVCNQHGEVMLTARVTDAVVPGTVLAPGVWWPKLSPDGRNINRTTPPDETDMGAGALFYDATVWVEPLPVMANG
>r4
MSESVVVRAACPHDCPDTCAMEIGVRDGRAVSVRGAADMPFTNGALCTKVAHYLERVYSDQRLMHPLRRVGAKGDGRFERIRWDDALDIIAERFRAIAADDPRAILPYSYAGTMGLVQSESLDRRFFNRLGAARLDRTICAAAGAMGWKATVGASIGADPEAVVDARLILIWGGNPVVSNVHGWRYLQEAKRRGAKLICIDPRRSETAAKCHQHVAPLPGTDGALALAMMQVLIAEDLLDHDYIARCTLGFAELAERVRGCTPEWAAGVTGLDAATIRELAQAYGRSAPTLIRLNYGLNRSGGGGMAVRNIACLPALTGAWRHAGGGALLSTSGNFPLDRHALERPDLYRQDPSRPPRTINMTTIGEALLETRDPPIRALYVYNANPVAVAPYSAKVRAGFRREDLFCVVHELFQTDTADYADILLPATSQLEHHDLHTSYGHFYLVANRPAIDAQGEARSNTEVFRQLAARLGFDDTALFETDEAIAAAALRREDGRSLGVGARLAEHGWARLNLPRPFAPFARGGFPTPSGKCEFLSSFLAAQGLDPLPAWHPPYESVVSNPALAARYPLALISPPARNFLNSSFANLPRFTAGEGGPRLDIHRDDAAARGLTDGMRVRIHNDRGAFHAYARVTEAVRRGVVAAPSIWWQKRSGDGENANAVTSDALTDMGGGAVFYDCLVEVAGAPA\n'''
        taxonomy = '''r1\td__Bacteria;p__Proteobacteria;c__Betaproteobacteria;o__Burkholderiales;f__Comamonadaceae;g__Hydrogenophaga;s__
r3\td__Bacteria;p__Proteobacteria;c__Betaproteobacteria;o__Burkholderiales;f__Comamonadaceae;g__Brachymonas;s__
r2\td__Bacteria;p__Proteobacteria;c__Betaproteobacteria;o__Burkholderiales;f__Burkholderiaceae;g__Burkholderia;s__
r4\td__Bacteria;p__Proteobacteria;c__Betaproteobacteria;o__Rhodocyclales;f__Rhodocyclaceae;g__;s__
r5\td__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Pasteurellales;f__Pasteurellaceae;g__Basfia;s__
r7\td__Bacteria;p__Chloroflexi;c__Caldilineae;o__Caldilineales;f__Caldilineaceae;g__Caldilinea;s__aerophila
r6\td__Archaea;p__Euryarchaeota;c__Methanomicrobia;o__Halobacteriales;f__Halobacteriaceae;g__Halogeometricum;s__\n'''

        with tempfile.NamedTemporaryFile(suffix='.fa', mode='w') as fasta:
            fasta.write(reads)
            fasta.flush()
            with tempfile.NamedTemporaryFile(suffix='.tsv', mode='w') as tax:
                tax.write(taxonomy)
                tax.flush()
                sequences_file = fasta.name
                taxonomy_file = tax.name
                expected_list = [7, 2, 3, 4, 5, 6, 7]
                for expected, i in zip(expected_list, list(range(0, 6))):

                    with tempdir.TempDir() as package:
                        Create(prerequisites).main(sequences=sequences_file,
                                                   taxonomy=taxonomy_file,
                                                   prefix=package,
                                                   dereplication_level=i,
                                                   force=True,
                                                   threads=5)
                        base = os.path.basename(package)
                        gpkg = GraftMPackageVersion2.acquire(package)
                        with open(gpkg.taxtastic_seqinfo_path()) as f:
                            seqinfo = f.readlines()

                        with open(gpkg.search_hmm_paths()[0]) as hmm:
                            nseq = [
                                int(x.strip().split()[1])
                                for x in hmm.readlines()
                                if x.startswith("NSEQ")
                            ][0]

                        self.assertEqual(nseq, expected)
Exemplo n.º 5
0
 def test_dna_package(self):
     with tempdir.TempDir() as tmp:
         gpkg = tmp+".gpkg"
         Create(prerequisites).main(sequences=os.path.join(path_to_data,'create','61_otus.fasta'),
                       taxtastic_taxonomy=os.path.join(path_to_data,'61_otus.gpkg','61_otus.refpkg','61_otus_taxonomy.csv'),
                       taxtastic_seqinfo=os.path.join(path_to_data,'61_otus.gpkg','61_otus.refpkg','61_otus_seqinfo.csv'),
                       alignment=os.path.join(path_to_data,'61_otus.gpkg','61_otus.refpkg','61_otus.aln.fa'),
                       prefix=gpkg,
                       threads=5)
         pkg = GraftMPackageVersion2.acquire(gpkg)
         self.assertEqual('NAME  61_otus.aln\n', open(pkg.alignment_hmm_path()).readlines()[1])
         self.assertEqual(pkg.diamond_database_path(), None)
Exemplo n.º 6
0
    def test_create_dereplication(self):
        reads = '''>r1
MSHFLDRLTYFSAPKESFSGGHGLTNAEDRTWEDAYRNRWAHDKIVRSTHGVNCTGSCSWKIYVKGGIVTWETQQTDYPRTRWDMPNHEPRGCARGASYSWYLYSANRVKYPMVRGRLLERWRAALKVAKSPVDAWALIQQDPEARRDYQQVRGMGGFVRSTWDEVNQLIAAANVYTIKQHGPDRVIGFSPIPAMSMVSYAAGSRYLSLIGGVCMSFYDWYCDLPPSSPQVWGEQTDVPESADWYNSTFIIAWGSNVPQTRTPDAHFFTEVRYKGAKTVAVTPDYSEVAKLSDLWLHPKQGTDAALAMAMGHVALKEFYFPGNGKPRSAYFDDYARRYTDLPLLVMLKELTLPDGKVTLVPDRYVRASDFNGKMGQKSNPEWKTVAFDTAGKAVLPNGSIGFRWGEPGREDEGKWNLESKEARSNEDVKLKLSVLEDGEQTHQVVDVAFPYFGGQHNPHFPTNNQKGDVTVAKVPAVQLRLGKAGEERYAMVATVFDLQVAQYGINRGLGSGAKDFDDNAPYTPAWQESITGVPREQVITVARQFAENADKTHGKSMVIIGAAMNHWYHADMNYRGVINLLMMCGCIGQSGGGWSHYVGQEKLRPQTGWVPLAFATDWIRPSRQQNSTSFFYAHTDQWRYEKLGMGEIVSPTLSAQDRQAYSGSMIDFNVKAERMGWLPSAPQLKTNPLQVVKAAEAAGMDAKDYVVKALKDGTLEMSCEDPDASQNWPRNMFVWRSNLLGSSGKGHEYFCKHLLGTENGVQGKDLGKDDARPTEVKWHKDAPQGKLDLLVTLDFRMSTTCLYSDIVLPTASWYEKNDLNTSDMHPFIHPLSAAVDPVWQARSDWEIYKGFAKAFSDMCVGHLGVEKEVVLTPIMHDTAGEMAQPIDVREWKKGQCDLIPGKTAPQITVVERDYPNTFKRFTALGPLLDKLGNGGKGIGWQTGIEVEQLGQLNGLTQEEGISKGRPRIVSDIDACETILQLAPETNGHVAVKAWEALGKQTGRDHTHLALYREDEKIRYRDIQAQPRKIISSPTWSGIESETVSYNAGYTNVHELIPWRTLTGRQQFYQDHKWMIAFGEGFSTYRPPVNLKTTDALQNKRPNGHKEIVLNFITPHQKWGIHSTYSDNLLMLTLNRGGTVIWLSENDAKVAGIEDNDWIELFNVNGAVAGRAVVSQRVNDGMCLMYHSQEKIINTPGSEITGVRGGIHNSVTRIVTKPTHMIGGYAQLSYGFNYYGTIGTNRDEFVIIRKMVKVDWLDTPADDHLVRAYQSQGENP
>r2
MSHFLDRLRYFTAPRPQFSDGHGAVTDEDRQWENAYRQRWQHDKIVRSTHGVNCTGSCSWKIYVKGGIVTWETQQTDYPRTRPDMPNHEPRGCSRGASYSWYLYSANRLKYPLVRSALVKLWRERRVSLEPVQAWQSIVTDEAARRGYQSRRGLGGFIRSTWDEVNEIVAAANVYTVKQHGPDRVVGFSPIPAMSMVSYAAGSRYLSLIGGVCLSFYDWYCDLPPASPQTWGEQTDVPESADWYNSTFIVMWGSNVPQTRTPDAHFMTEVRYKGAKIVSIFPDYAEGAKFGDIWLHPKQGTDAALALAMGHVILKEFHLTGKSDYFVDYCRRFTDMPCLVRLVPHGDAYVPERLVRASDFDDALGQANNAEWKTVMIDAASNEFVVPLGSVGFRWGQKEGEDQGKWNLKSEAATGEPLMPRLSLANAHDDVVAVLFPYFGNIAHPHFHHTTHGSQLSRKIGVRRIATRNGEMLVATVHDLFVANYGLDQGLGGEHIARDYDDDLPYTPAWQEAITGVKRADVISVARQFAENAHKTQGKSMVIIGAGINHWFHMDMSYRAIINMLIMCGCVGKSGGGWSHYVGQEKLRPQTGWTALAFALDWHRPPRHMNATSFFYAHTDQWRYDPMNPTALLSPLADASRFHGAPIDYNVRAERMGWLPSAPQLAVNPLDYGRALNDPAQAAATVVRDLKSGSLRMACEDPDHPDNFPRNLFVWRSNLLGSSGKGHEYFLKHLLGTINGVQGEEVVKSGHPRPEEIAWHDEAPRGKLDLLVTLDFRMSTTCMYSDVVLPTATWYEKDDMNTSDMHPFIHPLSAAVDPAWQSKSDWEIFKGIAKRFSELSEGHLGVERDIVLAPIAHDSPAELAQPFDVQDWKRGECEPVPGKTMPSVLVVERDYPATYAKFTSLGPLMDKLGNGGKGVSWDTKDEVALLGDLNYRVADAGCAKGRPRIDTALDAAEVILSLAPETNGAVAVKAWSAVTAMTGIEHAHLADARADEKIRFRDIPGPAAKIISSPTWSGIESEHVSYNAGYTNVHELIPWRTLSGRQQLYQDHLWMRAFGESLCVYKPPIETGSYEHMAGARSNGNPEIVLNFITPHQKWGIHSTYTDNLLMLTLSRGGPIVWLSEADAQAIGLVDNDWIECYNANGALCARAVVSQRVPRGMVMMYHAQEKIVNTPGSEITGTRGGIHNSVTRVALKPTHMIGGYAQLAYGFNYYGTVGSNRDEFLIVRKMKHIDWLDDTPPLTMSPPVQPLRQGEAS
>r3
MSHFLDRLKFMSKVKSTFSNGHGAVVKEDRQWEDAYRQRWQHDKVVRSTHGVNCTGSCSWKVYVKNGLITWETQQTDYPRTRADLPNHEPRGCPRGASYSWYVYSAQRVKYPMMRGKLAQMWREARKTMSAVEAWEYISQDPERAREYKSRRGQGGFVRATWEEANEMVAAANAYTIKNYGPDRVIGFSPIPAMSMVSYAAGSRYLSLIGGVPLSFYDWYCDLPPASPQIWGEQTDVAESADWYNSTYLMVWGSNVPMTRTPDAHFYTEVRYKGTKTVAVSSDFGEMAKFGDIWLAPKQGTDAALAMAMGHVIFKEFHLDNPSEYFTDYIRRLTDMPMLVRLKEEGGHYLPEYFLRASQLADNLGEDNNPDWKTLMIDELTGDVVAPNGSIGFRWGQAEGKTGKWNLETREGTTDREVKGQLSLLGRHDEVVGVAFPYFGAEHDDQLTRNIPAKRVQLADGSSALVTTVFDLMAGNYGIDRGLGGGNVATSYMDDVPYTPAWQQKHTGVKPEMVIQVAREFAQNADQTQGKSMVIVGAGLNHWYHMDMSYRGIINMLMLCGCIGQSGGGWCHYVGQEKLRPQSGWAPLAFAQDWNRPARQMNGTSFFYAHTSQWRHEKLGVNEILCPTAAGNMEHMSLIDYNAKAERMGWLPSAPQLETNPLDLTRMAAEAGMDPAAYTVQGLKDGSIDMSCNDPDNPKNFPRNLFVWRSNILGSSGKGHEYFLKYLLGTQNAVMSDETQCIQPSEITVRPAAEGKLDLLVVLDFRMSTTCLYGDIVLPTATWYEKDDLNTSDMHPFIHPLSEAVQPLWQSKTDWEIYKGFAKAISEVGKDYLGVQKDLVLTPLMHDSPQELGQPFDPRDWKKGECDPIPGKTMPAMTVVERDYGAIYQKFTSIGPLMEKVGNGGKGMAWKTEREVEQLRKMNKVVADGVAKGQPRLDTAIDAAEMIMTLAPETNGHVAVKAWESLSKITGRDHTHLAIPREHDHITFRDIQAQPRKIISSPIWSGLESEEVSYNAGYTNVHELIPWRTLTGRQQFYQDHQWMRAFGEGMCVYRPAVDLKTTAAVHNHKPNGNKEILLNFLTPHQKWGIHSTYSENLRMLTLSRGGPHIWISEKDAREAGIVDNDWVEVFNVNGTLTARVVVSQRIPQGMTLMYHAQEKIVNVPGAEMSGKRGGIHNSVTRAVTKPTHMIGGYAQLAYGFNYYGTVGCNRDEFIVMRKMKNVDWMDQPLSK
>r6
MSRNDASHTDETAGESDDKRRAVDQSDDEQPKKRGANEERTDGGPAVGDPPGGDNPSRRGFLKGVGLASVLGIGSASASDDALFSMDGLQPVGDPIGEYPYRDWEDLYREQWDWDSVSRSTHSVNCTGSCSWNVYVKNGQVWREEQSGDYPRFDESLPDPNPRGCQKGACYTDYVNAEQRIKHPLKRVGERGEGKWKRITWDEALTEIAEHVVDEVEAGRYDAISGFTPIPAMSPVSFASGSRLINLLGGVSHSFYDWYSDLPPGQPITWGTQTDNAESADWYNADYIIAWGSNINVTRIPDAKYFLESGYNGTKRVGIFTDYSQTAIHTDEWLSPDPGSDTALALGMAQTIVSEGLYDEAHLKEQTDMPLLVRQDTGKFLRASDVPSVNSSADRPEWMLLMLDSNGQLREAPGSLGERDGQKDYSKSIDLDFDPRLDAETTVQTDDGSVQVRSVWAELRDELAQYDPETVTKMTGVGTETYQRVAREFADVERAKIIHGKGVNDWYHNDLGNRAIQLLVTLTGNLGRQGTGVDHYVGQEKIWTFHGWKTLSFPTGKVRGVPTTLWTYYHAGILDNTDADTAAKIRESIDKGWMPVYPEEREDGSRPDPTTMFVWRGNYFNQAKGNVAVEEELWPKLDLVVDINFRMDSTALYSDIVLPTASHYEKHDLSMTDMHTYVHPFTPAVEPLGESKTDWQIFKDLAEKIQEVATERGVEPISDRTFDREIDLQSVYDDYVRDWETETDGALAEDRAAAEYILEHSAESNPAGTDEQITFADTVEQPQRLLEAGDHWTSDIEDGEAYAPWKDFVQDKNPWPTVTGRQQYYIDHDWFLELGEQLPTHKEGPTNTGGDYPMEYNTPHGRWAIHSTWRDSEKMLRLQRGEPIVYINPDDAAERGIEDGDTVEVFNDLGAVEVQAKIYPSSERGTLRHFFSWEKFQYASRNNFNTLVPMYMKPTQLVQYPEDTGEHLYFFPNYWGPTGVNSDVRVDVRKKGGDGG
>r5
MMNNQELNPKTTRRSFIKASGMTALTVSAGINLPLQVKAETIPIKNMQDDKEVLSLCSVNCGSRCVLRLHVKNDEVRWVETDNSGDDEYGNHQIRACLRGRSMRYRMNHPDRLKYPMKRIGKRGEGKFQRISWDEALDTIADNLKRIVKDYGNEAVYNNYASGIVGGNMTRSSPFASLFTRLMNCYGGLLSYYGSYSTAQIARAMPFTYGSNVGNSTSDIVNSKLVVFFGNNPMETRMSGAGITYHLEQARERSNAKLIVIDPRYTDTASGREDEWIPIRPGTDAALVAGMAHVMITENLVDQVFLDKYCVGYDEKTLPASAPANGHYKAYILGQGDDGVEKTPEWAAKITGIPAAKIIRLAREIGTTKPCYIAQGWGLQRQSNGELACRAVAMLAILTGNVGISGGNSGAREGSFYPPIQRLPVLENPVKAKISVFSWTDAIDHGDQMTALKDGVQGKDKLDVPIKFMWNYAGNCITNQHSDINRTHEILSDDKKCEMIVVIENFMTDSAKYADILLPDLMTTEQEDIVPNDYAGNMGYLIFSQPATSAKFERRGIYDICCEIAKRLGQDVYDKFTEGGRTQEQWLQFLYSRMQEKDATLPSYDELKQMGVYKRQDPNGHLVAYQKFREDPEANPLKTPSGKIEIYSERLAEIAATWELKEDEVIHPLPIYHSGFNGWDDPKREKYPFQLVSFHYKSRTHSTYGNIDVLESACPQEMWINPIDAQRLTLTDGETVRVFNEIGETRIPVKITPRIMPGVLGMGQGAWHKANMFGDKVDHNGCVNVLTTLRPSPLAKGNPQHSNLVRIEKL
>r7
MSKIDLYLDNPAFHIVTGACPHDCPDTCTWQVAVDRITGAAVDIWGHPAHPITQGKLCGKVDRYLERTYHKDRLTVPLRRVGPKGSGRFERVSWEEAIADIARRLQAIIEEYGPEAVLPYSYSGTLGFLQGEGMASRFFNRLGASQLARTICAEAGFQGYLYTIGAAEGMLPEDFAHSRLILIWGSNTLTSNLHLWPFIQQARKQGARVLVIDPANTRTAQAGDEWIPIRPGTDAALALAMMQVIIAEERYDADYVARYTLGFKQLAERVRDWTPARSAEITGIPAERIAALAREYATIRPAGIRINYGLQRHYGGGMAVRTIACLPALVGAWRQHGGGIQLSSSGLFRHLDKRGLHRPDLLEKAKAELQKGNAGAVDQTRPFPRVINMNRLGDALSLDPARLARAHYRPRPIDPLPKPEEAGPPVKALIVYNCNPAAVAPDQTAVLAGLRREELFTVVLEHFQTDTADYADYLLPATTQLEHWDIHRAYGHAYLALNRPAIAPVGESLPNSEIFRRLALAMGYDDPCFYESDEEILRAFVAAQTHERFAGLTWERLLNEAFFRLNLPDPYLPFAEGNFPTPSGKCEFFSARMAEDGYDPLPTYTPPRWQDRSGASALSSFDGSLVCISPPAHSFLNSTFANLPRFLQREDRPMLWIHPQDARPRHIEDGTRVRVCNQHGEVMLTARVTDAVVPGTVLAPGVWWPKLSPDGRNINRTTPPDETDMGAGALFYDATVWVEPLPVMANG
>r4
MSESVVVRAACPHDCPDTCAMEIGVRDGRAVSVRGAADMPFTNGALCTKVAHYLERVYSDQRLMHPLRRVGAKGDGRFERIRWDDALDIIAERFRAIAADDPRAILPYSYAGTMGLVQSESLDRRFFNRLGAARLDRTICAAAGAMGWKATVGASIGADPEAVVDARLILIWGGNPVVSNVHGWRYLQEAKRRGAKLICIDPRRSETAAKCHQHVAPLPGTDGALALAMMQVLIAEDLLDHDYIARCTLGFAELAERVRGCTPEWAAGVTGLDAATIRELAQAYGRSAPTLIRLNYGLNRSGGGGMAVRNIACLPALTGAWRHAGGGALLSTSGNFPLDRHALERPDLYRQDPSRPPRTINMTTIGEALLETRDPPIRALYVYNANPVAVAPYSAKVRAGFRREDLFCVVHELFQTDTADYADILLPATSQLEHHDLHTSYGHFYLVANRPAIDAQGEARSNTEVFRQLAARLGFDDTALFETDEAIAAAALRREDGRSLGVGARLAEHGWARLNLPRPFAPFARGGFPTPSGKCEFLSSFLAAQGLDPLPAWHPPYESVVSNPALAARYPLALISPPARNFLNSSFANLPRFTAGEGGPRLDIHRDDAAARGLTDGMRVRIHNDRGAFHAYARVTEAVRRGVVAAPSIWWQKRSGDGENANAVTSDALTDMGGGAVFYDCLVEVAGAPA\n'''
        taxonomy = '''r1\td__Bacteria;p__Proteobacteria;c__Betaproteobacteria;o__Burkholderiales;f__Comamonadaceae;g__Hydrogenophaga;s__
r3\td__Bacteria;p__Proteobacteria;c__Betaproteobacteria;o__Burkholderiales;f__Comamonadaceae;g__Brachymonas;s__
r2\td__Bacteria;p__Proteobacteria;c__Betaproteobacteria;o__Burkholderiales;f__Burkholderiaceae;g__Burkholderia;s__
r4\td__Bacteria;p__Proteobacteria;c__Betaproteobacteria;o__Rhodocyclales;f__Rhodocyclaceae;g__;s__
r5\td__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Pasteurellales;f__Pasteurellaceae;g__Basfia;s__
r7\td__Bacteria;p__Chloroflexi;c__Caldilineae;o__Caldilineales;f__Caldilineaceae;g__Caldilinea;s__aerophila
r6\td__Archaea;p__Euryarchaeota;c__Methanomicrobia;o__Halobacteriales;f__Halobacteriaceae;g__Halogeometricum;s__\n'''
    
        with tempfile.NamedTemporaryFile(suffix='.fa') as fasta:
            fasta.write(reads)
            fasta.flush()
            with tempfile.NamedTemporaryFile(suffix='.tsv') as tax:
                tax.write(taxonomy)
                tax.flush()
                sequences_file = fasta.name
                taxonomy_file  = tax.name
                expected_list = [7, 2 ,3, 4, 5, 6, 7]
                for expected, i in zip( expected_list, range(0,6) ):

                    with tempdir.TempDir() as package:
                        Create(prerequisites).main(sequences = sequences_file,
                                      taxonomy = taxonomy_file,
                                      prefix = package,
                                      dereplication_level = i,
                                      force = True,
                                      threads = 5)
                        base = os.path.basename(package)
                        gpkg = GraftMPackageVersion2.acquire(package)
                        seqinfo = open(gpkg.taxtastic_seqinfo_path())\
                                                                .readlines()

                        hmm = gpkg.search_hmm_paths()[0]
                        nseq = [int(x.strip().split()[1]) 
                                for x in open(hmm).readlines()
                                if x.startswith("NSEQ")][0]

                        self.assertEqual(nseq, expected)
Exemplo n.º 7
0
 def test_create_stars(self):
     with tempdir.TempDir() as tmp:
         gpkg = tmp + ".gpkg"
         Create(prerequisites).main(
             sequences=os.path.join(
                 path_to_data, 'create',
                 'homologs.trimmed.unaligned.with_stars.faa'),
             taxonomy=os.path.join(
                 path_to_data, 'create',
                 'homologs.tax2tree.rerooted.decorated.tree-consensus-strings'
             ),
             prefix=gpkg)
         self.assertTrue(
             os.path.exists(
                 GraftMPackageVersion2.acquire(gpkg).alignment_hmm_path()))
Exemplo n.º 8
0
 def test_dna_package(self):
     with tempdir.TempDir() as tmp:
         gpkg = tmp + ".gpkg"
         Create(prerequisites).main(
             sequences=os.path.join(path_to_data, 'create',
                                    '61_otus.fasta'),
             taxtastic_taxonomy=os.path.join(path_to_data, '61_otus.gpkg',
                                             '61_otus.refpkg',
                                             '61_otus_taxonomy.csv'),
             taxtastic_seqinfo=os.path.join(path_to_data, '61_otus.gpkg',
                                            '61_otus.refpkg',
                                            '61_otus_seqinfo.csv'),
             alignment=os.path.join(path_to_data, '61_otus.gpkg',
                                    '61_otus.refpkg', '61_otus.aln.fa'),
             prefix=gpkg,
             threads=5)
         pkg = GraftMPackageVersion2.acquire(gpkg)
         with open(pkg.alignment_hmm_path()) as f:
             self.assertEqual('NAME  61_otus.aln\n', f.readlines()[1])
         self.assertEqual(pkg.diamond_database_path(), None)