def test_get_percentage_identity(): alignment = BlastAlignment('pdb|1crn|A', 'AVAVAVAVAV', '', 1, 10, 'AVAVAVAVAV', 1, 10, 'ATATATATAT') eq_(alignment.get_percentage_identity(), 50.0) alignment = TargetTemplateAlignment('AVAVAVAVAV', 'ATATATATAT') eq_(alignment.get_percentage_identity(), 50.0)
def test_count_aligned_residues(): alignment = BlastAlignment('pdb|1crn|A', 'AVAVAVAVAV', '', 1, 10, 'AVAVAVAVAV', 1, 10, 'A-A-A-A-A-') eq_(alignment.count_aligned_residues(), 5) alignment = TargetTemplateAlignment('AVAVAVAVAV', 'A-A-A-A-A-') eq_(alignment.count_aligned_residues(), 5)
def test_get_percentage_coverage(): alignment = BlastAlignment('pdb|1crn|A', 'AVAVAVAVAV', '', 1, 10, 'AVAVAVAVAV', 1, 10, 'A-A-A-A-A-') eq_(alignment.get_percentage_coverage(), 50.0) alignment = TargetTemplateAlignment('AVAVAVAVAV', 'A-A-A-A-A-') eq_(alignment.get_percentage_coverage(), 50.0)
def test_get_query_range(): full_sequence = """ MSSSHSRAGQSAAGAAPGGGVDTRDAEMPATEKDLAEDAPWKKIQQNTFTRWCNEHLKCVSKRIANLQTDLS DGLRLIALLEVLSQKKMHRKHNQRPTFRQMQLENVSVALEFLDRESIKLVSIDSKAIVDGNLKLILGLIWTL ILHYSISMPMWDEEEDEEAKKQTPKQRLLGWIQNKLPQLPITNFSRDWQSGRALGALVDSCAPGLCPDWDSW DASKPVTNAREAMQQADDWLGIPQVITPEEIVDPNVDEHSVMTYLSQFPKAKLKPGAPLRPKLNPKKARAYG PGIEPTGNMVKKRAEFTVETRSAGQGEVLVYVEDPAGHQEEAKVTANNDKNRTFSVWYVPEVTGTHKVTVLF AGQHIAKSPFEVYVDKSQGDASKVTAQGPGLEPSGNIANKTTYFEIFTAGAGTGEVEVVIQDPMGQKGTVEP QLEARGDSTYRCSYQPTMEGVHTVHVTFAGVPIPRSPYTVTVGQACNPSACRAVGRGLQPKGVRVKETADFK VYTKGAGSGELKVTVKGPKGEERVKQKDLGDGVYGFEYYPMVPGTYIVTITWGGQNIGRSPFEVKVGTECGN QKVRAWGPGLEGGVVGKSADFVVEAIGDDVGTLGFSVEGPSQAKIECDDKGDGSCDVRYWPQEAGEYAVHVL CNSEDIRLSPFMADIRDAPQDFHPDRVKARGPGLEKTGVAVNKPAEFTVDAKHGGKAPLRVQVQDNEGCPVE ALVKDNGNGTYSCSYVPRKPVKHTAMVSWGGVSIPNSPFRVNVGAGSHPNKVKVYGPGVAKTGLKAHEPTYF TVDCAEAGQGDVSIGIKCAPGVVGPAEADIDFDIIRNDNDTFTVKYTPRGAGSYTIMVLFADQATPTSPIRV KVEPSHDASKVKAEGPGLSRTGVELGKPTHFTVNAKAAGKGKLDVQFSGLTKGDAVRDVDIIDHHDNTYTVK YTPVQQGPVGVNVTYGGDPIPKSPFSVAVSPSLDLSKIKVSGLGEKVDVGKDQEFTVKSKGAGGQGKVASKI VGPSGAAVPCKVEPGLGADNSVVRFLPREEGPYEVEVTYDGVPVPGSPFPLEAVAPTKPSKVKAFGPGLQGG SAGSPARFTIDTKGAGTGGLGLTVEGPCEAQLECLDNGDGTCSVSYVPTEPGDYNINILFADTHIPGSPFKA HVVPCFDASKVKCSGPGLERATAGEVGQFQVDCSSAGSAELTIEICSEAGLPAEVYIQDHGDGTHTITYIPL CPGAYTVTIKYGGQPVPNFPSKLQVEPAVDTSGVQCYGPGIEGQGVFREATTEFSVDARALTQTGGPHVKAR VANPSGNLTETYVQDRGDGMYKVEYTPYEEGLHSVDVTYDGSPVPSSPFQVPVTEGCDPSRVRVHGPGIQSG TTNKPNKFTVETRGAGTGGLGLAVEGPSEAKMSCMDNKDGSCSVEYIPYEAGTYSLNVTYGGHQVPGSPFKV PVHDVTDASKVKCSGPGLSPGMVRANLPQSFQVDTSKAGVAPLQVKVQGPKGLVEPVDVVDNADGTQTVNYV PSREGPYSISVLYGDEEVPRSPFKVKVLPTHDASKVKASGPGLNTTGVPASLPVEFTIDAKDAGEGLLAVQI TDPEGKPKKTHIQDNHDGTYTVAYVPDVTGRYTILIKYGGDEIPFSPYRVRAVPTGDASKCTVTVSIGGHGL GAGIGPTIQIGEETVITVDTKAAGKGKVTCTVCTPDGSEVDVDVVENEDGTFDIFYTAPQPGKYVICVRFGG EHVPNSPFQVTALAGDQPSVQPPLRSQQLAPQYTYAQGGQQTWAPERPLVGVNGLDVTSLRPFDLVIPFTIK KGEITGEVRMPSGKVAQPTITDNKDGTVTVRYAPSEAGLHEMDIRYDNMHIPGSPLQFYVDYVNCGHVTAYG PGLTHGVVNKPATFTVNTKDAGEGGLSLAIEGPSKAEISCTDNQDGTCSVSYLPVLPGDYSILVKYNEQHVP GSPFTARVTGDDSMRMSHLKVGSAADIPINISETDLSLLTATVVPPSGREEPCLLKRLRNGHVGISFVPKET GEHLVHVKKNGQHVASSPIPVVISQSEIGDASRVRVSGQGLHEGHTFEPAEFIIDTRDAGYGGLSLSIEGPS KVDINTEDLEDGTCRVTYCPTEPGNYIINIKFADQHVPGSPFSVKVTGEGRVKESITRRRRAPSVANVGSHC DLSLKIPEISIQDMTAQVTSPSGKTHEAEIVEGENHTYCIRFVPAEMGTHTVSVKYKGQHVPGSPFQFTVGP LGEGGAHKVRAGGPGLERAEAGVPAEFSIWTREAGAGGLAIAVEGPSKAEISFEDRKDGSCGVAYVVQEPGD YEVSVKFNEEHIPDSPFVVPVASPSGDARRLTVSSLQESGLKVNQPASFAVSLNGAKGAIDAKVHSPSGALE ECYVTEIDQDKYAVRFIPRENGVYLIDVKFNGTHIPGSPFKIRVGEPGHGGDPGLVSAYGAGLEGGVTGNPA EFVVNTSNAGAGALSVTIDGPSKVKMDCQECPEGYRVTYTPMAPGSYLISIKYGGPYHIGGSPFKAKVTGPR LVSNHSLHETSSVFVDSLTKATCAPQHGAPGPGPADASKVVAKGLGLSKAYVGQKSSFTVDCSKAGNNMLLV GVHGPRTPCEEILVKHVGSRLYSVSYLLKDKGEYTLVVKWGDEHIPGSPYRVVVP """.replace('\n', '') alignment = BlastAlignment('pdb|4M9P|A', full_sequence, 'pdb', 282, 475, "KKARAYGPGIEPTGNMVKKRAEFTVETRSAGQGEVLVYVEDPAGHQEEAKVTANNDKNRT" + "FSVWYVPEVTGTHKVTVLFAGQHIAKSPFEVYVDKSQGD--ASKVTAQGPGLEPSGNIAN" + "KTTYFEIFTAGAGTGEVEVVIQDPMGQKGTVEPQLEARGDSTYRCSYQPTMEGVHTVHVT" + "FAGVPIPRSPYTVTVG", 100, 287, "QKVRAWGPGLE--GGVVGKSADFVVEAIGDDVGTLGFSVEGPS--Q--AKIECDDKGDGS" + "CDVRYWPQEAGEYAVHVLCNSEDIRLSPFMADIRDAPQDFHPDRVKARGPGLEKTGVAVN" + "KPAEFTVDAKHGGKAPLRVQVQDNEG--CPVEALVKDNGNGTYSCSYVPRKPVKHTAMVS" + "WGGVSIPNSPFRVNVG") r = alignment.get_query_range() eq_(r.start, 281) eq_(r.end, 475)
def test_is_better_than(): hit1 = BlastAlignment('', '', '', 1, 10, 'AAAAAAAAAA', 1, 10, 'AAAAAAAAAA') hit2 = BlastAlignment('', '', '', 1, 10, 'AAAAAAAAAA', 1, 10, '-AAAAAAAAA') ok_(domain_aligner._is_better_than(hit1, hit2)) ok_(not domain_aligner._is_better_than(hit2, hit1)) hit1 = BlastAlignment('', '', '', 1, 10, 'AAAAAAAAAA', 1, 10, 'AAAAAAAAAA') hit2 = BlastAlignment('', '', '', 1, 10, 'AAAAAAAAAA', 1, 10, 'VAAAAAAAAA') ok_(domain_aligner._is_better_than(hit1, hit2)) ok_(not domain_aligner._is_better_than(hit2, hit1))
def test_get_hit_type(): alignment = BlastAlignment('pdb|1crn|A', '', '', 0, 0, '', 0, 0, '') eq_(alignment.get_hit_type(), 'pdb') alignment = BlastAlignment('sp|P00395|COX1_HUMAN', '', '', 0, 0, '', 0, 0, '') eq_(alignment.get_hit_type(), 'sp') alignment = BlastAlignment('1crn_A', '', '', 0, 0, '', 0, 0, '') eq_(alignment.get_hit_type(), 'pdb')
def _parse_alignments(self, xml_str, full_query_sequence, databank): hits = {} root = ET.fromstring(xml_str) iterations = root.find('BlastOutput_iterations') for it in iterations.findall('Iteration'): for mem in it.findall('Iteration_hits'): for hit in mem.findall('Hit'): hit_id = hit.find('Hit_def').text hits[hit_id] = [] hsps = hit.find('Hit_hsps') for hsp in hsps.findall('Hsp'): query_start = int(hsp.find('Hsp_query-from').text) query_end = int(hsp.find('Hsp_query-to').text) query_alignment = hsp.find('Hsp_qseq').text subject_start = int(hsp.find('Hsp_hit-from').text) subject_end = int(hsp.find('Hsp_hit-to').text) subject_alignment = hsp.find('Hsp_hseq').text hits[hit_id].append( BlastAlignment(hit_id, full_query_sequence, databank, query_start, query_end, query_alignment, subject_start, subject_end, subject_alignment)) return hits
def test_getters(): q = 'HALAILA' s = 'WALALLA' alignment = BlastAlignment('pdb|xxxx|X', q, '', 1, 7, q, 1, 7, s) eq_(alignment.subject_alignment, s)
def test_conversion(): full_sequence = """ MKMASTRCKLARYLEDLEDVDLKKFKMHLEDYPPQKGCIPLPRGQTEKADHVDLATLMIDFNGEEKAWAMAV WIFAAINRRDLYEKAKRDEPKWGSDNARVSNPTVICQEDSIEEEWMGLLEYLSRISICKMKKDYRKKYRKYV RSRFQCIEDRNARLGESVSLNKRYTRLRLIKEHRSQQEREQELLAIGKTKTCESPVSPIKMELLFDPDDEHS EPVHTVVFQGAAGIGKTILARKMMLDWASGTLYQDRFDYLFYIHCREVSLVTQRSLGDLIMSCCPDPNPPIH KIVRKPSRILFLMDGFDELQGAFDEHIGPLCTDWQKAERGDILLSSLIRKKLLPEASLLITTRPVALEKLQH LLDHPRHVEILGFSEAKRKEYFFKYFSDEAQARAAFSLIQENEVLFTMCFIPLVCWIVCTGLKQQMESGKSL AQTSKTTTAVYVFFLSSLLQPRGGSQEHGLCAHLWGLCSLAADGIWNQKILFEESDLRNHGLQKADVSAFLR MNLFQKEVDCEKFYSFIHMTFQEFFAAMYYLLEEEKEGRTNVPGSRLKLPSRDVTVLLENYGKFEKGYLIFV VRFLFGLVNQERTSYLEKKLSCKISQQIRLELLKWIEVKAKAKKLQIQPSQLELFYCLYEMQEEDFVQRAMD YFPKIEINLSTRMDHMVSSFCIENCHRVESLSLGFLHNMPKEEEEEEKEGRHLDMVQCVLPSSSHAACSHGL VNSHLTSSFCRGLFSVLSTSQSLTELDLSDNSLGDPGMRVLCETLQHPGCNIRRLWLGRCGLSHECCFDISL VLSSNQKLVELDLSDNALGDFGIRLLCVGLKHLLCNLKKLWLVSCCLTSACCQDLASVLSTSHSLTRLYVGE NALGDSGVAILCEKAKNPQCNLQKLGLVNSGLTSVCCSALSSVLSTNQNLTHLYLRGNTLGDKGIKLLCEGL LHPDCKLQVLELDNCNLTSHCCWDLSTLLTSSQSLRKLSLGNNDLGDLGVMMFCEVLKQQSCLLQNLGLSEM YFNYETKSALETLQEEKPELTVVFEPSW """.replace('\n', '') alignment = BlastAlignment('pdb|5IRM|C', full_sequence, 'pdb', 198, 452, "SPVSPIKMELLFDPDDEHSEPVHTVVFQGAAGIGKTILARKMMLDWASGTLYQDRFDYLF" + "YIHCREVSLVTQR-SLGDLIMS-CC-PDPNPP-IHK-IVRKPSRILFLMDGFDELQGAF-" + "DE--HIGPLCTDWQKAERGDILLSSLIRKKLLPEASLLITTRPVALEKLQHLLDHPRHVE" + "ILGFSEAKRKEYFFKYFSDEAQARAAFSLIQENEVLFTMCFIPLVCWIVCTGLKQQMESG" + "KSLAQTSKTTTAVYVFFLSSLLQ", 1, 252, "SPAA-LGLEELFSPNGHLNEDADTVLVVGEAGSGKSTLLQQVHLLWATGQDFQE-FLFVF" + "PFSCRQLQCVARPLSVMTLLFEHCCWPDVGQQDVFQFLLDHPDRILLTFDGFDEFKFKFT" + "DHERHCSP--TDPTSVQ---TLLFNLLQGNLLKNARKVLTSRPDAVSAF-LRKYVRTEFN" + "LKGFSEEGIELYLRKCHREPGVADRLIHLLQTTSALHGLCHLPVFSWMV--S-KCHQELL" + "LQDGGSPKTTTDMYLLILQHFLR") range_ = alignment.get_query_range() template_id = TemplateID('5IRM', 'C') domain_alignment = DomainAlignment(alignment.query_alignment, alignment.subject_alignment, range_, template_id) ok_(domain_alignment.is_target_residue_covered(317))
def test_get_hit_accession_code(): alignment = BlastAlignment('sp|P00395|COX1_HUMAN', '', '', 0, 0, '', 0, 0, '') eq_(alignment.get_hit_accession_code(), 'P00395')
def test_get_hit_chain_id(): alignment = BlastAlignment('pdb|1crn|A', '', '', 0, 0, '', 0, 0, '') eq_(alignment.get_hit_chain_id(), 'A')