Пример #1
0
def test_get_hit_type():
    alignment = BlastAlignment('pdb|1crn|A', '', '', 0, 0, '', 0, 0, '')
    eq_(alignment.get_hit_type(), 'pdb')

    alignment = BlastAlignment('sp|P00395|COX1_HUMAN', '', '', 0, 0, '', 0, 0, '')
    eq_(alignment.get_hit_type(), 'sp')

    alignment = BlastAlignment('1crn_A', '', '', 0, 0, '', 0, 0, '')
    eq_(alignment.get_hit_type(), 'pdb')
Пример #2
0
def test_is_better_than():
    hit1 = BlastAlignment('', '', '', 1, 10, 'AAAAAAAAAA', 1, 10, 'AAAAAAAAAA')
    hit2 = BlastAlignment('', '', '', 1, 10, 'AAAAAAAAAA', 1, 10, '-AAAAAAAAA')
    ok_(domain_aligner._is_better_than(hit1, hit2))
    ok_(not domain_aligner._is_better_than(hit2, hit1))

    hit1 = BlastAlignment('', '', '', 1, 10, 'AAAAAAAAAA', 1, 10, 'AAAAAAAAAA')
    hit2 = BlastAlignment('', '', '', 1, 10, 'AAAAAAAAAA', 1, 10, 'VAAAAAAAAA')
    ok_(domain_aligner._is_better_than(hit1, hit2))
    ok_(not domain_aligner._is_better_than(hit2, hit1))
Пример #3
0
    def _parse_alignments(self, xml_str, full_query_sequence, databank):
        hits = {}
        root = ET.fromstring(xml_str)
        iterations = root.find('BlastOutput_iterations')
        for it in iterations.findall('Iteration'):
            for mem in it.findall('Iteration_hits'):
                for hit in mem.findall('Hit'):
                    hit_id = hit.find('Hit_def').text
                    hits[hit_id] = []

                    hsps = hit.find('Hit_hsps')
                    for hsp in hsps.findall('Hsp'):

                        query_start = int(hsp.find('Hsp_query-from').text)
                        query_end = int(hsp.find('Hsp_query-to').text)
                        query_alignment = hsp.find('Hsp_qseq').text

                        subject_start = int(hsp.find('Hsp_hit-from').text)
                        subject_end = int(hsp.find('Hsp_hit-to').text)
                        subject_alignment = hsp.find('Hsp_hseq').text

                        hits[hit_id].append(
                            BlastAlignment(hit_id, full_query_sequence,
                                           databank, query_start, query_end,
                                           query_alignment, subject_start,
                                           subject_end, subject_alignment))
        return hits
Пример #4
0
def test_getters():
    q = 'HALAILA'
    s = 'WALALLA'
    alignment = BlastAlignment('pdb|xxxx|X', q, '',
                               1, 7, q,
                               1, 7, s)
    eq_(alignment.subject_alignment, s)
Пример #5
0
def test_count_aligned_residues():
    alignment = BlastAlignment('pdb|1crn|A', 'AVAVAVAVAV', '',
                               1, 10, 'AVAVAVAVAV',
                               1, 10, 'A-A-A-A-A-')
    eq_(alignment.count_aligned_residues(), 5)

    alignment = TargetTemplateAlignment('AVAVAVAVAV',
                                        'A-A-A-A-A-')
    eq_(alignment.count_aligned_residues(), 5)
Пример #6
0
def test_get_percentage_identity():
    alignment = BlastAlignment('pdb|1crn|A', 'AVAVAVAVAV', '',
                               1, 10, 'AVAVAVAVAV',
                               1, 10, 'ATATATATAT')
    eq_(alignment.get_percentage_identity(), 50.0)

    alignment = TargetTemplateAlignment('AVAVAVAVAV',
                                        'ATATATATAT')
    eq_(alignment.get_percentage_identity(), 50.0)
Пример #7
0
def test_get_percentage_coverage():
    alignment = BlastAlignment('pdb|1crn|A', 'AVAVAVAVAV', '',
                               1, 10, 'AVAVAVAVAV',
                               1, 10, 'A-A-A-A-A-')
    eq_(alignment.get_percentage_coverage(), 50.0)

    alignment = TargetTemplateAlignment('AVAVAVAVAV',
                                        'A-A-A-A-A-')
    eq_(alignment.get_percentage_coverage(), 50.0)
Пример #8
0
def test_get_query_range():
    full_sequence = """
MSSSHSRAGQSAAGAAPGGGVDTRDAEMPATEKDLAEDAPWKKIQQNTFTRWCNEHLKCVSKRIANLQTDLS
DGLRLIALLEVLSQKKMHRKHNQRPTFRQMQLENVSVALEFLDRESIKLVSIDSKAIVDGNLKLILGLIWTL
ILHYSISMPMWDEEEDEEAKKQTPKQRLLGWIQNKLPQLPITNFSRDWQSGRALGALVDSCAPGLCPDWDSW
DASKPVTNAREAMQQADDWLGIPQVITPEEIVDPNVDEHSVMTYLSQFPKAKLKPGAPLRPKLNPKKARAYG
PGIEPTGNMVKKRAEFTVETRSAGQGEVLVYVEDPAGHQEEAKVTANNDKNRTFSVWYVPEVTGTHKVTVLF
AGQHIAKSPFEVYVDKSQGDASKVTAQGPGLEPSGNIANKTTYFEIFTAGAGTGEVEVVIQDPMGQKGTVEP
QLEARGDSTYRCSYQPTMEGVHTVHVTFAGVPIPRSPYTVTVGQACNPSACRAVGRGLQPKGVRVKETADFK
VYTKGAGSGELKVTVKGPKGEERVKQKDLGDGVYGFEYYPMVPGTYIVTITWGGQNIGRSPFEVKVGTECGN
QKVRAWGPGLEGGVVGKSADFVVEAIGDDVGTLGFSVEGPSQAKIECDDKGDGSCDVRYWPQEAGEYAVHVL
CNSEDIRLSPFMADIRDAPQDFHPDRVKARGPGLEKTGVAVNKPAEFTVDAKHGGKAPLRVQVQDNEGCPVE
ALVKDNGNGTYSCSYVPRKPVKHTAMVSWGGVSIPNSPFRVNVGAGSHPNKVKVYGPGVAKTGLKAHEPTYF
TVDCAEAGQGDVSIGIKCAPGVVGPAEADIDFDIIRNDNDTFTVKYTPRGAGSYTIMVLFADQATPTSPIRV
KVEPSHDASKVKAEGPGLSRTGVELGKPTHFTVNAKAAGKGKLDVQFSGLTKGDAVRDVDIIDHHDNTYTVK
YTPVQQGPVGVNVTYGGDPIPKSPFSVAVSPSLDLSKIKVSGLGEKVDVGKDQEFTVKSKGAGGQGKVASKI
VGPSGAAVPCKVEPGLGADNSVVRFLPREEGPYEVEVTYDGVPVPGSPFPLEAVAPTKPSKVKAFGPGLQGG
SAGSPARFTIDTKGAGTGGLGLTVEGPCEAQLECLDNGDGTCSVSYVPTEPGDYNINILFADTHIPGSPFKA
HVVPCFDASKVKCSGPGLERATAGEVGQFQVDCSSAGSAELTIEICSEAGLPAEVYIQDHGDGTHTITYIPL
CPGAYTVTIKYGGQPVPNFPSKLQVEPAVDTSGVQCYGPGIEGQGVFREATTEFSVDARALTQTGGPHVKAR
VANPSGNLTETYVQDRGDGMYKVEYTPYEEGLHSVDVTYDGSPVPSSPFQVPVTEGCDPSRVRVHGPGIQSG
TTNKPNKFTVETRGAGTGGLGLAVEGPSEAKMSCMDNKDGSCSVEYIPYEAGTYSLNVTYGGHQVPGSPFKV
PVHDVTDASKVKCSGPGLSPGMVRANLPQSFQVDTSKAGVAPLQVKVQGPKGLVEPVDVVDNADGTQTVNYV
PSREGPYSISVLYGDEEVPRSPFKVKVLPTHDASKVKASGPGLNTTGVPASLPVEFTIDAKDAGEGLLAVQI
TDPEGKPKKTHIQDNHDGTYTVAYVPDVTGRYTILIKYGGDEIPFSPYRVRAVPTGDASKCTVTVSIGGHGL
GAGIGPTIQIGEETVITVDTKAAGKGKVTCTVCTPDGSEVDVDVVENEDGTFDIFYTAPQPGKYVICVRFGG
EHVPNSPFQVTALAGDQPSVQPPLRSQQLAPQYTYAQGGQQTWAPERPLVGVNGLDVTSLRPFDLVIPFTIK
KGEITGEVRMPSGKVAQPTITDNKDGTVTVRYAPSEAGLHEMDIRYDNMHIPGSPLQFYVDYVNCGHVTAYG
PGLTHGVVNKPATFTVNTKDAGEGGLSLAIEGPSKAEISCTDNQDGTCSVSYLPVLPGDYSILVKYNEQHVP
GSPFTARVTGDDSMRMSHLKVGSAADIPINISETDLSLLTATVVPPSGREEPCLLKRLRNGHVGISFVPKET
GEHLVHVKKNGQHVASSPIPVVISQSEIGDASRVRVSGQGLHEGHTFEPAEFIIDTRDAGYGGLSLSIEGPS
KVDINTEDLEDGTCRVTYCPTEPGNYIINIKFADQHVPGSPFSVKVTGEGRVKESITRRRRAPSVANVGSHC
DLSLKIPEISIQDMTAQVTSPSGKTHEAEIVEGENHTYCIRFVPAEMGTHTVSVKYKGQHVPGSPFQFTVGP
LGEGGAHKVRAGGPGLERAEAGVPAEFSIWTREAGAGGLAIAVEGPSKAEISFEDRKDGSCGVAYVVQEPGD
YEVSVKFNEEHIPDSPFVVPVASPSGDARRLTVSSLQESGLKVNQPASFAVSLNGAKGAIDAKVHSPSGALE
ECYVTEIDQDKYAVRFIPRENGVYLIDVKFNGTHIPGSPFKIRVGEPGHGGDPGLVSAYGAGLEGGVTGNPA
EFVVNTSNAGAGALSVTIDGPSKVKMDCQECPEGYRVTYTPMAPGSYLISIKYGGPYHIGGSPFKAKVTGPR
LVSNHSLHETSSVFVDSLTKATCAPQHGAPGPGPADASKVVAKGLGLSKAYVGQKSSFTVDCSKAGNNMLLV
GVHGPRTPCEEILVKHVGSRLYSVSYLLKDKGEYTLVVKWGDEHIPGSPYRVVVP
""".replace('\n', '')


    alignment = BlastAlignment('pdb|4M9P|A', full_sequence, 'pdb',
                               282, 475,
"KKARAYGPGIEPTGNMVKKRAEFTVETRSAGQGEVLVYVEDPAGHQEEAKVTANNDKNRT" +
"FSVWYVPEVTGTHKVTVLFAGQHIAKSPFEVYVDKSQGD--ASKVTAQGPGLEPSGNIAN" +
"KTTYFEIFTAGAGTGEVEVVIQDPMGQKGTVEPQLEARGDSTYRCSYQPTMEGVHTVHVT" +
"FAGVPIPRSPYTVTVG",
                               100, 287,
"QKVRAWGPGLE--GGVVGKSADFVVEAIGDDVGTLGFSVEGPS--Q--AKIECDDKGDGS" +
"CDVRYWPQEAGEYAVHVLCNSEDIRLSPFMADIRDAPQDFHPDRVKARGPGLEKTGVAVN" +
"KPAEFTVDAKHGGKAPLRVQVQDNEG--CPVEALVKDNGNGTYSCSYVPRKPVKHTAMVS" +
"WGGVSIPNSPFRVNVG")
    r = alignment.get_query_range()
    eq_(r.start, 281)
    eq_(r.end, 475)
Пример #9
0
def test_conversion():
    full_sequence = """
MKMASTRCKLARYLEDLEDVDLKKFKMHLEDYPPQKGCIPLPRGQTEKADHVDLATLMIDFNGEEKAWAMAV
WIFAAINRRDLYEKAKRDEPKWGSDNARVSNPTVICQEDSIEEEWMGLLEYLSRISICKMKKDYRKKYRKYV
RSRFQCIEDRNARLGESVSLNKRYTRLRLIKEHRSQQEREQELLAIGKTKTCESPVSPIKMELLFDPDDEHS
EPVHTVVFQGAAGIGKTILARKMMLDWASGTLYQDRFDYLFYIHCREVSLVTQRSLGDLIMSCCPDPNPPIH
KIVRKPSRILFLMDGFDELQGAFDEHIGPLCTDWQKAERGDILLSSLIRKKLLPEASLLITTRPVALEKLQH
LLDHPRHVEILGFSEAKRKEYFFKYFSDEAQARAAFSLIQENEVLFTMCFIPLVCWIVCTGLKQQMESGKSL
AQTSKTTTAVYVFFLSSLLQPRGGSQEHGLCAHLWGLCSLAADGIWNQKILFEESDLRNHGLQKADVSAFLR
MNLFQKEVDCEKFYSFIHMTFQEFFAAMYYLLEEEKEGRTNVPGSRLKLPSRDVTVLLENYGKFEKGYLIFV
VRFLFGLVNQERTSYLEKKLSCKISQQIRLELLKWIEVKAKAKKLQIQPSQLELFYCLYEMQEEDFVQRAMD
YFPKIEINLSTRMDHMVSSFCIENCHRVESLSLGFLHNMPKEEEEEEKEGRHLDMVQCVLPSSSHAACSHGL
VNSHLTSSFCRGLFSVLSTSQSLTELDLSDNSLGDPGMRVLCETLQHPGCNIRRLWLGRCGLSHECCFDISL
VLSSNQKLVELDLSDNALGDFGIRLLCVGLKHLLCNLKKLWLVSCCLTSACCQDLASVLSTSHSLTRLYVGE
NALGDSGVAILCEKAKNPQCNLQKLGLVNSGLTSVCCSALSSVLSTNQNLTHLYLRGNTLGDKGIKLLCEGL
LHPDCKLQVLELDNCNLTSHCCWDLSTLLTSSQSLRKLSLGNNDLGDLGVMMFCEVLKQQSCLLQNLGLSEM
YFNYETKSALETLQEEKPELTVVFEPSW
""".replace('\n', '')

    alignment = BlastAlignment('pdb|5IRM|C', full_sequence, 'pdb',
                               198, 452,
"SPVSPIKMELLFDPDDEHSEPVHTVVFQGAAGIGKTILARKMMLDWASGTLYQDRFDYLF" +
"YIHCREVSLVTQR-SLGDLIMS-CC-PDPNPP-IHK-IVRKPSRILFLMDGFDELQGAF-" +
"DE--HIGPLCTDWQKAERGDILLSSLIRKKLLPEASLLITTRPVALEKLQHLLDHPRHVE" +
"ILGFSEAKRKEYFFKYFSDEAQARAAFSLIQENEVLFTMCFIPLVCWIVCTGLKQQMESG" +
"KSLAQTSKTTTAVYVFFLSSLLQ",
                               1, 252,
"SPAA-LGLEELFSPNGHLNEDADTVLVVGEAGSGKSTLLQQVHLLWATGQDFQE-FLFVF" +
"PFSCRQLQCVARPLSVMTLLFEHCCWPDVGQQDVFQFLLDHPDRILLTFDGFDEFKFKFT" +
"DHERHCSP--TDPTSVQ---TLLFNLLQGNLLKNARKVLTSRPDAVSAF-LRKYVRTEFN" +
"LKGFSEEGIELYLRKCHREPGVADRLIHLLQTTSALHGLCHLPVFSWMV--S-KCHQELL" +
"LQDGGSPKTTTDMYLLILQHFLR")

    range_ = alignment.get_query_range()
    template_id = TemplateID('5IRM', 'C')

    domain_alignment = DomainAlignment(alignment.query_alignment,
                                       alignment.subject_alignment,
                                       range_, template_id)

    ok_(domain_alignment.is_target_residue_covered(317))
Пример #10
0
def test_get_hit_accession_code():
    alignment = BlastAlignment('sp|P00395|COX1_HUMAN', '', '', 0, 0, '', 0, 0, '')
    eq_(alignment.get_hit_accession_code(), 'P00395')
Пример #11
0
def test_get_hit_chain_id():
    alignment = BlastAlignment('pdb|1crn|A', '', '', 0, 0, '', 0, 0, '')
    eq_(alignment.get_hit_chain_id(), 'A')