#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Sun Oct 8 13:24:16 2017 @author: hhuang2 """ from utils import IMGTdbIO from Bio.Seq import Seq from Bio.Alphabet import generic_dna typing1 = 'A*23:17' Refseq1 = IMGTdbIO.readIMGTsql(typing1, db_fp= '../Database/', field = 'Exon1, Exon2, Exon3, Exon4, Exon5, Exon6, Exon7, Exon8') typing2 = 'A*23:01:01' Refseq2 = IMGTdbIO.readIMGTsql(typing2, db_fp= '../Database/', field = 'Exon1, Exon2, Exon3, Exon4, Exon5, Exon6, Exon7, Exon8') coding_dna = Seq(Refseq1, generic_dna) coding_dna.translate() str(coding_dna.translate()) seq = '' for i in range(len(Refseq1)): seq += Refseq1[i] typing1 = 'A*23:17' typing2 = 'A*23:01:01' HLAtyping = typing1+'_'+typing2 Exons = 'Exon1, Exon2, Exon3, Exon4, Exon5, Exon6, Exon7, Exon8'
def check_twoBlock_seq(seq_count, tplist, unique_Query, unique_HLATyping_list, ID, version="3310"): ''' Two blocks one phase sequences ''' if type(ID) == float: ID = str(int(ID)) Locus = tplist[0].split("*")[0] ARS0seq = IMGTdbIO.readIMGTsql(tplist[0], field='Exon2, Exon3', version=version) ARS1seq = IMGTdbIO.readIMGTsql(tplist[1], field='Exon2, Exon3', version=version) if seq_count > 4: print("Please check the ID: " + ID + " Locus " + Locus + "! More sequences than expected.") QueryTyping = {} for seq_item in unique_Query: if ARS0seq[0] in seq_item: # the first type; block 1; exon2 if "PS1" not in QueryTyping.keys(): QueryTyping["PS1"] = { "GLstring": unique_HLATyping_list[0], "Sequence": [seq_item], "blockIDs": [1] } else: # altered block order QueryTyping["PS1"] = { "GLstring": unique_HLATyping_list[0], "Sequence": [seq_item, QueryTyping["PS1"]["Sequence"][0]], "blockIDs": [1, 2] } elif ARS0seq[1] in seq_item: # the first type; block 2; exon3 if "PS1" not in QueryTyping.keys(): QueryTyping["PS1"] = { "GLstring": unique_HLATyping_list[0], "Sequence": [seq_item], "blockIDs": [2] } else: QueryTyping["PS1"]['Sequence'].append(seq_item) QueryTyping["PS1"]['blockIDs'].append(2) elif ARS1seq[0] in seq_item: # second type; block 1; exon2 if "PS2" not in QueryTyping.keys(): QueryTyping["PS2"] = { "GLstring": unique_HLATyping_list[1], "Sequence": [seq_item], "blockIDs": [1] } else: QueryTyping["PS2"] = { "GLstring": unique_HLATyping_list[1], "Sequence": [seq_item, QueryTyping["PS2"]["Sequence"][0]], "blockIDs": [1, 2] } elif ARS1seq[1] in seq_item: # second type; block2; exon3 if "PS2" not in QueryTyping.keys(): QueryTyping["PS2"] = { "GLstring": unique_HLATyping_list[1], "Sequence": [seq_item], "blockIDs": [2] } else: QueryTyping["PS2"]['Sequence'].append(seq_item) QueryTyping["PS2"]['blockIDs'].append(2) else: QueryTyping["PS3"] = { "GLstring": unique_HLATyping_list, "Sequence": [seq_item], "blockIDs": [1] } print(ID + ": The sequence at Locus " + Locus + " doesn't match to either of the Typings") if "PS1" in QueryTyping.keys() and "PS2" not in QueryTyping.keys( ): ## Homozygous QueryTyping["PS2"] = QueryTyping["PS1"] return (QueryTyping)
def check_DQB102_Block_seq(seq_count, tplist, unique_Query, unique_HLATyping_list, ID, version="3310"): ''' Two blocks one phase sequences ''' if type(ID) == float: ID = str(int(ID)) Locus = tplist[0].split("*")[0] ARS0seq = IMGTdbIO.readIMGTsql(tplist[0], field='Exon2, Exon3', version=version) ARS1seq = IMGTdbIO.readIMGTsql(tplist[1], field='Exon2, Exon3', version=version) serotype = [tp.split(":")[0] for tp in tplist] if seq_count > 3: print( "Please check the ID: " + ID + " Locus " + Locus + ", have heterozygotic DQB1*02 types or have more sequences than expected." ) QueryTyping = {} for seq_item in unique_Query: # PS1 if ARS0seq[0] in seq_item: # PS1 Exon 2 if serotype[0] == "DQB1*02": # DQB1*02 - 2 blocks if "PS1" not in QueryTyping.keys(): QueryTyping["PS1"] = { "GLstring": unique_HLATyping_list[0], "Sequence": [seq_item], "blockIDs": [1] } else: # altered block order QueryTyping["PS1"] = { "GLstring": unique_HLATyping_list[0], "Sequence": [seq_item, QueryTyping["PS1"]["Sequence"][0]], "blockIDs": [1, 2] } else: # non-DQB1 - 1 block if "PS1" not in QueryTyping.keys(): QueryTyping["PS1"] = { "GLstring": unique_HLATyping_list[0], "Sequence": [seq_item], "blockIDs": [1] } else: QueryTyping["PS1"]['Sequence'].append(seq_item) QueryTyping["PS1"]['blockIDs'].append(2) elif ARS0seq[1] in seq_item: # PS1 Exon 3 if serotype[0] == "DQB1*02": # DQB1*02 - 2 blocks if "PS1" not in QueryTyping.keys(): QueryTyping["PS1"] = { "GLstring": unique_HLATyping_list[0], "Sequence": [seq_item], "blockIDs": [2] } else: QueryTyping["PS1"]['Sequence'].append(seq_item) QueryTyping["PS1"]['blockIDs'].append(2) else: # non-DQB1 - 1 block if "PS1" not in QueryTyping.keys(): QueryTyping["PS1"] = { "GLstring": unique_HLATyping_list[0], "Sequence": [seq_item], "blockIDs": [1] } else: QueryTyping["PS1"]['Sequence'].append(seq_item) QueryTyping["PS1"]['blockIDs'].append(2) ## PS2 elif ARS1seq[0] in seq_item: # PS2 Exon 2 if serotype[0] == "DQB1*02": # DQB1*02 - 2 blocks if "PS2" not in QueryTyping.keys(): QueryTyping["PS2"] = { "GLstring": unique_HLATyping_list[1], "Sequence": [seq_item], "blockIDs": [1] } else: # altered block order QueryTyping["PS2"] = { "GLstring": unique_HLATyping_list[1], "Sequence": [seq_item, QueryTyping["PS2"]["Sequence"][0]], "blockIDs": [1, 2] } else: # non-DQB1 - 1 block if "PS2" not in QueryTyping.keys(): QueryTyping["PS2"] = { "GLstring": unique_HLATyping_list[1], "Sequence": [seq_item], "blockIDs": [1] } else: QueryTyping["PS2"]['Sequence'].append(seq_item) QueryTyping["PS2"]['blockIDs'].append(2) elif ARS1seq[1] in seq_item: # PS2 Exon 3 if serotype[0] == "DQB1*02": # DQB1*02 - 2 blocks if "PS2" not in QueryTyping.keys(): QueryTyping["PS2"] = { "GLstring": unique_HLATyping_list[1], "Sequence": [seq_item], "blockIDs": [2] } else: QueryTyping["PS2"]['Sequence'].append(seq_item) QueryTyping["PS2"]['blockIDs'].append(2) else: # non-DQB1 - 1 block if "PS2" not in QueryTyping.keys(): QueryTyping["PS2"] = { "GLstring": unique_HLATyping_list[1], "Sequence": [seq_item], "blockIDs": [1] } else: QueryTyping["PS2"]['Sequence'].append(seq_item) QueryTyping["PS2"]['blockIDs'].append(2) else: QueryTyping["PS3"] = { "GLstring": unique_HLATyping_list, "Sequence": [seq_item], "blockIDs": [1] } print(ID + ": The sequence at Locus " + Locus + " doesn't match to either of the Typings") if "PS1" in QueryTyping.keys() and "PS2" not in QueryTyping.keys( ): ## Homozygous QueryTyping["PS2"] = QueryTyping["PS1"] return (QueryTyping)
def check_oneBlock_seq(seq_count, tplist, unique_Query, unique_HLATyping_list, ID, version="3310"): ''' For one block one phase sequence ''' if type(ID) == float: ID = str(int(ID)) Locus = tplist[0].split("*")[0] ARS0seq = IMGTdbIO.readIMGTsql(tplist[0], field='Exon2, Exon3', version=version) ARS1seq = IMGTdbIO.readIMGTsql(tplist[1], field='Exon2, Exon3', version=version) if seq_count > 2: print("Please check the ID: " + ID + " Locus " + Locus + "! More sequences than expected.") QueryTyping = {} for seq_item in unique_Query: if ARS0seq != ARS1seq: # if the two types have different ARS regions if ARS0seq[0] in seq_item and ARS0seq[ 1] in seq_item: # the first type if "PS1" not in QueryTyping.keys(): QueryTyping["PS1"] = { "GLstring": unique_HLATyping_list[0], "Sequence": [seq_item], "blockIDs": [1] } else: QueryTyping["PS1"]['Sequence'].append(seq_item) QueryTyping["PS1"]['blockIDs'].append(2) elif ARS1seq[0] in seq_item and ARS1seq[ 1] in seq_item: # second type if "PS2" not in QueryTyping.keys(): QueryTyping["PS2"] = { "GLstring": unique_HLATyping_list[1], "Sequence": [seq_item], "blockIDs": [1] } else: QueryTyping["PS2"]['Sequence'].append(seq_item) QueryTyping["PS2"]['blockIDs'].append(2) else: if "PS3" not in QueryTyping.keys(): QueryTyping["PS3"] = { "GLstring": unique_HLATyping_list, "Sequence": [seq_item], "blockIDs": [1] } else: QueryTyping["PS3"]['Sequence'].append(seq_item) QueryTyping["PS3"]['blockIDs'].append(2) print(ID + ": The sequence at Locus " + Locus + " doesn't match to either of the Typings") else: # if the two types have the same ARS regions ARS0seq1456 = IMGTdbIO.readIMGTsql( tplist[0], field='Exon1, Exon4, Exon5, Exon6', version=version) ARS1seq1456 = IMGTdbIO.readIMGTsql( tplist[1], field='Exon1, Exon4, Exon5, Exon6', version=version) if ARS0seq1456 != ARS1seq1456: if ARS0seq1456[0] in seq_item and ARS0seq1456[ 1] in seq_item and ARS0seq1456[ 2] in seq_item and ARS0seq1456[ 3] in seq_item: # the first type if "PS1" not in QueryTyping.keys(): QueryTyping["PS1"] = { "GLstring": unique_HLATyping_list[0], "Sequence": [seq_item], "blockIDs": [1] } else: QueryTyping["PS1"]['Sequence'].append(seq_item) QueryTyping["PS1"]['blockIDs'].append(2) elif ARS1seq1456[0] in seq_item and ARS1seq1456[ 1] in seq_item and ARS1seq1456[ 2] in seq_item and ARS1seq1456[ 3] in seq_item: # second type if "PS2" not in QueryTyping.keys(): QueryTyping["PS2"] = { "GLstring": unique_HLATyping_list[1], "Sequence": [seq_item], "blockIDs": [1] } else: QueryTyping["PS2"]['Sequence'].append(seq_item) QueryTyping["PS2"]['blockIDs'].append(2) else: if "PS3" not in QueryTyping.keys(): QueryTyping["PS3"] = { "GLstring": unique_HLATyping_list, "Sequence": [seq_item], "blockIDs": [1] } else: QueryTyping["PS3"]['Sequence'].append(seq_item) QueryTyping["PS3"]['blockIDs'].append(2) print(ID + ": The sequence at Locus " + Locus + " doesn't match to either of the Typings") else: ARS0seq7 = IMGTdbIO.readIMGTsql(tplist[0], field='Exon7', version=version) ARS1seq7 = IMGTdbIO.readIMGTsql(tplist[1], field='Exon7', version=version) if ARS0seq7 != ARS1seq7: if ARS0seq7[0] in seq_item: # the first type if "PS1" not in QueryTyping.keys(): QueryTyping["PS1"] = { "GLstring": unique_HLATyping_list[0], "Sequence": [seq_item], "blockIDs": [1] } else: QueryTyping["PS1"]['Sequence'].append(seq_item) QueryTyping["PS1"]['blockIDs'].append(2) elif ARS1seq7[0] in seq_item: # second type if "PS2" not in QueryTyping.keys(): QueryTyping["PS2"] = { "GLstring": unique_HLATyping_list[1], "Sequence": [seq_item], "blockIDs": [1] } else: QueryTyping["PS2"]['Sequence'].append(seq_item) QueryTyping["PS2"]['blockIDs'].append(2) else: QueryTyping["PS3"] = { "GLstring": unique_HLATyping_list, "Sequence": [seq_item], "blockIDs": [1] } print(ID + ": The sequence at Locus " + Locus + " doesn't match to either of the Typings") else: ## all 8 exons are the same if "PS1" not in QueryTyping.keys(): QueryTyping["PS1"] = { "GLstring": unique_HLATyping_list[0], "Sequence": [seq_item], "blockIDs": [1] } elif "PS2" not in QueryTyping.keys(): QueryTyping["PS2"] = { "GLstring": unique_HLATyping_list[1], "Sequence": [seq_item], "blockIDs": [1] } else: QueryTyping["PS1"]['Sequence'].append(seq_item) QueryTyping["PS1"]['blockIDs'].append(2) print( ID + ": The sequence at Locus " + Locus + " two typings have exactly the same Exon sequences. Cannot distinguish by Exons." ) if "PS1" in QueryTyping.keys() and "PS2" not in QueryTyping.keys( ): ## Homozygous QueryTyping["PS2"] = QueryTyping["PS1"] return (QueryTyping)