示例#1
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun Oct  8 13:24:16 2017

@author: hhuang2
"""
from utils import IMGTdbIO
from Bio.Seq import Seq
from Bio.Alphabet import generic_dna

typing1 = 'A*23:17'
Refseq1 = IMGTdbIO.readIMGTsql(typing1, db_fp= '../Database/', field = 'Exon1, Exon2, Exon3, Exon4, Exon5, Exon6, Exon7, Exon8')

typing2 = 'A*23:01:01'
Refseq2 = IMGTdbIO.readIMGTsql(typing2, db_fp= '../Database/', field = 'Exon1, Exon2, Exon3, Exon4, Exon5, Exon6, Exon7, Exon8')

coding_dna = Seq(Refseq1, generic_dna)
coding_dna.translate()

str(coding_dna.translate())

seq = ''
for i in range(len(Refseq1)):
    seq += Refseq1[i]
    
    
typing1 = 'A*23:17'
typing2 = 'A*23:01:01'
HLAtyping = typing1+'_'+typing2
Exons = 'Exon1, Exon2, Exon3, Exon4, Exon5, Exon6, Exon7, Exon8'
def check_twoBlock_seq(seq_count,
                       tplist,
                       unique_Query,
                       unique_HLATyping_list,
                       ID,
                       version="3310"):
    '''
    Two blocks one phase sequences
    '''
    if type(ID) == float:
        ID = str(int(ID))
    Locus = tplist[0].split("*")[0]
    ARS0seq = IMGTdbIO.readIMGTsql(tplist[0],
                                   field='Exon2, Exon3',
                                   version=version)
    ARS1seq = IMGTdbIO.readIMGTsql(tplist[1],
                                   field='Exon2, Exon3',
                                   version=version)

    if seq_count > 4:
        print("Please check the ID: " + ID + " Locus " + Locus +
              "! More sequences than expected.")

    QueryTyping = {}
    for seq_item in unique_Query:
        if ARS0seq[0] in seq_item:  # the first type; block 1; exon2
            if "PS1" not in QueryTyping.keys():
                QueryTyping["PS1"] = {
                    "GLstring": unique_HLATyping_list[0],
                    "Sequence": [seq_item],
                    "blockIDs": [1]
                }
            else:  # altered block order
                QueryTyping["PS1"] = {
                    "GLstring": unique_HLATyping_list[0],
                    "Sequence": [seq_item, QueryTyping["PS1"]["Sequence"][0]],
                    "blockIDs": [1, 2]
                }
        elif ARS0seq[1] in seq_item:  # the first type; block 2; exon3
            if "PS1" not in QueryTyping.keys():
                QueryTyping["PS1"] = {
                    "GLstring": unique_HLATyping_list[0],
                    "Sequence": [seq_item],
                    "blockIDs": [2]
                }
            else:
                QueryTyping["PS1"]['Sequence'].append(seq_item)
                QueryTyping["PS1"]['blockIDs'].append(2)

        elif ARS1seq[0] in seq_item:  # second type; block 1; exon2
            if "PS2" not in QueryTyping.keys():
                QueryTyping["PS2"] = {
                    "GLstring": unique_HLATyping_list[1],
                    "Sequence": [seq_item],
                    "blockIDs": [1]
                }
            else:
                QueryTyping["PS2"] = {
                    "GLstring": unique_HLATyping_list[1],
                    "Sequence": [seq_item, QueryTyping["PS2"]["Sequence"][0]],
                    "blockIDs": [1, 2]
                }
        elif ARS1seq[1] in seq_item:  # second type; block2; exon3
            if "PS2" not in QueryTyping.keys():
                QueryTyping["PS2"] = {
                    "GLstring": unique_HLATyping_list[1],
                    "Sequence": [seq_item],
                    "blockIDs": [2]
                }
            else:
                QueryTyping["PS2"]['Sequence'].append(seq_item)
                QueryTyping["PS2"]['blockIDs'].append(2)
        else:
            QueryTyping["PS3"] = {
                "GLstring": unique_HLATyping_list,
                "Sequence": [seq_item],
                "blockIDs": [1]
            }
            print(ID + ": The sequence at Locus " + Locus +
                  " doesn't match to either of the Typings")

    if "PS1" in QueryTyping.keys() and "PS2" not in QueryTyping.keys(
    ):  ## Homozygous
        QueryTyping["PS2"] = QueryTyping["PS1"]

    return (QueryTyping)
def check_DQB102_Block_seq(seq_count,
                           tplist,
                           unique_Query,
                           unique_HLATyping_list,
                           ID,
                           version="3310"):
    '''
    Two blocks one phase sequences
    '''
    if type(ID) == float:
        ID = str(int(ID))
    Locus = tplist[0].split("*")[0]
    ARS0seq = IMGTdbIO.readIMGTsql(tplist[0],
                                   field='Exon2, Exon3',
                                   version=version)
    ARS1seq = IMGTdbIO.readIMGTsql(tplist[1],
                                   field='Exon2, Exon3',
                                   version=version)

    serotype = [tp.split(":")[0] for tp in tplist]

    if seq_count > 3:
        print(
            "Please check the ID: " + ID + " Locus " + Locus +
            ", have heterozygotic DQB1*02 types or have more sequences than expected."
        )

    QueryTyping = {}
    for seq_item in unique_Query:
        # PS1
        if ARS0seq[0] in seq_item:  # PS1 Exon 2
            if serotype[0] == "DQB1*02":  # DQB1*02 - 2 blocks
                if "PS1" not in QueryTyping.keys():
                    QueryTyping["PS1"] = {
                        "GLstring": unique_HLATyping_list[0],
                        "Sequence": [seq_item],
                        "blockIDs": [1]
                    }
                else:  # altered block order
                    QueryTyping["PS1"] = {
                        "GLstring": unique_HLATyping_list[0],
                        "Sequence":
                        [seq_item, QueryTyping["PS1"]["Sequence"][0]],
                        "blockIDs": [1, 2]
                    }
            else:  # non-DQB1 - 1 block
                if "PS1" not in QueryTyping.keys():
                    QueryTyping["PS1"] = {
                        "GLstring": unique_HLATyping_list[0],
                        "Sequence": [seq_item],
                        "blockIDs": [1]
                    }
                else:
                    QueryTyping["PS1"]['Sequence'].append(seq_item)
                    QueryTyping["PS1"]['blockIDs'].append(2)

        elif ARS0seq[1] in seq_item:  # PS1 Exon 3
            if serotype[0] == "DQB1*02":  # DQB1*02 - 2 blocks
                if "PS1" not in QueryTyping.keys():
                    QueryTyping["PS1"] = {
                        "GLstring": unique_HLATyping_list[0],
                        "Sequence": [seq_item],
                        "blockIDs": [2]
                    }
                else:
                    QueryTyping["PS1"]['Sequence'].append(seq_item)
                    QueryTyping["PS1"]['blockIDs'].append(2)
            else:  # non-DQB1 - 1 block
                if "PS1" not in QueryTyping.keys():
                    QueryTyping["PS1"] = {
                        "GLstring": unique_HLATyping_list[0],
                        "Sequence": [seq_item],
                        "blockIDs": [1]
                    }
                else:
                    QueryTyping["PS1"]['Sequence'].append(seq_item)
                    QueryTyping["PS1"]['blockIDs'].append(2)

        ## PS2
        elif ARS1seq[0] in seq_item:  # PS2 Exon 2
            if serotype[0] == "DQB1*02":  # DQB1*02 - 2 blocks
                if "PS2" not in QueryTyping.keys():
                    QueryTyping["PS2"] = {
                        "GLstring": unique_HLATyping_list[1],
                        "Sequence": [seq_item],
                        "blockIDs": [1]
                    }
                else:  # altered block order
                    QueryTyping["PS2"] = {
                        "GLstring": unique_HLATyping_list[1],
                        "Sequence":
                        [seq_item, QueryTyping["PS2"]["Sequence"][0]],
                        "blockIDs": [1, 2]
                    }
            else:  # non-DQB1 - 1 block
                if "PS2" not in QueryTyping.keys():
                    QueryTyping["PS2"] = {
                        "GLstring": unique_HLATyping_list[1],
                        "Sequence": [seq_item],
                        "blockIDs": [1]
                    }
                else:
                    QueryTyping["PS2"]['Sequence'].append(seq_item)
                    QueryTyping["PS2"]['blockIDs'].append(2)

        elif ARS1seq[1] in seq_item:  # PS2 Exon 3
            if serotype[0] == "DQB1*02":  # DQB1*02 - 2 blocks
                if "PS2" not in QueryTyping.keys():
                    QueryTyping["PS2"] = {
                        "GLstring": unique_HLATyping_list[1],
                        "Sequence": [seq_item],
                        "blockIDs": [2]
                    }
                else:
                    QueryTyping["PS2"]['Sequence'].append(seq_item)
                    QueryTyping["PS2"]['blockIDs'].append(2)
            else:  # non-DQB1 - 1 block
                if "PS2" not in QueryTyping.keys():
                    QueryTyping["PS2"] = {
                        "GLstring": unique_HLATyping_list[1],
                        "Sequence": [seq_item],
                        "blockIDs": [1]
                    }
                else:
                    QueryTyping["PS2"]['Sequence'].append(seq_item)
                    QueryTyping["PS2"]['blockIDs'].append(2)

        else:
            QueryTyping["PS3"] = {
                "GLstring": unique_HLATyping_list,
                "Sequence": [seq_item],
                "blockIDs": [1]
            }
            print(ID + ": The sequence at Locus " + Locus +
                  " doesn't match to either of the Typings")

    if "PS1" in QueryTyping.keys() and "PS2" not in QueryTyping.keys(
    ):  ## Homozygous
        QueryTyping["PS2"] = QueryTyping["PS1"]

    return (QueryTyping)
def check_oneBlock_seq(seq_count,
                       tplist,
                       unique_Query,
                       unique_HLATyping_list,
                       ID,
                       version="3310"):
    '''
    For one block one phase sequence
    '''
    if type(ID) == float:
        ID = str(int(ID))
    Locus = tplist[0].split("*")[0]

    ARS0seq = IMGTdbIO.readIMGTsql(tplist[0],
                                   field='Exon2, Exon3',
                                   version=version)
    ARS1seq = IMGTdbIO.readIMGTsql(tplist[1],
                                   field='Exon2, Exon3',
                                   version=version)

    if seq_count > 2:
        print("Please check the ID: " + ID + " Locus " + Locus +
              "! More sequences than expected.")

    QueryTyping = {}
    for seq_item in unique_Query:

        if ARS0seq != ARS1seq:  #  if the two types have different ARS regions

            if ARS0seq[0] in seq_item and ARS0seq[
                    1] in seq_item:  # the first type
                if "PS1" not in QueryTyping.keys():
                    QueryTyping["PS1"] = {
                        "GLstring": unique_HLATyping_list[0],
                        "Sequence": [seq_item],
                        "blockIDs": [1]
                    }
                else:
                    QueryTyping["PS1"]['Sequence'].append(seq_item)
                    QueryTyping["PS1"]['blockIDs'].append(2)
            elif ARS1seq[0] in seq_item and ARS1seq[
                    1] in seq_item:  # second type
                if "PS2" not in QueryTyping.keys():
                    QueryTyping["PS2"] = {
                        "GLstring": unique_HLATyping_list[1],
                        "Sequence": [seq_item],
                        "blockIDs": [1]
                    }
                else:
                    QueryTyping["PS2"]['Sequence'].append(seq_item)
                    QueryTyping["PS2"]['blockIDs'].append(2)
            else:
                if "PS3" not in QueryTyping.keys():
                    QueryTyping["PS3"] = {
                        "GLstring": unique_HLATyping_list,
                        "Sequence": [seq_item],
                        "blockIDs": [1]
                    }
                else:
                    QueryTyping["PS3"]['Sequence'].append(seq_item)
                    QueryTyping["PS3"]['blockIDs'].append(2)
                print(ID + ": The sequence at Locus " + Locus +
                      " doesn't match to either of the Typings")

        else:  #  if the two types have the same ARS regions
            ARS0seq1456 = IMGTdbIO.readIMGTsql(
                tplist[0], field='Exon1, Exon4, Exon5, Exon6', version=version)
            ARS1seq1456 = IMGTdbIO.readIMGTsql(
                tplist[1], field='Exon1, Exon4, Exon5, Exon6', version=version)
            if ARS0seq1456 != ARS1seq1456:
                if ARS0seq1456[0] in seq_item and ARS0seq1456[
                        1] in seq_item and ARS0seq1456[
                            2] in seq_item and ARS0seq1456[
                                3] in seq_item:  # the first type
                    if "PS1" not in QueryTyping.keys():
                        QueryTyping["PS1"] = {
                            "GLstring": unique_HLATyping_list[0],
                            "Sequence": [seq_item],
                            "blockIDs": [1]
                        }
                    else:
                        QueryTyping["PS1"]['Sequence'].append(seq_item)
                        QueryTyping["PS1"]['blockIDs'].append(2)

                elif ARS1seq1456[0] in seq_item and ARS1seq1456[
                        1] in seq_item and ARS1seq1456[
                            2] in seq_item and ARS1seq1456[
                                3] in seq_item:  # second type
                    if "PS2" not in QueryTyping.keys():
                        QueryTyping["PS2"] = {
                            "GLstring": unique_HLATyping_list[1],
                            "Sequence": [seq_item],
                            "blockIDs": [1]
                        }
                    else:
                        QueryTyping["PS2"]['Sequence'].append(seq_item)
                        QueryTyping["PS2"]['blockIDs'].append(2)
                else:
                    if "PS3" not in QueryTyping.keys():
                        QueryTyping["PS3"] = {
                            "GLstring": unique_HLATyping_list,
                            "Sequence": [seq_item],
                            "blockIDs": [1]
                        }
                    else:
                        QueryTyping["PS3"]['Sequence'].append(seq_item)
                        QueryTyping["PS3"]['blockIDs'].append(2)
                    print(ID + ": The sequence at Locus " + Locus +
                          " doesn't match to either of the Typings")
            else:
                ARS0seq7 = IMGTdbIO.readIMGTsql(tplist[0],
                                                field='Exon7',
                                                version=version)
                ARS1seq7 = IMGTdbIO.readIMGTsql(tplist[1],
                                                field='Exon7',
                                                version=version)
                if ARS0seq7 != ARS1seq7:
                    if ARS0seq7[0] in seq_item:  # the first type
                        if "PS1" not in QueryTyping.keys():
                            QueryTyping["PS1"] = {
                                "GLstring": unique_HLATyping_list[0],
                                "Sequence": [seq_item],
                                "blockIDs": [1]
                            }
                        else:
                            QueryTyping["PS1"]['Sequence'].append(seq_item)
                            QueryTyping["PS1"]['blockIDs'].append(2)

                    elif ARS1seq7[0] in seq_item:  # second type
                        if "PS2" not in QueryTyping.keys():
                            QueryTyping["PS2"] = {
                                "GLstring": unique_HLATyping_list[1],
                                "Sequence": [seq_item],
                                "blockIDs": [1]
                            }
                        else:
                            QueryTyping["PS2"]['Sequence'].append(seq_item)
                            QueryTyping["PS2"]['blockIDs'].append(2)
                    else:
                        QueryTyping["PS3"] = {
                            "GLstring": unique_HLATyping_list,
                            "Sequence": [seq_item],
                            "blockIDs": [1]
                        }
                        print(ID + ": The sequence at Locus " + Locus +
                              " doesn't match to either of the Typings")
                else:  ## all 8 exons are the same
                    if "PS1" not in QueryTyping.keys():
                        QueryTyping["PS1"] = {
                            "GLstring": unique_HLATyping_list[0],
                            "Sequence": [seq_item],
                            "blockIDs": [1]
                        }
                    elif "PS2" not in QueryTyping.keys():
                        QueryTyping["PS2"] = {
                            "GLstring": unique_HLATyping_list[1],
                            "Sequence": [seq_item],
                            "blockIDs": [1]
                        }
                    else:
                        QueryTyping["PS1"]['Sequence'].append(seq_item)
                        QueryTyping["PS1"]['blockIDs'].append(2)
                    print(
                        ID + ": The sequence at Locus " + Locus +
                        " two typings have exactly the same Exon sequences. Cannot distinguish by Exons."
                    )

    if "PS1" in QueryTyping.keys() and "PS2" not in QueryTyping.keys(
    ):  ## Homozygous
        QueryTyping["PS2"] = QueryTyping["PS1"]

    return (QueryTyping)