Пример #1
0
def SNP_MIP_Gap(chrome,start,end,min_hom_length,max_hom_length,tm_min,tm_max,gc_threshold_min,gc_threshold_max,ref,alt):
    returnStr=""
    
    ## New code on 1/21/2015
    # SNP on the gap fill (use + for gap)
    # if gap fill is 2 bases, it will do -+, +-
    # if gap fill is 3 bases, it will do --+, -+-, +--
    ## Fetch sequences

    for n in range(1,gap_num+1):
        for j in range(0,n):
            upstream_seq = nibFragger(chrome.replace("chr",""),start-max_hom_length-(n-j-1),max_hom_length).lower()
            downstream_seq = nibFragger(chrome.replace("chr",""),end+1+j,max_hom_length).lower()
            upstream_list = get_seq(upstream_seq, MIN_LENGTH = min_hom_length, MIN_TM=tm_min, MAX_TM=tm_max, GC_MIN=gc_threshold_min, GC_MAX=gc_threshold_max,right2left=False)
            downstream_list = get_seq(downstream_seq, MIN_LENGTH = min_hom_length, MIN_TM=tm_min, MAX_TM=tm_max, GC_MIN=gc_threshold_min, GC_MAX=gc_threshold_max,right2left=True)

            gapfill_W = nibFragger(chrome.replace("chr",""),start-(n-j-1),n)
            tmplist= list(gapfill_W)
            tmplist[n-j-1]=alt
            gapfill_M = "".join(tmplist)
            upstream_pos = chrome+":"+str(start-(n-j-1)-upstream_list[0][3])+"-"+str(start-(n-j-1)-1)
            downstream_pos = chrome+":"+str(end+1+j)+"-"+str(end+1+j+downstream_list[0][3]-1)

            returnStr+=make_Hom_pairs(upstream_list,downstream_list,hom_strand="+",MIP_Mut_Alignment="SNP_on_GF",GapFillBase_M=gapfill_M,GapFillBase_W=gapfill_W)
            returnStr = returnStr.rstrip("\n")
            returnStr+="\t"+upstream_pos+"\t"+downstream_pos+"\t"+getName(n,j)+"\n"
            returnStr+=make_Hom_pairs(upstream_list,downstream_list,hom_strand="-",MIP_Mut_Alignment="SNP_on_GF",GapFillBase_M=revcomp(gapfill_M),GapFillBase_W=revcomp(gapfill_W))
            returnStr = returnStr.rstrip("\n")
            returnStr+="\t"+upstream_pos+"\t"+downstream_pos+"\t"+getName(n,j)+"\n"
    return returnStr
Пример #2
0
def MNP_MIP(chrome, start, end, min_hom_length, max_hom_length, tm_min, tm_max,
            gc_threshold_min, gc_threshold_max, ref, alt):
    returnStr = ""

    # MNP on the H2 forward and H1 reverse
    ## Fetch sequences
    upstream_seq = nibFragger(
        chrome.replace("chr", ""), end - max_hom_length + 1,
        max_hom_length).lower()
    downstream_seq = nibFragger(
        chrome.replace("chr", ""), end + 2, max_hom_length).lower()
    gapfill = nibFragger(chrome.replace("chr", ""), end + 1, 1)
    upstream_seq = replaceString(upstream_seq, alt, first=False)

    upstream_list = get_seq(
        upstream_seq,
        MIN_LENGTH=min_hom_length,
        MIN_TM=tm_min,
        MAX_TM=tm_max,
        GC_MIN=gc_threshold_min,
        GC_MAX=gc_threshold_max,
        right2left=False)
    downstream_list = get_seq(
        downstream_seq,
        MIN_LENGTH=min_hom_length,
        MIN_TM=tm_min,
        MAX_TM=tm_max,
        GC_MIN=gc_threshold_min,
        GC_MAX=gc_threshold_max,
        right2left=True)

    returnStr += make_Hom_pairs(
        upstream_list,
        downstream_list,
        hom_strand="+",
        MIP_Mut_Alignment="MNP_on_H2",
        GapFillBase_M=gapfill,
        GapFillBase_W=gapfill)
    returnStr += make_Hom_pairs(
        upstream_list,
        downstream_list,
        hom_strand="-",
        MIP_Mut_Alignment="MNP_on_H1",
        GapFillBase_M=revcomp(gapfill),
        GapFillBase_W=revcomp(gapfill))

    ## MNP on the H2 reverse Strand or H1 forward strand
    upstream_seq = nibFragger(
        chrome.replace("chr", ""), start - max_hom_length - 1,
        max_hom_length).lower()
    downstream_seq = nibFragger(
        chrome.replace("chr", ""), start, max_hom_length).lower()
    gapfill = nibFragger(chrome.replace("chr", ""), start - 1, 1)
    ### replace the first chracter
    downstream_seq_replaced = replaceString(downstream_seq, alt, first=True)
    upstream_list = get_seq(
        upstream_seq,
        MIN_LENGTH=min_hom_length,
        MIN_TM=tm_min,
        MAX_TM=tm_max,
        GC_MIN=gc_threshold_min,
        GC_MAX=gc_threshold_max,
        right2left=False)
    downstream_list = get_seq(
        downstream_seq_replaced,
        MIN_LENGTH=min_hom_length,
        MIN_TM=tm_min,
        MAX_TM=tm_max,
        GC_MIN=gc_threshold_min,
        GC_MAX=gc_threshold_max,
        right2left=True)

    returnStr += make_Hom_pairs(
        upstream_list,
        downstream_list,
        hom_strand="-",
        MIP_Mut_Alignment="MNP_on_H2",
        GapFillBase_M=revcomp(gapfill),
        GapFillBase_W=revcomp(gapfill))
    returnStr += make_Hom_pairs(
        upstream_list,
        downstream_list,
        hom_strand="+",
        MIP_Mut_Alignment="MNP_on_H1",
        GapFillBase_M=gapfill,
        GapFillBase_W=gapfill)

    ## Do MNP_on_H2GF
    for i in range(1, len(ref) + 1):
        upstream_seq = nibFragger(
            chrome.replace("chr", ""), start - max_hom_length - 1 + i,
            max_hom_length).lower()
        downstream_seq = nibFragger(
            chrome.replace("chr", ""), start + i, max_hom_length).lower()
        gapfillM = alt[i - 1]
        gapfillW = ref[i - 1]

        upOverlapWithMutation = i - 1
        downOverlapWithMutation = len(ref) - i
        if upOverlapWithMutation > 0:
            upstream_seq = replaceString(
                upstream_seq, alt[:i - 1], first=False)
        if downOverlapWithMutation > 0:
            downstream_seq = replaceString(downstream_seq, alt[i:], first=True)

        upstream_list = get_seq(
            upstream_seq,
            MIN_LENGTH=min_hom_length,
            MIN_TM=tm_min,
            MAX_TM=tm_max,
            GC_MIN=gc_threshold_min,
            GC_MAX=gc_threshold_max,
            right2left=False)
        downstream_list = get_seq(
            downstream_seq,
            MIN_LENGTH=min_hom_length,
            MIN_TM=tm_min,
            MAX_TM=tm_max,
            GC_MIN=gc_threshold_min,
            GC_MAX=gc_threshold_max,
            right2left=True)

        MIP_name = "MNP_on_"
        if upOverlapWithMutation > 0:
            MIP_name += "H2"
        MIP_name += "Gap"

        if downOverlapWithMutation > 0:
            MIP_name += "H1"
        returnStr += make_Hom_pairs(
            upstream_list,
            downstream_list,
            hom_strand="+",
            MIP_Mut_Alignment=MIP_name,
            GapFillBase_M=gapfillM,
            GapFillBase_W=gapfillW)

        MIP_name = "MNP_on_"
        if downOverlapWithMutation > 0:
            MIP_name += "H2"
        MIP_name += "Gap"

        if upOverlapWithMutation > 0:
            MIP_name += "H1"
        returnStr += make_Hom_pairs(
            upstream_list,
            downstream_list,
            hom_strand="-",
            MIP_Mut_Alignment=MIP_name,
            GapFillBase_M=revcomp(gapfillM),
            GapFillBase_W=revcomp(gapfillW))

    return returnStr
Пример #3
0
def SNP_MIP_original(chrome,start,end,min_hom_length,max_hom_length,tm_min,tm_max,gc_threshold_min,gc_threshold_max,ref,alt):
    returnStr=""
    
    # SNP on the gap fill
    ## Fetch sequences
    if (re.search(r"[AT]",ref) and re.search(r"[CG]",alt)) or (re.search(r"[AT]",alt) and re.search(r"[CG]",ref)):
        upstream_seq = nibFragger(chrome.replace("chr",""),start-max_hom_length,max_hom_length).lower()
        downstream_seq = nibFragger(chrome.replace("chr",""),end+1,max_hom_length).lower()
        upstream_list = get_seq(upstream_seq, MIN_LENGTH = min_hom_length, MIN_TM=tm_min, MAX_TM=tm_max, GC_MIN=gc_threshold_min, GC_MAX=gc_threshold_max,right2left=False)
        downstream_list = get_seq(downstream_seq, MIN_LENGTH = min_hom_length, MIN_TM=tm_min, MAX_TM=tm_max, GC_MIN=gc_threshold_min, GC_MAX=gc_threshold_max,right2left=True)
    
        returnStr+=make_Hom_pairs(upstream_list,downstream_list,hom_strand="+",MIP_Mut_Alignment="SNP_on_GF",GapFillBase_M=alt,GapFillBase_W=ref)
        returnStr+=make_Hom_pairs(upstream_list,downstream_list,hom_strand="-",MIP_Mut_Alignment="SNP_on_GF",GapFillBase_M=revcomp(alt),GapFillBase_W=revcomp(ref))
        
    
    ## SNP on the H2 forward strand and H1 reverse - Mutation Type
    upstream_seq = nibFragger(chrome.replace("chr",""),start-max_hom_length+1,max_hom_length).lower()
    downstream_seq = nibFragger(chrome.replace("chr",""),end+2,max_hom_length).lower()
    gapfill = nibFragger(chrome.replace("chr",""),end+1,1)
    ### replace the last chracter
    upstream_seq_replaced=replaceString(upstream_seq,alt,first=False)
    upstream_list = get_seq(upstream_seq_replaced, MIN_LENGTH = min_hom_length, MIN_TM=tm_min, MAX_TM=tm_max, GC_MIN=gc_threshold_min, GC_MAX=gc_threshold_max,right2left=False)
    downstream_list = get_seq(downstream_seq, MIN_LENGTH = min_hom_length, MIN_TM=tm_min, MAX_TM=tm_max, GC_MIN=gc_threshold_min, GC_MAX=gc_threshold_max,right2left=True)
    
    returnStr+=make_Hom_pairs(upstream_list,downstream_list,hom_strand="+",MIP_Mut_Alignment="SNP_on_H2_M",GapFillBase_M=gapfill,GapFillBase_W=gapfill)
    returnStr+=make_Hom_pairs(upstream_list,downstream_list,hom_strand="-",MIP_Mut_Alignment="SNP_on_H1_M",GapFillBase_M=revcomp(gapfill),GapFillBase_W=revcomp(gapfill))
    
    ## SNP on the H2 forward strand and H1 reverse - Wild Type
    upstream_seq = nibFragger(chrome.replace("chr",""),start-max_hom_length+1,max_hom_length).lower()
    downstream_seq = nibFragger(chrome.replace("chr",""),end+2,max_hom_length).lower()
    gapfill = nibFragger(chrome.replace("chr",""),end+1,1)
    ### replace the last chracter
    upstream_seq_replaced=replaceString(upstream_seq,ref,first=False)
    upstream_list = get_seq(upstream_seq_replaced, MIN_LENGTH = min_hom_length, MIN_TM=tm_min, MAX_TM=tm_max, GC_MIN=gc_threshold_min, GC_MAX=gc_threshold_max,right2left=False)
    downstream_list = get_seq(downstream_seq, MIN_LENGTH = min_hom_length, MIN_TM=tm_min, MAX_TM=tm_max, GC_MIN=gc_threshold_min, GC_MAX=gc_threshold_max,right2left=True)
    
    returnStr+=make_Hom_pairs(upstream_list,downstream_list,hom_strand="+",MIP_Mut_Alignment="SNP_on_H2_W",GapFillBase_M=gapfill,GapFillBase_W=gapfill)
    returnStr+=make_Hom_pairs(upstream_list,downstream_list,hom_strand="-",MIP_Mut_Alignment="SNP_on_H1_W",GapFillBase_M=revcomp(gapfill),GapFillBase_W=revcomp(gapfill))
    
    ## SNP on the H2 reverse Strand or H1 forward strand - Mutation Type
    upstream_seq = nibFragger(chrome.replace("chr",""),start-max_hom_length-1,max_hom_length).lower()
    downstream_seq = nibFragger(chrome.replace("chr",""),end,max_hom_length).lower()
    Mgapfill = nibFragger(chrome.replace("chr",""),end-1,1)
    ### replace the first chracter
    downstream_seq_replaced=replaceString(downstream_seq,alt,first=True)
    upstream_list = get_seq(upstream_seq, MIN_LENGTH = min_hom_length, MIN_TM=tm_min, MAX_TM=tm_max, GC_MIN=gc_threshold_min, GC_MAX=gc_threshold_max,right2left=False)
    downstream_list = get_seq(downstream_seq_replaced, MIN_LENGTH = min_hom_length, MIN_TM=tm_min, MAX_TM=tm_max, GC_MIN=gc_threshold_min, GC_MAX=gc_threshold_max,right2left=True)

    returnStr+=make_Hom_pairs(upstream_list,downstream_list,hom_strand="-",MIP_Mut_Alignment="SNP_on_H2_M",GapFillBase_M=revcomp(gapfill),GapFillBase_W=revcomp(gapfill))
    returnStr+=make_Hom_pairs(upstream_list,downstream_list,hom_strand="+",MIP_Mut_Alignment="SNP_on_H1_M",GapFillBase_M=gapfill,GapFillBase_W=gapfill)

    ## SNP on the H2 reverse Strand or H1 forward strand - Wild Type
    upstream_seq = nibFragger(chrome.replace("chr",""),start-max_hom_length-1,max_hom_length).lower()
    downstream_seq = nibFragger(chrome.replace("chr",""),end,max_hom_length).lower()
    Mgapfill = nibFragger(chrome.replace("chr",""),end-1,1)
    ### replace the first chracter
    downstream_seq_replaced=replaceString(downstream_seq,ref,first=True)
    upstream_list = get_seq(upstream_seq, MIN_LENGTH = min_hom_length, MIN_TM=tm_min, MAX_TM=tm_max, GC_MIN=gc_threshold_min, GC_MAX=gc_threshold_max,right2left=False)
    downstream_list = get_seq(downstream_seq_replaced, MIN_LENGTH = min_hom_length, MIN_TM=tm_min, MAX_TM=tm_max, GC_MIN=gc_threshold_min, GC_MAX=gc_threshold_max,right2left=True)

    returnStr+=make_Hom_pairs(upstream_list,downstream_list,hom_strand="-",MIP_Mut_Alignment="SNP_on_H2_W",GapFillBase_M=revcomp(gapfill),GapFillBase_W=revcomp(gapfill))
    returnStr+=make_Hom_pairs(upstream_list,downstream_list,hom_strand="+",MIP_Mut_Alignment="SNP_on_H1_W",GapFillBase_M=gapfill,GapFillBase_W=gapfill)
    return returnStr