示例#1
0
def getAltExonLength(variant,
                     exonicPortionSize,
                     intronicPortionSize,
                     deNovoDonorInRefAcc=False,
                     donor=True):
    """
    Given a variant and the exonic portion size,
    returns the length of the alternate exon after splicing occurs in max MES window
    Donor argument determines if function is used for de novo donor (donor=True) or de novo acceptor (donor=False)
    deNovoDonorInRefAcc=True if looking for deNovoDonor in ref acceptor site, False otherwise
    """
    if varInExon(variant):
        varExonNum = getVarExonNumberSNS(variant)
    else:
        if varInSpliceRegion(variant, donor=donor, deNovo=False):
            spliceBounds = getVarSpliceRegionBounds(variant,
                                                    donor=donor,
                                                    deNovo=False)
            varExonNum = spliceBounds["exonName"]
        else:
            varExonNum = getClosestExonNumberIntronicSNS(variant,
                                                         "enigma",
                                                         donor=donor)
    exonBounds = extract.getExonBoundaries(variant)
    slidingWindowInfo = getMaxMaxEntScanScoreSlidingWindowSNS(
        variant,
        exonicPortionSize,
        STD_DE_NOVO_LENGTH,
        donor=donor,
        deNovoDonorInRefAcc=deNovoDonorInRefAcc)
    newSplicePos = extract.getNewSplicePosition(
        variant["Pos"],
        verify.getVarStrand(variant),
        slidingWindowInfo["varWindowPosition"],
        slidingWindowInfo["inExonicPortion"],
        exonicPortionSize,
        intronicPortionSize,
        donor=donor)
    if verify.getVarStrand(variant) == "-":
        if donor:
            varExonStart = int(exonBounds[varExonNum]["exonStart"])
            # +1 to account for all positions including newSplicePos
            # adding one to exonStart increases length by 1 bp because numbering decreases from left to right
            exonLength = varExonStart - newSplicePos + 1
        else:
            varExonEnd = int(exonBounds[varExonNum]["exonEnd"])
            # -1 to account for all position including newSplicePos
            # subtracting one increases length by 1 bp because numbering decreases from left to right
            exonLength = newSplicePos - varExonEnd - 1
    else:
        varExonEnd = int(exonBounds[varExonNum]["exonEnd"])
        if donor:
            refExonLength = getRefExonLength(variant, donor=donor)
            # need to compare to refExonLength because of + strand gene
            exonLength = refExonLength - (varExonEnd - newSplicePos)
        else:
            exonLength = varExonEnd - newSplicePos
    return exonLength
示例#2
0
def getClosestExonNumberIntronicSNS(variant, boundaries, donor=True):
    """
    Given a variant and boundaries (either priors or enigma),
    1. Checks that variant is in an intron or UTR and is a SNS variant
    2. Determines the exon end that is closest to that variant
    Returns the closest exon end in the format "exonN"
    If variant is not in an intron or UTR, returns "exon0"
    """
    varLoc = getVarLocation(variant, boundaries)
    if (varLoc == "intron_variant" or varLoc == "UTR_variant") and extract.getVarType(variant) == "substitution" and not varInExon(variant):
        exonBounds = extract.getExonBoundaries(variant)
        varGenPos = variant["Pos"]
        exonIntronDiffs = {}
        for exon in exonBounds.keys():
            if verify.getVarStrand(variant) == "+":
                if donor:
                    exonIntronDiff = int(varGenPos) - int(exonBounds[exon]["exonEnd"])
                else:
                    exonIntronDiff = int(exonBounds[exon]["exonStart"]) - int(varGenPos)
                if exonIntronDiff > 0:
                    exonIntronDiffs[exon] = exonIntronDiff
            else:
                if donor:
                    exonIntronDiff = int(exonBounds[exon]["exonEnd"]) - int(varGenPos)
                else:
                    exonIntronDiff = int(varGenPos) - int(exonBounds[exon]["exonStart"])
                if exonIntronDiff > 0:
                    exonIntronDiffs[exon] = exonIntronDiff
        closestExonInfo = min(exonIntronDiffs.items(), key=lambda k: k[1])
        return closestExonInfo[0]
    return "exon0"
示例#3
0
def varInCIDomain(variant, boundaries):
    """
    Given a variant, determines if variant is in a clinically important domain
    Second argument determiens which boundaries (ENIGMA or PRIORS) are used for CI domains
    Returns True if variant in CI domain
    """
    varGenPos = int(variant["Pos"])
    varGene = variant["Gene_Symbol"]
    varStrand = verify.getVarStrand(variant)
    inExon = varInExon(variant)
    if inExon:
        if varGene == "BRCA1":
            for domain in brca1CIDomains[boundaries].keys():
                domainStart = brca1CIDomains[boundaries][domain]["domStart"]
                domainEnd = brca1CIDomains[boundaries][domain]["domEnd"]
                withinBoundaries = verify.checkWithinBoundaries(varStrand, varGenPos, domainStart, domainEnd)
                if withinBoundaries:
                    return True
        elif varGene == "BRCA2":
            for domain in brca2CIDomains[boundaries].keys():
                domainStart = brca2CIDomains[boundaries][domain]["domStart"]
                domainEnd = brca2CIDomains[boundaries][domain]["domEnd"]
                withinBoundaries = verify.checkWithinBoundaries(varStrand, varGenPos, domainStart, domainEnd)
                if withinBoundaries:
                    return True
    return False
示例#4
0
def varInCIDomain(variant, boundaries):
    """
    Given a variant, determines if variant is in a clinically important domain
    Second argument determiens which boundaries (ENIGMA or PRIORS) are used for CI domains
    Returns True if variant in CI domain
    """
    varGenPos = int(variant["Pos"])
    varGene = variant["Gene_Symbol"]
    varStrand = verify.getVarStrand(variant)
    inExon = varInExon(variant)
    if inExon:
        if varGene == "BRCA1":
            for domain in brca1CIDomains[boundaries].keys():
                domainStart = brca1CIDomains[boundaries][domain]["domStart"]
                domainEnd = brca1CIDomains[boundaries][domain]["domEnd"]
                withinBoundaries = verify.checkWithinBoundaries(
                    varStrand, varGenPos, domainStart, domainEnd)
                if withinBoundaries:
                    return True
        elif varGene == "BRCA2":
            for domain in brca2CIDomains[boundaries].keys():
                domainStart = brca2CIDomains[boundaries][domain]["domStart"]
                domainEnd = brca2CIDomains[boundaries][domain]["domEnd"]
                withinBoundaries = verify.checkWithinBoundaries(
                    varStrand, varGenPos, domainStart, domainEnd)
                if withinBoundaries:
                    return True
    return False
示例#5
0
def getRefSpliceDonorBoundaries(variant, intronicLength, exonicLength):
    """
    Given a variant, intronicLength and exonicLength returns the splice donor boundaries
    intronicLength = number of bp in intron that will be considered as part of splice donor region
    exonicLength = number of bp in exon that will be considered as part of splice donor region
    splice region is the last exonicLength bp in the exon and first intronicLength bp in the intron
    for the variant's transcript in a dictionary with the format:
    key = exon number, value = dictionary with donor start and donor end for exon
    """
    varExons = getExonBoundaries(variant)
    donorExons = varExons.copy()
    if variant["Gene_Symbol"] == "BRCA1":
        del donorExons["exon24"]
    elif variant["Gene_Symbol"] == "BRCA2":
        del donorExons["exon27"]
    varStrand = verify.getVarStrand(variant)
    donorBoundaries = {}
    for exon in donorExons.keys():
        exonEnd = int(donorExons[exon]["exonEnd"])
        if varStrand == "+":
            # exonicLength + 1 because genomic position in RefSeq starts to the right of the first base
            # which affects 5' side of sequence, donor start is 5' to exon end for + strand transcripts
            donorStart = exonEnd - exonicLength + 1
            donorEnd = exonEnd + intronicLength
        else:
            donorStart = exonEnd + exonicLength
            # intronicLength + 1 because genomic position in RefSeq starts to the right of the first base
            # which affects 5' side of sequence, donor end is 5' to exon end for - strand transcripts
            donorEnd = exonEnd - intronicLength + 1
        donorBoundaries[exon] = {
            "donorStart": donorStart,
            "donorEnd": donorEnd
        }

    return donorBoundaries
示例#6
0
def getVarSpliceRegionBounds(variant, donor=False, deNovo=False):
    """
    Given a variant, checks if variant is in a splice donor/acceptor region
    If donor=True, checks if variant is in a splice donor region and returns boundaries for splice donor region
      *function CANNOT be used to return de novo donor splice region bounds*
    If donor=False and deNovo=False, checks if variant is in a ref splice acceptor region and returns boundaries for splice acceptor region
    If donor=False and deNovo=True, checks if variant is in a de novo splice acceptor region and returns boundaries for that region
    If variant is in a splice region, returns a dictionary with region boundaries where variant is located
    """
    if varInSpliceRegion(variant, donor=donor, deNovo=deNovo):
        if not donor:
            if not deNovo:
                regionBounds = extract.getSpliceAcceptorBoundaries(variant, STD_ACC_INTRONIC_LENGTH, STD_ACC_EXONIC_LENGTH)
            else:
                regionBounds = extract.getSpliceAcceptorBoundaries(variant, STD_ACC_INTRONIC_LENGTH, STD_DE_NOVO_LENGTH)
            regionStartKey = "acceptorStart"
            regionEndKey = "acceptorEnd"
        else:
            regionBounds = extract.getRefSpliceDonorBoundaries(variant, STD_DONOR_INTRONIC_LENGTH, STD_DONOR_EXONIC_LENGTH)
            regionStartKey = "donorStart"
            regionEndKey = "donorEnd"
        for exon in regionBounds.keys():
            regionStart = regionBounds[exon][regionStartKey]
            regionEnd = regionBounds[exon][regionEndKey]
            withinBoundaries = verify.checkWithinBoundaries(verify.getVarStrand(variant), int(variant["Pos"]), regionStart, regionEnd)
            if withinBoundaries:
                return {"exonName": exon,
                        regionStartKey: regionStart,
                        regionEndKey: regionEnd}
示例#7
0
def getSpliceAcceptorBoundaries(variant, intronicLength, exonicLength):
    """
    Given a variant, intronicLength and exonicLength returns the splice acceptor boundaries
    intronicLength = number of bp in intron that will be considered as part of splice acceptor region
    exonicLength = number of bp in exon that will be considered as part of splice acceptor region
    splice rgion is the last intronicLength bp in the exon and first exonicLength bp in the exon
    for the variant's transcript in a dictionary with the format:
    key = exon number, value = a dictionary with acceptor start and acceptor end for exon
    """
    varExons = getExonBoundaries(variant)
    acceptorExons = varExons.copy()
    if variant["Gene_Symbol"] == "BRCA1" or variant["Gene_Symbol"] == "BRCA2":
        del acceptorExons["exon1"]
    varStrand = verify.getVarStrand(variant)
    acceptorBoundaries = {}
    for exon in acceptorExons.keys():
        exonStart = int(acceptorExons[exon]["exonStart"])
        if varStrand == "+":
            # intronicLength + 1 because genomic position in RefSeq starts to the right of the first base
            # which affects 5' side of sequence, acceptor start is 5' to exon start for + strand transcripts
            acceptorStart = exonStart - intronicLength + 1
            acceptorEnd = exonStart + exonicLength
        else:
            acceptorStart = exonStart + intronicLength
            # exonicLength + 1 because genomic position in RefSeq starts to the right of the first base
            # which affects 5' side of sequence, acceptor end is 5' to exon start for - strand transcripts
            acceptorEnd = exonStart - exonicLength + 1
        acceptorBoundaries[exon] = {
            "acceptorStart": acceptorStart,
            "acceptorEnd": acceptorEnd
        }

    return acceptorBoundaries
示例#8
0
def getRefExonLength(variant, donor=True):
    """
    Given a variant, returns the length of the reference exon
    If variant is in an exon, returns length of that exon
    If variant is in a reference splice region, returns length of exon in which exonic portion is included
    If variant is in intron, returns exon in which either closest splice donor or acceptor is included depending on donor argument
      If donor=True, returns exon length for previous exon
      If donor=False, returns exon length for subsequent exon
    """
    if varInExon(variant):
        varExonNum = getVarExonNumberSNS(variant)
    else:
        if varInSpliceRegion(variant, donor=donor, deNovo=False):
            spliceBounds = getVarSpliceRegionBounds(variant, donor=donor, deNovo=False)
            varExonNum = spliceBounds["exonName"]
        else:
            varExonNum = getClosestExonNumberIntronicSNS(variant, "enigma", donor=donor)
    exonBounds = extract.getExonBoundaries(variant)
    if verify.getVarStrand(variant) == "-":
        varExonStart = int(exonBounds[varExonNum]["exonStart"])
        varExonEnd = int(exonBounds[varExonNum]["exonEnd"])
        # +1 is not included in the below equation for exonLength
        # because due to RefSeq numbering varExonEnd is 1 bp too long
        # varExonEnd is first intronic base (+1 position)
        # for minus strand genes
        exonLength = varExonStart - varExonEnd
    else:
        varExonStart = int(exonBounds[varExonNum]["exonStart"])
        varExonEnd = int(exonBounds[varExonNum]["exonEnd"])
        # +1 is not included in the below equatio for exonLength
        # because due to RefSeq numbering varExonStart is 1 bp too long
        # varExonStart is last intronic base (-1 position)
        # for plus strand genes
        exonLength = varExonEnd - varExonStart
    return exonLength
示例#9
0
def varInSpliceRegion(variant, donor=False, deNovo=False):
    """
    Given a variant, determines if a variant is in reference transcript's splice donor/acceptor region
    If donor=True and deNovo=False, checks if variant is in a reference splice donor region
    If donor=True and deNovo=True, checks if variant is in a de novo splice donor region
    If donor=False and deNovo=False, checks if variant is in a reference splice acceptor region
    If donor=False and deNovo=True, checks if variant is in a de novo splice acceptor region
    Returns True if variant is in a splice region, false otherwise
    """
    if not donor and not deNovo:
        regionBounds = extract.getSpliceAcceptorBoundaries(variant, STD_ACC_INTRONIC_LENGTH, STD_ACC_EXONIC_LENGTH)
    elif not donor and deNovo:
        regionBounds = extract.getSpliceAcceptorBoundaries(variant, STD_ACC_INTRONIC_LENGTH, STD_DE_NOVO_LENGTH)
    elif donor:
        # gets reference donor splice boundaries, if deNovo = True then entireity of exon will be included below
        regionBounds = extract.getRefSpliceDonorBoundaries(variant, STD_DONOR_INTRONIC_LENGTH, STD_DONOR_EXONIC_LENGTH)
    for exon in regionBounds.keys():
        if not donor:
            regionStart = regionBounds[exon]["acceptorStart"]
            regionEnd = regionBounds[exon]["acceptorEnd"]
        else:
            regionStart = regionBounds[exon]["donorStart"]
            regionEnd = regionBounds[exon]["donorEnd"]
        withinBoundaries = verify.checkWithinBoundaries(verify.getVarStrand(variant), int(variant["Pos"]), regionStart, regionEnd)
        if withinBoundaries and not donor:
            return True
        elif donor and not deNovo and withinBoundaries:
            return True
        # because de novo donor region includes reference splice donor region and entirity of exon
        elif donor and deNovo and (withinBoundaries or varInExon(variant)):
            return True
    return False
示例#10
0
def getRefSpliceDonorBoundaries(variant, intronicLength, exonicLength):
    """
    Given a variant, intronicLength and exonicLength returns the splice donor boundaries
    intronicLength = number of bp in intron that will be considered as part of splice donor region
    exonicLength = number of bp in exon that will be considered as part of splice donor region
    splice region is the last exonicLength bp in the exon and first intronicLength bp in the intron
    for the variant's transcript in a dictionary with the format:
    key = exon number, value = dictionary with donor start and donor end for exon
    """
    varExons = getExonBoundaries(variant)
    donorExons = varExons.copy()
    if variant["Gene_Symbol"] == "BRCA1":
        del donorExons["exon24"]
    elif variant["Gene_Symbol"] == "BRCA2":
        del donorExons["exon27"]
    varStrand = verify.getVarStrand(variant)
    donorBoundaries = {}
    for exon in donorExons.keys():
        exonEnd = int(donorExons[exon]["exonEnd"])
        if varStrand == "+":
            # exonicLength + 1 because genomic position in RefSeq starts to the right of the first base
            # which affects 5' side of sequence, donor start is 5' to exon end for + strand transcripts
            donorStart = exonEnd - exonicLength + 1
            donorEnd = exonEnd + intronicLength
        else:
            donorStart = exonEnd + exonicLength
            # intronicLength + 1 because genomic position in RefSeq starts to the right of the first base
            # which affects 5' side of sequence, donor end is 5' to exon end for - strand transcripts
            donorEnd = exonEnd - intronicLength + 1
        donorBoundaries[exon] = {"donorStart": donorStart,
                                 "donorEnd": donorEnd}

    return donorBoundaries
示例#11
0
def getSpliceAcceptorBoundaries(variant, intronicLength, exonicLength):
    """
    Given a variant, intronicLength and exonicLength returns the splice acceptor boundaries
    intronicLength = number of bp in intron that will be considered as part of splice acceptor region
    exonicLength = number of bp in exon that will be considered as part of splice acceptor region
    splice rgion is the last intronicLength bp in the exon and first exonicLength bp in the exon
    for the variant's transcript in a dictionary with the format:
    key = exon number, value = a dictionary with acceptor start and acceptor end for exon
    """
    varExons = getExonBoundaries(variant)
    acceptorExons = varExons.copy()
    if variant["Gene_Symbol"] == "BRCA1" or variant["Gene_Symbol"] == "BRCA2":
        del acceptorExons["exon1"]
    varStrand = verify.getVarStrand(variant)
    acceptorBoundaries = {}
    for exon in acceptorExons.keys():
        exonStart = int(acceptorExons[exon]["exonStart"])
        if varStrand == "+":
            # intronicLength + 1 because genomic position in RefSeq starts to the right of the first base
            # which affects 5' side of sequence, acceptor start is 5' to exon start for + strand transcripts
            acceptorStart = exonStart - intronicLength + 1
            acceptorEnd = exonStart + exonicLength
        else:
            acceptorStart = exonStart + intronicLength
            # exonicLength + 1 because genomic position in RefSeq starts to the right of the first base
            # which affects 5' side of sequence, acceptor end is 5' to exon start for - strand transcripts
            acceptorEnd = exonStart - exonicLength + 1
        acceptorBoundaries[exon] = {"acceptorStart": acceptorStart,
                                    "acceptorEnd": acceptorEnd}

    return acceptorBoundaries
示例#12
0
def getDeNovoFrameshiftAndCIStatus(variant, boundaries, donor=True, deNovoDonorInRefAcc=False):
    """
    Given a variant, boundaries (enigma or priors), donor argument, and deNovoDonorInRefAcc argument:
      donor argument = True for de novo donors, False for de novo acceptors
      deNovoDonorInRefAcc argument = True if lookign for de novo donor in ref acceptor site, False otherwise
    Determines if new splice position causes a frameshift and would disrupt a CI Domain
    If de novo splicing would cause a frameshift, returns False
    Else, checks to see if new splice position would splice out (skip) a CI domain
    If variant de novo splice position does not cause a frameshift and does not disrupt a CI domain, reutrns True
      Returns False otherwise
    """
    frameshiftStatus = getDeNovoSpliceFrameshiftStatus(variant, donor=donor, deNovoDonorInRefAcc=deNovoDonorInRefAcc)
    # checks to make sure that variant does not cause a frameshift
    if frameshiftStatus:
        return False
    else:
        # determine if CI domain is in region that would be skipped by new splicing
        if varInExon(variant):
            varExonNum = getVarExonNumberSNS(variant)
        else:
            if varInSpliceRegion(variant, donor=donor, deNovo=False):
                spliceBounds = getVarSpliceRegionBounds(variant, donor=donor, deNovo=False)
                varExonNum = spliceBounds["exonName"]
            else:
                if donor:
                    # if a variant is in an intron de novo donor cannot splice out any of the exon
                    # so no part of a CI domain will be spliced out
                    return True
        # varExonNum is a string in the format "exonN"
        varWindowPos = getVarWindowPosition(variant, donor=donor, deNovoDonorInRefAcc=deNovoDonorInRefAcc)
        inExonicPortion = varInExonicPortion(variant, STD_EXONIC_PORTION, STD_DE_NOVO_LENGTH, donor=donor,
                                             deNovoDonorInRefAcc=deNovoDonorInRefAcc)
        regionStart = extract.getNewSplicePosition(variant["Pos"], verify.getVarStrand(variant), varWindowPos, inExonicPortion,
                                           STD_EXONIC_PORTION, STD_ACC_INTRONIC_LENGTH, donor=donor)
        if donor:
            # nextExonNum parses out N from varExonNum and adds 1 to get next exon number "exonN+1"
            # uses [4:] to remove "exon" from "exonN" so can add 1 to N to get N+1
            nextExonNum = "exon" + str(int(varExonNum[4:]) + 1)
            # skips to exon 5 for any variants in BRCA1 exon 3 because exon 4 does not exist in BRCA1 RefSeq transcript
            if variant["Gene_Symbol"] == "BRCA1" and nextExonNum == "exon4":
                nextExonNum = "exon5"
            refSpliceAccBounds = extract.getSpliceAcceptorBoundaries(variant, STD_ACC_INTRONIC_LENGTH, STD_ACC_EXONIC_LENGTH)
            regionEnd = refSpliceAccBounds[nextExonNum]["acceptorStart"]
        else:
            # prevExonNum parses out N from varExonNum and adds 1 to get previous exon number "exonN-1"
            # uses [4:] to remove "exon" from "exonN" so can subtract 1 to N to get N-1
            prevExonNum = "exon" + str(int(varExonNum[4:]) - 1)
            if variant["Gene_Symbol"] == "BRCA1" and prevExonNum == "exon4":
                prevExonNum = "exon3"
            refSpliceDonorBounds = extract.getRefSpliceDonorBoundaries(variant, STD_DONOR_INTRONIC_LENGTH,
                                                               STD_DONOR_EXONIC_LENGTH)
            regionEnd = refSpliceDonorBounds[prevExonNum]["donorEnd"]
        CIDomainInRegion = verify.isCIDomainInRegion(regionStart, regionEnd, boundaries, variant["Gene_Symbol"])
        if not CIDomainInRegion:
            return True
        return False
示例#13
0
def getAltExonLength(variant, exonicPortionSize, intronicPortionSize, deNovoDonorInRefAcc=False, donor=True):
    """
    Given a variant and the exonic portion size,
    returns the length of the alternate exon after splicing occurs in max MES window
    Donor argument determines if function is used for de novo donor (donor=True) or de novo acceptor (donor=False)
    deNovoDonorInRefAcc=True if looking for deNovoDonor in ref acceptor site, False otherwise
    """
    if varInExon(variant):
        varExonNum = getVarExonNumberSNS(variant)
    else:
        if varInSpliceRegion(variant, donor=donor, deNovo=False):
            spliceBounds = getVarSpliceRegionBounds(variant, donor=donor, deNovo=False)
            varExonNum = spliceBounds["exonName"]
        else:
            varExonNum = getClosestExonNumberIntronicSNS(variant, "enigma", donor=donor)
    exonBounds = extract.getExonBoundaries(variant)
    slidingWindowInfo = getMaxMaxEntScanScoreSlidingWindowSNS(variant, exonicPortionSize, STD_DE_NOVO_LENGTH,
                                                              donor=donor,
                                                              deNovoDonorInRefAcc=deNovoDonorInRefAcc)
    newSplicePos = extract.getNewSplicePosition(variant["Pos"], verify.getVarStrand(variant), slidingWindowInfo["varWindowPosition"],
                                        slidingWindowInfo["inExonicPortion"], exonicPortionSize, intronicPortionSize,
                                        donor=donor)
    if verify.getVarStrand(variant) == "-":
        if donor:
            varExonStart = int(exonBounds[varExonNum]["exonStart"])
            # +1 to account for all positions including newSplicePos
            # adding one to exonStart increases length by 1 bp because numbering decreases from left to right
            exonLength = varExonStart - newSplicePos + 1
        else:
            varExonEnd = int(exonBounds[varExonNum]["exonEnd"])
            # -1 to account for all position including newSplicePos
            # subtracting one increases length by 1 bp because numbering decreases from left to right
            exonLength = newSplicePos - varExonEnd - 1
    else:
        varExonEnd = int(exonBounds[varExonNum]["exonEnd"])
        if donor:
            refExonLength = getRefExonLength(variant, donor=donor)
            # need to compare to refExonLength because of + strand gene
            exonLength = refExonLength - (varExonEnd - newSplicePos)
        else:
            exonLength = varExonEnd - newSplicePos
    return exonLength
示例#14
0
def getMaxEntScanScoresSlidingWindowSNS(variant, windowSize, donor=False):
    """
    Given a variant and window size determines window sequences and scores for a sliding window
      that is the size of windowSize
    If donor=True, calculates MaxEntScan scores for splice donors
    If donor=False, calculates MaxEntScan scores for splice acceptors
    Returns a dictionary containing:
        1. window sequences - ref and alt seq for each window (variant in positions 1-windowSize)
        2. window scores - ref and alt MaxEntScan scores and zscores for each window
        3. window alt MaxEntScan scores - only contains alt MaxEntScan scores for each window
    """
    varGenPos = int(variant["Pos"])
    varStrand = verify.getVarStrand(variant)
    # use +- (windowSize - 1) to get (windowSize*2 - 1) bp region so that have sequence for:
    # each window of size windowSize bp with variant in each position (1-windowSize)
    # minus strand and plus strand are opposite for +- (windowSize - 1) to preserve sequence returned by getRefAltSeqs
    offset = windowSize - 1
    varPos = windowSize
    windowEnd = windowSize
    totalPositions = windowSize
    if varStrand == "-":
        regionStart = varGenPos + offset
        regionEnd = varGenPos - offset
    else:
        regionStart = varGenPos - offset
        regionEnd = varGenPos + offset
    refAltSeqs = getRefAltSeqs(variant, regionStart, regionEnd)
    refSeq = refAltSeqs["refSeq"]
    altSeq = refAltSeqs["altSeq"]
    windowStart = 0
    windowSeqs = {}
    windowScores = {}
    windowAltMaxEntScanScores = {}
    while windowStart < totalPositions:
        refWindowSeq = refSeq[windowStart:windowEnd]
        altWindowSeq = altSeq[windowStart:windowEnd]
        windowSeqs[varPos] = {"refSeq": refWindowSeq,
                              "altSeq": altWindowSeq}
        refAltWindowScores = getRefAltScores(refWindowSeq, altWindowSeq, donor=donor)
        windowScores[varPos] = {"refMaxEntScanScore": refAltWindowScores["refScores"]["maxEntScanScore"],
                                "refZScore": refAltWindowScores["refScores"]["zScore"],
                                "altMaxEntScanScore": refAltWindowScores["altScores"]["maxEntScanScore"],
                                "altZScore": refAltWindowScores["altScores"]["zScore"]}
        windowAltMaxEntScanScores[varPos] = refAltWindowScores["altScores"]["maxEntScanScore"]
        varPos -= 1
        windowStart += 1
        windowEnd += 1

    return {"windowSeqs": windowSeqs,
            "windowScores": windowScores,
            "windowAltMaxEntScanScores": windowAltMaxEntScanScores}
示例#15
0
def isDeNovoWildTypeSplicePosDistanceDivisibleByThree(variant, exonicPortionSize, intronicPortionSize,
                                                      deNovoDonorInRefAcc=False, donor=True):
    """
    Given a variant, compares de novo splicing position with wild-type splicing position
    exonicPortionSize refers to length in bp that is considered to be in exonic portion of splice site
    intronicPortionSize referes to length in bp that is considered to be in intronic portion of splice site
    deNovoDonorInRefAcc argument=True if looking for de novo donor in reference splice acceptor region, False otherwise
    Donor argument determines if function is used for de novo donor (donor=True) or de novo acceptor (donor=False)
    If distance between de novo and wild-type donors is divisible by 3, returns True
    returns False otherwise
    This function is another way to check if a de novo splice site would cause a frameshift mutation
       If it returns True, then de novo splicing would not cause a frameshift
       If it returns False, then de novo splicing would cause a frameshift
    """
    if varInExon(variant):
        varExonNum = getVarExonNumberSNS(variant)
    else:
        if varInSpliceRegion(variant, donor=donor, deNovo=False):
            spliceBounds = getVarSpliceRegionBounds(variant, donor=donor, deNovo=False)
            varExonNum = spliceBounds["exonName"]
        else:
            varExonNum = getClosestExonNumberIntronicSNS(variant, "enigma", donor=donor)
    varStrand = verify.getVarStrand(variant)
    refExonBounds = extract.getExonBoundaries(variant)
    slidingWindowInfo = getMaxMaxEntScanScoreSlidingWindowSNS(variant, exonicPortionSize, STD_DE_NOVO_LENGTH,
                                                              donor=donor,
                                                              deNovoDonorInRefAcc=deNovoDonorInRefAcc)
    deNovoSplicePos = extract.getNewSplicePosition(variant["Pos"], varStrand, slidingWindowInfo["varWindowPosition"],
                                           slidingWindowInfo["inExonicPortion"], exonicPortionSize, intronicPortionSize,
                                           donor=donor)
    if donor:
        wildTypeSplicePos = refExonBounds[varExonNum]["exonEnd"]
        if varStrand == "+":
            distanceBetween = wildTypeSplicePos - deNovoSplicePos
        else:
            # +1 for minus strand donor because splice donor position is to the right of splice cut position
            distanceBetween = deNovoSplicePos - (wildTypeSplicePos + 1)
    else:
        wildTypeSplicePos = refExonBounds[varExonNum]["exonStart"]
        if varStrand == "+":
            distanceBetween = abs(deNovoSplicePos - wildTypeSplicePos)
        else:
            # +1 for minus strand acceptor because splice acceptor position is to the left of splice cut position
            distanceBetween = abs((wildTypeSplicePos + 1) - deNovoSplicePos)

    if distanceBetween % 3 == 0:
        return True
    return False
示例#16
0
def getRefAltSeqs(variant, rangeStart, rangeStop):
    """
    Given a variant, rangeStart, and rangeStop:
    Returns a dicitonary with ref and alt seq for the specified variant and range
    """
    varChrom = getVarChrom(variant)
    varStrand = verify.getVarStrand(variant)
    if varStrand == "-":
        refSeq = getFastaSeq(varChrom, rangeStart, rangeStop, plusStrandSeq=False)
    else:
        refSeq = getFastaSeq(varChrom, rangeStart, rangeStop, plusStrandSeq=True)
    refSeqDict = getSeqLocDict(varChrom, varStrand, rangeStart, rangeStop)
    altSeqDict = getAltSeqDict(variant, refSeqDict)
    altSeq = getAltSeq(altSeqDict, varStrand)
    return {"refSeq": refSeq,
            "altSeq": altSeq}
示例#17
0
def getVarExonNumberSNS(variant):
    """
    Given a SNS variant, checks that variant is in an exon
    If variant in an exon, returns the number of the exon variant is located within in format "exonN"
    """
    if varInExon(variant):
        varGenPos = int(variant["Pos"])
        varExons = extract.getExonBoundaries(variant)
        varStrand = verify.getVarStrand(variant)
        for exon in varExons.keys():
            exonStart = varExons[exon]["exonStart"]
            exonEnd = varExons[exon]["exonEnd"]
            if varStrand == "+":
                if varGenPos > exonStart and varGenPos <= exonEnd:
                    return exon
            else:
                if varGenPos <= exonStart and varGenPos > exonEnd:
                    return exon
示例#18
0
def getVarExonNumberSNS(variant):
    """
    Given a SNS variant, checks that variant is in an exon
    If variant in an exon, returns the number of the exon variant is located within in format "exonN"
    """
    if varInExon(variant):
        varGenPos = int(variant["Pos"])
        varExons = extract.getExonBoundaries(variant)
        varStrand = verify.getVarStrand(variant)
        for exon in varExons.keys():
            exonStart = varExons[exon]["exonStart"]
            exonEnd = varExons[exon]["exonEnd"]
            if varStrand == "+":
                if varGenPos > exonStart and varGenPos <= exonEnd:
                    return exon
            else:
                if varGenPos <= exonStart and varGenPos > exonEnd:
                    return exon
示例#19
0
def getVarDict(variant, boundaries):
    """
    Given input data, returns a dictionary containing information for each variant in input
    Dictionary key is variant HGVS_cDNA and value is a dictionary containing variant gene, variant chromosome,
    variant strand, variant genomic coordinate, variant type, and variant location
    """
    varStrand = getVarStrand(variant)
    varType = getVarType(variant)
    varLoc = getVarLocation(variant, boundaries)

    varDict = {"varGene": variant["Gene_Symbol"],
               "varChrom": variant["Chr"],
               "varStrand": varStrand,
               "varGenCoordinate": variant["Pos"],
               "varType": varType,
               "varLoc": varLoc,
               "varHGVScDNA": variant["HGVS_cDNA"]}

    return varDict
示例#20
0
def varInExon(variant):
    """
    Given a variant, determines if variant genomic position is inside transcript boundaries
    AND if variant is in an exon
    Returns true if variant is in an exon
    """
    varOutBounds = verify.varOutsideBoundaries(variant)
    if not varOutBounds:
        varGenPos = int(variant["Pos"])
        varExons = extract.getExonBoundaries(variant)
        varStrand = verify.getVarStrand(variant)
        for exon in varExons.keys():
            exonStart = int(varExons[exon]["exonStart"])
            exonEnd = int(varExons[exon]["exonEnd"])
            if varStrand == "+":
                if varGenPos > exonStart and varGenPos <= exonEnd:
                    return True
            else:
                if varGenPos <= exonStart and varGenPos > exonEnd:
                    return True
    return False
示例#21
0
def varInExon(variant):
    """
    Given a variant, determines if variant genomic position is inside transcript boundaries
    AND if variant is in an exon
    Returns true if variant is in an exon
    """
    varOutBounds = verify.varOutsideBoundaries(variant)
    if not varOutBounds:
        varGenPos = int(variant["Pos"])
        varExons = extract.getExonBoundaries(variant)
        varStrand = verify.getVarStrand(variant)
        for exon in varExons.keys():
            exonStart = int(varExons[exon]["exonStart"])
            exonEnd = int(varExons[exon]["exonEnd"])
            if varStrand == "+":
                if varGenPos > exonStart and varGenPos <= exonEnd:
                    return True
            else:
                if varGenPos <= exonStart and varGenPos > exonEnd:
                    return True
    return False
示例#22
0
def getRefAltSeqs(variant, rangeStart, rangeStop):
    """
    Given a variant, rangeStart, and rangeStop:
    Returns a dicitonary with ref and alt seq for the specified variant and range
    """
    varChrom = getVarChrom(variant)
    varStrand = verify.getVarStrand(variant)
    if varStrand == "-":
        refSeq = getFastaSeq(varChrom,
                             rangeStart,
                             rangeStop,
                             plusStrandSeq=False)
    else:
        refSeq = getFastaSeq(varChrom,
                             rangeStart,
                             rangeStop,
                             plusStrandSeq=True)
    refSeqDict = getSeqLocDict(varChrom, varStrand, rangeStart, rangeStop)
    altSeqDict = getAltSeqDict(variant, refSeqDict)
    altSeq = getAltSeq(altSeqDict, varStrand)
    return {"refSeq": refSeq, "altSeq": altSeq}
示例#23
0
def getRefExonLength(variant, donor=True):
    """
    Given a variant, returns the length of the reference exon
    If variant is in an exon, returns length of that exon
    If variant is in a reference splice region, returns length of exon in which exonic portion is included
    If variant is in intron, returns exon in which either closest splice donor or acceptor is included depending on donor argument
      If donor=True, returns exon length for previous exon
      If donor=False, returns exon length for subsequent exon
    """
    if varInExon(variant):
        varExonNum = getVarExonNumberSNS(variant)
    else:
        if varInSpliceRegion(variant, donor=donor, deNovo=False):
            spliceBounds = getVarSpliceRegionBounds(variant,
                                                    donor=donor,
                                                    deNovo=False)
            varExonNum = spliceBounds["exonName"]
        else:
            varExonNum = getClosestExonNumberIntronicSNS(variant,
                                                         "enigma",
                                                         donor=donor)
    exonBounds = extract.getExonBoundaries(variant)
    if verify.getVarStrand(variant) == "-":
        varExonStart = int(exonBounds[varExonNum]["exonStart"])
        varExonEnd = int(exonBounds[varExonNum]["exonEnd"])
        # +1 is not included in the below equation for exonLength
        # because due to RefSeq numbering varExonEnd is 1 bp too long
        # varExonEnd is first intronic base (+1 position)
        # for minus strand genes
        exonLength = varExonStart - varExonEnd
    else:
        varExonStart = int(exonBounds[varExonNum]["exonStart"])
        varExonEnd = int(exonBounds[varExonNum]["exonEnd"])
        # +1 is not included in the below equatio for exonLength
        # because due to RefSeq numbering varExonStart is 1 bp too long
        # varExonStart is last intronic base (-1 position)
        # for plus strand genes
        exonLength = varExonEnd - varExonStart
    return exonLength
示例#24
0
def varInSpliceRegion(variant, donor=False, deNovo=False):
    """
    Given a variant, determines if a variant is in reference transcript's splice donor/acceptor region
    If donor=True and deNovo=False, checks if variant is in a reference splice donor region
    If donor=True and deNovo=True, checks if variant is in a de novo splice donor region
    If donor=False and deNovo=False, checks if variant is in a reference splice acceptor region
    If donor=False and deNovo=True, checks if variant is in a de novo splice acceptor region
    Returns True if variant is in a splice region, false otherwise
    """
    if not donor and not deNovo:
        regionBounds = extract.getSpliceAcceptorBoundaries(
            variant, STD_ACC_INTRONIC_LENGTH, STD_ACC_EXONIC_LENGTH)
    elif not donor and deNovo:
        regionBounds = extract.getSpliceAcceptorBoundaries(
            variant, STD_ACC_INTRONIC_LENGTH, STD_DE_NOVO_LENGTH)
    elif donor:
        # gets reference donor splice boundaries, if deNovo = True then entireity of exon will be included below
        regionBounds = extract.getRefSpliceDonorBoundaries(
            variant, STD_DONOR_INTRONIC_LENGTH, STD_DONOR_EXONIC_LENGTH)
    for exon in regionBounds.keys():
        if not donor:
            regionStart = regionBounds[exon]["acceptorStart"]
            regionEnd = regionBounds[exon]["acceptorEnd"]
        else:
            regionStart = regionBounds[exon]["donorStart"]
            regionEnd = regionBounds[exon]["donorEnd"]
        withinBoundaries = verify.checkWithinBoundaries(
            verify.getVarStrand(variant), int(variant["Pos"]), regionStart,
            regionEnd)
        if withinBoundaries and not donor:
            return True
        elif donor and not deNovo and withinBoundaries:
            return True
        # because de novo donor region includes reference splice donor region and entirity of exon
        elif donor and deNovo and (withinBoundaries or varInExon(variant)):
            return True
    return False
示例#25
0
def getClosestExonNumberIntronicSNS(variant, boundaries, donor=True):
    """
    Given a variant and boundaries (either priors or enigma),
    1. Checks that variant is in an intron or UTR and is a SNS variant
    2. Determines the exon end that is closest to that variant
    Returns the closest exon end in the format "exonN"
    If variant is not in an intron or UTR, returns "exon0"
    """
    varLoc = getVarLocation(variant, boundaries)
    if (varLoc == "intron_variant"
            or varLoc == "UTR_variant") and extract.getVarType(
                variant) == "substitution" and not varInExon(variant):
        exonBounds = extract.getExonBoundaries(variant)
        varGenPos = variant["Pos"]
        exonIntronDiffs = {}
        for exon in exonBounds.keys():
            if verify.getVarStrand(variant) == "+":
                if donor:
                    exonIntronDiff = int(varGenPos) - int(
                        exonBounds[exon]["exonEnd"])
                else:
                    exonIntronDiff = int(
                        exonBounds[exon]["exonStart"]) - int(varGenPos)
                if exonIntronDiff > 0:
                    exonIntronDiffs[exon] = exonIntronDiff
            else:
                if donor:
                    exonIntronDiff = int(
                        exonBounds[exon]["exonEnd"]) - int(varGenPos)
                else:
                    exonIntronDiff = int(varGenPos) - int(
                        exonBounds[exon]["exonStart"])
                if exonIntronDiff > 0:
                    exonIntronDiffs[exon] = exonIntronDiff
        closestExonInfo = min(exonIntronDiffs.items(), key=lambda k: k[1])
        return closestExonInfo[0]
    return "exon0"
示例#26
0
def getVarSpliceRegionBounds(variant, donor=False, deNovo=False):
    """
    Given a variant, checks if variant is in a splice donor/acceptor region
    If donor=True, checks if variant is in a splice donor region and returns boundaries for splice donor region
      *function CANNOT be used to return de novo donor splice region bounds*
    If donor=False and deNovo=False, checks if variant is in a ref splice acceptor region and returns boundaries for splice acceptor region
    If donor=False and deNovo=True, checks if variant is in a de novo splice acceptor region and returns boundaries for that region
    If variant is in a splice region, returns a dictionary with region boundaries where variant is located
    """
    if varInSpliceRegion(variant, donor=donor, deNovo=deNovo):
        if not donor:
            if not deNovo:
                regionBounds = extract.getSpliceAcceptorBoundaries(
                    variant, STD_ACC_INTRONIC_LENGTH, STD_ACC_EXONIC_LENGTH)
            else:
                regionBounds = extract.getSpliceAcceptorBoundaries(
                    variant, STD_ACC_INTRONIC_LENGTH, STD_DE_NOVO_LENGTH)
            regionStartKey = "acceptorStart"
            regionEndKey = "acceptorEnd"
        else:
            regionBounds = extract.getRefSpliceDonorBoundaries(
                variant, STD_DONOR_INTRONIC_LENGTH, STD_DONOR_EXONIC_LENGTH)
            regionStartKey = "donorStart"
            regionEndKey = "donorEnd"
        for exon in regionBounds.keys():
            regionStart = regionBounds[exon][regionStartKey]
            regionEnd = regionBounds[exon][regionEndKey]
            withinBoundaries = verify.checkWithinBoundaries(
                verify.getVarStrand(variant), int(variant["Pos"]), regionStart,
                regionEnd)
            if withinBoundaries:
                return {
                    "exonName": exon,
                    regionStartKey: regionStart,
                    regionEndKey: regionEnd
                }
示例#27
0
def isDeNovoWildTypeSplicePosDistanceDivisibleByThree(
        variant,
        exonicPortionSize,
        intronicPortionSize,
        deNovoDonorInRefAcc=False,
        donor=True):
    """
    Given a variant, compares de novo splicing position with wild-type splicing position
    exonicPortionSize refers to length in bp that is considered to be in exonic portion of splice site
    intronicPortionSize referes to length in bp that is considered to be in intronic portion of splice site
    deNovoDonorInRefAcc argument=True if looking for de novo donor in reference splice acceptor region, False otherwise
    Donor argument determines if function is used for de novo donor (donor=True) or de novo acceptor (donor=False)
    If distance between de novo and wild-type donors is divisible by 3, returns True
    returns False otherwise
    This function is another way to check if a de novo splice site would cause a frameshift mutation
       If it returns True, then de novo splicing would not cause a frameshift
       If it returns False, then de novo splicing would cause a frameshift
    """
    if varInExon(variant):
        varExonNum = getVarExonNumberSNS(variant)
    else:
        if varInSpliceRegion(variant, donor=donor, deNovo=False):
            spliceBounds = getVarSpliceRegionBounds(variant,
                                                    donor=donor,
                                                    deNovo=False)
            varExonNum = spliceBounds["exonName"]
        else:
            varExonNum = getClosestExonNumberIntronicSNS(variant,
                                                         "enigma",
                                                         donor=donor)
    varStrand = verify.getVarStrand(variant)
    refExonBounds = extract.getExonBoundaries(variant)
    slidingWindowInfo = getMaxMaxEntScanScoreSlidingWindowSNS(
        variant,
        exonicPortionSize,
        STD_DE_NOVO_LENGTH,
        donor=donor,
        deNovoDonorInRefAcc=deNovoDonorInRefAcc)
    deNovoSplicePos = extract.getNewSplicePosition(
        variant["Pos"],
        varStrand,
        slidingWindowInfo["varWindowPosition"],
        slidingWindowInfo["inExonicPortion"],
        exonicPortionSize,
        intronicPortionSize,
        donor=donor)
    if donor:
        wildTypeSplicePos = refExonBounds[varExonNum]["exonEnd"]
        if varStrand == "+":
            distanceBetween = wildTypeSplicePos - deNovoSplicePos
        else:
            # +1 for minus strand donor because splice donor position is to the right of splice cut position
            distanceBetween = deNovoSplicePos - (wildTypeSplicePos + 1)
    else:
        wildTypeSplicePos = refExonBounds[varExonNum]["exonStart"]
        if varStrand == "+":
            distanceBetween = abs(deNovoSplicePos - wildTypeSplicePos)
        else:
            # +1 for minus strand acceptor because splice acceptor position is to the left of splice cut position
            distanceBetween = abs((wildTypeSplicePos + 1) - deNovoSplicePos)

    if distanceBetween % 3 == 0:
        return True
    return False
示例#28
0
def getDeNovoFrameshiftAndCIStatus(variant,
                                   boundaries,
                                   donor=True,
                                   deNovoDonorInRefAcc=False):
    """
    Given a variant, boundaries (enigma or priors), donor argument, and deNovoDonorInRefAcc argument:
      donor argument = True for de novo donors, False for de novo acceptors
      deNovoDonorInRefAcc argument = True if lookign for de novo donor in ref acceptor site, False otherwise
    Determines if new splice position causes a frameshift and would disrupt a CI Domain
    If de novo splicing would cause a frameshift, returns False
    Else, checks to see if new splice position would splice out (skip) a CI domain
    If variant de novo splice position does not cause a frameshift and does not disrupt a CI domain, reutrns True
      Returns False otherwise
    """
    frameshiftStatus = getDeNovoSpliceFrameshiftStatus(
        variant, donor=donor, deNovoDonorInRefAcc=deNovoDonorInRefAcc)
    # checks to make sure that variant does not cause a frameshift
    if frameshiftStatus:
        return False
    else:
        # determine if CI domain is in region that would be skipped by new splicing
        if varInExon(variant):
            varExonNum = getVarExonNumberSNS(variant)
        else:
            if varInSpliceRegion(variant, donor=donor, deNovo=False):
                spliceBounds = getVarSpliceRegionBounds(variant,
                                                        donor=donor,
                                                        deNovo=False)
                varExonNum = spliceBounds["exonName"]
            else:
                if donor:
                    # if a variant is in an intron de novo donor cannot splice out any of the exon
                    # so no part of a CI domain will be spliced out
                    return True
        # varExonNum is a string in the format "exonN"
        varWindowPos = getVarWindowPosition(
            variant, donor=donor, deNovoDonorInRefAcc=deNovoDonorInRefAcc)
        inExonicPortion = varInExonicPortion(
            variant,
            STD_EXONIC_PORTION,
            STD_DE_NOVO_LENGTH,
            donor=donor,
            deNovoDonorInRefAcc=deNovoDonorInRefAcc)
        regionStart = extract.getNewSplicePosition(
            variant["Pos"],
            verify.getVarStrand(variant),
            varWindowPos,
            inExonicPortion,
            STD_EXONIC_PORTION,
            STD_ACC_INTRONIC_LENGTH,
            donor=donor)
        if donor:
            # nextExonNum parses out N from varExonNum and adds 1 to get next exon number "exonN+1"
            # uses [4:] to remove "exon" from "exonN" so can add 1 to N to get N+1
            nextExonNum = "exon" + str(int(varExonNum[4:]) + 1)
            # skips to exon 5 for any variants in BRCA1 exon 3 because exon 4 does not exist in BRCA1 RefSeq transcript
            if variant["Gene_Symbol"] == "BRCA1" and nextExonNum == "exon4":
                nextExonNum = "exon5"
            refSpliceAccBounds = extract.getSpliceAcceptorBoundaries(
                variant, STD_ACC_INTRONIC_LENGTH, STD_ACC_EXONIC_LENGTH)
            regionEnd = refSpliceAccBounds[nextExonNum]["acceptorStart"]
        else:
            # prevExonNum parses out N from varExonNum and adds 1 to get previous exon number "exonN-1"
            # uses [4:] to remove "exon" from "exonN" so can subtract 1 to N to get N-1
            prevExonNum = "exon" + str(int(varExonNum[4:]) - 1)
            if variant["Gene_Symbol"] == "BRCA1" and prevExonNum == "exon4":
                prevExonNum = "exon3"
            refSpliceDonorBounds = extract.getRefSpliceDonorBoundaries(
                variant, STD_DONOR_INTRONIC_LENGTH, STD_DONOR_EXONIC_LENGTH)
            regionEnd = refSpliceDonorBounds[prevExonNum]["donorEnd"]
        CIDomainInRegion = verify.isCIDomainInRegion(regionStart, regionEnd,
                                                     boundaries,
                                                     variant["Gene_Symbol"])
        if not CIDomainInRegion:
            return True
        return False
示例#29
0
def getMaxMaxEntScanScoreSlidingWindowSNS(variant,
                                          exonicPortionSize,
                                          deNovoLength,
                                          donor=True,
                                          deNovo=False,
                                          deNovoDonorInRefAcc=False):
    """
    Given a variant, determines the maximum alt MaxEntScan score in
       a sliding window of size STD_DONOR_SIZE with the variant in each position (1-STD_DONOR_SIZE) if donor = True
       a sliding window of size STD_ACC_SIZE with the variant in each position (1-STD_ACC_SIZE) if donor = False
    This function should be used to determine window in which de novo splicing is most likely to occur
    Function can only return highest scoring window details for either de novo donor OR de novo acceptor, not both
    If donor=True, function determines highest scoring window for potential de novo donor
    If donor=False, function determines highest scoring window for potential de novo acceptor
    Returns a dictionary containing the ref and alt MaxEntScan score and z-score and position of variant for the highest scoring window
    Ref and alt seqs for the highest scoring window are included in dictionary along with varStart (0-based index of variant for formatting)
       and varLength (equal to 1 for this function, becuase this function only works for single nucleotide substitution variants
    Dictionary also containing value "inExonicPortion" that has value either True or False
       If inExonicPortion = True, then variant is in length of bp specified by exonicPortionSize of highest scoring sliding window
       If inExonicPortion = False, then variant is NOT in length of bp specified by exonicPortionSize highest scoring sliding window
    deNovoLength refers to the length of the exonic portion of a de novo splice acceptor
    deNovoDonorInRefAcc = False if NOT checking for de novo splice donors in reference splice acceptor sites
    deNovoDonorInRefAcc = True if checking for de novo splice donors in reference splice acceptor sites
    """
    if donor:
        # uses default window size for a splice donor region
        slidingWindowInfo = extract.getMaxEntScanScoresSlidingWindowSNS(
            variant, STD_DONOR_SIZE, donor=donor)
    else:
        # uses default window size for a splice acceptor region
        slidingWindowInfo = extract.getMaxEntScanScoresSlidingWindowSNS(
            variant, STD_ACC_SIZE, donor=donor)

    windowAltMaxEntScanScores = slidingWindowInfo["windowAltMaxEntScanScores"]
    # checks to see if variatn is within reference splice donor region

    inRefSpliceDonorRegion = varInSpliceRegion(variant,
                                               donor=True,
                                               deNovo=False)
    # checks to see if variant is within reference splice acceptor region
    inRefSpliceAccRegion = varInSpliceRegion(variant,
                                             donor=False,
                                             deNovo=False)
    # if variant in ref splice donor region (for de novo donor) or in ref splice acceptor region (for de novo acceptor),
    # then need to remove native splicing window from consideration for highest scoring window
    if (inRefSpliceDonorRegion
            or inRefSpliceAccRegion) and not deNovoDonorInRefAcc:
        if donor:
            refSpliceBounds = getVarSpliceRegionBounds(variant,
                                                       donor=donor,
                                                       deNovo=False)
            if verify.getVarStrand(variant) == "+":
                refSpliceSeq = extract.getFastaSeq(
                    extract.getVarChrom(variant),
                    refSpliceBounds["donorStart"],
                    refSpliceBounds["donorEnd"],
                    plusStrandSeq=True)
            else:
                refSpliceSeq = extract.getFastaSeq(
                    extract.getVarChrom(variant),
                    refSpliceBounds["donorStart"],
                    refSpliceBounds["donorEnd"],
                    plusStrandSeq=False)
        else:
            refSpliceBounds = getVarSpliceRegionBounds(variant,
                                                       donor=donor,
                                                       deNovo=True)
            deNovoOffset = deNovoLength - exonicPortionSize
            # acceptorEnd +- deNovoOffset because deNovo splice acceptor region is deNovoOffset bp longer than reference splice acceptor region
            if verify.getVarStrand(variant) == "+":
                # acceptorEnd - deNovoOffset because genomic position increases from left to right on plus strand, refSeq reduced to correct length
                refSpliceSeq = extract.getFastaSeq(
                    extract.getVarChrom(variant),
                    refSpliceBounds["acceptorStart"],
                    (refSpliceBounds["acceptorEnd"] - deNovoOffset),
                    plusStrandSeq=True)
            else:
                # acceptorEnd + deNovoOffset because genomic position decreases from left to right on minus strand, refSeq reduced to correct length
                refSpliceSeq = extract.getFastaSeq(
                    extract.getVarChrom(variant),
                    refSpliceBounds["acceptorStart"],
                    (refSpliceBounds["acceptorEnd"] + deNovoOffset),
                    plusStrandSeq=False)
        for position, seqs in slidingWindowInfo["windowSeqs"].iteritems():
            if seqs["refSeq"] == refSpliceSeq:
                refSpliceWindow = position
                # removes reference splice window so it is not considered for de novo splicing
                del windowAltMaxEntScanScores[refSpliceWindow]
    # to get tuple containing sequence with variant position with maximum alt MaxEntScan score
    maxAltWindowScore = max(windowAltMaxEntScanScores.items(),
                            key=lambda k: k[1])
    maxVarPosition = maxAltWindowScore[0]
    maxScores = slidingWindowInfo["windowScores"][maxVarPosition]
    maxSeqs = slidingWindowInfo["windowSeqs"][maxVarPosition]

    # determines if variant is in the exonic portion specified by exonicPortionLength
    inExonicPortion = False
    if donor:
        # determines if variant is in first exonicPortionSize bp of the donor region
        if maxVarPosition <= exonicPortionSize:
            inExonicPortion = True
    else:
        # determines if variant is in the last exonicPortionSize bp of the acceptor region
        if (STD_ACC_SIZE - maxVarPosition) < exonicPortionSize:
            inExonicPortion = True

    return {
        "refMaxEntScanScore": maxScores["refMaxEntScanScore"],
        "refZScore": maxScores["refZScore"],
        "altMaxEntScanScore": maxScores["altMaxEntScanScore"],
        "altZScore": maxScores["altZScore"],
        "refSeq": maxSeqs["refSeq"],
        "altSeq": maxSeqs["altSeq"],
        "varStart": maxVarPosition - 1,
        "varLength": 1,
        "varWindowPosition": maxVarPosition,
        "inExonicPortion": inExonicPortion
    }
示例#30
0
def getClosestSpliceSiteScores(variant,
                               deNovoOffset,
                               donor=True,
                               deNovo=False,
                               deNovoDonorInRefAcc=False,
                               testMode=False):
    """
    Given a variant, determines scores for closest reference splice sequence
    Also returns sequence of closest reference splice site and genomic position of splice site
    deNovoOffset refers to difference between de novo acceptor length and exonic portion size
       If donor = True, looks for closest splice donor sequence
       If donor = False, looks for closest splice acceptor sequence
       If deNovo = True, accomodates for de novo splicing
         *Note only use argument deNovo=True in this function if donor=False
         *Function will not return correct sequence if donor=True and deNovo=True
    If exonic variant, returns a dictionary containing:
       MaxEntScan score, z-score, and splice site sequence for reference closest splice sequence
    If variant located in referene splice site, returns a dictionary containing:
       MaxEntScan score, z-score, and splice site sequence for that reference splice site sequence
    If intronic variant or variant in UTR, returns a dictionary containg:
       MaxEntScan score, z-score, and splice site sequence for reference closest splice site
       *Note if looking for closest ref acceptor for a variant in an intron, use deNovoOffset=0
    Return dictionary also contains necessary formatting variables for splice site sequence (exonStart, intronStart)
    deNovoDonorInRefAcc = False if NOT checking for de novo splice donor sites in reference splice acceptor sites
    deNovoDonorInRefAcc = True if checking for de novo splice donor sites in reference splice acceptor sites
    """
    varGenPos = int(variant["Pos"])
    varChrom = extract.getVarChrom(variant)
    varLoc = getVarLocation(variant, "enigma")

    if (varInExon(variant) and not deNovo) or (varLoc == "intron_variant"
                                               or varLoc == "UTR_variant"):
        if varInExon(variant):
            exonNumber = getVarExonNumberSNS(variant)
            exonName = exonNumber
        if (varLoc == "intron_variant"
                or varLoc == "UTR_variant") and not varInExon(variant):
            exonNumber = getClosestExonNumberIntronicSNS(variant,
                                                         "enigma",
                                                         donor=donor)
            exonName = exonNumber
        if donor:
            refSpliceDonorBounds = extract.getRefSpliceDonorBoundaries(
                variant, STD_DONOR_INTRONIC_LENGTH, STD_DONOR_EXONIC_LENGTH)
            closestSpliceBounds = refSpliceDonorBounds[exonNumber]
        else:
            refSpliceAccBounds = extract.getSpliceAcceptorBoundaries(
                variant, STD_ACC_INTRONIC_LENGTH, STD_ACC_EXONIC_LENGTH)
            closestSpliceBounds = refSpliceAccBounds[exonNumber]
    if varInSpliceRegion(variant, donor=donor,
                         deNovo=deNovo) and not deNovoDonorInRefAcc:
        closestSpliceBounds = getVarSpliceRegionBounds(variant,
                                                       donor=donor,
                                                       deNovo=deNovo)
        exonName = closestSpliceBounds["exonName"]
    if donor:
        if verify.getVarStrand(variant) == "+":
            refSeq = extract.getFastaSeq(varChrom,
                                         closestSpliceBounds["donorStart"],
                                         closestSpliceBounds["donorEnd"],
                                         plusStrandSeq=True)
            # splice site is 3 bp to the right of donor Start (+3 because plus strand numbering increases from left to right)
            # splice site is 3 bp to the right because exon end is 3 bp to the right of donor start
            genomicSplicePos = closestSpliceBounds["donorStart"] + 3
        else:
            refSeq = extract.getFastaSeq(varChrom,
                                         closestSpliceBounds["donorStart"],
                                         closestSpliceBounds["donorEnd"],
                                         plusStrandSeq=False)
            # splice site is 3 bp to the right of donor Start (-3 because minus strand numbering decreases from left to right)
            # splice site is 3 bp to the right because exon end is 3 bp to the right of donor start
            genomicSplicePos = closestSpliceBounds["donorStart"] - 3
        exonStart = 0
        intronStart = STD_EXONIC_PORTION
    else:
        # acceptorEnd +- deNovoOffset because deNovo splice acceptor region is deNovoOffset bp longer than reference splice acceptor region
        # for plus strand it is acceptorEnd - deNovoOffset because
        # the genomic position increases from left to right on the plus strand and subtraction reduces the refSeq to correct length
        # for minus strand it is acceptorEnd + deNovoOffset because
        # the genomic position decreases from left to right on the minus strand and addition reduces the refSeq to correct length
        if verify.getVarStrand(variant) == "+":
            refSeq = extract.getFastaSeq(
                varChrom,
                closestSpliceBounds["acceptorStart"],
                (closestSpliceBounds["acceptorEnd"] - deNovoOffset),
                plusStrandSeq=True)
            # splice site is 3 bp to the left of reference acceptor End (-3 because plus strand numbering increases from left to right)
            # minus deNovoOffset because deNovo splice acceptor region is deNovoOffset bp longer than reference splice acceptor region
            genomicSplicePos = closestSpliceBounds[
                "acceptorEnd"] - 3 - deNovoOffset
        else:
            refSeq = extract.getFastaSeq(
                varChrom,
                closestSpliceBounds["acceptorStart"],
                (closestSpliceBounds["acceptorEnd"] + deNovoOffset),
                plusStrandSeq=False)
            # splice site is 3 bp to the left of reference acceptor End (+3 because minus strand numbering decreases from left to right)
            # plus deNovoOffset because deNovo splice acceptor region is deNovoOffset bp longer than reference splice acceptor region
            genomicSplicePos = closestSpliceBounds[
                "acceptorEnd"] + 3 + deNovoOffset
        exonStart = len(refSeq) - STD_EXONIC_PORTION
        intronStart = 0
    if not testMode:
        # to prevent issue with running max ent scan score on unittests
        if exonName == "exon0":
            return {
                "exonName": "N/A",
                "sequence": "N/A",
                "exonStart": "N/A",
                "intronStart": "N/A",
                "maxEntScanScore": "N/A",
                "zScore": "N/A",
                "genomicSplicePos": "N/A"
            }
        closestMaxEntScanScore = calcMaxEntScanMeanStd.runMaxEntScan(
            refSeq, donor=donor)
        closestZScore = extract.getZScore(closestMaxEntScanScore, donor=donor)
        return {
            "exonName": exonName,
            "sequence": refSeq.upper(),
            "exonStart": exonStart,
            "intronStart": intronStart,
            "maxEntScanScore": closestMaxEntScanScore,
            "zScore": closestZScore,
            "genomicSplicePos": genomicSplicePos
        }
    else:
        return {
            "exonName": exonName,
            "sequence": refSeq.upper(),
            "genomicSplicePos": genomicSplicePos
        }
示例#31
0
def getClosestSpliceSiteScores(variant, deNovoOffset, donor=True, deNovo=False, deNovoDonorInRefAcc=False,
                               testMode=False):
    """
    Given a variant, determines scores for closest reference splice sequence
    Also returns sequence of closest reference splice site and genomic position of splice site
    deNovoOffset refers to difference between de novo acceptor length and exonic portion size
       If donor = True, looks for closest splice donor sequence
       If donor = False, looks for closest splice acceptor sequence
       If deNovo = True, accomodates for de novo splicing
         *Note only use argument deNovo=True in this function if donor=False
         *Function will not return correct sequence if donor=True and deNovo=True
    If exonic variant, returns a dictionary containing:
       MaxEntScan score, z-score, and splice site sequence for reference closest splice sequence
    If variant located in referene splice site, returns a dictionary containing:
       MaxEntScan score, z-score, and splice site sequence for that reference splice site sequence
    If intronic variant or variant in UTR, returns a dictionary containg:
       MaxEntScan score, z-score, and splice site sequence for reference closest splice site
       *Note if looking for closest ref acceptor for a variant in an intron, use deNovoOffset=0
    Return dictionary also contains necessary formatting variables for splice site sequence (exonStart, intronStart)
    deNovoDonorInRefAcc = False if NOT checking for de novo splice donor sites in reference splice acceptor sites
    deNovoDonorInRefAcc = True if checking for de novo splice donor sites in reference splice acceptor sites
    """
    varGenPos = int(variant["Pos"])
    varChrom = extract.getVarChrom(variant)
    varLoc = getVarLocation(variant, "enigma")

    if (varInExon(variant) and not deNovo) or (varLoc == "intron_variant" or varLoc == "UTR_variant"):
        if varInExon(variant):
            exonNumber = getVarExonNumberSNS(variant)
            exonName = exonNumber
        if (varLoc == "intron_variant" or varLoc == "UTR_variant") and not varInExon(variant):
            exonNumber = getClosestExonNumberIntronicSNS(variant, "enigma", donor=donor)
            exonName = exonNumber
        if donor:
            refSpliceDonorBounds = extract.getRefSpliceDonorBoundaries(variant, STD_DONOR_INTRONIC_LENGTH,
                                                               STD_DONOR_EXONIC_LENGTH)
            closestSpliceBounds = refSpliceDonorBounds[exonNumber]
        else:
            refSpliceAccBounds = extract.getSpliceAcceptorBoundaries(variant, STD_ACC_INTRONIC_LENGTH, STD_ACC_EXONIC_LENGTH)
            closestSpliceBounds = refSpliceAccBounds[exonNumber]
    if varInSpliceRegion(variant, donor=donor, deNovo=deNovo) and not deNovoDonorInRefAcc:
        closestSpliceBounds = getVarSpliceRegionBounds(variant, donor=donor, deNovo=deNovo)
        exonName = closestSpliceBounds["exonName"]
    if donor:
        if verify.getVarStrand(variant) == "+":
            refSeq = extract.getFastaSeq(varChrom, closestSpliceBounds["donorStart"], closestSpliceBounds["donorEnd"],
                                 plusStrandSeq=True)
            # splice site is 3 bp to the right of donor Start (+3 because plus strand numbering increases from left to right)
            # splice site is 3 bp to the right because exon end is 3 bp to the right of donor start
            genomicSplicePos = closestSpliceBounds["donorStart"] + 3
        else:
            refSeq = extract.getFastaSeq(varChrom, closestSpliceBounds["donorStart"], closestSpliceBounds["donorEnd"],
                                 plusStrandSeq=False)
            # splice site is 3 bp to the right of donor Start (-3 because minus strand numbering decreases from left to right)
            # splice site is 3 bp to the right because exon end is 3 bp to the right of donor start
            genomicSplicePos = closestSpliceBounds["donorStart"] - 3
        exonStart = 0
        intronStart = STD_EXONIC_PORTION
    else:
        # acceptorEnd +- deNovoOffset because deNovo splice acceptor region is deNovoOffset bp longer than reference splice acceptor region
        # for plus strand it is acceptorEnd - deNovoOffset because
        # the genomic position increases from left to right on the plus strand and subtraction reduces the refSeq to correct length
        # for minus strand it is acceptorEnd + deNovoOffset because
        # the genomic position decreases from left to right on the minus strand and addition reduces the refSeq to correct length
        if verify.getVarStrand(variant) == "+":
            refSeq = extract.getFastaSeq(varChrom, closestSpliceBounds["acceptorStart"],
                                 (closestSpliceBounds["acceptorEnd"] - deNovoOffset), plusStrandSeq=True)
            # splice site is 3 bp to the left of reference acceptor End (-3 because plus strand numbering increases from left to right)
            # minus deNovoOffset because deNovo splice acceptor region is deNovoOffset bp longer than reference splice acceptor region
            genomicSplicePos = closestSpliceBounds["acceptorEnd"] - 3 - deNovoOffset
        else:
            refSeq = extract.getFastaSeq(varChrom, closestSpliceBounds["acceptorStart"],
                                 (closestSpliceBounds["acceptorEnd"] + deNovoOffset), plusStrandSeq=False)
            # splice site is 3 bp to the left of reference acceptor End (+3 because minus strand numbering decreases from left to right)
            # plus deNovoOffset because deNovo splice acceptor region is deNovoOffset bp longer than reference splice acceptor region
            genomicSplicePos = closestSpliceBounds["acceptorEnd"] + 3 + deNovoOffset
        exonStart = len(refSeq) - STD_EXONIC_PORTION
        intronStart = 0
    if not testMode:
        # to prevent issue with running max ent scan score on unittests
        if exonName == "exon0":
            return {"exonName": "N/A",
                    "sequence": "N/A",
                    "exonStart": "N/A",
                    "intronStart": "N/A",
                    "maxEntScanScore": "N/A",
                    "zScore": "N/A",
                    "genomicSplicePos": "N/A"}
        closestMaxEntScanScore = calcMaxEntScanMeanStd.runMaxEntScan(refSeq, donor=donor)
        closestZScore = extract.getZScore(closestMaxEntScanScore, donor=donor)
        return {"exonName": exonName,
                "sequence": refSeq.upper(),
                "exonStart": exonStart,
                "intronStart": intronStart,
                "maxEntScanScore": closestMaxEntScanScore,
                "zScore": closestZScore,
                "genomicSplicePos": genomicSplicePos}
    else:
        return {"exonName": exonName,
                "sequence": refSeq.upper(),
                "genomicSplicePos": genomicSplicePos}
示例#32
0
def getMaxMaxEntScanScoreSlidingWindowSNS(variant, exonicPortionSize, deNovoLength, donor=True, deNovo=False,
                                          deNovoDonorInRefAcc=False):
    """
    Given a variant, determines the maximum alt MaxEntScan score in
       a sliding window of size STD_DONOR_SIZE with the variant in each position (1-STD_DONOR_SIZE) if donor = True
       a sliding window of size STD_ACC_SIZE with the variant in each position (1-STD_ACC_SIZE) if donor = False
    This function should be used to determine window in which de novo splicing is most likely to occur
    Function can only return highest scoring window details for either de novo donor OR de novo acceptor, not both
    If donor=True, function determines highest scoring window for potential de novo donor
    If donor=False, function determines highest scoring window for potential de novo acceptor
    Returns a dictionary containing the ref and alt MaxEntScan score and z-score and position of variant for the highest scoring window
    Ref and alt seqs for the highest scoring window are included in dictionary along with varStart (0-based index of variant for formatting)
       and varLength (equal to 1 for this function, becuase this function only works for single nucleotide substitution variants
    Dictionary also containing value "inExonicPortion" that has value either True or False
       If inExonicPortion = True, then variant is in length of bp specified by exonicPortionSize of highest scoring sliding window
       If inExonicPortion = False, then variant is NOT in length of bp specified by exonicPortionSize highest scoring sliding window
    deNovoLength refers to the length of the exonic portion of a de novo splice acceptor
    deNovoDonorInRefAcc = False if NOT checking for de novo splice donors in reference splice acceptor sites
    deNovoDonorInRefAcc = True if checking for de novo splice donors in reference splice acceptor sites
    """
    if donor:
        # uses default window size for a splice donor region
        slidingWindowInfo = extract.getMaxEntScanScoresSlidingWindowSNS(variant, STD_DONOR_SIZE, donor=donor)
    else:
        # uses default window size for a splice acceptor region
        slidingWindowInfo = extract.getMaxEntScanScoresSlidingWindowSNS(variant, STD_ACC_SIZE, donor=donor)

    windowAltMaxEntScanScores = slidingWindowInfo["windowAltMaxEntScanScores"]
    # checks to see if variatn is within reference splice donor region

    inRefSpliceDonorRegion = varInSpliceRegion(variant, donor=True, deNovo=False)
    # checks to see if variant is within reference splice acceptor region
    inRefSpliceAccRegion = varInSpliceRegion(variant, donor=False, deNovo=False)
    # if variant in ref splice donor region (for de novo donor) or in ref splice acceptor region (for de novo acceptor),
    # then need to remove native splicing window from consideration for highest scoring window
    if (inRefSpliceDonorRegion or inRefSpliceAccRegion) and not deNovoDonorInRefAcc:
        if donor:
            refSpliceBounds = getVarSpliceRegionBounds(variant, donor=donor, deNovo=False)
            if verify.getVarStrand(variant) == "+":
                refSpliceSeq = extract.getFastaSeq(extract.getVarChrom(variant), refSpliceBounds["donorStart"],
                                           refSpliceBounds["donorEnd"], plusStrandSeq=True)
            else:
                refSpliceSeq = extract.getFastaSeq(extract.getVarChrom(variant), refSpliceBounds["donorStart"],
                                           refSpliceBounds["donorEnd"], plusStrandSeq=False)
        else:
            refSpliceBounds = getVarSpliceRegionBounds(variant, donor=donor, deNovo=True)
            deNovoOffset = deNovoLength - exonicPortionSize
            # acceptorEnd +- deNovoOffset because deNovo splice acceptor region is deNovoOffset bp longer than reference splice acceptor region
            if verify.getVarStrand(variant) == "+":
                # acceptorEnd - deNovoOffset because genomic position increases from left to right on plus strand, refSeq reduced to correct length
                refSpliceSeq = extract.getFastaSeq(extract.getVarChrom(variant), refSpliceBounds["acceptorStart"],
                                           (refSpliceBounds["acceptorEnd"] - deNovoOffset), plusStrandSeq=True)
            else:
                # acceptorEnd + deNovoOffset because genomic position decreases from left to right on minus strand, refSeq reduced to correct length
                refSpliceSeq = extract.getFastaSeq(extract.getVarChrom(variant), refSpliceBounds["acceptorStart"],
                                           (refSpliceBounds["acceptorEnd"] + deNovoOffset), plusStrandSeq=False)
        for position, seqs in slidingWindowInfo["windowSeqs"].iteritems():
            if seqs["refSeq"] == refSpliceSeq:
                refSpliceWindow = position
                # removes reference splice window so it is not considered for de novo splicing
                del windowAltMaxEntScanScores[refSpliceWindow]
    # to get tuple containing sequence with variant position with maximum alt MaxEntScan score
    maxAltWindowScore = max(windowAltMaxEntScanScores.items(), key=lambda k: k[1])
    maxVarPosition = maxAltWindowScore[0]
    maxScores = slidingWindowInfo["windowScores"][maxVarPosition]
    maxSeqs = slidingWindowInfo["windowSeqs"][maxVarPosition]

    # determines if variant is in the exonic portion specified by exonicPortionLength
    inExonicPortion = False
    if donor:
        # determines if variant is in first exonicPortionSize bp of the donor region
        if maxVarPosition <= exonicPortionSize:
            inExonicPortion = True
    else:
        # determines if variant is in the last exonicPortionSize bp of the acceptor region
        if (STD_ACC_SIZE - maxVarPosition) < exonicPortionSize:
            inExonicPortion = True

    return {"refMaxEntScanScore": maxScores["refMaxEntScanScore"],
            "refZScore": maxScores["refZScore"],
            "altMaxEntScanScore": maxScores["altMaxEntScanScore"],
            "altZScore": maxScores["altZScore"],
            "refSeq": maxSeqs["refSeq"],
            "altSeq": maxSeqs["altSeq"],
            "varStart": maxVarPosition - 1,
            "varLength": 1,
            "varWindowPosition": maxVarPosition,
            "inExonicPortion": inExonicPortion}
示例#33
0
def getMaxEntScanScoresSlidingWindowSNS(variant, windowSize, donor=False):
    """
    Given a variant and window size determines window sequences and scores for a sliding window
      that is the size of windowSize
    If donor=True, calculates MaxEntScan scores for splice donors
    If donor=False, calculates MaxEntScan scores for splice acceptors
    Returns a dictionary containing:
        1. window sequences - ref and alt seq for each window (variant in positions 1-windowSize)
        2. window scores - ref and alt MaxEntScan scores and zscores for each window
        3. window alt MaxEntScan scores - only contains alt MaxEntScan scores for each window
    """
    varGenPos = int(variant["Pos"])
    varStrand = verify.getVarStrand(variant)
    # use +- (windowSize - 1) to get (windowSize*2 - 1) bp region so that have sequence for:
    # each window of size windowSize bp with variant in each position (1-windowSize)
    # minus strand and plus strand are opposite for +- (windowSize - 1) to preserve sequence returned by getRefAltSeqs
    offset = windowSize - 1
    varPos = windowSize
    windowEnd = windowSize
    totalPositions = windowSize
    if varStrand == "-":
        regionStart = varGenPos + offset
        regionEnd = varGenPos - offset
    else:
        regionStart = varGenPos - offset
        regionEnd = varGenPos + offset
    refAltSeqs = getRefAltSeqs(variant, regionStart, regionEnd)
    refSeq = refAltSeqs["refSeq"]
    altSeq = refAltSeqs["altSeq"]
    windowStart = 0
    windowSeqs = {}
    windowScores = {}
    windowAltMaxEntScanScores = {}
    while windowStart < totalPositions:
        refWindowSeq = refSeq[windowStart:windowEnd]
        altWindowSeq = altSeq[windowStart:windowEnd]
        windowSeqs[varPos] = {"refSeq": refWindowSeq, "altSeq": altWindowSeq}
        refAltWindowScores = getRefAltScores(refWindowSeq,
                                             altWindowSeq,
                                             donor=donor)
        windowScores[varPos] = {
            "refMaxEntScanScore":
            refAltWindowScores["refScores"]["maxEntScanScore"],
            "refZScore":
            refAltWindowScores["refScores"]["zScore"],
            "altMaxEntScanScore":
            refAltWindowScores["altScores"]["maxEntScanScore"],
            "altZScore":
            refAltWindowScores["altScores"]["zScore"]
        }
        windowAltMaxEntScanScores[varPos] = refAltWindowScores["altScores"][
            "maxEntScanScore"]
        varPos -= 1
        windowStart += 1
        windowEnd += 1

    return {
        "windowSeqs": windowSeqs,
        "windowScores": windowScores,
        "windowAltMaxEntScanScores": windowAltMaxEntScanScores
    }