Пример #1
0
def isNonContiguous(lst, isSorted=True):
    ''' Return True iff Beds are not overlapping or contiguous.  Modifies list if sorted is not True '''
    if not isSorted:
        sortByChromStartEnd(lst)

    lstLen = len(lst)
    if lstLen <= 1:
        return True

    for i in xrange(1, lstLen):
        prev = lst[i - 1]
        curr = lst[i]
        if curr.chrom < prev.chrom:
            errAbort("Bed list is not sorted.")

        elif curr.chrom == prev.chrom:
            if (curr.chromStart < prev.chromStart) or (
                (curr.chromStart == prev.chromStart) and
                (curr.chromEnd < prev.chromEnd)):
                errAbort("Bed list is not sorted.")

            if curr.chromStart <= prev.chromEnd:
                print prev.chrom, prev.chromStart, prev.chromEnd
                return False
    return True
Пример #2
0
def codonValue(dna):
    '''Return the codon value of the dna string or 'X' if it is not a valid DNA string '''
    global codonTable, NON_CODON
    if len(dna) != 3:
        errAbort("codonValue takes a 3-bp dna string as argument: %s" % dna)

    return codonTable.get(dna.upper(), NON_CODON)
Пример #3
0
def getDNA(chrom, start, end, fn, noMask=False):
    ''' Return the DNA associated with the BED-style position as a single long string '''
    maskFlag = ""
    if noMask:
        maskFlag = " -noMask"
    cmd = ("twoBitToFa %s stdout" % fn) + maskFlag + " -seq="
    proc = subprocess.Popen(cmd + "%s:%d-%d" % (chrom, start, end),
                            shell=True,
                            stdout=subprocess.PIPE)
    dna = str(proc.communicate()[0]).strip()
    dna = dna.split('\n')
    if len(dna) < 2:
        errAbort("Must be at least a header line and one line of DNA.")
    tmp = "".join(dna[1:])
    return "".join(tmp.split())
Пример #4
0
def getOverlappingRegionDict(bd1, bd2, debug=True):
    ''' Return a dictionary of overlapping regions within two dictionaries of Bed regions '''
    retval = {}
    for chrom, bedList1 in bd1.items():
        bedList2 = bd2.get(chrom, [])

        if bedList2 == []:
            continue

        if debug:
            if not isNonContiguous(bedList1):
                errAbort(
                    "Calculating overlapping regions must have non-contiguous input elements."
                )
            if not isNonContiguous(bedList2):
                errAbort(
                    "Calculating overlapping regions must have non-contiguous input elements."
                )

        b1Len = len(bedList1)
        b1Idx = 0
        b2Len = len(bedList2)
        b2Idx = 0

        while (b1Idx < b1Len) and (b2Idx < b2Len):
            b1Curr = bedList1[b1Idx]
            b2Curr = bedList2[b2Idx]
            assert b1Curr.chrom == b2Curr.chrom
            maxStart = max(b1Curr.chromStart, b2Curr.chromStart)
            minEnd = min(b1Curr.chromEnd, b2Curr.chromEnd)
            if maxStart < minEnd:
                retval[b1Curr.chrom] = retval.get(b1Curr.chrom, []) + [
                    Bed.Bed("%s\t%d\t%d" % (b1Curr.chrom, maxStart, minEnd))
                ]

            if b1Curr.chromEnd < b2Curr.chromEnd:
                b1Idx += 1
            elif b1Curr.chromEnd > b2Curr.chromEnd:
                b2Idx += 1
            else:
                b1Idx += 1
                b2Idx += 1
    return retval
Пример #5
0
def exonMutationType(wtDNA, mutantDNA):
    ''' Return the type of mutation.  Assumes both wt and mutant are fully within exons '''
    global NON_CODON, SILENT_MUT, MISSENSE_MUT, NONSENSE_MUT, GAIN_MUT, FS_INS, INFRAME_INS, FS_DEL, INFRAME_DEL
    wtLen = len(wtDNA.replace('-', ''))
    mutantLen = len(mutantDNA.replace('-', ''))
    if wtLen < mutantLen:
        if (mutantLen - wtLen) % 3 != 0:
            return FS_INS
        else:
            return INFRAME_INS
    elif wtLen > mutantLen:
        if (wtLen - mutantLen) % 3 != 0:
            return FS_DEL
        else:
            return INFRAME_DEL
    else:
        wtCodon = codonValue(wtDNA)
        mutantCodon = codonValue(mutantDNA)
        if (wtCodon == NON_CODON) or (mutantCodon == NON_CODON):
            errAbort(
                "Invalid DNA codons tested for mutation type.  Wt: %s, mutant: %s"
                % (wtDNA, mutantDNA))

        if not isStopCodon(wtDNA):
            if isStopCodon(mutantDNA):
                return NONSENSE_MUT
            else:
                if wtCodon == mutantCodon:
                    return SILENT_MUT
                else:
                    return MISSENSE_MUT
        else:
            if isStopCodon(mutantDNA):
                return SILENT_MUT
            else:
                return GAIN_MUT