Пример #1
0
def readFile(inFile, afCutoff, aoCutoff):
    target = open(inFile, 'r')

    print 'Chrom-Loc\tAO\t\tDP\t\tAF'
    for line in target:
        if '#' not in line and 'chr' in line:  # skip vcf info
            lineObj = seqRead(line)
            if (lineObj.af() >= afCutoff) and (lineObj.ao() >= aoCutoff):
                print '%s-%s\t%f\t%f\t%f' % (lineObj.chrom(), lineObj.loc(),
                                             lineObj.ao(), lineObj.dp(),
                                             lineObj.af())
Пример #2
0
def elimBadAligns(inFile, outFile):
    from parseLine import seqRead

    inTarget = open(inFile, 'r')
    outTarget = open(outFile, 'w')

    targetLocs = {'chr1':['1152278','1152279','1152564','1152565','1152566','1152587','1152588'],'chr2':['254572','254573','2091130','2091131','2091132','1982668','1982669','2231906','2231907','2231908','2290411','2290412'],'chr4':['1061972','1061973','1061974','1061551','1061552','1105411','1105412','1105413','1129972','1129973','1211677','1211678','1235477','1235478','1235479','1244286','1244287'],'chr9':['50737','50738'],'chr11':['21262','21263','21264','23899','2390','2593','2594','114865','114866','114867','5342','5343'],'chr12':['253982','253983','253984','253802','253803'],'chr15':['925270','925271','906318','906319'],'chr16':['733796','733797','733798','824550','824551','859491','859492'],'chr17':['75775','75776','75783','75784','75785','75770','75771','75772'],'chrX':['486496','486497','486498']}

    for line in inTarget:
        if '#' not in line and 'chr' in line: # skip the damn info
            lineObj = seqRead(line)
            if lineObj.chrom() in targetLocs: # is this chrom probed?
                goodLoc = False
                for i in targetLocs[lineObj.chrom()]:
                    if i in lineObj.loc(): # is location probed?
                        goodLoc = True
                if goodLoc == True:
                    outTarget.write(line)
        elif '#' in line:
            outTarget.write(line)

    inTarget.close()
    outTarget.close()
Пример #3
0
def mutationsPerProbe(inFile, outputDir):
    from parseLine import seqRead
    target = open(inFile, 'r')

    # Number of unique variants found within a particular capture region
    uniqVars = {
        'TIIIa': 0,
        'NRAS-1': 0,
        'NRAS-2': 0,
        'DNMT3a': 0,
        'IDH1': 0,
        'SF3B1': 0,
        'TIIIb': 0,
        'TIIIc': 0,
        'TET2-1': 0,
        'TET2-2': 0,
        'TIIId': 0,
        'TIIIe': 0,
        'TIIIf': 0,
        'TIIIg': 0,
        'TIIIh': 0,
        'JAK2': 0,
        'TIIIj': 0,
        'TIIIk': 0,
        'TIIIl': 0,
        'TIIIm': 0,
        'HRAS': 0,
        'KRAS-1': 0,
        'KRAS-2': 0,
        'TIIIn': 0,
        'IDH2': 0,
        'TIIIo': 0,
        'TIIIp': 0,
        'TIIIq': 0,
        'p53-1': 0,
        'p53-2': 0,
        'p53-3': 0,
        'GATA1': 0
    }
    # Number of total variants found within a particular capture region
    totalVars = {
        'TIIIa': 0,
        'NRAS-1': 0,
        'NRAS-2': 0,
        'DNMT3a': 0,
        'IDH1': 0,
        'SF3B1': 0,
        'TIIIb': 0,
        'TIIIc': 0,
        'TET2-1': 0,
        'TET2-2': 0,
        'TIIId': 0,
        'TIIIe': 0,
        'TIIIf': 0,
        'TIIIg': 0,
        'TIIIh': 0,
        'JAK2': 0,
        'TIIIj': 0,
        'TIIIk': 0,
        'TIIIl': 0,
        'TIIIm': 0,
        'HRAS': 0,
        'KRAS-1': 0,
        'KRAS-2': 0,
        'TIIIn': 0,
        'IDH2': 0,
        'TIIIo': 0,
        'TIIIp': 0,
        'TIIIq': 0,
        'p53-1': 0,
        'p53-2': 0,
        'p53-3': 0,
        'GATA1': 0
    }
    # Number of total probes capturing a particular region
    totalCoverage = {
        'TIIIa': [],
        'NRAS-1': [],
        'NRAS-2': [],
        'DNMT3a': [],
        'IDH1': [],
        'SF3B1': [],
        'TIIIb': [],
        'TIIIc': [],
        'TET2-1': [],
        'TET2-2': [],
        'TIIId': [],
        'TIIIe': [],
        'TIIIf': [],
        'TIIIg': [],
        'TIIIh': [],
        'JAK2': [],
        'TIIIj': [],
        'TIIIk': [],
        'TIIIl': [],
        'TIIIm': [],
        'HRAS': [],
        'KRAS-1': [],
        'KRAS-2': [],
        'TIIIn': [],
        'IDH2': [],
        'TIIIo': [],
        'TIIIp': [],
        'TIIIq': [],
        'p53-1': [],
        'p53-2': [],
        'p53-3': [],
        'GATA1': []
    }

    for line in target:
        if '#' not in line and 'chr' in line:
            lineObj = seqRead(line)
            probeNum = identifyProbe(lineObj.loc())
            if probeNum:
                uniqVars[probeNum] += 1
                if lineObj.af() < 0.4:  # Don't want germline bias
                    totalVars[probeNum] += lineObj.ao()
                totalCoverage[probeNum].append(lineObj.dp())

    print totalCoverage
    from matplotlib.backends.backend_pdf import PdfPages
    pdf = PdfPages(outputDir + '/probeBias.pdf')

    totalCoverage = getMeanCoverage(totalCoverage)
    pdf = plotTally(outputDir, totalCoverage, pdf, 'Probe Bias',
                    'Avg Num of Captures')

    uniqVars = normalizeCounts(uniqVars, totalCoverage)
    #displayTally(uniqVars)
    pdf = plotTally(outputDir, uniqVars, pdf,
                    'Normalized Unique Variants Per Probe',
                    'Number of Variants')

    totalVars = normalizeCounts(totalVars, totalCoverage)
    #displayTally(totalVars)
    pdf = plotTally(outputDir, totalVars, pdf,
                    'Normalized Total Variants Per Probe',
                    'Number of Variants')

    pdf.close()
Пример #4
0
#!/usr/bin/python

from parseLine import seqRead

line = 'chr18\t22343095\t.\tG\tA\t13777\t.\tAB=0;ABP=0;AC=2;AF=1;AN=2;AO=636;CIGAR=1X;DP=637;DPB=637;DPRA=0;EPP=1384.07;EPPR=5.18177;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=60;NS=1;NUMALT=1;ODDS=875.012;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=21725;QR=36;RO=1;RPL=636;RPP=1384.07;RPPR=5.18177;RPR=0;RUN=1;SAF=636;SAP=1384.07;SAR=0;SRF=1;SRP=5.18177;SRR=0;TYPE=snp\tGT:DP:DPR:RO:QR:AO:QA:GL\t1/1:637:637,636:1:36:636:21725:-1951.16,-188.158,0\n'

x = seqRead(line)
print x.chrom()
print x.loc()
print x.wt()
print x.var()
print x.ao()
print x.dp()
print x.af()