示例#1
0
def runHalfLifes(bams, timepoints, outputDirectory) :
    outputCSV = os.path.join(outputDirectory, replaceExtension(basename(bams[0]), ".tsv", "_halflifes"))
    outputLOG = os.path.join(outputDirectory, replaceExtension(basename(bams[0]), ".log", "_halflifes"))
    log = getLogFile(outputLOG)
    stats.halflifes(",".join(bams), outputCSV, timepoints, log)
    closeLogFile(log)
    stepFinished()
示例#2
0
def runCollapse(tid, tcount, outputDirectory) :
    outputTCOUNT = os.path.join(outputDirectory, replaceExtension(basename(tcount), ".csv", "_collapsed"))
    outputLOG = os.path.join(outputDirectory, replaceExtension(basename(tcount), ".log", "_collapsed"))
    log = getLogFile(outputLOG)
    tcounter.collapse(tcount, outputTCOUNT, log)
    closeLogFile(log)
    stepFinished()
示例#3
0
def runHalfLifes(bams, timepoints, outputDirectory) :
    outputCSV = os.path.join(outputDirectory, replaceExtension(basename(bams[0]), ".tsv", "_halflifes"))
    outputLOG = os.path.join(outputDirectory, replaceExtension(basename(bams[0]), ".log", "_halflifes"))
    log = getLogFile(outputLOG)
    stats.halflifes(",".join(bams), outputCSV, timepoints, log)
    closeLogFile(log)
    stepFinished()
示例#4
0
def runDedup(tid, bam, outputDirectory, tcMutations) :
    outputBAM = os.path.join(outputDirectory, replaceExtension(basename(bam), ".bam", "_dedup"))
    outputLOG = os.path.join(outputDirectory, replaceExtension(basename(bam), ".log", "_dedup"))
    log = getLogFile(outputLOG)
    deduplicator.Dedup(bam, outputBAM, tcMutations, log)
    closeLogFile(log)
    stepFinished()
示例#5
0
def runDedup(tid, bam, outputDirectory, tcMutations) :
    outputBAM = os.path.join(outputDirectory, replaceExtension(basename(bam), ".bam", "_dedup"))
    outputLOG = os.path.join(outputDirectory, replaceExtension(basename(bam), ".log", "_dedup"))
    log = getLogFile(outputLOG)
    deduplicator.Dedup(bam, outputBAM, tcMutations, log)
    closeLogFile(log)
    stepFinished()
示例#6
0
def runStatsRatesUTR(tid, bam, referenceFile, minMQ, strictTCs,
                     outputDirectory, utrFile, maxReadLength):
    outputCSV = os.path.join(
        outputDirectory,
        replaceExtension(basename(bam), ".csv", "_mutationrates_utr"))
    outputPDF = os.path.join(
        outputDirectory,
        replaceExtension(basename(bam), ".pdf", "_mutationrates_utr"))
    outputLOG = os.path.join(
        outputDirectory,
        replaceExtension(basename(bam), ".log", "_mutationrates_utr"))

    if (maxReadLength == None):
        maxReadLength = estimateMaxReadLength(bam)
    if (maxReadLength < 0):
        print(
            "Could not reliable estimate maximum read length. Please specify --max-read-length parameter."
        )
        sys.exit(0)

    log = getLogFile(outputLOG)

    print("Using " + str(maxReadLength) + " as maximum read length.", file=log)

    stats.statsComputeOverallRatesPerUTR(referenceFile, bam, minMQ, strictTCs,
                                         outputCSV, outputPDF, utrFile,
                                         maxReadLength, log)
    closeLogFile(log)
    stepFinished()
示例#7
0
def runTcPerUtr(tid, bam, referenceFile, bed, minMQ, maxReadLength,
                outputDirectory, snpDirectory):
    outputCSV = os.path.join(
        outputDirectory, replaceExtension(basename(bam), ".csv", "_tcperutr"))
    outputPDF = os.path.join(
        outputDirectory, replaceExtension(basename(bam), ".pdf", "_tcperutr"))
    outputLOG = os.path.join(
        outputDirectory, replaceExtension(basename(bam), ".log", "_tcperutr"))
    if (snpDirectory != None):
        inputSNP = os.path.join(
            snpDirectory, replaceExtension(basename(bam), ".vcf", "_snp"))
    else:
        inputSNP = None

    if (maxReadLength == None):
        maxReadLength = estimateMaxReadLength(bam)
    if (maxReadLength < 0):
        print(
            "Could not reliable estimate maximum read length. Please specify --max-read-length parameter."
        )
        sys.exit(0)

    log = getLogFile(outputLOG)

    print("Using " + str(maxReadLength) + " as maximum read length.", file=log)

    stats.tcPerUtr(referenceFile, bed, bam, minMQ, maxReadLength, outputCSV,
                   outputPDF, inputSNP, log, False, True, True)

    closeLogFile(log)
    stepFinished()
示例#8
0
def runSNPeval(tid, bam, ref, bed, maxLength, minQual, coverageCutoff,
               variantFraction, strictTCs, outputDirectory, snpDirectory):

    outputCSV = os.path.join(
        outputDirectory, replaceExtension(basename(bam), ".csv", "_SNPeval"))
    outputPDF = os.path.join(
        outputDirectory, replaceExtension(basename(bam), ".pdf", "_SNPeval"))
    outputLOG = os.path.join(
        outputDirectory, replaceExtension(basename(bam), ".log", "_SNPeval"))

    if (not os.path.isdir(snpDirectory)):
        print("SNP directory does not exists. Abort.")
        sys.exit(0)

    inputSNP = os.path.join(snpDirectory,
                            replaceExtension(basename(bam), ".vcf", "_snp"))

    if (maxLength == None):
        maxLength = estimateMaxReadLength(bam)
    if (maxLength < 0):
        print(
            "Could not reliable estimate maximum read length. Please specify --max-read-length parameter."
        )
        sys.exit(0)

    log = getLogFile(outputLOG)

    print("Using " + str(maxLength) + " as maximum read length.", file=log)

    stats.computeSNPMaskedRates(ref, bed, inputSNP, bam, maxLength, minQual,
                                coverageCutoff, variantFraction, outputCSV,
                                outputPDF, strictTCs, log)
    stepFinished()
示例#9
0
文件: splash.py 项目: pforai/slamdunk
def reads(outputDirectory, bed, sampleName, readLenght, readNumber, readCoverage, seqError, pulseTimePoint, chaseTimePoint, conversionRate, sampleInfo, labledTranscripots = -1.0):
    message("Simulating read sample: " + sampleName)
         
    bed12File = replaceExtension(bed, ".bed12")
    bed12FastaFile = replaceExtension(bed, ".fa")
    explvFile = replaceExtension(bed, ".eplv")
     
    bedReads = os.path.join(outputDirectory, sampleName + "_reads_tmp.bed")
    faReads = os.path.join(outputDirectory, sampleName + "_reads_tmp.fa")
     
    totalUTRlength = simulator.getTotalUtrLength(bed12File)
     
    if(readNumber == 0):
        readNumber = (totalUTRlength / readLenght) *  readCoverage
        readNumber = int(readNumber * (random.uniform(-0.2, 0.2) + 1)) 
     
    #message("Simulating " + str(readNumber) + " reads with sequencing error of " + str(seqError))
    simulator.simulateReads(bed12File, bed12FastaFile, explvFile, bedReads, faReads, readLenght, readNumber, seqError)
     
    bamReadsWithTC = os.path.join(outputDirectory, sampleName + "_reads.bam")
    utrSummary = os.path.join(outputDirectory, sampleName + "_utrsummary.tsv")
     
    simulator.addTcConversions(bed, faReads, bamReadsWithTC, pulseTimePoint, chaseTimePoint, utrSummary, conversionRate, readNumber, sampleInfo, labledTranscripots)
     
    os.unlink(faReads)
    os.unlink(bedReads)
示例#10
0
def runCollapse(tid, tcount, outputDirectory) :
    outputTCOUNT = os.path.join(outputDirectory, replaceExtension(basename(tcount), ".csv", "_collapsed"))
    outputLOG = os.path.join(outputDirectory, replaceExtension(basename(tcount), ".log", "_collapsed"))
    log = getLogFile(outputLOG)
    tcounter.collapse(tcount, outputTCOUNT, log)
    closeLogFile(log)
    stepFinished()
示例#11
0
def runCount(tid, bam, ref, bed, maxLength, minQual, strictTCs,
             outputDirectory, snpDirectory):
    outputCSV = os.path.join(
        outputDirectory, replaceExtension(basename(bam), ".tsv", "_tcount"))
    outputBedgraphPlus = os.path.join(
        outputDirectory,
        replaceExtension(basename(bam), ".bedgraph", "_tcount_plus"))
    outputBedgraphMinus = os.path.join(
        outputDirectory,
        replaceExtension(basename(bam), ".bedgraph", "_tcount_mins"))
    outputLOG = os.path.join(
        outputDirectory, replaceExtension(basename(bam), ".log", "_tcount"))
    if (snpDirectory != None):
        inputSNP = os.path.join(
            snpDirectory, replaceExtension(basename(bam), ".vcf", "_snp"))
    else:
        inputSNP = None

    if (maxLength == None):
        maxLength = estimateMaxReadLength(bam)
    if (maxLength < 0):
        print(
            "Could not reliable estimate maximum read length. Please specify --max-read-length parameter."
        )
        sys.exit(0)

    log = getLogFile(outputLOG)

    print("Using " + str(maxLength) + " as maximum read length.", file=log)

    tcounter.computeTconversions(ref, bed, inputSNP, bam, maxLength, minQual,
                                 outputCSV, outputBedgraphPlus,
                                 outputBedgraphMinus, strictTCs, log)
    stepFinished()
    return outputCSV
示例#12
0
def runStatsTCContext(tid, bam, referenceFile, minMQ, outputDirectory) :
    outputCSV = os.path.join(outputDirectory, replaceExtension(basename(bam), ".csv", "_tccontext"))
    outputPDF = os.path.join(outputDirectory, replaceExtension(basename(bam), ".pdf", "_tccontext"))
    outputLOG = os.path.join(outputDirectory, replaceExtension(basename(bam), ".log", "_tccontext"))
    log = getLogFile(outputLOG)
    stats.statsComputeTCContext(referenceFile, bam, minMQ, outputCSV, outputPDF, log)
    closeLogFile(log)
    stepFinished()
示例#13
0
def runFilter(tid, bam, bed, mq, minIdentity, maxNM, outputDirectory):
    outputBAM = os.path.join(
        outputDirectory, replaceExtension(basename(bam), ".bam", "_filtered"))
    outputLOG = os.path.join(
        outputDirectory, replaceExtension(basename(bam), ".log", "_filtered"))
    filter.Filter(bam, outputBAM, getLogFile(outputLOG), bed, mq, minIdentity,
                  maxNM, printOnly, verbose)
    stepFinished()
示例#14
0
def runStatsRates(tid, bam, referenceFile, minMQ, outputDirectory) :
    outputCSV = os.path.join(outputDirectory, replaceExtension(basename(bam), ".csv", "_overallrates"))
    outputPDF = os.path.join(outputDirectory, replaceExtension(basename(bam), ".pdf", "_overallrates"))
    outputLOG = os.path.join(outputDirectory, replaceExtension(basename(bam), ".log", "_overallrates"))
    log = getLogFile(outputLOG)
    stats.statsComputeOverallRates(referenceFile, bam, minMQ, outputCSV, outputPDF, log)
    closeLogFile(log)
    stepFinished()
示例#15
0
def runSnp(tid, referenceFile, minCov, minVarFreq, minQual, inputBAM,
           outputDirectory):
    outputSNP = os.path.join(
        outputDirectory, replaceExtension(basename(inputBAM), ".vcf", "_snp"))
    outputLOG = os.path.join(
        outputDirectory, replaceExtension(basename(inputBAM), ".log", "_snp"))
    snps.SNPs(inputBAM, outputSNP, referenceFile, minVarFreq, minCov, minQual,
              getLogFile(outputLOG), printOnly, verbose, False)
    stepFinished()
示例#16
0
文件: splash.py 项目: pforai/slamdunk
 def Utrs(outputDirectory, bed, referenceFasta, readLength, polyALength, snpRate):
     message("Simulating UTRs")
     createDir(outputDirectory)
     bed12 = os.path.join(outputDirectory, replaceExtension(basename(bed), ".bed12", "_utrs"))
     bed12Fasta = os.path.join(outputDirectory, replaceExtension(basename(bed), ".fa", "_utrs"))
     explv = os.path.join(outputDirectory, replaceExtension(basename(bed), ".eplv", "_utrs"))
     vcfFile = os.path.join(outputDirectory, replaceExtension(basename(bed), ".vcf", "_utrs"))
     
     totalUTRlength = simulator.prepareUTRs(bed, bed12, bed12Fasta, referenceFasta, readLength, polyALength, explv, snpRate, vcfFile)
示例#17
0
def runMap(tid, inputBAM, referenceFile, threads, trim5p, maxPolyA,
           quantseqMapping, endtoendMapping, topn, sampleDescription,
           outputDirectory, skipSAM):
    if skipSAM:
        outputSAM = os.path.join(
            outputDirectory,
            replaceExtension(basename(inputBAM), ".bam", "_slamdunk_mapped"))
    else:
        outputSAM = os.path.join(
            outputDirectory,
            replaceExtension(basename(inputBAM), ".sam", "_slamdunk_mapped"))
    outputLOG = os.path.join(
        outputDirectory,
        replaceExtension(basename(inputBAM), ".log", "_slamdunk_mapped"))

    sampleName = "sample_" + str(tid)
    sampleType = "NA"
    sampleTime = "-1"
    if (sampleDescription != ""):
        sampleDescriptions = sampleDescription.split(":")
        if (len(sampleDescriptions) >= 1):
            sampleName = sampleDescriptions[0]
        if (len(sampleDescriptions) >= 2):
            typeDict = {
                'p': 'pulse',
                'c': 'chase',
                'pulse': 'pulse',
                'chase': 'chase',
                '': 'NA'
            }
            if sampleDescriptions[1] in typeDict:
                sampleType = typeDict[sampleDescriptions[1]]
            else:
                sampleType = sampleDescriptions[1]
        if (len(sampleDescriptions) >= 3):
            sampleTime = sampleDescriptions[2]

    mapper.Map(inputBAM,
               referenceFile,
               outputSAM,
               getLogFile(outputLOG),
               quantseqMapping,
               endtoendMapping,
               threads=threads,
               trim5p=trim5p,
               maxPolyA=maxPolyA,
               topn=topn,
               sampleId=tid,
               sampleName=sampleName,
               sampleType=sampleType,
               sampleTime=sampleTime,
               printOnly=printOnly,
               verbose=verbose)
    stepFinished()
示例#18
0
def runReadSeparator(tid, bam, ref, minQual, conversionThreshold, outputDirectory, snpDirectory) :
    outputBAM = os.path.join(outputDirectory, replaceExtension(basename(bam), "", ""))
    outputLOG = os.path.join(outputDirectory, replaceExtension(basename(bam), ".log", "_read_separator"))
    if(snpDirectory != None):
        inputSNP = os.path.join(snpDirectory, replaceExtension(basename(bam), ".vcf", "_snp"))
    else:
        inputSNP = None
    
    log = getLogFile(outputLOG)
        
    tcounter.genomewideReadSeparation(ref, inputSNP, bam, minQual, outputBAM, conversionThreshold, log)
    stepFinished()
示例#19
0
def runPositionalRates(tid, bam, ref, minQual, conversionThreshold, coverageCutoff, outputDirectory, snpDirectory) :
    outputBedGraphPrefix = os.path.join(outputDirectory, replaceExtension(basename(bam), "", "_positional_rates"))
    outputLOG = os.path.join(outputDirectory, replaceExtension(basename(bam), ".log", "_positional_rates"))
    if(snpDirectory != None):
        inputSNP = os.path.join(snpDirectory, replaceExtension(basename(bam), ".vcf", "_snp"))
    else:
        inputSNP = None
    
    log = getLogFile(outputLOG)
        
    tcounter.genomewideConversionRates(ref, inputSNP, bam, minQual, outputBedGraphPrefix, conversionThreshold, coverageCutoff, log)
    stepFinished()
示例#20
0
def runDumpReadInfo(tid, bam, referenceFile, minMQ, outputDirectory, snpDirectory):
    outputCSV = os.path.join(outputDirectory, replaceExtension(basename(bam), ".sdunk", "_readinfo"))
    outputLOG = os.path.join(outputDirectory, replaceExtension(basename(bam), ".log", "_readinfo"))
    if(snpDirectory != None):
        inputSNP = os.path.join(snpDirectory, replaceExtension(basename(bam), ".vcf", "_snp"))
    else:
        inputSNP = None
    log = getLogFile(outputLOG)
    
    dump.dumpReadInfo(referenceFile, bam, minMQ, outputCSV, inputSNP, log)
    
    closeLogFile(log)
    stepFinished()
示例#21
0
def runDumpReadInfo(tid, bam, referenceFile, minMQ, outputDirectory, snpDirectory):
    outputCSV = os.path.join(outputDirectory, replaceExtension(basename(bam), ".sdunk", "_readinfo"))
    outputLOG = os.path.join(outputDirectory, replaceExtension(basename(bam), ".log", "_readinfo"))
    if(snpDirectory != None):
        inputSNP = os.path.join(snpDirectory, replaceExtension(basename(bam), ".vcf", "_snp"))
    else:
        inputSNP = None
    log = getLogFile(outputLOG)
    
    dump.dumpReadInfo(referenceFile, bam, minMQ, outputCSV, inputSNP, log)
    
    closeLogFile(log)
    stepFinished()
示例#22
0
def runSam2Bam(tid, bam, threads, outputDirectory):
    inputSAM = os.path.join(
        outputDirectory,
        replaceExtension(basename(bam), ".sam", "_slamdunk_mapped"))
    outputBAM = os.path.join(
        outputDirectory,
        replaceExtension(basename(bam), ".bam", "_slamdunk_mapped"))
    outputLOG = os.path.join(
        outputDirectory,
        replaceExtension(basename(bam), ".log", "_slamdunk_mapped"))
    mapper.sort(inputSAM, outputBAM, getLogFile(outputLOG), threads, False,
                printOnly, verbose)
    stepFinished()
示例#23
0
    def prepareBed(outputDirectory, bed, readLength):

        createDir(outputDirectory)
        slamSimBed = os.path.join(
            outputDirectory,
            replaceExtension(basename(bed), ".bed", "_original"))
        simulator.prepareBED(bed, slamSimBed, readLength)
示例#24
0
文件: splash.py 项目: pforai/slamdunk
 def turnOver(outputDirectory, bed, minHalflife, maxHalfLife, skipTurnover=False):
     message("Simulating turnover")
     createDir(outputDirectory)
     trunoverBed = os.path.join(outputDirectory, replaceExtension(basename(bed), ".bed", "_utrs"))
     if not skipTurnover:
         simulator.simulateTurnOver(bed, trunoverBed, minHalflife, maxHalfLife)
     else:
         copyfile(bed, trunoverBed)
示例#25
0
def runStatsRatesUTR(tid, bam, referenceFile, minMQ, strictTCs, outputDirectory, utrFile, maxReadLength) :
    outputCSV = os.path.join(outputDirectory, replaceExtension(basename(bam), ".csv", "_mutationrates_utr"))
    outputPDF = os.path.join(outputDirectory, replaceExtension(basename(bam), ".pdf", "_mutationrates_utr"))
    outputLOG = os.path.join(outputDirectory, replaceExtension(basename(bam), ".log", "_mutationrates_utr"))
    
    if (maxReadLength == None) :
        maxReadLength = estimateMaxReadLength(bam)
    if (maxReadLength < 0) :
        print("Could not reliable estimate maximum read length. Please specify --max-read-length parameter.")
        sys.exit(0)
    
    log = getLogFile(outputLOG)
    
    print("Using " + str(maxReadLength) + " as maximum read length.",file=log)
    
    stats.statsComputeOverallRatesPerUTR(referenceFile, bam, minMQ, strictTCs, outputCSV, outputPDF, utrFile, maxReadLength, log)
    closeLogFile(log)
    stepFinished()
示例#26
0
def runReadSeparator(tid, bam, ref, minQual, conversionThreshold,
                     outputDirectory, snpDirectory):
    outputBAM = os.path.join(outputDirectory,
                             replaceExtension(basename(bam), "", ""))
    outputLOG = os.path.join(
        outputDirectory,
        replaceExtension(basename(bam), ".log", "_read_separator"))
    if (snpDirectory != None):
        inputSNP = os.path.join(
            snpDirectory, replaceExtension(basename(bam), ".vcf", "_snp"))
    else:
        inputSNP = None

    log = getLogFile(outputLOG)

    tcounter.genomewideReadSeparation(ref, inputSNP, bam, minQual, outputBAM,
                                      conversionThreshold, log)
    stepFinished()
示例#27
0
def runPositionalRates(tid, bam, ref, minQual, conversionThreshold,
                       coverageCutoff, outputDirectory, snpDirectory):
    outputBedGraphPrefix = os.path.join(
        outputDirectory,
        replaceExtension(basename(bam), "", "_positional_rates"))
    outputLOG = os.path.join(
        outputDirectory,
        replaceExtension(basename(bam), ".log", "_positional_rates"))
    if (snpDirectory != None):
        inputSNP = os.path.join(
            snpDirectory, replaceExtension(basename(bam), ".vcf", "_snp"))
    else:
        inputSNP = None

    log = getLogFile(outputLOG)

    tcounter.genomewideConversionRates(ref, inputSNP, bam, minQual,
                                       outputBedGraphPrefix,
                                       conversionThreshold, coverageCutoff,
                                       log)
    stepFinished()
示例#28
0
def runTcPerUtr(tid, bam, referenceFile, bed, minMQ, maxReadLength, outputDirectory, snpDirectory):
    outputCSV = os.path.join(outputDirectory, replaceExtension(basename(bam), ".csv", "_tcperutr"))
    outputPDF = os.path.join(outputDirectory, replaceExtension(basename(bam), ".pdf", "_tcperutr"))
    outputLOG = os.path.join(outputDirectory, replaceExtension(basename(bam), ".log", "_tcperutr"))
    if(snpDirectory != None):
        inputSNP = os.path.join(snpDirectory, replaceExtension(basename(bam), ".vcf", "_snp"))
    else:
        inputSNP = None
    
    if (maxReadLength == None) :
        maxReadLength = estimateMaxReadLength(bam)
    if (maxReadLength < 0) :
        print("Could not reliable estimate maximum read length. Please specify --max-read-length parameter.")
        sys.exit(0)
    
    log = getLogFile(outputLOG)
    
    print("Using " + str(maxReadLength) + " as maximum read length.",file=log)
    
    stats.tcPerUtr(referenceFile, bed, bam, minMQ, maxReadLength, outputCSV, outputPDF, inputSNP, log, False, True, True)
    
    closeLogFile(log)
    stepFinished()
示例#29
0
def runSNPeval(tid, bam, ref, bed, maxLength, minQual, coverageCutoff, variantFraction, strictTCs, outputDirectory, snpDirectory) :
    
    outputCSV = os.path.join(outputDirectory, replaceExtension(basename(bam), ".csv", "_SNPeval"))
    outputPDF = os.path.join(outputDirectory, replaceExtension(basename(bam), ".pdf", "_SNPeval"))
    outputLOG = os.path.join(outputDirectory, replaceExtension(basename(bam), ".log", "_SNPeval"))
    
    if (not os.path.isdir(snpDirectory)) :
        print("SNP directory does not exists. Abort.")
        sys.exit(0)
    
    inputSNP = os.path.join(snpDirectory, replaceExtension(basename(bam), ".vcf", "_snp"))
        
    if (maxLength == None) :
        maxLength = estimateMaxReadLength(bam)
    if (maxLength < 0) :
        print("Could not reliable estimate maximum read length. Please specify --max-read-length parameter.")
        sys.exit(0)
    
    log = getLogFile(outputLOG)
    
    print("Using " + str(maxLength) + " as maximum read length.",file=log)
    
    stats.computeSNPMaskedRates(ref, bed, inputSNP, bam, maxLength, minQual, coverageCutoff, variantFraction, outputCSV, outputPDF, strictTCs, log)
    stepFinished()
示例#30
0
def runAll(args):
    message("slamdunk all")

    if args.sampleIndex > -1:
        sec = random.randrange(200, 2000) / 1000.0
        message("Waiting " + str(sec) + " seconds")
        sleep(sec)

    # Setup slamdunk run folder

    outputDirectory = args.outputDir
    createDir(outputDirectory)

    n = args.threads
    referenceFile = args.referenceFile

    # Run mapper dunk

    dunkPath = os.path.join(outputDirectory, "map")
    createDir(dunkPath)

    samples, samplesInfos = getSamples(args.files, runOnly=args.sampleIndex)

    message("Running slamDunk map for " + str(len(samples)) + " files (" +
            str(n) + " threads)")

    for i in xrange(0, len(samples)):
        bam = samples[i]
        sampleInfo = samplesInfos[i]
        tid = i
        if args.sampleIndex > -1:
            tid = args.sampleIndex
        runMap(tid, bam, referenceFile, n, args.trim5, args.maxPolyA,
               args.quantseq, args.endtoend, args.topn, sampleInfo, dunkPath,
               args.skipSAM)

    dunkFinished()

    if (not args.skipSAM):
        message("Running slamDunk sam2bam for " + str(len(samples)) +
                " files (" + str(n) + " threads)")
        results = Parallel(n_jobs=1, verbose=verbose)(
            delayed(runSam2Bam)(tid, samples[tid], n, dunkPath)
            for tid in range(0, len(samples)))
        dunkFinished()

    dunkbufferIn = []

    for file in samples:
        dunkbufferIn.append(
            os.path.join(
                dunkPath,
                replaceExtension(basename(file), ".bam", "_slamdunk_mapped")))

    # Run filter dunk

    bed = args.bed

    if args.filterbed:
        bed = args.filterbed

    if (not args.multimap):
        bed = None

    dunkPath = os.path.join(outputDirectory, "filter")
    createDir(dunkPath)

    message("Running slamDunk filter for " + str(len(samples)) + " files (" +
            str(n) + " threads)")
    results = Parallel(n_jobs=n, verbose=verbose)(
        delayed(runFilter)(tid, dunkbufferIn[tid], bed, args.mq, args.identity,
                           args.nm, dunkPath)
        for tid in range(0, len(samples)))

    dunkFinished()

    # Run filter dunk

    dunkbufferOut = []

    for file in dunkbufferIn:
        dunkbufferOut.append(
            os.path.join(dunkPath,
                         replaceExtension(basename(file), ".bam",
                                          "_filtered")))

    dunkbufferIn = dunkbufferOut

    dunkbufferOut = []

    dunkFinished()

    # Run snps dunk

    dunkPath = os.path.join(outputDirectory, "snp")
    createDir(dunkPath)

    minCov = args.cov
    minVarFreq = args.var

    snpThread = n
    if (snpThread > 1):
        snpThread = snpThread / 2

    #if (args.minQual == 0) :
    #    snpqual = 13
    #else :
    snpqual = args.minQual

    message("Running slamDunk SNP for " + str(len(samples)) + " files (" +
            str(snpThread) + " threads)")
    results = Parallel(n_jobs=snpThread, verbose=verbose)(
        delayed(runSnp)(tid, referenceFile, minCov, minVarFreq, snpqual,
                        dunkbufferIn[tid], dunkPath)
        for tid in range(0, len(samples)))

    dunkFinished()

    # Run count dunk

    dunkPath = os.path.join(outputDirectory, "count")
    createDir(dunkPath)

    snpDirectory = os.path.join(outputDirectory, "snp")

    message("Running slamDunk tcount for " + str(len(samples)) + " files (" +
            str(n) + " threads)")
    results = Parallel(n_jobs=n, verbose=verbose)(
        delayed(runCount)(tid, dunkbufferIn[tid], referenceFile, args.bed,
                          args.maxLength, args.minQual, args.strictTCs,
                          dunkPath, snpDirectory)
        for tid in range(0, len(samples)))

    dunkFinished()
示例#31
0
def runSummary(bam, outputFile, countDirectory):
    
    outputLog = replaceExtension(outputFile, ".log")
    stats.readSummary(bam, countDirectory, outputFile, getLogFile(outputLog))
示例#32
0
def run():

    ########################################################################
    # Argument parsing
    ########################################################################

    # Info
    usage = "AlleyOop utility tools and diagnostics for SLAMSeq data"

    # Main Parsers
    parser = ArgumentParser(description=usage,
                            formatter_class=ArgumentDefaultsHelpFormatter)
    parser.add_argument('--version',
                        action='version',
                        version='%(prog)s ' + __version__)

    # Initialize Subparsers
    subparsers = parser.add_subparsers(help="", dest="command")

    # dedup command
    dedupparser = subparsers.add_parser(
        'dedup',
        help='Deduplicate SLAM-seq aligned data',
        formatter_class=ArgumentDefaultsHelpFormatter)
    dedupparser.add_argument("-o",
                             "--outputDir",
                             type=str,
                             required=True,
                             dest="outputDir",
                             default=SUPPRESS,
                             help="Output directory for mapped BAM files.")
    dedupparser.add_argument(
        "-tc",
        "--tcMutations",
        type=int,
        required=False,
        default=0,
        dest="tcMutations",
        help="Only select reads with x number of T>C mutations.")
    dedupparser.add_argument("-t",
                             "--threads",
                             type=int,
                             required=False,
                             default=1,
                             dest="threads",
                             help="Thread number")
    dedupparser.add_argument('bam',
                             action='store',
                             help='Bam file(s)',
                             nargs="+")

    # collapse command
    collapseparser = subparsers.add_parser(
        'collapse',
        help='Collapse UTRs',
        formatter_class=ArgumentDefaultsHelpFormatter)
    collapseparser.add_argument("-o",
                                "--outputDir",
                                type=str,
                                required=True,
                                dest="outputDir",
                                default=SUPPRESS,
                                help="Output directory for mapped BAM files.")
    collapseparser.add_argument("-t",
                                "--threads",
                                type=int,
                                required=False,
                                default=1,
                                dest="threads",
                                help="Thread number")
    collapseparser.add_argument('tcount',
                                action='store',
                                help='Tcount file(s)',
                                nargs="+")

    # positional-rates command
    posratesparser = subparsers.add_parser(
        'positional-tracks',
        help='Genome-wide positional tracks as bedgraph',
        formatter_class=ArgumentDefaultsHelpFormatter)
    posratesparser.add_argument('bam',
                                action='store',
                                help='Bam file(s)',
                                nargs="+")
    posratesparser.add_argument("-o",
                                "--outputDir",
                                type=str,
                                required=True,
                                dest="outputDir",
                                default=SUPPRESS,
                                help="Output directory for bedGraph files.")
    posratesparser.add_argument("-s",
                                "--snp-directory",
                                type=str,
                                required=False,
                                dest="snpDir",
                                default=SUPPRESS,
                                help="Directory containing SNP files.")
    posratesparser.add_argument("-r",
                                "--reference",
                                type=str,
                                required=True,
                                dest="ref",
                                default=SUPPRESS,
                                help="Reference fasta file")
    posratesparser.add_argument(
        "-c",
        "--conversion-threshold",
        type=int,
        dest="conversionThreshold",
        required=False,
        default=1,
        help=
        "Number of T>C conversions required to count read as T>C read (default: %(default)d)"
    )
    posratesparser.add_argument(
        "-a",
        "--coverage-cutoff",
        type=int,
        dest="coverageCutoff",
        required=False,
        default=1,
        help=
        "Minimum coverage required to report nucleotide-conversion rate (default: %(default)d). Anything less than 1 will be set to 1 to avoid division by zero."
    )
    posratesparser.add_argument(
        "-q",
        "--min-base-qual",
        type=int,
        default=27,
        required=False,
        dest="minQual",
        help="Min base quality for T -> C conversions (default: %(default)d)")
    posratesparser.add_argument("-t",
                                "--threads",
                                type=int,
                                required=False,
                                default=1,
                                dest="threads",
                                help="Thread number (default: %(default)d)")

    # TC read separator
    readseparatorparser = subparsers.add_parser(
        'read-separator',
        help='Separate TC-reads from background reads genome-wide',
        formatter_class=ArgumentDefaultsHelpFormatter)
    readseparatorparser.add_argument('bam',
                                     action='store',
                                     help='Bam file(s)',
                                     nargs="+")
    readseparatorparser.add_argument("-o",
                                     "--outputDir",
                                     type=str,
                                     required=True,
                                     dest="outputDir",
                                     default=SUPPRESS,
                                     help="Output directory for bam files.")
    readseparatorparser.add_argument("-s",
                                     "--snp-directory",
                                     type=str,
                                     required=False,
                                     dest="snpDir",
                                     default=SUPPRESS,
                                     help="Directory containing SNP files.")
    readseparatorparser.add_argument("-r",
                                     "--reference",
                                     type=str,
                                     required=True,
                                     dest="ref",
                                     default=SUPPRESS,
                                     help="Reference fasta file")
    readseparatorparser.add_argument(
        "-c",
        "--conversion-threshold",
        type=int,
        dest="conversionThreshold",
        required=False,
        default=1,
        help=
        "Number of T>C conversions required to count read as T>C read (default: %(default)d)"
    )
    readseparatorparser.add_argument(
        "-q",
        "--min-base-qual",
        type=int,
        default=27,
        required=False,
        dest="minQual",
        help="Min base quality for T -> C conversions (default: %(default)d)")
    readseparatorparser.add_argument(
        "-t",
        "--threads",
        type=int,
        required=False,
        default=1,
        dest="threads",
        help="Thread number (default: %(default)d)")

    # stats command
    statsparser = subparsers.add_parser(
        'rates',
        help='Calculate overall conversion rates on SLAM-seq datasets',
        formatter_class=ArgumentDefaultsHelpFormatter)
    statsparser.add_argument('bam',
                             action='store',
                             help='Bam file(s)',
                             nargs="+")
    statsparser.add_argument("-o",
                             "--outputDir",
                             type=str,
                             required=True,
                             dest="outputDir",
                             default=SUPPRESS,
                             help="Output directory for mapped BAM files.")
    statsparser.add_argument("-r",
                             "--reference",
                             type=str,
                             required=True,
                             dest="referenceFile",
                             default=SUPPRESS,
                             help="Reference fasta file")
    statsparser.add_argument("-mq",
                             "--min-basequality",
                             type=int,
                             required=False,
                             default=27,
                             dest="mq",
                             help="Minimal base quality for SNPs")
    #statsparser.add_argument('-R', "--compute-rates", dest="overallRates", action='store_true', help="Compute overall conversion rates.")
    statsparser.add_argument("-t",
                             "--threads",
                             type=int,
                             required=False,
                             default=1,
                             dest="threads",
                             help="Thread number")

    # context command
    tccontextparser = subparsers.add_parser(
        'tccontext',
        help='Calculate T->C conversion context on SLAM-seq datasets',
        formatter_class=ArgumentDefaultsHelpFormatter)
    tccontextparser.add_argument('bam',
                                 action='store',
                                 help='Bam file(s)',
                                 nargs="+")
    #tccontextparser.add_argument("-b", "--bed", type=str, required=True, dest="bed", help="BED file")
    tccontextparser.add_argument("-o",
                                 "--outputDir",
                                 type=str,
                                 required=True,
                                 dest="outputDir",
                                 default=SUPPRESS,
                                 help="Output directory for mapped BAM files.")
    tccontextparser.add_argument("-r",
                                 "--reference",
                                 type=str,
                                 required=True,
                                 dest="referenceFile",
                                 default=SUPPRESS,
                                 help="Reference fasta file")
    tccontextparser.add_argument("-mq",
                                 "--min-basequality",
                                 type=int,
                                 required=False,
                                 default=0,
                                 dest="mq",
                                 help="Minimal base quality for SNPs")
    tccontextparser.add_argument("-t",
                                 "--threads",
                                 type=int,
                                 required=False,
                                 default=1,
                                 dest="threads",
                                 help="Thread number")

    # stats rates utr command
    statsutrrateparser = subparsers.add_parser(
        'utrrates',
        help='Calculate conversion rates per UTR on SLAM-seq datasets')
    statsutrrateparser.add_argument('bam',
                                    action='store',
                                    help='Bam file(s)',
                                    nargs="+")
    statsutrrateparser.add_argument(
        "-o",
        "--outputDir",
        type=str,
        required=True,
        dest="outputDir",
        help="Output directory for mapped BAM files.")
    statsutrrateparser.add_argument("-r",
                                    "--reference",
                                    type=str,
                                    required=True,
                                    dest="referenceFile",
                                    help="Reference fasta file")
    statsutrrateparser.add_argument(
        "-mq",
        "--min-basequality",
        type=int,
        required=False,
        default=27,
        dest="mq",
        help="Minimal base quality for SNPs (default: %(default)s)")
    statsutrrateparser.add_argument("-m",
                                    "--multiTCStringency",
                                    dest="strictTCs",
                                    action='store_true',
                                    required=False,
                                    help="")
    statsutrrateparser.add_argument(
        "-t",
        "--threads",
        type=int,
        required=False,
        default=1,
        dest="threads",
        help="Thread number (default: %(default)s)")
    statsutrrateparser.add_argument("-b",
                                    "--bed",
                                    type=str,
                                    required=True,
                                    dest="bed",
                                    help="BED file")
    statsutrrateparser.add_argument(
        "-l",
        "--max-read-length",
        type=int,
        required=False,
        dest="maxLength",
        help="Max read length in BAM file (default: %(default)s)")

    # SNPeval command
    snpevalparser = subparsers.add_parser('snpeval',
                                          help='Evaluate SNP calling')
    snpevalparser.add_argument('bam',
                               action='store',
                               help='Bam file(s)',
                               nargs="+")
    snpevalparser.add_argument("-o",
                               "--outputDir",
                               type=str,
                               required=True,
                               dest="outputDir",
                               help="Output directory for mapped BAM files.")
    snpevalparser.add_argument("-s",
                               "--snp-directory",
                               type=str,
                               required=True,
                               dest="snpDir",
                               help="Directory containing SNP files.")
    snpevalparser.add_argument("-r",
                               "--reference",
                               type=str,
                               required=True,
                               dest="ref",
                               help="Reference fasta file")
    snpevalparser.add_argument("-b",
                               "--bed",
                               type=str,
                               required=True,
                               dest="bed",
                               help="BED file")
    snpevalparser.add_argument(
        "-c",
        "--min-coverage",
        required=False,
        dest="cov",
        type=int,
        help="Minimum coverage to call variant (default: %(default)s)",
        default=10)
    snpevalparser.add_argument(
        "-f",
        "--var-fraction",
        required=False,
        dest="var",
        type=float,
        help="Minimum variant fraction to call variant (default: %(default)s)",
        default=0.8)
    snpevalparser.add_argument("-m",
                               "--multiTCStringency",
                               dest="strictTCs",
                               action='store_true',
                               required=False,
                               help="")
    snpevalparser.add_argument(
        "-l",
        "--max-read-length",
        type=int,
        required=False,
        dest="maxLength",
        help="Max read length in BAM file (default: %(default)s)")
    snpevalparser.add_argument(
        "-q",
        "--min-base-qual",
        type=int,
        default=27,
        required=False,
        dest="minQual",
        help="Min base quality for T -> C conversions (default: %(default)s)")
    snpevalparser.add_argument("-t",
                               "--threads",
                               type=int,
                               required=False,
                               default=1,
                               dest="threads",
                               help="Thread number (default: %(default)s)")

    # stats summary command
    statsSumParser = subparsers.add_parser(
        'summary',
        help='Display summary information and statistics on read numbers')
    statsSumParser.add_argument(
        'bam',
        action='store',
        help='Filtered BAM files (produced by slamdunk filter or all)',
        nargs="+")
    statsSumParser.add_argument("-o",
                                "--output",
                                type=str,
                                required=True,
                                dest="outputFile",
                                help="Output file")
    statsSumParser.add_argument("-t",
                                "--tcountDir",
                                type=str,
                                required=False,
                                dest="countDirectory",
                                help="Folder containing tcount files")

    # merge command
    statsMergeParser = subparsers.add_parser(
        'merge',
        help='Merge T->C rates from multiple sample in one TSV file',
        formatter_class=ArgumentDefaultsHelpFormatter)
    statsMergeParser.add_argument('countFiles',
                                  action='store',
                                  help='tCount files',
                                  nargs="+")
    statsMergeParser.add_argument("-o",
                                  "--output",
                                  type=str,
                                  required=True,
                                  dest="outputFile",
                                  default=SUPPRESS,
                                  help="Output file")
    statsMergeParser.add_argument(
        '-c',
        "--column",
        dest="column",
        type=str,
        required=False,
        default="TcReadCount / ReadCount",
        help="Column or expression used to summarize files.")
    statsMergeParser.add_argument(
        '-n',
        "--columnname",
        dest="columnName",
        type=int,
        required=False,
        default=2,
        help="Index of meta data field to use as column name.")

    # stats read info command
    conversionRateParser = subparsers.add_parser(
        'tcperreadpos',
        help='Calculate conversion rates per read position on SLAM-seq datasets'
    )
    conversionRateParser.add_argument('bam',
                                      action='store',
                                      help='Bam file(s)',
                                      nargs="+")
    conversionRateParser.add_argument("-r",
                                      "--reference",
                                      type=str,
                                      required=True,
                                      dest="referenceFile",
                                      help="Reference fasta file")
    conversionRateParser.add_argument("-s",
                                      "--snp-directory",
                                      type=str,
                                      required=False,
                                      dest="snpDir",
                                      help="Directory containing SNP files.")
    conversionRateParser.add_argument("-l",
                                      "--max-read-length",
                                      type=int,
                                      required=False,
                                      dest="maxLength",
                                      help="Max read length in BAM file")
    conversionRateParser.add_argument(
        "-o",
        "--outputDir",
        type=str,
        required=True,
        dest="outputDir",
        help="Output directory for mapped BAM files."
    )  #conversionRateParser.add_argument("-5", "--trim-5p", type=int, required=False, dest="trim5", help="Number of bp removed from 5' end of all reads.")
    conversionRateParser.add_argument(
        "-mq",
        "--min-basequality",
        type=int,
        required=False,
        default=27,
        dest="mq",
        help="Minimal base quality for SNPs (default: %(default)s)")
    conversionRateParser.add_argument(
        "-t",
        "--threads",
        type=int,
        required=False,
        dest="threads",
        default=1,
        help="Thread number (default: %(default)s)")

    # stats utr info command
    utrRateParser = subparsers.add_parser(
        'tcperutrpos',
        help='Calculate conversion rates per UTR position on SLAM-seq datasets'
    )
    utrRateParser.add_argument('bam',
                               action='store',
                               help='Bam file(s)',
                               nargs="+")
    utrRateParser.add_argument("-r",
                               "--reference",
                               type=str,
                               required=True,
                               dest="referenceFile",
                               help="Reference fasta file")
    utrRateParser.add_argument("-b",
                               "--bed",
                               type=str,
                               required=True,
                               dest="bed",
                               help="BED file")
    utrRateParser.add_argument("-s",
                               "--snp-directory",
                               type=str,
                               required=False,
                               dest="snpDir",
                               help="Directory containing SNP files.")
    utrRateParser.add_argument("-l",
                               "--max-read-length",
                               type=int,
                               required=False,
                               dest="maxLength",
                               help="Max read length in BAM file")
    utrRateParser.add_argument(
        "-o",
        "--outputDir",
        type=str,
        required=True,
        dest="outputDir",
        help="Output directory for mapped BAM files."
    )  #conversionRateParser.add_argument("-5", "--trim-5p", type=int, required=False, dest="trim5", help="Number of bp removed from 5' end of all reads.")
    utrRateParser.add_argument(
        "-mq",
        "--min-basequality",
        type=int,
        required=False,
        default=27,
        dest="mq",
        help="Minimal base quality for SNPs (default: %(default)s)")
    utrRateParser.add_argument("-t",
                               "--threads",
                               type=int,
                               required=False,
                               dest="threads",
                               default=1,
                               help="Thread number (default: %(default)s)")

    # dump read info command
    dumpReadInfo = subparsers.add_parser(
        'dump',
        help='Print all info available for slamdunk reads',
        formatter_class=ArgumentDefaultsHelpFormatter)
    dumpReadInfo.add_argument('bam',
                              action='store',
                              help='Bam file(s)',
                              nargs="+")
    dumpReadInfo.add_argument("-r",
                              "--reference",
                              type=str,
                              required=True,
                              dest="referenceFile",
                              default=SUPPRESS,
                              help="Reference fasta file")
    dumpReadInfo.add_argument("-s",
                              "--snp-directory",
                              type=str,
                              required=True,
                              dest="snpDir",
                              default=SUPPRESS,
                              help="Directory containing SNP files.")
    dumpReadInfo.add_argument(
        "-o",
        "--outputDir",
        type=str,
        required=True,
        dest="outputDir",
        default=SUPPRESS,
        help="Output directory for mapped BAM files."
    )  #conversionRateParser.add_argument("-5", "--trim-5p", type=int, required=False, dest="trim5", help="Number of bp removed from 5' end of all reads.")
    dumpReadInfo.add_argument("-mq",
                              "--min-basequality",
                              type=int,
                              required=False,
                              default=0,
                              dest="mq",
                              help="Minimal base quality for SNPs")
    dumpReadInfo.add_argument("-t",
                              "--threads",
                              type=int,
                              required=False,
                              dest="threads",
                              default=1,
                              help="Thread number")

    args = parser.parse_args()

    ########################################################################
    # Routine selection
    ########################################################################

    command = args.command

    if (command == "dedup"):
        outputDirectory = args.outputDir
        createDir(outputDirectory)
        n = args.threads
        tcMutations = args.tcMutations
        message("Running alleyoop dedup for " + str(len(args.bam)) +
                " files (" + str(n) + " threads)")
        results = Parallel(n_jobs=n, verbose=verbose)(
            delayed(runDedup)(tid, args.bam[tid], outputDirectory, tcMutations)
            for tid in range(0, len(args.bam)))
        dunkFinished()

    elif (command == "collapse"):
        outputDirectory = args.outputDir
        createDir(outputDirectory)
        n = args.threads
        message("Running alleyoop collapse for " + str(len(args.tcount)) +
                " files (" + str(n) + " threads)")
        results = Parallel(n_jobs=n, verbose=verbose)(
            delayed(runCollapse)(tid, args.tcount[tid], outputDirectory)
            for tid in range(0, len(args.tcount)))
        dunkFinished()

    elif (command == "positional-tracks"):
        outputDirectory = args.outputDir
        createDir(outputDirectory)
        snpDirectory = args.snpDir
        n = args.threads
        message("Running alleyoop positional-tracks for " +
                str(len(args.bam)) + " files (" + str(n) + " threads)")
        results = Parallel(n_jobs=n,
                           verbose=verbose)(delayed(runPositionalRates)(
                               tid, args.bam[tid], args.ref, args.minQual,
                               args.conversionThreshold, args.coverageCutoff,
                               outputDirectory, snpDirectory)
                                            for tid in range(0, len(args.bam)))
        dunkFinished()

    elif (command == "read-separator"):
        outputDirectory = args.outputDir
        createDir(outputDirectory)
        snpDirectory = args.snpDir
        n = args.threads
        message("Running alleyoop read-separator for " + str(len(args.bam)) +
                " files (" + str(n) + " threads)")
        results = Parallel(n_jobs=n, verbose=verbose)(
            delayed(runReadSeparator)(tid, args.bam[tid], args.ref,
                                      args.minQual, args.conversionThreshold,
                                      outputDirectory, snpDirectory)
            for tid in range(0, len(args.bam)))
        dunkFinished()

    elif (command == "half-lifes"):

        outputDirectory = args.outputDir
        createDir(outputDirectory)

        timepoints = args.timepoints

        message("Running alleyoop half-lifes for " + str(len(args.bam)) +
                " files")
        runHalfLifes(args.bam, timepoints, outputDirectory)
        dunkFinished()

    elif (command == "rates"):
        outputDirectory = args.outputDir
        createDir(outputDirectory)
        n = args.threads
        referenceFile = args.referenceFile
        minMQ = args.mq
        message("Running alleyoop rates for " + str(len(args.bam)) +
                " files (" + str(n) + " threads)")
        results = Parallel(n_jobs=n, verbose=verbose)(
            delayed(runStatsRates)(tid, args.bam[tid], referenceFile, minMQ,
                                   outputDirectory)
            for tid in range(0, len(args.bam)))
        dunkFinished()

    elif (command == "snpeval"):
        outputDirectory = args.outputDir
        createDir(outputDirectory)
        snpDirectory = args.snpDir
        n = args.threads
        message("Running alleyoop SNPeval for " + str(len(args.bam)) +
                " files (" + str(n) + " threads)")
        results = Parallel(n_jobs=n, verbose=verbose)(
            delayed(runSNPeval)(tid, args.bam[tid], args.ref, args.bed, args.
                                maxLength, args.minQual, args.cov, args.var,
                                args.strictTCs, outputDirectory, snpDirectory)
            for tid in range(0, len(args.bam)))
        dunkFinished()

    elif (command == "tccontext"):
        outputDirectory = args.outputDir
        createDir(outputDirectory)
        n = args.threads
        referenceFile = args.referenceFile
        minMQ = args.mq
        message("Running alleyoop TC context for " + str(len(args.bam)) +
                " files (" + str(n) + " threads)")
        results = Parallel(n_jobs=n, verbose=verbose)(
            delayed(runStatsTCContext)(tid, args.bam[tid], referenceFile,
                                       minMQ, outputDirectory)
            for tid in range(0, len(args.bam)))
        dunkFinished()

    elif (command == "utrrates"):
        outputDirectory = args.outputDir
        createDir(outputDirectory)
        n = args.threads
        referenceFile = args.referenceFile
        minMQ = args.mq

        message("Running alleyoop utrrates for " + str(len(args.bam)) +
                " files (" + str(n) + " threads)")
        results = Parallel(n_jobs=n, verbose=verbose)(
            delayed(runStatsRatesUTR)(tid, args.bam[tid], referenceFile, minMQ,
                                      args.strictTCs, outputDirectory,
                                      args.bed, args.maxLength)
            for tid in range(0, len(args.bam)))
        dunkFinished()

    elif (command == "summary"):
        message("Running alleyoop summary for " + str(len(args.bam)) +
                " files")
        runSummary(args.bam, args.outputFile, args.countDirectory)
        dunkFinished()

    elif (command == "merge"):
        message("Running alleyoop merge for " + str(len(args.countFiles)) +
                " files")
        outputLog = replaceExtension(args.outputFile, ".log")
        stats.mergeRates(",".join(args.countFiles), args.outputFile,
                         args.column, args.columnName, getLogFile(outputLog))
        dunkFinished()

    elif (command == "tcperreadpos"):
        outputDirectory = args.outputDir
        createDir(outputDirectory)
        n = args.threads
        snpDirectory = args.snpDir
        referenceFile = args.referenceFile
        minMQ = args.mq
        message("Running alleyoop tcperreadpos for " + str(len(args.bam)) +
                " files (" + str(n) + " threads)")
        results = Parallel(n_jobs=n, verbose=verbose)(delayed(runTcPerReadPos)(
            tid, args.bam[tid], referenceFile, minMQ, args.maxLength,
            outputDirectory, snpDirectory) for tid in range(0, len(args.bam)))
        dunkFinished()

    elif (command == "tcperutrpos"):
        outputDirectory = args.outputDir
        createDir(outputDirectory)
        n = args.threads
        snpDirectory = args.snpDir
        referenceFile = args.referenceFile
        minMQ = args.mq
        snpDirectory = args.snpDir
        message("Running alleyoop tcperutrpos for " + str(len(args.bam)) +
                " files (" + str(n) + " threads)")
        results = Parallel(n_jobs=n, verbose=verbose)(delayed(runTcPerUtr)(
            tid, args.bam[tid], referenceFile, args.bed, minMQ, args.maxLength,
            outputDirectory, snpDirectory) for tid in range(0, len(args.bam)))
        dunkFinished()

    elif (command == "dump"):
        outputDirectory = args.outputDir
        createDir(outputDirectory)
        n = args.threads
        snpDirectory = args.snpDir
        referenceFile = args.referenceFile
        minMQ = args.mq
        message("Running alleyoop dump for " + str(len(args.bam)) +
                " files (" + str(n) + " threads)")
        results = Parallel(n_jobs=n, verbose=verbose)(
            delayed(runDumpReadInfo)(tid, args.bam[tid], referenceFile, minMQ,
                                     outputDirectory, snpDirectory)
            for tid in range(0, len(args.bam)))
        dunkFinished()

    else:
        parser.error("Too few arguments.")
示例#33
0
def run():
    
    ########################################################################
    # Argument parsing
    ########################################################################
    
    # Info
    usage = "AlleyOop utility tools and diagnostics for SLAMSeq data"
    
    # Main Parsers
    parser = ArgumentParser(description=usage, formatter_class=ArgumentDefaultsHelpFormatter)
    parser.add_argument('--version', action='version', version='%(prog)s ' + __version__)
    
    # Initialize Subparsers
    subparsers = parser.add_subparsers(help="", dest="command")
    
    # dedup command
    dedupparser = subparsers.add_parser('dedup', help='Deduplicate SLAM-seq aligned data', formatter_class=ArgumentDefaultsHelpFormatter)
    dedupparser.add_argument("-o", "--outputDir", type=str, required=True, dest="outputDir", default=SUPPRESS, help="Output directory for mapped BAM files.")
    dedupparser.add_argument("-tc", "--tcMutations", type=int, required=False, default = 0, dest="tcMutations", help="Only select reads with x number of T>C mutations.")
    dedupparser.add_argument("-t", "--threads", type=int, required=False, default=1, dest="threads", help="Thread number")
    dedupparser.add_argument('bam', action='store', help='Bam file(s)' , nargs="+")
    
    # collapse command
    collapseparser = subparsers.add_parser('collapse', help='Collapse UTRs', formatter_class=ArgumentDefaultsHelpFormatter)
    collapseparser.add_argument("-o", "--outputDir", type=str, required=True, dest="outputDir", default=SUPPRESS, help="Output directory for mapped BAM files.")
    collapseparser.add_argument("-t", "--threads", type=int, required=False, default=1, dest="threads", help="Thread number")
    collapseparser.add_argument('tcount', action='store', help='Tcount file(s)' , nargs="+")
    
    # positional-rates command
    posratesparser = subparsers.add_parser('positional-tracks', help='Genome-wide positional tracks as bedgraph', formatter_class=ArgumentDefaultsHelpFormatter)
    posratesparser.add_argument('bam', action='store', help='Bam file(s)' , nargs="+")
    posratesparser.add_argument("-o", "--outputDir", type=str, required=True, dest="outputDir", default=SUPPRESS, help="Output directory for bedGraph files.")
    posratesparser.add_argument("-s", "--snp-directory", type=str, required=False, dest="snpDir", default=SUPPRESS, help="Directory containing SNP files.")
    posratesparser.add_argument("-r", "--reference", type=str, required=True, dest="ref", default=SUPPRESS, help="Reference fasta file")
    posratesparser.add_argument("-c", "--conversion-threshold", type=int, dest="conversionThreshold", required=False, default=1,help="Number of T>C conversions required to count read as T>C read (default: %(default)d)")
    posratesparser.add_argument("-a", "--coverage-cutoff", type=int, dest="coverageCutoff", required=False, default=1,help="Minimum coverage required to report nucleotide-conversion rate (default: %(default)d). Anything less than 1 will be set to 1 to avoid division by zero.")
    posratesparser.add_argument("-q", "--min-base-qual", type=int, default=27, required=False, dest="minQual", help="Min base quality for T -> C conversions (default: %(default)d)")
    posratesparser.add_argument("-t", "--threads", type=int, required=False, default=1, dest="threads", help="Thread number (default: %(default)d)")
    
    # TC read separator
    readseparatorparser = subparsers.add_parser('read-separator', help='Separate TC-reads from background reads genome-wide', formatter_class=ArgumentDefaultsHelpFormatter)
    readseparatorparser.add_argument('bam', action='store', help='Bam file(s)' , nargs="+")
    readseparatorparser.add_argument("-o", "--outputDir", type=str, required=True, dest="outputDir", default=SUPPRESS, help="Output directory for bam files.")
    readseparatorparser.add_argument("-s", "--snp-directory", type=str, required=False, dest="snpDir", default=SUPPRESS, help="Directory containing SNP files.")
    readseparatorparser.add_argument("-r", "--reference", type=str, required=True, dest="ref", default=SUPPRESS, help="Reference fasta file")
    readseparatorparser.add_argument("-c", "--conversion-threshold", type=int, dest="conversionThreshold", required=False, default=1,help="Number of T>C conversions required to count read as T>C read (default: %(default)d)")
    readseparatorparser.add_argument("-q", "--min-base-qual", type=int, default=27, required=False, dest="minQual", help="Min base quality for T -> C conversions (default: %(default)d)")
    readseparatorparser.add_argument("-t", "--threads", type=int, required=False, default=1, dest="threads", help="Thread number (default: %(default)d)")
    
    
    # stats command
    statsparser = subparsers.add_parser('rates', help='Calculate overall conversion rates on SLAM-seq datasets', formatter_class=ArgumentDefaultsHelpFormatter)
    statsparser.add_argument('bam', action='store', help='Bam file(s)' , nargs="+")
    statsparser.add_argument("-o", "--outputDir", type=str, required=True, dest="outputDir", default=SUPPRESS, help="Output directory for mapped BAM files.")
    statsparser.add_argument("-r", "--reference", type=str, required=True, dest="referenceFile", default=SUPPRESS, help="Reference fasta file")
    statsparser.add_argument("-mq", "--min-basequality", type=int, required=False, default=27, dest="mq", help="Minimal base quality for SNPs")
    #statsparser.add_argument('-R', "--compute-rates", dest="overallRates", action='store_true', help="Compute overall conversion rates.")
    statsparser.add_argument("-t", "--threads", type=int, required=False, default=1, dest="threads", help="Thread number")
    
    # context command
    tccontextparser = subparsers.add_parser('tccontext', help='Calculate T->C conversion context on SLAM-seq datasets', formatter_class=ArgumentDefaultsHelpFormatter)
    tccontextparser.add_argument('bam', action='store', help='Bam file(s)' , nargs="+")
    #tccontextparser.add_argument("-b", "--bed", type=str, required=True, dest="bed", help="BED file")
    tccontextparser.add_argument("-o", "--outputDir", type=str, required=True, dest="outputDir", default=SUPPRESS, help="Output directory for mapped BAM files.")
    tccontextparser.add_argument("-r", "--reference", type=str, required=True, dest="referenceFile", default=SUPPRESS, help="Reference fasta file")
    tccontextparser.add_argument("-mq", "--min-basequality", type=int, required=False, default=0, dest="mq", help="Minimal base quality for SNPs")
    tccontextparser.add_argument("-t", "--threads", type=int, required=False, default=1, dest="threads", help="Thread number")

    # stats rates utr command
    statsutrrateparser = subparsers.add_parser('utrrates', help='Calculate conversion rates per UTR on SLAM-seq datasets')
    statsutrrateparser.add_argument('bam', action='store', help='Bam file(s)' , nargs="+")
    statsutrrateparser.add_argument("-o", "--outputDir", type=str, required=True, dest="outputDir", help="Output directory for mapped BAM files.")
    statsutrrateparser.add_argument("-r", "--reference", type=str, required=True, dest="referenceFile", help="Reference fasta file")
    statsutrrateparser.add_argument("-mq", "--min-basequality", type=int, required=False, default=27, dest="mq", help="Minimal base quality for SNPs (default: %(default)s)")
    statsutrrateparser.add_argument("-m", "--multiTCStringency", dest="strictTCs", action='store_true', required=False, help="")
    statsutrrateparser.add_argument("-t", "--threads", type=int, required=False, default=1, dest="threads", help="Thread number (default: %(default)s)")
    statsutrrateparser.add_argument("-b", "--bed", type=str, required=True, dest="bed", help="BED file")
    statsutrrateparser.add_argument("-l", "--max-read-length", type=int, required=False, dest="maxLength", help="Max read length in BAM file (default: %(default)s)")
    
    # SNPeval command
    snpevalparser = subparsers.add_parser('snpeval', help='Evaluate SNP calling')
    snpevalparser.add_argument('bam', action='store', help='Bam file(s)' , nargs="+")
    snpevalparser.add_argument("-o", "--outputDir", type=str, required=True, dest="outputDir", help="Output directory for mapped BAM files.")
    snpevalparser.add_argument("-s", "--snp-directory", type=str, required=True, dest="snpDir", help="Directory containing SNP files.")
    snpevalparser.add_argument("-r", "--reference", type=str, required=True, dest="ref", help="Reference fasta file")
    snpevalparser.add_argument("-b", "--bed", type=str, required=True, dest="bed", help="BED file")
    snpevalparser.add_argument("-c", "--min-coverage", required=False, dest="cov", type=int, help="Minimum coverage to call variant (default: %(default)s)", default=10)
    snpevalparser.add_argument("-f", "--var-fraction", required=False, dest="var", type=float, help="Minimum variant fraction to call variant (default: %(default)s)", default=0.8)
    snpevalparser.add_argument("-m", "--multiTCStringency", dest="strictTCs", action='store_true', required=False, help="")
    snpevalparser.add_argument("-l", "--max-read-length", type=int, required=False, dest="maxLength", help="Max read length in BAM file (default: %(default)s)")
    snpevalparser.add_argument("-q", "--min-base-qual", type=int, default=27, required=False, dest="minQual", help="Min base quality for T -> C conversions (default: %(default)s)")
    snpevalparser.add_argument("-t", "--threads", type=int, required=False, default=1, dest="threads", help="Thread number (default: %(default)s)")
    
    # stats summary command
    statsSumParser = subparsers.add_parser('summary', help='Display summary information and statistics on read numbers')
    statsSumParser.add_argument('bam', action='store', help='Filtered BAM files (produced by slamdunk filter or all)' , nargs="+")
    statsSumParser.add_argument("-o", "--output", type=str, required=True, dest="outputFile", help="Output file")
    statsSumParser.add_argument("-t", "--tcountDir", type=str, required=False, dest="countDirectory", help="Folder containing tcount files")

    # merge command
    statsMergeParser = subparsers.add_parser('merge', help='Merge T->C rates from multiple sample in one TSV file', formatter_class=ArgumentDefaultsHelpFormatter)
    statsMergeParser.add_argument('countFiles', action='store', help='tCount files' , nargs="+")
    statsMergeParser.add_argument("-o", "--output", type=str, required=True, dest="outputFile", default=SUPPRESS, help="Output file")
    statsMergeParser.add_argument('-c', "--column", dest="column", type=str, required=False, default="TcReadCount / ReadCount", help="Column or expression used to summarize files.")
    statsMergeParser.add_argument('-n', "--columnname", dest="columnName", type=int, required=False, default=2, help="Index of meta data field to use as column name.")    
    
    # stats read info command
    conversionRateParser = subparsers.add_parser('tcperreadpos', help='Calculate conversion rates per read position on SLAM-seq datasets')
    conversionRateParser.add_argument('bam', action='store', help='Bam file(s)' , nargs="+")
    conversionRateParser.add_argument("-r", "--reference", type=str, required=True, dest="referenceFile", help="Reference fasta file")
    conversionRateParser.add_argument("-s", "--snp-directory", type=str, required=False, dest="snpDir", help="Directory containing SNP files.")
    conversionRateParser.add_argument("-l", "--max-read-length", type=int, required=False, dest="maxLength", help="Max read length in BAM file")
    conversionRateParser.add_argument("-o", "--outputDir", type=str, required=True, dest="outputDir", help="Output directory for mapped BAM files.")#conversionRateParser.add_argument("-5", "--trim-5p", type=int, required=False, dest="trim5", help="Number of bp removed from 5' end of all reads.")
    conversionRateParser.add_argument("-mq", "--min-basequality", type=int, required=False, default=27, dest="mq", help="Minimal base quality for SNPs (default: %(default)s)")
    conversionRateParser.add_argument("-t", "--threads", type=int, required=False, dest="threads", default=1, help="Thread number (default: %(default)s)")
    
    # stats utr info command
    utrRateParser = subparsers.add_parser('tcperutrpos', help='Calculate conversion rates per UTR position on SLAM-seq datasets')
    utrRateParser.add_argument('bam', action='store', help='Bam file(s)' , nargs="+")
    utrRateParser.add_argument("-r", "--reference", type=str, required=True, dest="referenceFile", help="Reference fasta file")
    utrRateParser.add_argument("-b", "--bed", type=str, required=True, dest="bed", help="BED file")
    utrRateParser.add_argument("-s", "--snp-directory", type=str, required=False, dest="snpDir", help="Directory containing SNP files.")
    utrRateParser.add_argument("-l", "--max-read-length", type=int, required=False, dest="maxLength", help="Max read length in BAM file")
    utrRateParser.add_argument("-o", "--outputDir", type=str, required=True, dest="outputDir", help="Output directory for mapped BAM files.")#conversionRateParser.add_argument("-5", "--trim-5p", type=int, required=False, dest="trim5", help="Number of bp removed from 5' end of all reads.")
    utrRateParser.add_argument("-mq", "--min-basequality", type=int, required=False, default=27, dest="mq", help="Minimal base quality for SNPs (default: %(default)s)")
    utrRateParser.add_argument("-t", "--threads", type=int, required=False, dest="threads", default=1, help="Thread number (default: %(default)s)")
    
    # dump read info command
    dumpReadInfo = subparsers.add_parser('dump', help='Print all info available for slamdunk reads', formatter_class=ArgumentDefaultsHelpFormatter)
    dumpReadInfo.add_argument('bam', action='store', help='Bam file(s)' , nargs="+")
    dumpReadInfo.add_argument("-r", "--reference", type=str, required=True, dest="referenceFile", default=SUPPRESS, help="Reference fasta file")
    dumpReadInfo.add_argument("-s", "--snp-directory", type=str, required=True, dest="snpDir", default=SUPPRESS, help="Directory containing SNP files.")
    dumpReadInfo.add_argument("-o", "--outputDir", type=str, required=True, dest="outputDir", default=SUPPRESS, help="Output directory for mapped BAM files.")#conversionRateParser.add_argument("-5", "--trim-5p", type=int, required=False, dest="trim5", help="Number of bp removed from 5' end of all reads.")
    dumpReadInfo.add_argument("-mq", "--min-basequality", type=int, required=False, default=0, dest="mq", help="Minimal base quality for SNPs")
    dumpReadInfo.add_argument("-t", "--threads", type=int, required=False, dest="threads", default=1, help="Thread number")
    
    args = parser.parse_args()
    
    ########################################################################
    # Routine selection
    ########################################################################
    
    command = args.command

    if (command == "dedup") :
        outputDirectory = args.outputDir
        createDir(outputDirectory)
        n = args.threads
        tcMutations = args.tcMutations
        message("Running alleyoop dedup for " + str(len(args.bam)) + " files (" + str(n) + " threads)")
        results = Parallel(n_jobs=n, verbose=verbose)(delayed(runDedup)(tid, args.bam[tid], outputDirectory, tcMutations) for tid in range(0, len(args.bam)))
        dunkFinished()
        
    elif (command == "collapse") :
        outputDirectory = args.outputDir
        createDir(outputDirectory)
        n = args.threads
        message("Running alleyoop collapse for " + str(len(args.tcount)) + " files (" + str(n) + " threads)")
        results = Parallel(n_jobs=n, verbose=verbose)(delayed(runCollapse)(tid, args.tcount[tid], outputDirectory) for tid in range(0, len(args.tcount)))
        dunkFinished()
        
    elif (command == "positional-tracks") :
        outputDirectory = args.outputDir
        createDir(outputDirectory)
        snpDirectory = args.snpDir
        n = args.threads
        message("Running alleyoop positional-tracks for " + str(len(args.bam)) + " files (" + str(n) + " threads)")
        results = Parallel(n_jobs=n, verbose=verbose)(delayed(runPositionalRates)(tid, args.bam[tid], args.ref, args.minQual, args.conversionThreshold, args.coverageCutoff, outputDirectory, snpDirectory) for tid in range(0, len(args.bam)))
        dunkFinished()
        
    elif (command == "read-separator") :
        outputDirectory = args.outputDir
        createDir(outputDirectory)
        snpDirectory = args.snpDir
        n = args.threads
        message("Running alleyoop read-separator for " + str(len(args.bam)) + " files (" + str(n) + " threads)")
        results = Parallel(n_jobs=n, verbose=verbose)(delayed(runReadSeparator)(tid, args.bam[tid], args.ref, args.minQual, args.conversionThreshold, outputDirectory, snpDirectory) for tid in range(0, len(args.bam)))
        dunkFinished()       
        
    elif (command == "half-lifes") :
        
        outputDirectory = args.outputDir
        createDir(outputDirectory)
        
        timepoints = args.timepoints
        
        message("Running alleyoop half-lifes for " + str(len(args.bam)) + " files")
        runHalfLifes(args.bam, timepoints, outputDirectory)
        dunkFinished()
        
    elif (command == "rates") :  
        outputDirectory = args.outputDir
        createDir(outputDirectory)
        n = args.threads
        referenceFile = args.referenceFile
        minMQ = args.mq
        message("Running alleyoop rates for " + str(len(args.bam)) + " files (" + str(n) + " threads)")
        results = Parallel(n_jobs=n, verbose=verbose)(delayed(runStatsRates)(tid, args.bam[tid], referenceFile, minMQ, outputDirectory) for tid in range(0, len(args.bam)))
        dunkFinished()
        
    elif (command == "snpeval") :
        outputDirectory = args.outputDir
        createDir(outputDirectory)
        snpDirectory = args.snpDir
        n = args.threads
        message("Running alleyoop SNPeval for " + str(len(args.bam)) + " files (" + str(n) + " threads)")
        results = Parallel(n_jobs=n, verbose=verbose)(delayed(runSNPeval)(tid, args.bam[tid], args.ref, args.bed, args.maxLength, args.minQual, args.cov, args.var, args.strictTCs, outputDirectory, snpDirectory) for tid in range(0, len(args.bam)))
        dunkFinished()
        
    elif (command == "tccontext") :  
        outputDirectory = args.outputDir
        createDir(outputDirectory)
        n = args.threads
        referenceFile = args.referenceFile
        minMQ = args.mq
        message("Running alleyoop TC context for " + str(len(args.bam)) + " files (" + str(n) + " threads)")
        results = Parallel(n_jobs=n, verbose=verbose)(delayed(runStatsTCContext)(tid, args.bam[tid], referenceFile, minMQ, outputDirectory) for tid in range(0, len(args.bam)))
        dunkFinished()
    
    elif (command == "utrrates") :  
        outputDirectory = args.outputDir
        createDir(outputDirectory)
        n = args.threads
        referenceFile = args.referenceFile
        minMQ = args.mq
        
        message("Running alleyoop utrrates for " + str(len(args.bam)) + " files (" + str(n) + " threads)")
        results = Parallel(n_jobs=n, verbose=verbose)(delayed(runStatsRatesUTR)(tid, args.bam[tid], referenceFile, minMQ, args.strictTCs, outputDirectory, args.bed, args.maxLength) for tid in range(0, len(args.bam)))
        dunkFinished()
    
    elif (command == "summary") :
        message("Running alleyoop summary for " + str(len(args.bam)) + " files")
        runSummary(args.bam, args.outputFile, args.countDirectory)
        dunkFinished() 
    
    elif (command == "merge") :
        message("Running alleyoop merge for " + str(len(args.countFiles)) + " files")
        outputLog = replaceExtension(args.outputFile, ".log")
        stats.mergeRates(",".join(args.countFiles), args.outputFile, args.column, args.columnName, getLogFile(outputLog))
        dunkFinished() 
    
    elif (command == "tcperreadpos") :
        outputDirectory = args.outputDir
        createDir(outputDirectory)
        n = args.threads
        snpDirectory = args.snpDir
        referenceFile = args.referenceFile
        minMQ = args.mq
        message("Running alleyoop tcperreadpos for " + str(len(args.bam)) + " files (" + str(n) + " threads)")
        results = Parallel(n_jobs=n, verbose=verbose)(delayed(runTcPerReadPos)(tid, args.bam[tid], referenceFile, minMQ, args.maxLength, outputDirectory, snpDirectory) for tid in range(0, len(args.bam)))
        dunkFinished()
        
    elif (command == "tcperutrpos") :
        outputDirectory = args.outputDir
        createDir(outputDirectory)
        n = args.threads
        snpDirectory = args.snpDir
        referenceFile = args.referenceFile
        minMQ = args.mq
        snpDirectory = args.snpDir
        message("Running alleyoop tcperutrpos for " + str(len(args.bam)) + " files (" + str(n) + " threads)")
        results = Parallel(n_jobs=n, verbose=verbose)(delayed(runTcPerUtr)(tid, args.bam[tid], referenceFile, args.bed, minMQ, args.maxLength, outputDirectory, snpDirectory) for tid in range(0, len(args.bam)))
        dunkFinished()
    
    elif (command == "dump") :
        outputDirectory = args.outputDir
        createDir(outputDirectory)
        n = args.threads
        snpDirectory = args.snpDir
        referenceFile = args.referenceFile
        minMQ = args.mq
        message("Running alleyoop dump for " + str(len(args.bam)) + " files (" + str(n) + " threads)")
        results = Parallel(n_jobs=n, verbose=verbose)(delayed(runDumpReadInfo)(tid, args.bam[tid], referenceFile, minMQ, outputDirectory, snpDirectory) for tid in range(0, len(args.bam)))
        dunkFinished()
    
    else:
        parser.error("Too few arguments.")
示例#34
0
def runSummary(bam, outputFile, countDirectory):

    outputLog = replaceExtension(outputFile, ".log")
    stats.readSummary(bam, countDirectory, outputFile, getLogFile(outputLog))