def runCollapse(tid, tcount, outputDirectory) : outputTCOUNT = os.path.join(outputDirectory, replaceExtension(basename(tcount), ".csv", "_collapsed")) outputLOG = os.path.join(outputDirectory, replaceExtension(basename(tcount), ".log", "_collapsed")) log = getLogFile(outputLOG) tcounter.collapse(tcount, outputTCOUNT, log) closeLogFile(log) stepFinished()
def runSNPeval(tid, bam, ref, bed, maxLength, minQual, coverageCutoff, variantFraction, strictTCs, outputDirectory, snpDirectory): outputCSV = os.path.join( outputDirectory, replaceExtension(basename(bam), ".csv", "_SNPeval")) outputPDF = os.path.join( outputDirectory, replaceExtension(basename(bam), ".pdf", "_SNPeval")) outputLOG = os.path.join( outputDirectory, replaceExtension(basename(bam), ".log", "_SNPeval")) if (not os.path.isdir(snpDirectory)): print("SNP directory does not exists. Abort.") sys.exit(0) inputSNP = os.path.join(snpDirectory, replaceExtension(basename(bam), ".vcf", "_snp")) if (maxLength == None): maxLength = estimateMaxReadLength(bam) if (maxLength < 0): print( "Could not reliable estimate maximum read length. Please specify --max-read-length parameter." ) sys.exit(0) log = getLogFile(outputLOG) print("Using " + str(maxLength) + " as maximum read length.", file=log) stats.computeSNPMaskedRates(ref, bed, inputSNP, bam, maxLength, minQual, coverageCutoff, variantFraction, outputCSV, outputPDF, strictTCs, log) stepFinished()
def runTcPerUtr(tid, bam, referenceFile, bed, minMQ, maxReadLength, outputDirectory, snpDirectory, vcfFile): outputCSV = os.path.join( outputDirectory, replaceExtension(basename(bam), ".csv", "_tcperutr")) outputPDF = os.path.join( outputDirectory, replaceExtension(basename(bam), ".pdf", "_tcperutr")) outputLOG = os.path.join( outputDirectory, replaceExtension(basename(bam), ".log", "_tcperutr")) if (vcfFile != None): inputSNP = vcfFile elif (snpDirectory != None): inputSNP = os.path.join( snpDirectory, replaceExtension(basename(bam), ".vcf", "_snp")) else: inputSNP = None if (maxReadLength == None): maxReadLength = estimateMaxReadLength(bam) if (maxReadLength < 0): print( "Could not reliable estimate maximum read length. Please specify --max-read-length parameter." ) sys.exit(0) log = getLogFile(outputLOG) print("Using " + str(maxReadLength) + " as maximum read length.", file=log) stats.tcPerUtr(referenceFile, bed, bam, minMQ, maxReadLength, outputCSV, outputPDF, inputSNP, log, False, True, True) closeLogFile(log) stepFinished()
def runCount(tid, bam, ref, bed, maxLength, minQual, conversionThreshold, outputDirectory, snpDirectory): outputCSV = os.path.join( outputDirectory, replaceExtension(basename(bam), ".tsv", "_tcount")) outputBedgraphPlus = os.path.join( outputDirectory, replaceExtension(basename(bam), ".bedgraph", "_tcount_plus")) outputBedgraphMinus = os.path.join( outputDirectory, replaceExtension(basename(bam), ".bedgraph", "_tcount_mins")) outputLOG = os.path.join( outputDirectory, replaceExtension(basename(bam), ".log", "_tcount")) if (snpDirectory != None): inputSNP = os.path.join( snpDirectory, replaceExtension(basename(bam), ".vcf", "_snp")) else: inputSNP = None if (maxLength == None): maxLength = estimateMaxReadLength(bam) if (maxLength < 0): print( "Difference between minimum and maximum read length is > 10. Please specify --max-read-length parameter." ) sys.exit(0) log = getLogFile(outputLOG) print("Using " + str(maxLength) + " as maximum read length.", file=log) tcounter.computeTconversions(ref, bed, inputSNP, bam, maxLength, minQual, outputCSV, outputBedgraphPlus, outputBedgraphMinus, conversionThreshold, log) stepFinished() return outputCSV
def runStatsRatesUTR(tid, bam, referenceFile, minMQ, strictTCs, outputDirectory, utrFile, maxReadLength): outputCSV = os.path.join( outputDirectory, replaceExtension(basename(bam), ".csv", "_mutationrates_utr")) outputPDF = os.path.join( outputDirectory, replaceExtension(basename(bam), ".pdf", "_mutationrates_utr")) outputLOG = os.path.join( outputDirectory, replaceExtension(basename(bam), ".log", "_mutationrates_utr")) if (maxReadLength == None): maxReadLength = estimateMaxReadLength(bam) if (maxReadLength < 0): print( "Could not reliable estimate maximum read length. Please specify --max-read-length parameter." ) sys.exit(0) log = getLogFile(outputLOG) print("Using " + str(maxReadLength) + " as maximum read length.", file=log) stats.statsComputeOverallRatesPerUTR(referenceFile, bam, minMQ, strictTCs, outputCSV, outputPDF, utrFile, maxReadLength, log) closeLogFile(log) stepFinished()
def reads(outputDirectory, bed, sampleName, readLenght, readNumber, readCoverage, seqError, pulseTimePoint, chaseTimePoint, conversionRate, sampleInfo, labledTranscripots = -1.0): message("Simulating read sample: " + sampleName) bed12File = replaceExtension(bed, ".bed12") bed12FastaFile = replaceExtension(bed, ".fa") explvFile = replaceExtension(bed, ".eplv") bedReads = os.path.join(outputDirectory, sampleName + "_reads_tmp.bed") faReads = os.path.join(outputDirectory, sampleName + "_reads_tmp.fa") totalUTRlength = simulator.getTotalUtrLength(bed12File) if(readNumber == 0): readNumber = (totalUTRlength / readLenght) * readCoverage readNumber = int(readNumber * (random.uniform(-0.2, 0.2) + 1)) #message("Simulating " + str(readNumber) + " reads with sequencing error of " + str(seqError)) simulator.simulateReads(bed12File, bed12FastaFile, explvFile, bedReads, faReads, readLenght, readNumber, seqError) bamReadsWithTC = os.path.join(outputDirectory, sampleName + "_reads.bam") utrSummary = os.path.join(outputDirectory, sampleName + "_utrsummary.tsv") simulator.addTcConversions(bed, faReads, bamReadsWithTC, pulseTimePoint, chaseTimePoint, utrSummary, conversionRate, readNumber, sampleInfo, labledTranscripots) os.unlink(faReads) os.unlink(bedReads)
def runDedup(tid, bam, outputDirectory, tcMutations) : outputBAM = os.path.join(outputDirectory, replaceExtension(basename(bam), ".bam", "_dedup")) outputLOG = os.path.join(outputDirectory, replaceExtension(basename(bam), ".log", "_dedup")) log = getLogFile(outputLOG) deduplicator.Dedup(bam, outputBAM, tcMutations, log) closeLogFile(log) stepFinished()
def runHalfLifes(bams, timepoints, outputDirectory) : outputCSV = os.path.join(outputDirectory, replaceExtension(basename(bams[0]), ".tsv", "_halflifes")) outputLOG = os.path.join(outputDirectory, replaceExtension(basename(bams[0]), ".log", "_halflifes")) log = getLogFile(outputLOG) stats.halflifes(",".join(bams), outputCSV, timepoints, log) closeLogFile(log) stepFinished()
def readSummary(filteredFiles, countDirectory, outputFile, log, printOnly=False, verbose=True, force=False): # Print sort by ID contentDict = {} tsvFile = open(outputFile, "w") print("# slamdunk summary v" + __version__, file=tsvFile) if (countDirectory != None) : f = tempfile.NamedTemporaryFile(delete=False) for bam in filteredFiles: slamseqInfo = SlamSeqInfo(bam) sampleInfo = getSampleInfo(bam) if (countDirectory != None) : countedReads = 0 countFile = os.path.join(countDirectory, replaceExtension(os.path.basename(bam), ".tsv", "_tcount")) if not os.path.exists(countFile): print("TCount directory does not seem to contain tcount file for:\t" + countFile) else : print(sampleInfo.Name, countFile, sep='\t', file=f) countedReads = sumCounts(countFile) if(int(sampleInfo.ID) in contentDict): ID = len(contentDict) + 1 else: ID = sampleInfo.ID contentDict[int(ID)] = "\t".join([bam, sampleInfo.Name, sampleInfo.Type, sampleInfo.Time, str(slamseqInfo.SequencedReads), str(slamseqInfo.MappedReads), str(slamseqInfo.DedupReads), str(slamseqInfo.MQFilteredReads), str(slamseqInfo.IdFilteredReads), str(slamseqInfo.NmFilteredReads), str(slamseqInfo.MultimapperReads), str(slamseqInfo.FilteredReads), str(countedReads), slamseqInfo.AnnotationName]) else : if(int(sampleInfo.ID) in contentDict): ID = len(contentDict) + 1 else: ID = sampleInfo.ID contentDict[int(ID)] = "\t".join([bam, sampleInfo.Name, sampleInfo.Type, sampleInfo.Time, str(slamseqInfo.SequencedReads), str(slamseqInfo.MappedReads), str(slamseqInfo.DedupReads), str(slamseqInfo.MQFilteredReads), str(slamseqInfo.IdFilteredReads), str(slamseqInfo.NmFilteredReads), str(slamseqInfo.MultimapperReads), str(slamseqInfo.FilteredReads), slamseqInfo.AnnotationName]) if (countDirectory != None) : f.close() callR(getPlotter("PCAPlotter") + " -f " + f.name + " -O " + replaceExtension(outputFile, ".pdf", "_PCA") + " -P " + replaceExtension(outputFile, ".txt", "_PCA"), log, dry=printOnly, verbose=verbose) print("FileName", "SampleName", "SampleType", "SampleTime", "Sequenced", "Mapped", "Deduplicated", "MQ-Filtered", "Identity-Filtered", "NM-Filtered", "Multimap-Filtered", "Retained", "Counted", "Annotation", sep="\t", file=tsvFile) else : print("FileName", "SampleName", "SampleType", "SampleTime", "Sequenced", "Mapped", "Deduplicated", "MQ-Filtered", "Identity-Filtered", "NM-Filtered", "Multimap-Filtered", "Retained", "Annotation", sep="\t", file=tsvFile) for key in sorted(contentDict): print(contentDict[key], file=tsvFile) tsvFile.close()
def runStatsTCContext(tid, bam, referenceFile, minMQ, outputDirectory) : outputCSV = os.path.join(outputDirectory, replaceExtension(basename(bam), ".csv", "_tccontext")) outputPDF = os.path.join(outputDirectory, replaceExtension(basename(bam), ".pdf", "_tccontext")) outputLOG = os.path.join(outputDirectory, replaceExtension(basename(bam), ".log", "_tccontext")) log = getLogFile(outputLOG) stats.statsComputeTCContext(referenceFile, bam, minMQ, outputCSV, outputPDF, log) closeLogFile(log) stepFinished()
def runFilter(tid, bam, bed, mq, minIdentity, maxNM, outputDirectory): outputBAM = os.path.join( outputDirectory, replaceExtension(basename(bam), ".bam", "_filtered")) outputLOG = os.path.join( outputDirectory, replaceExtension(basename(bam), ".log", "_filtered")) filter.Filter(bam, outputBAM, getLogFile(outputLOG), bed, mq, minIdentity, maxNM, printOnly, verbose) stepFinished()
def runMap(tid, inputBAMrun1, inputBAMrun2, referenceFile, threads, trim5p, maxPolyA, quantseqMapping, endtoendMapping, topn, sampleDescription, outputDirectory, skipSAM): if skipSAM: outputSAM = os.path.join( outputDirectory, replaceExtension(basename(inputBAMrun1), ".bam", "_slamdunk_mapped")) else: outputSAM = os.path.join( outputDirectory, replaceExtension(basename(inputBAMrun1), ".sam", "_slamdunk_mapped")) outputLOG = os.path.join( outputDirectory, replaceExtension(basename(inputBAMrun1), ".log", "_slamdunk_mapped")) #sampleName = "sample_" + str(tid) sampleName = replaceExtension(basename(inputBAMrun1), ".bam", "") sampleType = "NA" sampleTime = "-1" if (sampleDescription != ""): sampleDescriptions = sampleDescription.split(":") if (len(sampleDescriptions) >= 1): sampleName = sampleDescriptions[0] if (len(sampleDescriptions) >= 2): typeDict = { 'p': 'pulse', 'c': 'chase', 'pulse': 'pulse', 'chase': 'chase', '': 'NA' } if sampleDescriptions[1] in typeDict: sampleType = typeDict[sampleDescriptions[1]] else: sampleType = sampleDescriptions[1] if (len(sampleDescriptions) >= 3): sampleTime = sampleDescriptions[2] mapper.Map(inputBAMrun1, inputBAMrun2, referenceFile, outputSAM, getLogFile(outputLOG), quantseqMapping, endtoendMapping, threads=threads, trim5p=trim5p, maxPolyA=maxPolyA, topn=topn, sampleId=tid, sampleName=sampleName, sampleType=sampleType, sampleTime=sampleTime, printOnly=printOnly, verbose=verbose) stepFinished()
def Utrs(outputDirectory, bed, referenceFasta, readLength, polyALength, snpRate): message("Simulating UTRs") createDir(outputDirectory) bed12 = os.path.join(outputDirectory, replaceExtension(basename(bed), ".bed12", "_utrs")) bed12Fasta = os.path.join(outputDirectory, replaceExtension(basename(bed), ".fa", "_utrs")) explv = os.path.join(outputDirectory, replaceExtension(basename(bed), ".eplv", "_utrs")) vcfFile = os.path.join(outputDirectory, replaceExtension(basename(bed), ".vcf", "_utrs")) totalUTRlength = simulator.prepareUTRs(bed, bed12, bed12Fasta, referenceFasta, readLength, polyALength, explv, snpRate, vcfFile)
def runSnp(tid, referenceFile, minCov, minVarFreq, minQual, inputBAM, outputDirectory): outputSNP = os.path.join( outputDirectory, replaceExtension(basename(inputBAM), ".vcf", "_snp")) outputLOG = os.path.join( outputDirectory, replaceExtension(basename(inputBAM), ".log", "_snp")) snps.SNPs(inputBAM, outputSNP, referenceFile, minVarFreq, minCov, minQual, getLogFile(outputLOG), printOnly, verbose, False) stepFinished()
def runReadSeparator(tid, bam, ref, minQual, conversionThreshold, outputDirectory, snpDirectory) : outputBAM = os.path.join(outputDirectory, replaceExtension(basename(bam), "", "")) outputLOG = os.path.join(outputDirectory, replaceExtension(basename(bam), ".log", "_read_separator")) if(snpDirectory != None): inputSNP = os.path.join(snpDirectory, replaceExtension(basename(bam), ".vcf", "_snp")) else: inputSNP = None log = getLogFile(outputLOG) tcounter.genomewideReadSeparation(ref, inputSNP, bam, minQual, outputBAM, conversionThreshold, log) stepFinished()
def runPositionalRates(tid, bam, ref, minQual, conversionThreshold, coverageCutoff, outputDirectory, snpDirectory) : outputBedGraphPrefix = os.path.join(outputDirectory, replaceExtension(basename(bam), "", "_positional_rates")) outputLOG = os.path.join(outputDirectory, replaceExtension(basename(bam), ".log", "_positional_rates")) if(snpDirectory != None): inputSNP = os.path.join(snpDirectory, replaceExtension(basename(bam), ".vcf", "_snp")) else: inputSNP = None log = getLogFile(outputLOG) tcounter.genomewideConversionRates(ref, inputSNP, bam, minQual, outputBedGraphPrefix, conversionThreshold, coverageCutoff, log) stepFinished()
def runSam2Bam(tid, bam, threads, outputDirectory): inputSAM = os.path.join( outputDirectory, replaceExtension(basename(bam), ".sam", "_slamdunk_mapped")) outputBAM = os.path.join( outputDirectory, replaceExtension(basename(bam), ".bam", "_slamdunk_mapped")) outputLOG = os.path.join( outputDirectory, replaceExtension(basename(bam), ".log", "_slamdunk_mapped")) mapper.sort(inputSAM, outputBAM, getLogFile(outputLOG), threads, False, printOnly, verbose) stepFinished()
def runDumpReadInfo(tid, bam, referenceFile, minMQ, outputDirectory, snpDirectory): outputCSV = os.path.join(outputDirectory, replaceExtension(basename(bam), ".sdunk", "_readinfo")) outputLOG = os.path.join(outputDirectory, replaceExtension(basename(bam), ".log", "_readinfo")) if(snpDirectory != None): inputSNP = os.path.join(snpDirectory, replaceExtension(basename(bam), ".vcf", "_snp")) else: inputSNP = None log = getLogFile(outputLOG) dump.dumpReadInfo(referenceFile, bam, minMQ, outputCSV, inputSNP, log) closeLogFile(log) stepFinished()
def Map(inputBAM, inputReference, outputSAM, log, quantseqMapping, endtoendMapping, threads=1, parameter="--no-progress --slam-seq 2" , outputSuffix="_ngm_slamdunk", trim5p=0, maxPolyA=-1, topn=1, sampleId=None, sampleName="NA", sampleType="NA", sampleTime=0, printOnly=False, verbose=True, force=False): if(quantseqMapping is True) : parameter = "--no-progress" if(trim5p > 0): parameter = parameter + " -5 " + str(trim5p) if(maxPolyA > -1): parameter = parameter + " --max-polya " + str(maxPolyA) if(endtoendMapping is True): parameter = parameter + " -e " else: parameter = parameter + " -l " if(sampleId != None): parameter = parameter + " --rg-id " + str(sampleId) if(sampleName != ""): parameter = parameter + " --rg-sm " + sampleName + ":" + sampleType + ":" + str(sampleTime) if(topn > 1): parameter = parameter + " -n " + str(topn) + " --strata " if(checkStep([inputReference, inputBAM], [replaceExtension(outputSAM, ".bam")], force)): if outputSAM.endswith(".sam"): # Output SAM run(getBinary("ngm") + " -r " + inputReference + " -q " + inputBAM + " -t " + str(threads) + " " + parameter + " -o " + outputSAM, log, verbose=verbose, dry=printOnly) else: # Output BAM directly run(getBinary("ngm") + " -b -r " + inputReference + " -q " + inputBAM + " -t " + str(threads) + " " + parameter + " -o " + outputSAM, log, verbose=verbose, dry=printOnly) else: print("Skipped mapping for " + inputBAM, file=log)
def turnOver(outputDirectory, bed, minHalflife, maxHalfLife, skipTurnover=False): message("Simulating turnover") createDir(outputDirectory) trunoverBed = os.path.join(outputDirectory, replaceExtension(basename(bed), ".bed", "_utrs")) if not skipTurnover: simulator.simulateTurnOver(bed, trunoverBed, minHalflife, maxHalfLife) else: copyfile(bed, trunoverBed)
def Map(inputBAM, inputReference, outputSAM, log, quantseqMapping, endtoendMapping, threads=1, parameter="--no-progress --slam-seq 2", outputSuffix="_ngm_slamdunk", trim5p=0, maxPolyA=-1, topn=1, sampleId=None, sampleName="NA", sampleType="NA", sampleTime=0, printOnly=False, verbose=True, force=False, isPaired=False): if quantseqMapping: parameter = "--no-progress" if trim5p > 0: parameter = parameter + " -5 " + str(trim5p) if maxPolyA > -1: parameter = parameter + " --max-polya " + str(maxPolyA) if endtoendMapping: parameter = parameter + " -e " else: parameter = parameter + " -l " if sampleId is not None: parameter = parameter + " --rg-id " + str(sampleId) if sampleName != "": parameter = parameter + " --rg-sm " + sampleName + ":" + sampleType + ":" + str( sampleTime) if topn > 1: parameter = parameter + " -n " + str(topn) + " --strata " files = [inputReference] files.append(inputBAM) if not isPaired else files.extend(inputBAM) files = [os.path.expanduser(p) for p in files] if checkStep(files, [replaceExtension(outputSAM, ".bam")], force): cmd = "ngm %s -r %s %s -t %s %s -o %s" % ( "" if outputSAM.endswith(".sam") else "-b", files[0], "-q %s" % files[1] if not isPaired else "-1 %s -2 %s" % (files[1], files[2]), threads, parameter, outputSAM) run(cmd, log, verbose=verbose, dry=printOnly) else: print("Skipped mapping for " + inputBAM if not isPaired else inputBAM[0], file=log)
def processFilter(bam, mq, identity, nm, bed, paired, outputDirectory, n): dunkPath = os.path.join(outputDirectory, "filter") createDir(dunkPath) message("Running slamDunk filter for %s files (%s threads)" % (len(bam), n)) _ = Parallel(n_jobs=n, verbose=verbose)( delayed(runFilter)(bam[tid], bed, mq, identity, nm, paired, dunkPath) for tid in range(0, len(bam))) dunkFinished() return dunkPath, [ os.path.join(dunkPath, replaceExtension(basename(b), ".bam", "_filtered")) for b in bam ]
def computeTconversions(ref, bed, snpsFile, bam, maxReadLength, minQual, outputCSV, outputBedgraphPlus, outputBedgraphMinus, conversionThreshold, log, mle=False): referenceFile = pysam.FastaFile(ref) sampleInfo = getSampleInfo(bam) slamseqInfo = SlamSeqInfo(bam) #readNumber = slamseqInfo.MappedReads readNumber = slamseqInfo.FilteredReads bedMD5 = md5(bed) if (mle): fileNameTest = replaceExtension(outputCSV, ".tsv", "_perread") fileTest = open(fileNameTest, 'w') print("#slamdunk v" + __version__, __count_version__, "sample info:", sampleInfo.Name, sampleInfo.ID, sampleInfo.Type, sampleInfo.Time, sep="\t", file=fileTest) print("#annotation:", os.path.basename(bed), bedMD5, sep="\t", file=fileTest) #print("utr", "n", "k", file=fileTest) print(SlamSeqInterval.Header, file=fileTest) fileCSV = open(outputCSV, 'w') print("#slamdunk v" + __version__, __count_version__, "sample info:", sampleInfo.Name, sampleInfo.ID, sampleInfo.Type, sampleInfo.Time, sep="\t", file=fileCSV) print("#annotation:", os.path.basename(bed), bedMD5, sep="\t", file=fileCSV) print(SlamSeqInterval.Header, file=fileCSV) snps = SNPtools.SNPDictionary(snpsFile) snps.read() #Go through one chr after the other testFile = SlamSeqBamFile(bam, ref, snps) if not testFile.bamVersion == __bam_version__: raise RuntimeError("Wrong filtered BAM file version detected (" + testFile.bamVersion + "). Expected version " + __bam_version__ + ". Please rerun slamdunk filter.") bedMD5 = md5(bed) if slamseqInfo.AnnotationMD5 != bedMD5: print( "Warning: MD5 checksum of annotation (" + bedMD5 + ") does not matched MD5 in filtered BAM files (" + slamseqInfo.AnnotationMD5 + "). Most probably the annotation filed changed after the filtered BAM files were created.", file=log) conversionBedGraph = {} for utr in BedIterator(bed): Tcontent = 0 slamSeqUtr = SlamSeqInterval(utr.chromosome, utr.start, utr.stop, utr.strand, utr.name, Tcontent, 0, 0, 0, 0, 0, 0, 0) slamSeqUtrMLE = SlamSeqInterval(utr.chromosome, utr.start, utr.stop, utr.strand, utr.name, Tcontent, 0, 0, 0, 0, 0, 0, 0) if (not utr.hasStrand()): raise RuntimeError( "Input BED file does not contain stranded intervals.") if utr.start < 0: raise RuntimeError( "Negativ start coordinate found. Please check the following entry in your BED file: " + utr) # Retreive reference sequence region = utr.chromosome + ":" + str(utr.start + 1) + "-" + str( utr.stop) if (utr.chromosome in list(referenceFile.references)): #print(refRegion,file=sys.stderr) # pysam-0.15.0.1 #refSeq = referenceFile.fetch(region=region).upper() refSeq = referenceFile.fetch(reference=utr.chromosome, start=utr.start, end=utr.stop).upper() if (utr.strand == "-"): #refSeq = complement(refSeq[::-1]) Tcontent = refSeq.count("A") else: Tcontent = refSeq.count("T") slamSeqUtr._Tcontent = Tcontent readIterator = testFile.readInRegion(utr.chromosome, utr.start, utr.stop, utr.strand, maxReadLength, minQual, conversionThreshold) tcCountUtr = [0] * utr.getLength() coverageUtr = [0] * utr.getLength() tInReads = [] tcInRead = [] countFwd = 0 tcCountFwd = 0 countRev = 0 tCountRev = 0 multiMapFwd = 0 multiMapRev = 0 for read in readIterator: # Overwrite any conversions for non-TC reads (reads with < 2 TC conversions) if (not read.isTcRead): read.tcCount = 0 read.mismatches = [] read.conversionRates = 0.0 read.tcRate = 0.0 if (read.direction == ReadDirection.Reverse): countRev += 1 if read.tcCount > 0: tCountRev += 1 if read.isMultimapper: multiMapRev += 1 else: countFwd += 1 if read.tcCount > 0: tcCountFwd += 1 if read.isMultimapper: multiMapFwd += 1 for mismatch in read.mismatches: if (mismatch.isTCMismatch( read.direction == ReadDirection.Reverse) and mismatch.referencePosition >= 0 and mismatch.referencePosition < utr.getLength()): tcCountUtr[mismatch.referencePosition] += 1 testN = read.getTcount() testk = 0 for mismatch in read.mismatches: if (mismatch.referencePosition >= 0 and mismatch.referencePosition < utr.getLength()): if (mismatch.isT(read.direction == ReadDirection.Reverse)): testN += 1 if (mismatch.isTCMismatch( read.direction == ReadDirection.Reverse)): testk += 1 #print(utr.name, read.name, read.direction, testN, testk, read.sequence, sep="\t") tInReads.append(testN) tcInRead.append(testk) #print(utr.name, testN, testk, sep="\t", file=fileTest) for i in xrange(read.startRefPos, read.endRefPos): if (i >= 0 and i < utr.getLength()): coverageUtr[i] += 1 if ((utr.strand == "+" and countFwd > 0) or (utr.strand == "-" and countRev > 0)): tcRateUtr = [ x * 100.0 / y if y > 0 else 0 for x, y in zip(tcCountUtr, coverageUtr) ] readCount = countFwd tcReadCount = tcCountFwd multiMapCount = multiMapFwd if (utr.strand == "-"): readCount = countRev tcReadCount = tCountRev multiMapCount = multiMapRev if ((utr.strand == "-" and countFwd > countRev) or (utr.strand == "+" and countRev > countFwd)): print( "Warning: " + utr.name + " is located on the " + utr.strand + " strand but read counts are higher for the opposite strand (fwd: " + countFwd + ", rev: " + countRev + ")", file=sys.stderr) refSeq = readIterator.getRefSeq() # Get number of covered Ts/As in the UTR and compute average conversion rate for all covered Ts/As coveredTcount = 0 avgConversationRate = 0 coveredPositions = 0 # Get number of reads on T positions and number of reads with T->C conversions on T positions coverageOnTs = 0 conversionsOnTs = 0 for position in xrange(0, len(coverageUtr)): if (coverageUtr[position] > 0 and ((utr.strand == "+" and refSeq[position] == "T") or (utr.strand == "-" and refSeq[position] == "A"))): coveredTcount += 1 avgConversationRate += tcRateUtr[position] coverageOnTs += coverageUtr[position] conversionsOnTs += tcCountUtr[position] conversionBedGraph[utr.chromosome + ":" + str(utr.start + position) + ":" + str(utr.strand)] = tcRateUtr[position] if (coverageUtr[position] > 0): coveredPositions += 1 if (coveredTcount > 0): avgConversationRate = avgConversationRate / coveredTcount else: avgConversationRate = 0 # reads per million mapped to the UTR readsCPM = 0 if (readNumber > 0): readsCPM = readCount * 1000000.0 / readNumber # Convert to SlamSeqInterval and print conversionRate = 0 if (coverageOnTs > 0): conversionRate = float(conversionsOnTs) / float(coverageOnTs) slamSeqUtr = SlamSeqInterval(utr.chromosome, utr.start, utr.stop, utr.strand, utr.name, Tcontent, readsCPM, coverageOnTs, conversionsOnTs, conversionRate, readCount, tcReadCount, multiMapCount) slamSeqUtrMLE = SlamSeqInterval( utr.chromosome, utr.start, utr.stop, utr.strand, utr.name, Tcontent, readsCPM, coverageOnTs, conversionsOnTs, conversionRate, ",".join(str(x) for x in tInReads), ",".join(str(x) for x in tcInRead), multiMapCount) print(slamSeqUtr, file=fileCSV) if (mle): print(slamSeqUtrMLE, file=fileTest) fileCSV.close() if (mle): fileTest.close() fileBedgraphPlus = open(outputBedgraphPlus, 'w') fileBedgraphMinus = open(outputBedgraphMinus, 'w') for position in conversionBedGraph: positionData = position.split(":") if (positionData[2] == "+"): print(positionData[0], positionData[1], int(positionData[1]) + 1, conversionBedGraph[position], file=fileBedgraphPlus) else: print(positionData[0], positionData[1], int(positionData[1]) + 1, conversionBedGraph[position], file=fileBedgraphMinus) fileBedgraphPlus.close() fileBedgraphMinus.close() if (mle): fileNameMLE = replaceExtension(outputCSV, ".tsv", "_mle") callR( getPlotter("compute_conversion_rate_mle") + " -f " + fileNameTest + " -r " + "0.024" + " -o " + fileNameMLE + " &> /dev/null")
def readSummary(filteredFiles, countDirectory, outputFile, log, printOnly=False, verbose=True, force=False): # Print sort by ID contentDict = {} tsvFile = open(outputFile, "w") print("# slamdunk summary v" + __version__, file=tsvFile) if (countDirectory != None): f = tempfile.NamedTemporaryFile(delete=False) for bam in filteredFiles: slamseqInfo = SlamSeqInfo(bam) sampleInfo = getSampleInfo(bam) if (countDirectory != None): countedReads = 0 countFile = os.path.join( countDirectory, replaceExtension(os.path.basename(bam), ".tsv", "_tcount")) if not os.path.exists(countFile): print( "TCount directory does not seem to contain tcount file for:\t" + countFile) else: print(sampleInfo.Name, countFile, sep='\t', file=f) countedReads = sumCounts(countFile) if (sampleInfo.ID in contentDict): ID = len(contentDict) + 1 else: ID = sampleInfo.ID contentDict[int(ID)] = "\t".join([ bam, sampleInfo.Name, sampleInfo.Type, sampleInfo.Time, str(slamseqInfo.SequencedReads), str(slamseqInfo.MappedReads), str(slamseqInfo.DedupReads), str(slamseqInfo.MQFilteredReads), str(slamseqInfo.IdFilteredReads), str(slamseqInfo.NmFilteredReads), str(slamseqInfo.MultimapperReads), str(slamseqInfo.FilteredReads), str(countedReads), slamseqInfo.AnnotationName ]) else: if (sampleInfo.ID in contentDict): ID = len(contentDict) + 1 else: ID = sampleInfo.ID contentDict[int(ID)] = "\t".join([ bam, sampleInfo.Name, sampleInfo.Type, sampleInfo.Time, str(slamseqInfo.SequencedReads), str(slamseqInfo.MappedReads), str(slamseqInfo.DedupReads), str(slamseqInfo.MQFilteredReads), str(slamseqInfo.IdFilteredReads), str(slamseqInfo.NmFilteredReads), str(slamseqInfo.MultimapperReads), str(slamseqInfo.FilteredReads), slamseqInfo.AnnotationName ]) if (countDirectory != None): f.close() callR(getPlotter("PCAPlotter") + " -f " + f.name + " -O " + replaceExtension(outputFile, ".pdf", "_PCA") + " -P " + replaceExtension(outputFile, ".txt", "_PCA"), log, dry=printOnly, verbose=verbose) print("FileName", "SampleName", "SampleType", "SampleTime", "Sequenced", "Mapped", "Deduplicated", "MQ-Filtered", "Identity-Filtered", "NM-Filtered", "Multimap-Filtered", "Retained", "Counted", "Annotation", sep="\t", file=tsvFile) else: print("FileName", "SampleName", "SampleType", "SampleTime", "Sequenced", "Mapped", "Deduplicated", "MQ-Filtered", "Identity-Filtered", "NM-Filtered", "Multimap-Filtered", "Retained", "Annotation", sep="\t", file=tsvFile) for key in sorted(contentDict): print(contentDict[key], file=tsvFile) tsvFile.close()
def runCount( bam, ref, bed, maxLength, minQual, conversionThreshold, is_inverse, outputDirectory, snpDirectory, vcfFile, ): outputCSV = os.path.join( outputDirectory, replaceExtension(basename(bam), ".tsv", "_tcount")) outputBedgraphPlus = os.path.join( outputDirectory, replaceExtension(basename(bam), ".bedgraph", "_tcount_plus")) outputBedgraphMinus = os.path.join( outputDirectory, replaceExtension(basename(bam), ".bedgraph", "_tcount_mins")) outputLOG = os.path.join( outputDirectory, replaceExtension(basename(bam), ".log", "_tcount")) if vcfFile is not None: inputSNP = vcfFile elif snpDirectory is not None: inputSNP = os.path.join( snpDirectory, replaceExtension(basename(bam), ".vcf", "_snp")) else: inputSNP = None if maxLength is None: maxLength = estimateMaxReadLength(bam) if maxLength < 0: print("Difference between minimum and maximum read length is > 10. " "Please specify --max-read-length parameter.") sys.exit(0) log = getLogFile(outputLOG) print("Using " + str(maxLength) + " as maximum read length.", file=log) if bed is not None: message("Bed file detected.") tcounter.computeTconversions(ref, bed, inputSNP, bam, maxLength, minQual, outputCSV, outputBedgraphPlus, outputBedgraphMinus, conversionThreshold, log) else: message("No bed file passed. Count w.r.t. the full genome.") outputBedgraphPlusNew = os.path.join( outputDirectory, replaceExtension(basename(bam), ".bedgraph", "_tcount_plus_new")) outputBedgraphMinusNew = os.path.join( outputDirectory, replaceExtension(basename(bam), ".bedgraph", "_tcount_mins_new")) tcounter.computeTconversionsAll(ref, inputSNP, bam, outputBedgraphPlus, outputBedgraphPlusNew, outputBedgraphMinus, outputBedgraphMinusNew, conversionThreshold, minQual, is_inverse, log) stepFinished() return outputCSV
def runAll(args): message("slamdunk all") if args.sampleIndex > -1: sec = random.randrange(200, 2000) / 1000.0 message("Waiting " + str(sec) + " seconds") sleep(sec) # Setup slamdunk run folder outputDirectory = args.outputDir createDir(outputDirectory) n = args.threads referenceFile = args.referenceFile # Run mapper dunk dunkPath = os.path.join(outputDirectory, "map") createDir(dunkPath) samples, samplesInfos = getSamples(args.files, runOnly=args.sampleIndex) message("Running slamDunk map for " + str(len(samples)) + " files (" + str(n) + " threads)") for i in range(0, len(samples)): bam = samples[i] sampleInfo = samplesInfos[i] tid = i if args.sampleIndex > -1: tid = args.sampleIndex runMap(tid, bam, referenceFile, n, args.trim5, args.maxPolyA, args.quantseq, args.endtoend, args.topn, sampleInfo, dunkPath, args.skipSAM) dunkFinished() if (not args.skipSAM): message("Running slamDunk sam2bam for " + str(len(samples)) + " files (" + str(n) + " threads)") results = Parallel(n_jobs=1, verbose=verbose)( delayed(runSam2Bam)(tid, samples[tid], n, dunkPath) for tid in range(0, len(samples))) dunkFinished() dunkbufferIn = [] for file in samples: dunkbufferIn.append( os.path.join( dunkPath, replaceExtension(basename(file), ".bam", "_slamdunk_mapped"))) # Run filter dunk bed = args.bed if args.filterbed: bed = args.filterbed args.multimap = True if (not args.multimap): bed = None dunkPath = os.path.join(outputDirectory, "filter") createDir(dunkPath) message("Running slamDunk filter for " + str(len(samples)) + " files (" + str(n) + " threads)") results = Parallel(n_jobs=n, verbose=verbose)( delayed(runFilter)(tid, dunkbufferIn[tid], bed, args.mq, args.identity, args.nm, dunkPath) for tid in range(0, len(samples))) dunkFinished() # Run filter dunk dunkbufferOut = [] for file in dunkbufferIn: dunkbufferOut.append( os.path.join(dunkPath, replaceExtension(basename(file), ".bam", "_filtered"))) dunkbufferIn = dunkbufferOut dunkbufferOut = [] dunkFinished() # Run snps dunk dunkPath = os.path.join(outputDirectory, "snp") createDir(dunkPath) minCov = args.cov minVarFreq = args.var snpThread = n if (snpThread > 1): snpThread = int(snpThread / 2) #if (args.minQual == 0) : # snpqual = 13 #else : snpqual = args.minQual message("Running slamDunk SNP for " + str(len(samples)) + " files (" + str(snpThread) + " threads)") results = Parallel(n_jobs=snpThread, verbose=verbose)( delayed(runSnp)(tid, referenceFile, minCov, minVarFreq, snpqual, dunkbufferIn[tid], dunkPath) for tid in range(0, len(samples))) dunkFinished() # Run count dunk dunkPath = os.path.join(outputDirectory, "count") createDir(dunkPath) snpDirectory = os.path.join(outputDirectory, "snp") message("Running slamDunk tcount for " + str(len(samples)) + " files (" + str(n) + " threads)") results = Parallel(n_jobs=n, verbose=verbose)( delayed(runCount)(tid, dunkbufferIn[tid], referenceFile, args.bed, args.maxLength, args.minQual, args.conversionThreshold, dunkPath, snpDirectory) for tid in range(0, len(samples))) dunkFinished()
def prepareBed(outputDirectory, bed, readLength): createDir(outputDirectory) slamSimBed = os.path.join(outputDirectory, replaceExtension(basename(bed), ".bed", "_original")) simulator.prepareBED(bed, slamSimBed, readLength)
def computeTconversionsAll( ref, snpsFile, bam, outputBedgraphPlus, outputBedgraphPlusNew, outputBedgraphMinus, outputBedgraphMinusNew, conversionThreshold, minQual, is_inverse, log, ): def to_bed_graph(c, data, bedgraph, rn): data /= rn data *= 1000000.0 [print(c, i, i+1, d, file=bedgraph) for i, d in enumerate(data)] chroms_fw = { 'chrI': np.zeros(230218).astype('float32'), 'chrII': np.zeros(813184).astype('float32'), 'chrIII': np.zeros(316620).astype('float32'), 'chrIV': np.zeros(1531933).astype('float32'), 'chrIX': np.zeros(439888).astype('float32'), 'chrM': np.zeros(85779).astype('float32'), 'chrV': np.zeros(576874).astype('float32'), 'chrVI': np.zeros(270161).astype('float32'), 'chrVII': np.zeros(1090940).astype('float32'), 'chrVIII': np.zeros(562643).astype('float32'), 'chrX': np.zeros(745751).astype('float32'), 'chrXI': np.zeros(666816).astype('float32'), 'chrXII': np.zeros(1078177).astype('float32'), 'chrXIII': np.zeros(924431).astype('float32'), 'chrXIV': np.zeros(784333).astype('float32'), 'chrXV': np.zeros(1091291).astype('float32'), 'chrXVI': np.zeros(948066).astype('float32') } chroms_bw = copy.deepcopy(chroms_fw) chroms_fw_new = copy.deepcopy(chroms_fw.copy()) chroms_bw_new = copy.deepcopy(chroms_fw.copy()) readNumber, positiveCount, negativeCount, positiveCountNew, negativeCountNew = 0, 0, 0, 0, 0 bamFile = pysam.AlignmentFile(bam, "rb") if bamFile.header['HD']['SO'] != 'queryname': # Sort bam file sbam = replaceExtension(bam, '.bam', '_sorted') if not os.path.exists(sbam): run( 'samtools sort -n %s -o %s' % (bam, sbam), log ) else: sbam = bam bamFile = pysam.AlignmentFile(sbam, "rb") snps = SNPtools.SNPDictionary(snpsFile) snps.read() # Go through one chr after the other seqIter = SlamSeqIter(bamFile, ref, snps, conversionThreshold, minQual) read1 = None read2 = None for read in seqIter: if not read.isPaired or read.unmappedMate or read.duplicate: continue if read.isSecondRead: read2 = read else: read1 = read read2 = None continue if read1 is None or read2 is None or read1.queryName != read2.queryName: continue readNumber += 1 chrom = read1.chromosome start = np.minimum(read1.startRefPos, read2.startRefPos) end = np.maximum(read2.endRefPos, read2.endRefPos) is_tc_read = read1.isTcRead or read2.isTcRead direction_read = read1 if not is_inverse else read2 if direction_read.direction == ReadDirection.Forward: positiveCount += 1 chroms_fw[chrom][start:end] += 1 if is_tc_read: positiveCountNew += 1 chroms_fw_new[chrom][start:end] += 1 else: negativeCount += 1 chroms_bw[chrom][start:end] += 1 if is_tc_read: negativeCountNew += 1 chroms_bw_new[chrom][start:end] += 1 print("Total reads: %s\n" "Positive reads: %s\n" "Positive reads new: %s\n" "Negative reads: %s\n" "Negative reads new: %s" % (readNumber, positiveCount, positiveCountNew, negativeCount, negativeCountNew), file=log) fileBedgraphPlus = open(outputBedgraphPlus, 'w') fileBedgraphPlusNew = open(outputBedgraphPlusNew, 'w') fileBedgraphMinus = open(outputBedgraphMinus, 'w') fileBedgraphMinusNew = open(outputBedgraphMinusNew, 'w') for chrom in chroms_fw.keys(): to_bed_graph(chrom, chroms_fw[chrom], fileBedgraphPlus, readNumber) to_bed_graph(chrom, chroms_bw[chrom], fileBedgraphMinus, readNumber) to_bed_graph(chrom, chroms_fw_new[chrom], fileBedgraphPlusNew, readNumber) to_bed_graph(chrom, chroms_bw_new[chrom], fileBedgraphMinusNew, readNumber) fileBedgraphPlus.close() fileBedgraphPlusNew.close() fileBedgraphMinus.close() fileBedgraphMinusNew.close()
def run(): ######################################################################## # Argument parsing ######################################################################## # Info usage = "SLAMdunk software for analyzing SLAM-seq data" # Main Parsers parser = ArgumentParser(description=usage, formatter_class=ArgumentDefaultsHelpFormatter) parser.add_argument('--version', action='version', version='%(prog)s ' + __version__) # Initialize Subparsers subparsers = parser.add_subparsers(help="", dest="command") # map command mapparser = subparsers.add_parser( 'map', help='Map SLAM-seq read data', formatter_class=ArgumentDefaultsHelpFormatter) mapparser.add_argument( 'files', action='store', help= 'Single csv/tsv file (recommended) containing all sample files and sample info or a list of all sample BAM/FASTA(gz)/FASTQ(gz) files', nargs="+") mapparser.add_argument("-r", "--reference", type=str, required=True, dest="referenceFile", default=SUPPRESS, help="Reference fasta file") mapparser.add_argument("-o", "--outputDir", type=str, required=True, dest="outputDir", default=SUPPRESS, help="Output directory for mapped BAM files.") mapparser.add_argument( "-5", "--trim-5p", type=int, required=False, dest="trim5", default=12, help="Number of bp removed from 5' end of all reads.") mapparser.add_argument("-n", "--topn", type=int, required=False, dest="topn", default=1, help="Max. number of alignments to report per read") mapparser.add_argument("-a", "--max-polya", type=int, required=False, dest="maxPolyA", default=4, help="Max number of As at the 3' end of a read.") mapparser.add_argument("-t", "--threads", type=int, required=False, dest="threads", default=1, help="Thread number") mapparser.add_argument( "-q", "--quantseq", dest="quantseq", action='store_true', required=False, help="Run plain Quantseq alignment without SLAM-seq scoring") mapparser.add_argument( '-e', "--endtoend", action='store_true', dest="endtoend", help="Use a end to end alignment algorithm for mapping.") mapparser.add_argument( '-sn', "--sampleName", type=str, dest="sampleName", required=False, help="Use this sample name for all supplied samples") mapparser.add_argument( '-sy', "--sampleType", type=str, dest="sampleType", required=False, default="pulse", help="Use this sample type for all supplied samples") mapparser.add_argument( '-st', "--sampleTime", type=int, dest="sampleTime", required=False, default=0, help="Use this sample time for all supplied samples") mapparser.add_argument( "-i", "--sample-index", type=int, required=False, default=-1, dest="sampleIndex", help= "Run analysis only for sample <i>. Use for distributing slamdunk analysis on a cluster (index is 1-based)." ) mapparser.add_argument( '-ss', "--skip-sam", action='store_true', dest="skipSAM", help="Output BAM while mapping. Slower but, uses less hard disk.") # filter command filterparser = subparsers.add_parser('filter', help='Filter SLAM-seq aligned data') filterparser.add_argument('bam', action='store', help='Bam file(s)', nargs="+") filterparser.add_argument("-o", "--outputDir", type=str, required=True, dest="outputDir", help="Output directory for mapped BAM files.") filterparser.add_argument("-b", "--bed", type=str, required=False, dest="bed", help="BED file, overrides MQ filter to 0") filterparser.add_argument( "-mq", "--min-mq", type=int, required=False, default=2, dest="mq", help="Minimum mapping quality (default: %(default)d)") filterparser.add_argument( "-mi", "--min-identity", type=float, required=False, default=0.95, dest="identity", help="Minimum alignment identity (default: %(default)s)") filterparser.add_argument( "-nm", "--max-nm", type=int, required=False, default=-1, dest="nm", help="Maximum NM for alignments (default: %(default)d)") filterparser.add_argument("-t", "--threads", type=int, required=False, dest="threads", default=1, help="Thread number (default: %(default)d)") # snp command snpparser = subparsers.add_parser( 'snp', help='Call SNPs on SLAM-seq aligned data', formatter_class=ArgumentDefaultsHelpFormatter) snpparser.add_argument('bam', action='store', help='Bam file(s)', nargs="+") snpparser.add_argument("-o", "--outputDir", type=str, required=True, dest="outputDir", default=SUPPRESS, help="Output directory for mapped BAM files.") snpparser.add_argument("-r", "--reference", required=True, dest="fasta", type=str, default=SUPPRESS, help="Reference fasta file") snpparser.add_argument("-c", "--min-coverage", required=False, dest="cov", type=int, help="Minimimum coverage to call variant", default=10) #snpparser.add_argument("-q", "--min-base-qual", type=int, default=13, required=False, dest="minQual", help="Min base quality for T -> C conversions (default: %(default)d)") snpparser.add_argument("-f", "--var-fraction", required=False, dest="var", type=float, help="Minimimum variant fraction to call variant", default=0.8) snpparser.add_argument("-t", "--threads", type=int, required=False, default=1, dest="threads", help="Thread number") # count command countparser = subparsers.add_parser( 'count', help='Count T/C conversions in SLAM-seq aligned data') countparser.add_argument('bam', action='store', help='Bam file(s)', nargs="+") countparser.add_argument("-o", "--outputDir", type=str, required=True, dest="outputDir", default=SUPPRESS, help="Output directory for mapped BAM files.") countparser.add_argument("-s", "--snp-directory", type=str, required=False, dest="snpDir", default=SUPPRESS, help="Directory containing SNP files.") countparser.add_argument("-v", "--vcf", type=str, required=False, dest="vcfFile", default=SUPPRESS, help="Externally provided custom variant file.") countparser.add_argument("-r", "--reference", type=str, required=True, dest="ref", default=SUPPRESS, help="Reference fasta file") countparser.add_argument("-b", "--bed", type=str, required=True, dest="bed", default=SUPPRESS, help="BED file") countparser.add_argument( "-c", "--conversion-threshold", type=int, dest="conversionThreshold", required=False, default=1, help= "Number of T>C conversions required to count read as T>C read (default: %(default)d)" ) countparser.add_argument("-l", "--max-read-length", type=int, required=False, dest="maxLength", help="Max read length in BAM file") countparser.add_argument( "-q", "--min-base-qual", type=int, default=27, required=False, dest="minQual", help="Min base quality for T -> C conversions (default: %(default)d)") countparser.add_argument("-t", "--threads", type=int, required=False, default=1, dest="threads", help="Thread number (default: %(default)d)") # all command allparser = subparsers.add_parser('all', help='Run entire SLAMdunk analysis') allparser.add_argument( 'files', action='store', help= 'Single csv/tsv file (recommended) containing all sample files and sample info or a list of all sample BAM/FASTA(gz)/FASTQ(gz) files', nargs="+") allparser.add_argument("-r", "--reference", type=str, required=True, dest="referenceFile", help="Reference fasta file") allparser.add_argument("-b", "--bed", type=str, required=True, dest="bed", help="BED file with 3'UTR coordinates") allparser.add_argument( "-fb", "--filterbed", type=str, required=False, dest="filterbed", help= "BED file with 3'UTR coordinates to filter multimappers (activates -m)" ) allparser.add_argument( "-v", "--vcf", type=str, required=False, dest="vcfFile", default=SUPPRESS, help="Skip SNP step and provide custom variant file.") allparser.add_argument("-o", "--outputDir", type=str, required=True, dest="outputDir", help="Output directory for slamdunk run.") allparser.add_argument( "-5", "--trim-5p", type=int, required=False, dest="trim5", default=12, help= "Number of bp removed from 5' end of all reads (default: %(default)s)") allparser.add_argument( "-a", "--max-polya", type=int, required=False, dest="maxPolyA", default=4, help="Max number of As at the 3' end of a read (default: %(default)s)") allparser.add_argument( "-n", "--topn", type=int, required=False, dest="topn", default=1, help= "Max. number of alignments to report per read (default: %(default)s)") allparser.add_argument("-t", "--threads", type=int, required=False, default=1, dest="threads", help="Thread number (default: %(default)s)") allparser.add_argument( "-q", "--quantseq", dest="quantseq", action='store_true', required=False, help="Run plain Quantseq alignment without SLAM-seq scoring") allparser.add_argument( '-e', "--endtoend", action='store_true', dest="endtoend", help="Use a end to end alignment algorithm for mapping.") allparser.add_argument( '-m', "--multimap", action='store_true', dest="multimap", help="Use reference to resolve multimappers (requires -n > 1).") allparser.add_argument( "-mq", "--min-mq", type=int, required=False, default=2, dest="mq", help="Minimum mapping quality (default: %(default)s)") allparser.add_argument( "-mi", "--min-identity", type=float, required=False, default=0.95, dest="identity", help="Minimum alignment identity (default: %(default)s)") allparser.add_argument( "-nm", "--max-nm", type=int, required=False, default=-1, dest="nm", help="Maximum NM for alignments (default: %(default)s)") allparser.add_argument( "-mc", "--min-coverage", required=False, dest="cov", type=int, help="Minimimum coverage to call variant (default: %(default)s)", default=10) allparser.add_argument( "-mv", "--var-fraction", required=False, dest="var", type=float, help= "Minimimum variant fraction to call variant (default: %(default)s)", default=0.8) allparser.add_argument( "-c", "--conversion-threshold", type=int, dest="conversionThreshold", required=False, default=1, help= "Number of T>C conversions required to count read as T>C read (default: %(default)d)" ) allparser.add_argument("-rl", "--max-read-length", type=int, required=False, dest="maxLength", help="Max read length in BAM file") allparser.add_argument( "-mbq", "--min-base-qual", type=int, default=27, required=False, dest="minQual", help="Min base quality for T -> C conversions (default: %(default)d)") allparser.add_argument( '-sn', "--sampleName", type=str, dest="sampleName", required=False, help="Use this sample name for all supplied samples") allparser.add_argument( '-sy', "--sampleType", type=str, dest="sampleType", required=False, default="pulse", help="Use this sample type for all supplied samples") allparser.add_argument( '-st', "--sampleTime", type=int, dest="sampleTime", required=False, default=0, help="Use this sample time for all supplied samples") allparser.add_argument( "-i", "--sample-index", type=int, required=False, default=-1, dest="sampleIndex", help= "Run analysis only for sample <i>. Use for distributing slamdunk analysis on a cluster (index is 1-based)." ) allparser.add_argument( "-ss", "--skip-sam", action='store_true', dest="skipSAM", help="Output BAM while mapping. Slower but, uses less hard disk.") args = parser.parse_args() ######################################################################## # Routine selection ######################################################################## command = args.command if (command == "map"): mapper.checkNextGenMapVersion() outputDirectory = args.outputDir if args.sampleIndex > -1: sec = random.randrange(0, 2000) / 1000 message("Waiting " + str(sec) + " seconds") sleep(sec) createDir(outputDirectory) n = args.threads referenceFile = args.referenceFile samples, samplesInfos = getSamples(args.files, runOnly=args.sampleIndex) message("Running slamDunk map for " + str(len(samples)) + " files (" + str(n) + " threads)") for i in range(0, len(samples)): bam = samples[i] if not args.sampleName or len(samples) > 1: sampleName = replaceExtension(basename(bam), "", "") else: sampleName = args.sampleName sampleInfo = samplesInfos[i] if sampleInfo == "": sampleInfo = sampleName + ":" + args.sampleType + ":" + str( args.sampleTime) tid = i if args.sampleIndex > -1: tid = args.sampleIndex runMap(tid, bam, referenceFile, n, args.trim5, args.maxPolyA, args.quantseq, args.endtoend, args.topn, sampleInfo, outputDirectory, args.skipSAM) dunkFinished() if not args.skipSAM: message("Running slamDunk sam2bam for " + str(len(samples)) + " files (" + str(n) + " threads)") results = Parallel(n_jobs=1, verbose=verbose)( delayed(runSam2Bam)(tid, samples[tid], n, outputDirectory) for tid in range(0, len(samples))) dunkFinished() elif (command == "filter"): outputDirectory = args.outputDir createDir(outputDirectory) n = args.threads message("Running slamDunk filter for " + str(len(args.bam)) + " files (" + str(n) + " threads)") results = Parallel(n_jobs=n, verbose=verbose)( delayed(runFilter)(tid, args.bam[tid], args.bed, args.mq, args.identity, args.nm, outputDirectory) for tid in range(0, len(args.bam))) dunkFinished() elif (command == "snp"): outputDirectory = args.outputDir createDir(outputDirectory) fasta = args.fasta minCov = args.cov minVarFreq = args.var #minQual = args.minQual minQual = 15 n = args.threads if (n > 1): n = int(n / 2) message("Running slamDunk SNP for " + str(len(args.bam)) + " files (" + str(n) + " threads)") results = Parallel(n_jobs=n, verbose=verbose)( delayed(runSnp)(tid, fasta, minCov, minVarFreq, minQual, args.bam[tid], outputDirectory) for tid in range(0, len(args.bam))) dunkFinished() elif (command == "count"): outputDirectory = args.outputDir createDir(outputDirectory) if "snpDir" in args: snpDirectory = args.snpDir else: snpDirectory = None if "vcfFile" in args: vcfFile = args.vcfFile else: vcfFile = None n = args.threads message("Running slamDunk tcount for " + str(len(args.bam)) + " files (" + str(n) + " threads)") results = Parallel(n_jobs=n, verbose=verbose)(delayed(runCount)( tid, args.bam[tid], args.ref, args.bed, args.maxLength, args.minQual, args.conversionThreshold, outputDirectory, snpDirectory, vcfFile) for tid in range(0, len(args.bam))) dunkFinished() elif (command == "all"): runAll(args) dunkFinished() else: parser.error("Too few arguments.")
def runSummary(bam, outputFile, countDirectory): outputLog = replaceExtension(outputFile, ".log") stats.readSummary(bam, countDirectory, outputFile, getLogFile(outputLog))
def run(): ######################################################################## # Argument parsing ######################################################################## # Info usage = "AlleyOop utility tools and diagnostics for SLAMSeq data" # Main Parsers parser = ArgumentParser(description=usage, formatter_class=ArgumentDefaultsHelpFormatter) parser.add_argument('--version', action='version', version='%(prog)s ' + __version__) # Initialize Subparsers subparsers = parser.add_subparsers(help="", dest="command") # dedup command dedupparser = subparsers.add_parser('dedup', help='Deduplicate SLAM-seq aligned data', formatter_class=ArgumentDefaultsHelpFormatter) dedupparser.add_argument("-o", "--outputDir", type=str, required=True, dest="outputDir", default=SUPPRESS, help="Output directory for mapped BAM files.") dedupparser.add_argument("-tc", "--tcMutations", type=int, required=False, default = 0, dest="tcMutations", help="Only select reads with x number of T>C mutations.") dedupparser.add_argument("-t", "--threads", type=int, required=False, default=1, dest="threads", help="Thread number") dedupparser.add_argument('bam', action='store', help='Bam file(s)' , nargs="+") # collapse command collapseparser = subparsers.add_parser('collapse', help='Collapse UTRs', formatter_class=ArgumentDefaultsHelpFormatter) collapseparser.add_argument("-o", "--outputDir", type=str, required=True, dest="outputDir", default=SUPPRESS, help="Output directory for mapped BAM files.") collapseparser.add_argument("-t", "--threads", type=int, required=False, default=1, dest="threads", help="Thread number") collapseparser.add_argument('tcount', action='store', help='Tcount file(s)' , nargs="+") # positional-rates command posratesparser = subparsers.add_parser('positional-tracks', help='Genome-wide positional tracks as bedgraph', formatter_class=ArgumentDefaultsHelpFormatter) posratesparser.add_argument('bam', action='store', help='Bam file(s)' , nargs="+") posratesparser.add_argument("-o", "--outputDir", type=str, required=True, dest="outputDir", default=SUPPRESS, help="Output directory for bedGraph files.") posratesparser.add_argument("-s", "--snp-directory", type=str, required=False, dest="snpDir", default=SUPPRESS, help="Directory containing SNP files.") posratesparser.add_argument("-r", "--reference", type=str, required=True, dest="ref", default=SUPPRESS, help="Reference fasta file") posratesparser.add_argument("-c", "--conversion-threshold", type=int, dest="conversionThreshold", required=False, default=1,help="Number of T>C conversions required to count read as T>C read (default: %(default)d)") posratesparser.add_argument("-a", "--coverage-cutoff", type=int, dest="coverageCutoff", required=False, default=1,help="Minimum coverage required to report nucleotide-conversion rate (default: %(default)d). Anything less than 1 will be set to 1 to avoid division by zero.") posratesparser.add_argument("-q", "--min-base-qual", type=int, default=27, required=False, dest="minQual", help="Min base quality for T -> C conversions (default: %(default)d)") posratesparser.add_argument("-t", "--threads", type=int, required=False, default=1, dest="threads", help="Thread number (default: %(default)d)") # TC read separator readseparatorparser = subparsers.add_parser('read-separator', help='Separate TC-reads from background reads genome-wide', formatter_class=ArgumentDefaultsHelpFormatter) readseparatorparser.add_argument('bam', action='store', help='Bam file(s)' , nargs="+") readseparatorparser.add_argument("-o", "--outputDir", type=str, required=True, dest="outputDir", default=SUPPRESS, help="Output directory for bam files.") readseparatorparser.add_argument("-s", "--snp-directory", type=str, required=False, dest="snpDir", default=SUPPRESS, help="Directory containing SNP files.") readseparatorparser.add_argument("-r", "--reference", type=str, required=True, dest="ref", default=SUPPRESS, help="Reference fasta file") readseparatorparser.add_argument("-c", "--conversion-threshold", type=int, dest="conversionThreshold", required=False, default=1,help="Number of T>C conversions required to count read as T>C read (default: %(default)d)") readseparatorparser.add_argument("-q", "--min-base-qual", type=int, default=27, required=False, dest="minQual", help="Min base quality for T -> C conversions (default: %(default)d)") readseparatorparser.add_argument("-t", "--threads", type=int, required=False, default=1, dest="threads", help="Thread number (default: %(default)d)") # stats command statsparser = subparsers.add_parser('rates', help='Calculate overall conversion rates on SLAM-seq datasets', formatter_class=ArgumentDefaultsHelpFormatter) statsparser.add_argument('bam', action='store', help='Bam file(s)' , nargs="+") statsparser.add_argument("-o", "--outputDir", type=str, required=True, dest="outputDir", default=SUPPRESS, help="Output directory for mapped BAM files.") statsparser.add_argument("-r", "--reference", type=str, required=True, dest="referenceFile", default=SUPPRESS, help="Reference fasta file") statsparser.add_argument("-mq", "--min-basequality", type=int, required=False, default=27, dest="mq", help="Minimal base quality for SNPs") #statsparser.add_argument('-R', "--compute-rates", dest="overallRates", action='store_true', help="Compute overall conversion rates.") statsparser.add_argument("-t", "--threads", type=int, required=False, default=1, dest="threads", help="Thread number") # context command tccontextparser = subparsers.add_parser('tccontext', help='Calculate T->C conversion context on SLAM-seq datasets', formatter_class=ArgumentDefaultsHelpFormatter) tccontextparser.add_argument('bam', action='store', help='Bam file(s)' , nargs="+") #tccontextparser.add_argument("-b", "--bed", type=str, required=True, dest="bed", help="BED file") tccontextparser.add_argument("-o", "--outputDir", type=str, required=True, dest="outputDir", default=SUPPRESS, help="Output directory for mapped BAM files.") tccontextparser.add_argument("-r", "--reference", type=str, required=True, dest="referenceFile", default=SUPPRESS, help="Reference fasta file") tccontextparser.add_argument("-mq", "--min-basequality", type=int, required=False, default=0, dest="mq", help="Minimal base quality for SNPs") tccontextparser.add_argument("-t", "--threads", type=int, required=False, default=1, dest="threads", help="Thread number") # stats rates utr command statsutrrateparser = subparsers.add_parser('utrrates', help='Calculate conversion rates per UTR on SLAM-seq datasets') statsutrrateparser.add_argument('bam', action='store', help='Bam file(s)' , nargs="+") statsutrrateparser.add_argument("-o", "--outputDir", type=str, required=True, dest="outputDir", help="Output directory for mapped BAM files.") statsutrrateparser.add_argument("-r", "--reference", type=str, required=True, dest="referenceFile", help="Reference fasta file") statsutrrateparser.add_argument("-mq", "--min-basequality", type=int, required=False, default=27, dest="mq", help="Minimal base quality for SNPs (default: %(default)s)") statsutrrateparser.add_argument("-m", "--multiTCStringency", dest="strictTCs", action='store_true', required=False, help="") statsutrrateparser.add_argument("-t", "--threads", type=int, required=False, default=1, dest="threads", help="Thread number (default: %(default)s)") statsutrrateparser.add_argument("-b", "--bed", type=str, required=True, dest="bed", help="BED file") statsutrrateparser.add_argument("-l", "--max-read-length", type=int, required=False, dest="maxLength", help="Max read length in BAM file (default: %(default)s)") # SNPeval command snpevalparser = subparsers.add_parser('snpeval', help='Evaluate SNP calling') snpevalparser.add_argument('bam', action='store', help='Bam file(s)' , nargs="+") snpevalparser.add_argument("-o", "--outputDir", type=str, required=True, dest="outputDir", help="Output directory for mapped BAM files.") snpevalparser.add_argument("-s", "--snp-directory", type=str, required=True, dest="snpDir", help="Directory containing SNP files.") snpevalparser.add_argument("-r", "--reference", type=str, required=True, dest="ref", help="Reference fasta file") snpevalparser.add_argument("-b", "--bed", type=str, required=True, dest="bed", help="BED file") snpevalparser.add_argument("-c", "--min-coverage", required=False, dest="cov", type=int, help="Minimum coverage to call variant (default: %(default)s)", default=10) snpevalparser.add_argument("-f", "--var-fraction", required=False, dest="var", type=float, help="Minimum variant fraction to call variant (default: %(default)s)", default=0.8) snpevalparser.add_argument("-m", "--multiTCStringency", dest="strictTCs", action='store_true', required=False, help="") snpevalparser.add_argument("-l", "--max-read-length", type=int, required=False, dest="maxLength", help="Max read length in BAM file (default: %(default)s)") snpevalparser.add_argument("-q", "--min-base-qual", type=int, default=27, required=False, dest="minQual", help="Min base quality for T -> C conversions (default: %(default)s)") snpevalparser.add_argument("-t", "--threads", type=int, required=False, default=1, dest="threads", help="Thread number (default: %(default)s)") # stats summary command statsSumParser = subparsers.add_parser('summary', help='Display summary information and statistics on read numbers') statsSumParser.add_argument('bam', action='store', help='Filtered BAM files (produced by slamdunk filter or all)' , nargs="+") statsSumParser.add_argument("-o", "--output", type=str, required=True, dest="outputFile", help="Output file") statsSumParser.add_argument("-t", "--tcountDir", type=str, required=False, dest="countDirectory", help="Folder containing tcount files") # merge command statsMergeParser = subparsers.add_parser('merge', help='Merge T->C rates from multiple sample in one TSV file', formatter_class=ArgumentDefaultsHelpFormatter) statsMergeParser.add_argument('countFiles', action='store', help='tCount files' , nargs="+") statsMergeParser.add_argument("-o", "--output", type=str, required=True, dest="outputFile", default=SUPPRESS, help="Output file") statsMergeParser.add_argument('-c', "--column", dest="column", type=str, required=False, default="TcReadCount / ReadCount", help="Column or expression used to summarize files.") statsMergeParser.add_argument('-n', "--columnname", dest="columnName", type=int, required=False, default=2, help="Index of meta data field to use as column name.") # stats read info command conversionRateParser = subparsers.add_parser('tcperreadpos', help='Calculate conversion rates per read position on SLAM-seq datasets') conversionRateParser.add_argument('bam', action='store', help='Bam file(s)' , nargs="+") conversionRateParser.add_argument("-r", "--reference", type=str, required=True, dest="referenceFile", help="Reference fasta file") conversionRateParser.add_argument("-s", "--snp-directory", type=str, required=False, dest="snpDir", help="Directory containing SNP files.") conversionRateParser.add_argument("-l", "--max-read-length", type=int, required=False, dest="maxLength", help="Max read length in BAM file") conversionRateParser.add_argument("-o", "--outputDir", type=str, required=True, dest="outputDir", help="Output directory for mapped BAM files.")#conversionRateParser.add_argument("-5", "--trim-5p", type=int, required=False, dest="trim5", help="Number of bp removed from 5' end of all reads.") conversionRateParser.add_argument("-mq", "--min-basequality", type=int, required=False, default=27, dest="mq", help="Minimal base quality for SNPs (default: %(default)s)") conversionRateParser.add_argument("-t", "--threads", type=int, required=False, dest="threads", default=1, help="Thread number (default: %(default)s)") # stats utr info command utrRateParser = subparsers.add_parser('tcperutrpos', help='Calculate conversion rates per UTR position on SLAM-seq datasets') utrRateParser.add_argument('bam', action='store', help='Bam file(s)' , nargs="+") utrRateParser.add_argument("-r", "--reference", type=str, required=True, dest="referenceFile", help="Reference fasta file") utrRateParser.add_argument("-b", "--bed", type=str, required=True, dest="bed", help="BED file") utrRateParser.add_argument("-s", "--snp-directory", type=str, required=False, dest="snpDir", help="Directory containing SNP files.") utrRateParser.add_argument("-l", "--max-read-length", type=int, required=False, dest="maxLength", help="Max read length in BAM file") utrRateParser.add_argument("-o", "--outputDir", type=str, required=True, dest="outputDir", help="Output directory for mapped BAM files.")#conversionRateParser.add_argument("-5", "--trim-5p", type=int, required=False, dest="trim5", help="Number of bp removed from 5' end of all reads.") utrRateParser.add_argument("-mq", "--min-basequality", type=int, required=False, default=27, dest="mq", help="Minimal base quality for SNPs (default: %(default)s)") utrRateParser.add_argument("-t", "--threads", type=int, required=False, dest="threads", default=1, help="Thread number (default: %(default)s)") # dump read info command dumpReadInfo = subparsers.add_parser('dump', help='Print all info available for slamdunk reads', formatter_class=ArgumentDefaultsHelpFormatter) dumpReadInfo.add_argument('bam', action='store', help='Bam file(s)' , nargs="+") dumpReadInfo.add_argument("-r", "--reference", type=str, required=True, dest="referenceFile", default=SUPPRESS, help="Reference fasta file") dumpReadInfo.add_argument("-s", "--snp-directory", type=str, required=True, dest="snpDir", default=SUPPRESS, help="Directory containing SNP files.") dumpReadInfo.add_argument("-o", "--outputDir", type=str, required=True, dest="outputDir", default=SUPPRESS, help="Output directory for mapped BAM files.")#conversionRateParser.add_argument("-5", "--trim-5p", type=int, required=False, dest="trim5", help="Number of bp removed from 5' end of all reads.") dumpReadInfo.add_argument("-mq", "--min-basequality", type=int, required=False, default=0, dest="mq", help="Minimal base quality for SNPs") dumpReadInfo.add_argument("-t", "--threads", type=int, required=False, dest="threads", default=1, help="Thread number") args = parser.parse_args() ######################################################################## # Routine selection ######################################################################## command = args.command if (command == "dedup") : outputDirectory = args.outputDir createDir(outputDirectory) n = args.threads tcMutations = args.tcMutations message("Running alleyoop dedup for " + str(len(args.bam)) + " files (" + str(n) + " threads)") results = Parallel(n_jobs=n, verbose=verbose)(delayed(runDedup)(tid, args.bam[tid], outputDirectory, tcMutations) for tid in range(0, len(args.bam))) dunkFinished() elif (command == "collapse") : outputDirectory = args.outputDir createDir(outputDirectory) n = args.threads message("Running alleyoop collapse for " + str(len(args.tcount)) + " files (" + str(n) + " threads)") results = Parallel(n_jobs=n, verbose=verbose)(delayed(runCollapse)(tid, args.tcount[tid], outputDirectory) for tid in range(0, len(args.tcount))) dunkFinished() elif (command == "positional-tracks") : outputDirectory = args.outputDir createDir(outputDirectory) snpDirectory = args.snpDir n = args.threads message("Running alleyoop positional-tracks for " + str(len(args.bam)) + " files (" + str(n) + " threads)") results = Parallel(n_jobs=n, verbose=verbose)(delayed(runPositionalRates)(tid, args.bam[tid], args.ref, args.minQual, args.conversionThreshold, args.coverageCutoff, outputDirectory, snpDirectory) for tid in range(0, len(args.bam))) dunkFinished() elif (command == "read-separator") : outputDirectory = args.outputDir createDir(outputDirectory) snpDirectory = args.snpDir n = args.threads message("Running alleyoop read-separator for " + str(len(args.bam)) + " files (" + str(n) + " threads)") results = Parallel(n_jobs=n, verbose=verbose)(delayed(runReadSeparator)(tid, args.bam[tid], args.ref, args.minQual, args.conversionThreshold, outputDirectory, snpDirectory) for tid in range(0, len(args.bam))) dunkFinished() elif (command == "half-lifes") : outputDirectory = args.outputDir createDir(outputDirectory) timepoints = args.timepoints message("Running alleyoop half-lifes for " + str(len(args.bam)) + " files") runHalfLifes(args.bam, timepoints, outputDirectory) dunkFinished() elif (command == "rates") : outputDirectory = args.outputDir createDir(outputDirectory) n = args.threads referenceFile = args.referenceFile minMQ = args.mq message("Running alleyoop rates for " + str(len(args.bam)) + " files (" + str(n) + " threads)") results = Parallel(n_jobs=n, verbose=verbose)(delayed(runStatsRates)(tid, args.bam[tid], referenceFile, minMQ, outputDirectory) for tid in range(0, len(args.bam))) dunkFinished() elif (command == "snpeval") : outputDirectory = args.outputDir createDir(outputDirectory) snpDirectory = args.snpDir n = args.threads message("Running alleyoop SNPeval for " + str(len(args.bam)) + " files (" + str(n) + " threads)") results = Parallel(n_jobs=n, verbose=verbose)(delayed(runSNPeval)(tid, args.bam[tid], args.ref, args.bed, args.maxLength, args.minQual, args.cov, args.var, args.strictTCs, outputDirectory, snpDirectory) for tid in range(0, len(args.bam))) dunkFinished() elif (command == "tccontext") : outputDirectory = args.outputDir createDir(outputDirectory) n = args.threads referenceFile = args.referenceFile minMQ = args.mq message("Running alleyoop TC context for " + str(len(args.bam)) + " files (" + str(n) + " threads)") results = Parallel(n_jobs=n, verbose=verbose)(delayed(runStatsTCContext)(tid, args.bam[tid], referenceFile, minMQ, outputDirectory) for tid in range(0, len(args.bam))) dunkFinished() elif (command == "utrrates") : outputDirectory = args.outputDir createDir(outputDirectory) n = args.threads referenceFile = args.referenceFile minMQ = args.mq message("Running alleyoop utrrates for " + str(len(args.bam)) + " files (" + str(n) + " threads)") results = Parallel(n_jobs=n, verbose=verbose)(delayed(runStatsRatesUTR)(tid, args.bam[tid], referenceFile, minMQ, args.strictTCs, outputDirectory, args.bed, args.maxLength) for tid in range(0, len(args.bam))) dunkFinished() elif (command == "summary") : message("Running alleyoop summary for " + str(len(args.bam)) + " files") runSummary(args.bam, args.outputFile, args.countDirectory) dunkFinished() elif (command == "merge") : message("Running alleyoop merge for " + str(len(args.countFiles)) + " files") outputLog = replaceExtension(args.outputFile, ".log") stats.mergeRates(",".join(args.countFiles), args.outputFile, args.column, args.columnName, getLogFile(outputLog)) dunkFinished() elif (command == "tcperreadpos") : outputDirectory = args.outputDir createDir(outputDirectory) n = args.threads snpDirectory = args.snpDir referenceFile = args.referenceFile minMQ = args.mq message("Running alleyoop tcperreadpos for " + str(len(args.bam)) + " files (" + str(n) + " threads)") results = Parallel(n_jobs=n, verbose=verbose)(delayed(runTcPerReadPos)(tid, args.bam[tid], referenceFile, minMQ, args.maxLength, outputDirectory, snpDirectory) for tid in range(0, len(args.bam))) dunkFinished() elif (command == "tcperutrpos") : outputDirectory = args.outputDir createDir(outputDirectory) n = args.threads snpDirectory = args.snpDir referenceFile = args.referenceFile minMQ = args.mq snpDirectory = args.snpDir message("Running alleyoop tcperutrpos for " + str(len(args.bam)) + " files (" + str(n) + " threads)") results = Parallel(n_jobs=n, verbose=verbose)(delayed(runTcPerUtr)(tid, args.bam[tid], referenceFile, args.bed, minMQ, args.maxLength, outputDirectory, snpDirectory) for tid in range(0, len(args.bam))) dunkFinished() elif (command == "dump") : outputDirectory = args.outputDir createDir(outputDirectory) n = args.threads snpDirectory = args.snpDir referenceFile = args.referenceFile minMQ = args.mq message("Running alleyoop dump for " + str(len(args.bam)) + " files (" + str(n) + " threads)") results = Parallel(n_jobs=n, verbose=verbose)(delayed(runDumpReadInfo)(tid, args.bam[tid], referenceFile, minMQ, outputDirectory, snpDirectory) for tid in range(0, len(args.bam))) dunkFinished() else: parser.error("Too few arguments.")