def newPipeline(): checkArgumentMarkerReducer() m = DnaUtils.readIUPACFile(sys.argv[1]) mapOligos = DnaUtils.readOligosFile(sys.argv[2], m) # for k in mapOligos: # for e in mapOligos[k]: # print(e) # sys.exit(1) readSingleFastQFile(sys.argv[4], 10, "log.txt", mapOligos, sys.argv[3])
def setSequence(self, value, forced=False): if forced: self.sequence = value else: if self.readDirection == FastqSequence.READ_DIRECTION_LEFT_TO_RIGHT: self.sequence = value else: self.sequence = DnaUtils.complementAndReverseDnaSequence(value) self.logger.debug("self.sequence : " + self.sequence)
def readSingleFastQFile(fileName, threshold, logFileName, mapOligos, outputDir): f = open(fileName, "r") log = open(logFileName, "w") lineNumber = 0 lastHeaderLineNumber = 0 lastSeparatorLineNumber = 0 sequences = [] fastqSequence = None for line in f: line = line[:-1] if line[0] == "@" and lastSeparatorLineNumber != lineNumber - 1: if fastqSequence != None: #tryToFusion(fastqSequence, reverseFileName, threshold, log, seqOutput) logger.debug("before threshold " + fastqSequence.sequence) fastqSequence.applyDrasticThreshold(threshold) logger.debug("after threshold " + fastqSequence.sequence) logger.debug("after threshold " + DnaUtils.complementAndReverseDnaSequence( fastqSequence.sequence)) DnaUtils.splitMarker1(mapOligos, outputDir, fastqSequence, fileName) fastqSequence = FastqSequence.readFasqSequenceHeader(line) lastHeaderLineNumber = lineNumber logging.debug(line) elif lastHeaderLineNumber == lineNumber - 1: logging.debug("seq in file : " + line) fastqSequence.setSequence(line) elif line[0] == "+": lastSeparatorLineNumber = lineNumber elif lastSeparatorLineNumber == lineNumber - 1: fastqSequence.setQuality(line) lineNumber += 1 #tryToFusion(fastqSequence, reverseFileName, threshold, log, seqOutput) fastqSequence.applyDrasticThreshold(threshold) DnaUtils.splitMarker1(mapOligos, outputDir, fastqSequence, fileName) f.close() log.close()
def readSingleFastQFile(fileName, threshold, logFileName, mapOligos, outputDir): f = open(fileName, "r") log = open(logFileName, "w") lineNumber = 0 lastHeaderLineNumber = 0 lastSeparatorLineNumber = 0 sequences = [] fastqSequence = None for line in f: line = line[:-1] if line[0] == "@" and lastSeparatorLineNumber != lineNumber - 1: if fastqSequence != None: #tryToFusion(fastqSequence, reverseFileName, threshold, log, seqOutput) logger.debug("before threshold " + fastqSequence.sequence) fastqSequence.applyDrasticThreshold(threshold) logger.debug("after threshold " + fastqSequence.sequence) logger.debug("after threshold " + DnaUtils.complementAndReverseDnaSequence(fastqSequence.sequence)) DnaUtils.splitMarker1(mapOligos, outputDir, fastqSequence, fileName) fastqSequence = FastqSequence.readFasqSequenceHeader(line) lastHeaderLineNumber = lineNumber logging.debug(line) elif lastHeaderLineNumber == lineNumber - 1: logging.debug("seq in file : " + line) fastqSequence.setSequence(line) elif line[0] == "+": lastSeparatorLineNumber = lineNumber elif lastSeparatorLineNumber == lineNumber - 1: fastqSequence.setQuality(line) lineNumber += 1 #tryToFusion(fastqSequence, reverseFileName, threshold, log, seqOutput) fastqSequence.applyDrasticThreshold(threshold) DnaUtils.splitMarker1(mapOligos, outputDir, fastqSequence, fileName) f.close() log.close()
def cutFusionAndMerge(headerF, dataF, headerR, dataR, wellThresholds, log, outputFile): logger.debug("[cutFusionAndMerge] " + headerF + ", " + headerR + ", " + str(wellThresholds)) forward = 0 reverse = 1 dataF = dataF[0:wellThresholds[forward]] #a = "ATAACGCTGTTATCCCTGCGGTAACTTGTTCTTTTGATCACTGTAAGTGGATCACACCTTCATTTTTATGATTTAAGAAAAACAATTCTTTTATTTTAGGTTAATATAACCATATAGTAGCGGAGGATTTTCTTTCTCCGGGATTGCCCCAATCAAAGCTTGTTTCAATTTGCCATGCTCTAGGCCTACTATTTCTATTATATTAGTTAGGGCTAATAGTAAATAACAATTAAAATTCAACTACAGCTCG" #dataR = DnaUtils.complementAndReverseDnaSequence(dataR[0:wellThresholds[reverse]]) cdataR = DnaUtils.complementAndReverseDnaSequence(dataR) cdataR = cdataR[:wellThresholds[reverse]] #cdataR = DnaUtils.complementAndReverseDnaSequence(cdataR) #data = merge(dataF, cdataR) # print(dataF) #print(DnaUtils.complementAndReverseDnaSequence(cdataR)) fastqSequenceForward = FastqSequence.createFasqSequenceHeader(headerF) fastqSequenceForward.setSequence(dataF, True) fastqSequenceForward.quality = None fastqSequenceReverse = FastqSequence.createFasqSequenceHeader(headerR) fastqSequenceReverse.setSequence(cdataR) fastqSequenceReverse.quality = None newFastQ = FastqSequence.matchFastqSequence(fastqSequenceForward, fastqSequenceReverse) logging.debug(newFastQ.getLineHeader() + "\t" + str(newFastQ.typeFusion)) if newFastQ.typeFusion == newFastQ.TYPE_FUSION_OK: log.write("Merge " + fastqSequenceForward.getLineHeader() + " with " + fastqSequenceReverse.getLineHeader() + "\n") elif newFastQ.typeFusion == newFastQ.TYPE_FUSION_NO_MATCH: log.write("Forced merge " + fastqSequenceForward.getLineHeader() + " with " + fastqSequenceReverse.getLineHeader() + "\n") elif newFastQ.typeFusion == newFastQ.TYPE_FUSION_PARTIAL: log.write("Partial merge " + fastqSequenceForward.getLineHeader() + " with " + fastqSequenceReverse.getLineHeader() + "\n") else: log.write("Can't merge " + fastqSequenceForward.getLineHeader() + " with " + fastqSequenceReverse.getLineHeader() + "\n") #outputFile.write(">" + newFastQ.getLineHeader() + "\n") #outputFile.write(newFastQ.sequence + "\n") #sys.exit(1) return newFastQ
def generatePlot(): checkArgumentMarkerReducer() logger.debug("IUPAC File : " + sys.argv[2]) iupacMap = DnaUtils.readIUPACFile(sys.argv[2]) logger.debug("forward MarkerMap File : " + sys.argv[3]) logger.debug("reverse MarkerMap File : " + sys.argv[4]) forwardMarkerMap = DnaUtils.readOligosFile(sys.argv[3], iupacMap) reverseMarkerMap = DnaUtils.readOligosFile(sys.argv[4], iupacMap, True) rAllOutputFileName = sys.argv[6] + ".r.csv" fAllOutputFileName = sys.argv[6] + ".f.csv" fAll = {} rAll = {} for root, dirs, files in os.walk(sys.argv[5]): for f1le in files: filePath = os.path.join(sys.argv[5], f1le) logger.debug("file : " + filePath) f, r = DnaUtils.readmarker(filePath, forwardMarkerMap, reverseMarkerMap, sys.argv[6], False) mergeMap(rAll, r) mergeMap(fAll, f) indexOsPath = filePath.rfind(os.sep) if indexOsPath == -1: indexOsPath = 0 else: indexOsPath += 1 indexOfDot = filePath.rfind(".") if indexOfDot == -1: indexOfDot = len(filePath) rOutputFileName = filePath[indexOsPath:indexOfDot] + ".r.csv" fOutputFileName = filePath[indexOsPath:indexOfDot] + ".f.csv" DnaUtils.mapToCsv(r, rOutputFileName) DnaUtils.mapToCsv(f, fOutputFileName) DnaUtils.mapToCsv(rAll, rAllOutputFileName) DnaUtils.mapToCsv(fAll, fAllOutputFileName)
def setSequence(self, value): if self.readDirection == FastqSequence.READ_DIRECTION_LEFT_TO_RIGHT: self.sequence = value else: self.sequence = DnaUtils.complementAndReverseDnaSequence(value)
def generatePlot(): checkArgumentMarkerReducer() logger.debug("IUPAC File : " + sys.argv[2]) iupacMap = DnaUtils.readIUPACFile(sys.argv[2]) logger.debug("forward MarkerMap File : " + sys.argv[3]) logger.debug("reverse MarkerMap File : " + sys.argv[4]) forwardMarkerMap = DnaUtils.readOligosFile(sys.argv[3], iupacMap) reverseMarkerMap = DnaUtils.readOligosFile(sys.argv[4], iupacMap, True) rAllOutputFileName = sys.argv[6] + ".r.csv" fAllOutputFileName = sys.argv[6] + ".f.csv" fAll = {} rAll = {} for root, dirs, files in os.walk(sys.argv[5]): for f1le in files: filePath = os.path.join(sys.argv[5], f1le) logger.debug("file : " + filePath) f,r = DnaUtils.readmarker(filePath, forwardMarkerMap, reverseMarkerMap, sys.argv[6], False) mergeMap(rAll, r) mergeMap(fAll, f) indexOsPath = filePath.rfind(os.sep) if indexOsPath == -1: indexOsPath = 0 else: indexOsPath += 1 indexOfDot = filePath.rfind(".") if indexOfDot == -1: indexOfDot = len(filePath) rOutputFileName = filePath[indexOsPath:indexOfDot] + ".r.csv" fOutputFileName = filePath[indexOsPath:indexOfDot] + ".f.csv" DnaUtils.mapToCsv(r, rOutputFileName) DnaUtils.mapToCsv(f, fOutputFileName) DnaUtils.mapToCsv(rAll, rAllOutputFileName) DnaUtils.mapToCsv(fAll, fAllOutputFileName)