def readFastQFile(fileName, reverseFileName, threshold, logFileName): f = open(fileName, "r") log = open(logFileName, "w") lineNumber = 0 lastHeaderLineNumber = 0 lastSeparatorLineNumber = 0 sequences = [] fastqSequence = None for line in f: line = line[:-1] if line[0] == "@" and lastSeparatorLineNumber != lineNumber - 1: if fastqSequence != None: tryToFusion(fastqSequence, reverseFileName, threshold, log) fastqSequence = FastqSequence.readFasqSequenceHeader(line) lastHeaderLineNumber = lineNumber elif lastHeaderLineNumber == lineNumber - 1: fastqSequence.setSequence(line) elif line[0] == "+": lastSeparatorLineNumber = lineNumber elif lastSeparatorLineNumber == lineNumber - 1: fastqSequence.setQuality(line) lineNumber += 1 tryToFusion(fastqSequence, reverseFileName, threshold, log) f.close() log.close()
def testQualityThreshold(fastqFileName, threshold): f = open(fastqFileName, "r") lineNumber = 0 lastHeaderLineNumber = 0 lastSeparatorLineNumber = 0 sequences = [] fastqSequence = None for line in f: line = line[:-1] if line[0] == "@" and lastSeparatorLineNumber != lineNumber - 1: if fastqSequence != None: #tryToFusion(fastqSequence, reverseFileName, threshold, log, seqOutput) avLen = len(fastqSequence.sequence) fastqSequence.applyDrasticThreshold(threshold) logger.debug("Avant : " + str(avLen) + " / Apres : " + str(len(fastqSequence.sequence))) fastqSequence = FastqSequence.readFasqSequenceHeader(line) lastHeaderLineNumber = lineNumber logging.debug(line) elif lastHeaderLineNumber == lineNumber - 1: fastqSequence.setSequence(line) elif line[0] == "+": lastSeparatorLineNumber = lineNumber elif lastSeparatorLineNumber == lineNumber - 1: fastqSequence.setQuality(line) lineNumber += 1 #tryToFusion(fastqSequence, reverseFileName, threshold, log, seqOutput) f.close()
def readFastQFile(fileName, reverseFileName, threshold, seqOutputName, logFileName): f = open(fileName, "r") log = open(logFileName, "w") seqOutput = open(seqOutputName, "w") lineNumber = 0 lastHeaderLineNumber = 0 lastSeparatorLineNumber = 0 sequences = [] fastqSequence = None for line in f: line = line[:-1] if line[0] == "@" and lastSeparatorLineNumber != lineNumber - 1: if fastqSequence != None: tryToFusion(fastqSequence, reverseFileName, threshold, log, seqOutput) fastqSequence = FastqSequence.readFasqSequenceHeader(line) lastHeaderLineNumber = lineNumber logging.debug(line) elif lastHeaderLineNumber == lineNumber - 1: fastqSequence.setSequence(line) elif line[0] == "+": lastSeparatorLineNumber = lineNumber elif lastSeparatorLineNumber == lineNumber - 1: fastqSequence.setQuality(line) lineNumber += 1 tryToFusion(fastqSequence, reverseFileName, threshold, log, seqOutput) f.close() log.close() seqOutput.close()
def tryToFusion(fastqSequence, reverseFileName, threshold, log, seqOutput): fastqSequence.applyDrasticThreshold(threshold) f = open(reverseFileName, "r") reverseLineHeader = fastqSequence.getReverseLineHeader() logging.debug("Search " + reverseLineHeader) lastHeaderLineNumber = 0 lastSeparatorLineNumber = 0 parseFastq = False rFastqSequence = None lineNumber = 0 for line in f: line = line[:-1] lineNumber += 1 #logging.debug(str(lineNumber) +"\t**** "+line + "\t" + str(line[0] == "@") + "\t" + str(lineNumber == 1)) if line[0] == "@" and (lastSeparatorLineNumber != lineNumber - 1 or lineNumber == 1): logging.debug(line + " est un header") if parseFastq: rFastqSequence.applyDrasticThreshold(threshold) s = FastqSequence.matchFastqSequence(fastqSequence, rFastqSequence) if s != None: seqOutput.write(">" + s.getLineHeader() + "\n") seqOutput.write(s.sequence + "\n") logging.debug(s.getLineHeader() + "\t" + str(s.typeFusion)) if s.typeFusion == s.TYPE_FUSION_OK: log.write("Merge " + fastqSequence.getLineHeader() + " with " + rFastqSequence.getLineHeader() + "\n") elif s.typeFusion == s.TYPE_FUSION_NO_MATCH: log.write("Forced merge " + fastqSequence.getLineHeader() + " with " + rFastqSequence.getLineHeader() + "\n") elif s.typeFusion == s.TYPE_FUSION_PARTIAL: log.write("Partial merge " + fastqSequence.getLineHeader() + " with " + rFastqSequence.getLineHeader() + "\n") else: log.write("Can't merge " + fastqSequence.getLineHeader() + " with " + rFastqSequence.getLineHeader() + "\n") else: log.write("*Can't merge " + fastqSequence.getLineHeader() + " with " + rFastqSequence.getLineHeader() + "\n") parseFastq = False #logging.debug("-"+line) #logging.debug("-"+reverseLineHeader) #logging.debug(line + " =?= " + reverseLineHeader + " ? " + str(line == reverseLineHeader)) if line == reverseLineHeader: parseFastq = True if parseFastq: rFastqSequence = FastqSequence.readFasqSequenceHeader(line) #logging.debug("Compare to " + rFastqSequence.getLineHeader()) lastHeaderLineNumber = lineNumber elif parseFastq: if lastHeaderLineNumber == lineNumber - 1: rFastqSequence.setSequence(line) elif line[0] == "+": lastSeparatorLineNumber = lineNumber elif lastSeparatorLineNumber == lineNumber - 1: rFastqSequence.setQuality(line) f.close()
def readSingleFastQFile(fileName, threshold, logFileName, mapOligos, outputDir): f = open(fileName, "r") log = open(logFileName, "w") lineNumber = 0 lastHeaderLineNumber = 0 lastSeparatorLineNumber = 0 sequences = [] fastqSequence = None for line in f: line = line[:-1] if line[0] == "@" and lastSeparatorLineNumber != lineNumber - 1: if fastqSequence != None: #tryToFusion(fastqSequence, reverseFileName, threshold, log, seqOutput) logger.debug("before threshold " + fastqSequence.sequence) fastqSequence.applyDrasticThreshold(threshold) logger.debug("after threshold " + fastqSequence.sequence) logger.debug("after threshold " + DnaUtils.complementAndReverseDnaSequence( fastqSequence.sequence)) DnaUtils.splitMarker1(mapOligos, outputDir, fastqSequence, fileName) fastqSequence = FastqSequence.readFasqSequenceHeader(line) lastHeaderLineNumber = lineNumber logging.debug(line) elif lastHeaderLineNumber == lineNumber - 1: logging.debug("seq in file : " + line) fastqSequence.setSequence(line) elif line[0] == "+": lastSeparatorLineNumber = lineNumber elif lastSeparatorLineNumber == lineNumber - 1: fastqSequence.setQuality(line) lineNumber += 1 #tryToFusion(fastqSequence, reverseFileName, threshold, log, seqOutput) fastqSequence.applyDrasticThreshold(threshold) DnaUtils.splitMarker1(mapOligos, outputDir, fastqSequence, fileName) f.close() log.close()
def tryToFusion(fastqSequence, reverseFileName, threshold, log): fastqSequence f = open(reverseFileName, "r") reverseLineHeader = fastqSequence.getReverseLineHeader() lastHeaderLineNumber = 0 lastSeparatorLineNumber = 0 parseFastq = False rFastqSequence = None lineNumber = 0 for line in f: line = line[:-1] lineNumber += 1 if line[0] == "@" and lastSeparatorLineNumber != lineNumber - 1: if parseFastq: rFastqSequence.applyDrasticThreshold(threshold) s = FastqSequence.matchFastqSequence(fastqSequence, rFastqSequence) if s != None: print(s.getLineHeader()) print(s.sequence) else: log.write("Can't merge " + fastqSequence.getLineHeader() + " with " + rFastqSequence.getLineHeader() + "\n") parseFastq = False if line == reverseLineHeader: parseFastq = True if parseFastq: rFastqSequence = FastqSequence.readFasqSequenceHeader(line) lastHeaderLineNumber = lineNumber elif parseFastq: if lastHeaderLineNumber == lineNumber - 1: rFastqSequence.setSequence(line) elif line[0] == "+": lastSeparatorLineNumber = lineNumber elif lastSeparatorLineNumber == lineNumber - 1: rFastqSequence.setQuality(line) f.close()
def readSingleFastQFile(fileName, threshold, logFileName, mapOligos, outputDir): f = open(fileName, "r") log = open(logFileName, "w") lineNumber = 0 lastHeaderLineNumber = 0 lastSeparatorLineNumber = 0 sequences = [] fastqSequence = None for line in f: line = line[:-1] if line[0] == "@" and lastSeparatorLineNumber != lineNumber - 1: if fastqSequence != None: #tryToFusion(fastqSequence, reverseFileName, threshold, log, seqOutput) logger.debug("before threshold " + fastqSequence.sequence) fastqSequence.applyDrasticThreshold(threshold) logger.debug("after threshold " + fastqSequence.sequence) logger.debug("after threshold " + DnaUtils.complementAndReverseDnaSequence(fastqSequence.sequence)) DnaUtils.splitMarker1(mapOligos, outputDir, fastqSequence, fileName) fastqSequence = FastqSequence.readFasqSequenceHeader(line) lastHeaderLineNumber = lineNumber logging.debug(line) elif lastHeaderLineNumber == lineNumber - 1: logging.debug("seq in file : " + line) fastqSequence.setSequence(line) elif line[0] == "+": lastSeparatorLineNumber = lineNumber elif lastSeparatorLineNumber == lineNumber - 1: fastqSequence.setQuality(line) lineNumber += 1 #tryToFusion(fastqSequence, reverseFileName, threshold, log, seqOutput) fastqSequence.applyDrasticThreshold(threshold) DnaUtils.splitMarker1(mapOligos, outputDir, fastqSequence, fileName) f.close() log.close()