Exemplo n.º 1
0
def readFastQFile(fileName, reverseFileName, threshold, logFileName):
    f = open(fileName, "r")
    log = open(logFileName, "w")
    
    lineNumber = 0
    lastHeaderLineNumber    = 0
    lastSeparatorLineNumber = 0
    sequences  = []
    fastqSequence = None
    
    for line in f:
        line = line[:-1]
        if line[0] == "@" and lastSeparatorLineNumber != lineNumber - 1:
            if fastqSequence != None:
                tryToFusion(fastqSequence, reverseFileName, threshold, log)
                
            fastqSequence = FastqSequence.readFasqSequenceHeader(line)
            lastHeaderLineNumber = lineNumber
        elif  lastHeaderLineNumber == lineNumber - 1:
            fastqSequence.setSequence(line)
        elif line[0] == "+":
            lastSeparatorLineNumber = lineNumber
        elif lastSeparatorLineNumber == lineNumber - 1:
            fastqSequence.setQuality(line)
            
        lineNumber += 1
    tryToFusion(fastqSequence, reverseFileName, threshold, log)
    f.close()
    log.close()
Exemplo n.º 2
0
def testQualityThreshold(fastqFileName, threshold):
    f = open(fastqFileName, "r")

    lineNumber = 0
    lastHeaderLineNumber = 0
    lastSeparatorLineNumber = 0
    sequences = []
    fastqSequence = None

    for line in f:
        line = line[:-1]
        if line[0] == "@" and lastSeparatorLineNumber != lineNumber - 1:
            if fastqSequence != None:
                #tryToFusion(fastqSequence, reverseFileName, threshold, log, seqOutput)
                avLen = len(fastqSequence.sequence)
                fastqSequence.applyDrasticThreshold(threshold)
                logger.debug("Avant : " + str(avLen) + " / Apres : " +
                             str(len(fastqSequence.sequence)))

            fastqSequence = FastqSequence.readFasqSequenceHeader(line)
            lastHeaderLineNumber = lineNumber

            logging.debug(line)
        elif lastHeaderLineNumber == lineNumber - 1:
            fastqSequence.setSequence(line)
        elif line[0] == "+":
            lastSeparatorLineNumber = lineNumber
        elif lastSeparatorLineNumber == lineNumber - 1:
            fastqSequence.setQuality(line)

        lineNumber += 1
    #tryToFusion(fastqSequence, reverseFileName, threshold, log, seqOutput)
    f.close()
Exemplo n.º 3
0
def readFastQFile(fileName, reverseFileName, threshold, seqOutputName,
                  logFileName):
    f = open(fileName, "r")
    log = open(logFileName, "w")
    seqOutput = open(seqOutputName, "w")

    lineNumber = 0
    lastHeaderLineNumber = 0
    lastSeparatorLineNumber = 0
    sequences = []
    fastqSequence = None

    for line in f:
        line = line[:-1]
        if line[0] == "@" and lastSeparatorLineNumber != lineNumber - 1:
            if fastqSequence != None:
                tryToFusion(fastqSequence, reverseFileName, threshold, log,
                            seqOutput)

            fastqSequence = FastqSequence.readFasqSequenceHeader(line)
            lastHeaderLineNumber = lineNumber

            logging.debug(line)
        elif lastHeaderLineNumber == lineNumber - 1:
            fastqSequence.setSequence(line)
        elif line[0] == "+":
            lastSeparatorLineNumber = lineNumber
        elif lastSeparatorLineNumber == lineNumber - 1:
            fastqSequence.setQuality(line)

        lineNumber += 1
    tryToFusion(fastqSequence, reverseFileName, threshold, log, seqOutput)
    f.close()
    log.close()
    seqOutput.close()
Exemplo n.º 4
0
Arquivo: test.py Projeto: chaby/dana
def testQualityThreshold(fastqFileName, threshold):
    f = open(fastqFileName, "r")
    
    lineNumber = 0
    lastHeaderLineNumber    = 0
    lastSeparatorLineNumber = 0
    sequences  = []
    fastqSequence = None
    
    for line in f:
        line = line[:-1]
        if line[0] == "@" and lastSeparatorLineNumber != lineNumber - 1:
            if fastqSequence != None:
                #tryToFusion(fastqSequence, reverseFileName, threshold, log, seqOutput)
                avLen = len(fastqSequence.sequence)
                fastqSequence.applyDrasticThreshold(threshold)
                logger.debug("Avant : " + str(avLen) + " / Apres : " + str(len(fastqSequence.sequence)))
                
            fastqSequence = FastqSequence.readFasqSequenceHeader(line)
            lastHeaderLineNumber = lineNumber
            
            logging.debug(line)
        elif  lastHeaderLineNumber == lineNumber - 1:
            fastqSequence.setSequence(line)
        elif line[0] == "+":
            lastSeparatorLineNumber = lineNumber
        elif lastSeparatorLineNumber == lineNumber - 1:
            fastqSequence.setQuality(line)
            
        lineNumber += 1
    #tryToFusion(fastqSequence, reverseFileName, threshold, log, seqOutput)
    f.close()
Exemplo n.º 5
0
Arquivo: test.py Projeto: chaby/dana
def tryToFusion(fastqSequence, reverseFileName, threshold, log, seqOutput):
    fastqSequence.applyDrasticThreshold(threshold)
    f = open(reverseFileName, "r")
    reverseLineHeader = fastqSequence.getReverseLineHeader()
    logging.debug("Search " + reverseLineHeader)
    
    lastHeaderLineNumber    = 0
    lastSeparatorLineNumber = 0
    parseFastq     = False
    rFastqSequence = None
    lineNumber     = 0
    
    for line in f:
        line = line[:-1]
        lineNumber += 1
        #logging.debug(str(lineNumber) +"\t****   "+line + "\t" + str(line[0] == "@") + "\t" + str(lineNumber == 1))
        
        if line[0] == "@" and (lastSeparatorLineNumber != lineNumber - 1 or lineNumber == 1):
            logging.debug(line + " est un header")
            if parseFastq:
                rFastqSequence.applyDrasticThreshold(threshold)
                
                s = FastqSequence.matchFastqSequence(fastqSequence, rFastqSequence)
                if s != None:
                    seqOutput.write(">" + s.getLineHeader() + "\n")
                    seqOutput.write(s.sequence + "\n")
                    logging.debug(s.getLineHeader() + "\t" + str(s.typeFusion))
                    if s.typeFusion == s.TYPE_FUSION_OK:
                        log.write("Merge " + fastqSequence.getLineHeader() + " with " + rFastqSequence.getLineHeader() + "\n")
                    elif s.typeFusion == s.TYPE_FUSION_NO_MATCH:
                        log.write("Forced merge " + fastqSequence.getLineHeader() + " with " + rFastqSequence.getLineHeader() + "\n")
                    elif s.typeFusion == s.TYPE_FUSION_PARTIAL:
                        log.write("Partial merge " + fastqSequence.getLineHeader() + " with " + rFastqSequence.getLineHeader() + "\n")
                    else:
                        log.write("Can't merge " + fastqSequence.getLineHeader() + " with " + rFastqSequence.getLineHeader() + "\n")
                else:
                    log.write("*Can't merge " + fastqSequence.getLineHeader() + " with " + rFastqSequence.getLineHeader() + "\n")
                parseFastq = False
                
            #logging.debug("-"+line)
            #logging.debug("-"+reverseLineHeader)
            #logging.debug(line + " =?= " + reverseLineHeader + " ? " + str(line == reverseLineHeader))
            if line == reverseLineHeader:
                parseFastq = True
                
            if parseFastq:
                rFastqSequence = FastqSequence.readFasqSequenceHeader(line)
                #logging.debug("Compare to " + rFastqSequence.getLineHeader())
                lastHeaderLineNumber = lineNumber
        elif parseFastq:
            if  lastHeaderLineNumber == lineNumber - 1:
                rFastqSequence.setSequence(line)
            elif line[0] == "+":
                lastSeparatorLineNumber = lineNumber
            elif lastSeparatorLineNumber == lineNumber - 1:
                rFastqSequence.setQuality(line)                
            
    f.close()
Exemplo n.º 6
0
def readSingleFastQFile(fileName, threshold, logFileName, mapOligos,
                        outputDir):
    f = open(fileName, "r")
    log = open(logFileName, "w")

    lineNumber = 0
    lastHeaderLineNumber = 0
    lastSeparatorLineNumber = 0
    sequences = []
    fastqSequence = None

    for line in f:
        line = line[:-1]
        if line[0] == "@" and lastSeparatorLineNumber != lineNumber - 1:
            if fastqSequence != None:
                #tryToFusion(fastqSequence, reverseFileName, threshold, log, seqOutput)
                logger.debug("before threshold " + fastqSequence.sequence)
                fastqSequence.applyDrasticThreshold(threshold)
                logger.debug("after threshold " + fastqSequence.sequence)
                logger.debug("after threshold " +
                             DnaUtils.complementAndReverseDnaSequence(
                                 fastqSequence.sequence))
                DnaUtils.splitMarker1(mapOligos, outputDir, fastqSequence,
                                      fileName)

            fastqSequence = FastqSequence.readFasqSequenceHeader(line)
            lastHeaderLineNumber = lineNumber

            logging.debug(line)
        elif lastHeaderLineNumber == lineNumber - 1:
            logging.debug("seq in file : " + line)
            fastqSequence.setSequence(line)
        elif line[0] == "+":
            lastSeparatorLineNumber = lineNumber
        elif lastSeparatorLineNumber == lineNumber - 1:
            fastqSequence.setQuality(line)

        lineNumber += 1
    #tryToFusion(fastqSequence, reverseFileName, threshold, log, seqOutput)
    fastqSequence.applyDrasticThreshold(threshold)
    DnaUtils.splitMarker1(mapOligos, outputDir, fastqSequence, fileName)
    f.close()
    log.close()
Exemplo n.º 7
0
def tryToFusion(fastqSequence, reverseFileName, threshold, log):
    fastqSequence
    f = open(reverseFileName, "r")
    reverseLineHeader = fastqSequence.getReverseLineHeader()
    
    lastHeaderLineNumber    = 0
    lastSeparatorLineNumber = 0
    parseFastq     = False
    rFastqSequence = None
    lineNumber     = 0
    
    for line in f:
        line = line[:-1]
        lineNumber += 1
        
        if line[0] == "@" and lastSeparatorLineNumber != lineNumber - 1:
            if parseFastq:
                rFastqSequence.applyDrasticThreshold(threshold)
                s = FastqSequence.matchFastqSequence(fastqSequence, rFastqSequence)
                if s != None:
                    print(s.getLineHeader())
                    print(s.sequence)
                else:
                    log.write("Can't merge " + fastqSequence.getLineHeader() + " with " + rFastqSequence.getLineHeader() + "\n")
                parseFastq = False
                
            if line == reverseLineHeader:
                parseFastq = True
                
            if parseFastq:
                rFastqSequence = FastqSequence.readFasqSequenceHeader(line)
                lastHeaderLineNumber = lineNumber
        elif parseFastq:
            if  lastHeaderLineNumber == lineNumber - 1:
                rFastqSequence.setSequence(line)
            elif line[0] == "+":
                lastSeparatorLineNumber = lineNumber
            elif lastSeparatorLineNumber == lineNumber - 1:
                rFastqSequence.setQuality(line)                
            
    f.close()
Exemplo n.º 8
0
Arquivo: test.py Projeto: chaby/dana
def readSingleFastQFile(fileName, threshold, logFileName, mapOligos, outputDir):
    f = open(fileName, "r")
    log = open(logFileName, "w")
    
    lineNumber = 0
    lastHeaderLineNumber    = 0
    lastSeparatorLineNumber = 0
    sequences  = []
    fastqSequence = None
    
    for line in f:
        line = line[:-1]
        if line[0] == "@" and lastSeparatorLineNumber != lineNumber - 1:
            if fastqSequence != None:
                #tryToFusion(fastqSequence, reverseFileName, threshold, log, seqOutput)
                logger.debug("before threshold " + fastqSequence.sequence)
                fastqSequence.applyDrasticThreshold(threshold)
                logger.debug("after threshold " + fastqSequence.sequence)
                logger.debug("after threshold " + DnaUtils.complementAndReverseDnaSequence(fastqSequence.sequence))
                DnaUtils.splitMarker1(mapOligos, outputDir, fastqSequence, fileName)
                
            fastqSequence = FastqSequence.readFasqSequenceHeader(line)
            lastHeaderLineNumber = lineNumber
            
            logging.debug(line)
        elif  lastHeaderLineNumber == lineNumber - 1:
            logging.debug("seq in file : " + line)
            fastqSequence.setSequence(line)
        elif line[0] == "+":
            lastSeparatorLineNumber = lineNumber
        elif lastSeparatorLineNumber == lineNumber - 1:
            fastqSequence.setQuality(line)
            
        lineNumber += 1
    #tryToFusion(fastqSequence, reverseFileName, threshold, log, seqOutput)
    fastqSequence.applyDrasticThreshold(threshold)
    DnaUtils.splitMarker1(mapOligos, outputDir, fastqSequence, fileName)
    f.close()
    log.close()
Exemplo n.º 9
0
def tryToFusion(fastqSequence, reverseFileName, threshold, log, seqOutput):
    fastqSequence.applyDrasticThreshold(threshold)
    f = open(reverseFileName, "r")
    reverseLineHeader = fastqSequence.getReverseLineHeader()
    logging.debug("Search " + reverseLineHeader)

    lastHeaderLineNumber = 0
    lastSeparatorLineNumber = 0
    parseFastq = False
    rFastqSequence = None
    lineNumber = 0

    for line in f:
        line = line[:-1]
        lineNumber += 1
        #logging.debug(str(lineNumber) +"\t****   "+line + "\t" + str(line[0] == "@") + "\t" + str(lineNumber == 1))

        if line[0] == "@" and (lastSeparatorLineNumber != lineNumber - 1
                               or lineNumber == 1):
            logging.debug(line + " est un header")
            if parseFastq:
                rFastqSequence.applyDrasticThreshold(threshold)

                s = FastqSequence.matchFastqSequence(fastqSequence,
                                                     rFastqSequence)
                if s != None:
                    seqOutput.write(">" + s.getLineHeader() + "\n")
                    seqOutput.write(s.sequence + "\n")
                    logging.debug(s.getLineHeader() + "\t" + str(s.typeFusion))
                    if s.typeFusion == s.TYPE_FUSION_OK:
                        log.write("Merge " + fastqSequence.getLineHeader() +
                                  " with " + rFastqSequence.getLineHeader() +
                                  "\n")
                    elif s.typeFusion == s.TYPE_FUSION_NO_MATCH:
                        log.write("Forced merge " +
                                  fastqSequence.getLineHeader() + " with " +
                                  rFastqSequence.getLineHeader() + "\n")
                    elif s.typeFusion == s.TYPE_FUSION_PARTIAL:
                        log.write("Partial merge " +
                                  fastqSequence.getLineHeader() + " with " +
                                  rFastqSequence.getLineHeader() + "\n")
                    else:
                        log.write("Can't merge " +
                                  fastqSequence.getLineHeader() + " with " +
                                  rFastqSequence.getLineHeader() + "\n")
                else:
                    log.write("*Can't merge " + fastqSequence.getLineHeader() +
                              " with " + rFastqSequence.getLineHeader() + "\n")
                parseFastq = False

            #logging.debug("-"+line)
            #logging.debug("-"+reverseLineHeader)
            #logging.debug(line + " =?= " + reverseLineHeader + " ? " + str(line == reverseLineHeader))
            if line == reverseLineHeader:
                parseFastq = True

            if parseFastq:
                rFastqSequence = FastqSequence.readFasqSequenceHeader(line)
                #logging.debug("Compare to " + rFastqSequence.getLineHeader())
                lastHeaderLineNumber = lineNumber
        elif parseFastq:
            if lastHeaderLineNumber == lineNumber - 1:
                rFastqSequence.setSequence(line)
            elif line[0] == "+":
                lastSeparatorLineNumber = lineNumber
            elif lastSeparatorLineNumber == lineNumber - 1:
                rFastqSequence.setQuality(line)

    f.close()