def main(argv): # parse arguments opts, args = getopt.getopt(argv[1:-1], 'ho:q:', longopts = ['help']) qmin = 30 outputName = '' for option, val in opts: if option in ['-h', '--help']: print(helpString) sys.exit() elif option == '-o': outputName = val elif option == '-q': qmin = float(val) else: sys.exit('Unsupported parameter.') # deal with invalid paths if (len(argv) == 1) or (not os.path.isfile(argv[-1])): print(helpString) sys.exit('Valid input file required.') else: inputName = argv[-1] parser = FASTQParser(inputName) if outputName != '': outFile = open(outputName, 'w') # read file and print back lines that pass the filter while True: read = parser.nextRead() # stop at EOF if read['quals'] == '': break numQual = encoding2num(read['quals'], parser.encoding) # get first index of acceptable quality start = -1 for baseq in numQual: start += 1 if baseq >= qmin: break # now get last index end = -1 for baseq in reversed(numQual): end += 1 if baseq >= qmin: break # only print back reads where there was at least one decent base call end = len(numQual) - end - 1 if start < end: if outputName == '': print(read['header'], read['bases'][start:end], '\n', read['qheader'], read['quals'][start:end], '\n',\ sep='', end='') else: outFile.writelines([read['header'], read['bases'][start:end], '\n', \ read['qheader'], read['quals'][start:end], '\n']) parser.file.close() if outputName != '': outFile.close()
def indexFile(fileName): idxParser = FASTQParser(fileName) IDStore = {} try: while True: # process individual reads pos = idxParser.file.tell() read = idxParser.nextRead() # break at EOF if read["quals"] == "": break # get tile X/Y position and use as key for dictionary that stores file position for later read IDStore[regex.findall(read["header"])[0]] = pos finally: idxParser.close() return IDStore
def main(argv): # parse arguments opts, args = getopt.getopt(argv[1:-1], 'ho:q:', longopts = ['help']) qmin = 30 outputName = '' for option, val in opts: if option in ['-h', '--help']: print(helpString) sys.exit() elif option == '-o': outputName = val elif option == '-q': qmin = float(val) else: sys.exit('Unsupported parameter.') # deal with invalid paths if (len(argv) == 1) or (not os.path.isfile(argv[-1])): print(helpString) sys.exit('Valid input file required.') else: inputName = argv[-1] parser = FASTQParser(inputName) if outputName != '': outFile = open(outputName, 'w') # read file and print back lines that pass the filter while True: read = parser.nextRead() # stop at EOF if read['quals'] == '': break meanQual = mean(encoding2num(read['quals'], parser.encoding)) if meanQual >= qmin: if outputName == '': print(read['header'], read['bases'], read['qheader'], read['quals'], sep = '', end = '') else: outFile.writelines([read['header'], read['bases'], read['qheader'], read['quals']]) parser.file.close() if outputName != '': outFile.close()
def matchReads(fastq1, fastq2): idxStore = indexFile(fastq2) # open file handles fastq1_common = open(fastq1 + ".common", "w") fastq1_unique = open(fastq1 + ".unique", "w") fastq2_common = open(fastq2 + ".common", "w") fastq1_parser = FASTQParser(fastq1) fastq2_parser = FASTQParser(fastq2) while True: read = fastq1_parser.nextRead() # EOF if read["quals"] == "": break ID = regex.findall(read["header"])[0] if ID in idxStore.keys(): # write both reads out to common files, remove key from index fastq1_common.writelines([read["header"], read["bases"], read["qheader"], read["quals"]]) fastq2_parser.file.seek(idxStore.pop(ID)) readMatch = fastq2_parser.nextRead() fastq2_common.writelines( [readMatch["header"], readMatch["bases"], readMatch["qheader"], readMatch["quals"]] ) else: # write out to unique file for fastq1 fastq1_unique.writelines([read["header"], read["bases"], read["qheader"], read["quals"]]) # close file handles fastq1_common.close() fastq1_unique.close() fastq2_common.close() # all remaining keys in dictionary are the unique reads for fastq2 with open(fastq2 + ".unique", "w") as fastq2_unique: for remaining in idxStore: fastq2_parser.file.seek(idxStore[remaining]) read = fastq2_parser.nextRead() fastq2_unique.writelines([read["header"], read["bases"], read["qheader"], read["quals"]]) fastq1_parser.close() fastq2_parser.close() return