def filterChimericReads(IOstream, distance): line = common.outputHeader(IOstream) while line != "": multiMaps = list() multiMaps.append(line) readID = line.split()[0] line = IOstream.readline() while line != "" and line.split()[0] == readID: multiMaps.append(line) line = IOstream.readline() splitMappings = set() for i in range(len(multiMaps) - 1): for j in range(i + 1, len(multiMaps)): splitted_i, splitted_j = multiMaps[i].split( ), multiMaps[j].split() ipos_b, ipos_e = common.beg_end_of_seq(splitted_i[5]) jpos_b, jpos_e = common.beg_end_of_seq(splitted_j[5]) if (jpos_b - ipos_e) >= -distance: splitMappings.add(multiMaps[i]) splitMappings.add(multiMaps[j]) elif (ipos_b - jpos_e) >= -distance: splitMappings.add(multiMaps[i]) splitMappings.add(multiMaps[j]) for outputLine in multiMaps: if outputLine not in splitMappings: sys.stdout.write(outputLine)
def filterByQuality(IOstream, quality): line = common.outputHeader(IOstream) while line != "": readQuality = int(line.split()[4]) if readQuality >= quality: sys.stdout.write(line) line = IOstream.readline()
def outputMultimappedReads(IOstream,multimappedReadsSet): IOstream.seek(0) line = common.outputHeader(IOstream) while line!='': readID = line.split()[0] if readID in multimappedReadsSet: sys.stdout.write(line) line = IOstream.readline()
def filterByReference(IOstream): line = common.outputHeader(IOstream) while line != "": multiMaps = list() multiMaps.append(line) readID = line.split()[0] line = IOstream.readline() while line != "" and line.split()[0] == readID: multiMaps.append(line) line = IOstream.readline() readsMultimappedSameRef = set() for i in range(len(multiMaps) - 1): for j in range(i + 1, len(multiMaps)): if common.compareReadIDReferences(multiMaps[i], multiMaps[j]) == 0: readsMultimappedSameRef.add(multiMaps[i]) readsMultimappedSameRef.add(multiMaps[j]) for outputLine in multiMaps: if outputLine in readsMultimappedSameRef: sys.stdout.write(outputLine)
import functools import argparse if __name__=='__main__': parser = argparse.ArgumentParser(description="Sort a SAM file by several criteria, for now you cannot sort by pos or ref only, only id, id/pos, id/ref, id/pos/ref. The order used between read IDs is the alphabetic order. For sorting using samtools use the sortSam2 script.") parser.add_argument("-id", dest="id", action="store_true", help="Allows sorting of SAM file by read ID") parser.add_argument("-pos", dest="pos", action="store_true", help="Allows sorting of SAM file by position number") parser.add_argument("-ref", dest="ref", action="store_true", help="Allows sorting of SAM file by reference number") args = parser.parse_args() toBeSorted = list() line = common.outputHeader(sys.stdin) while line!="": toBeSorted.append(line) line = sys.stdin.readline() if args.pos and args.ref and args.id: toBeSorted.sort(key=functools.cmp_to_key(common.compareReadIDReferencesPositions)) elif args.id and args.pos: toBeSorted.sort(key=functools.cmp_to_key(common.compareReadIDPositions)) elif args.id and args.ref: toBeSorted.sort(key=functools.cmp_to_key(common.compareReadIDReferences)) elif args.id: