def parseCommandLineOptions(args, returnSignificantOffsets=True): """ Deal with the various command-line options added to the ArgumentParser instance by addCommandLineOptions. @param args: The result of calling C{parse_args} on an C{ArgumentParser} instance (the one that was passed to C{addCommandLineOptions}, unless we're testing). @param returnSignificantOffsets: If C{True} also return a list of the significant offsets (else that element of the return value will be C{None}). @return: A C{tuple}: (genomeLength, alignedReads, padddedSAM, readCountAtOffset, baseCountAtOffset, readsAtOffset, significantOffsets). """ genomeLength = None alignedReads = [] samFilter = SAMFilter.parseFilteringOptions(args) if samFilter.referenceIds and len(samFilter.referenceIds) > 1: raise ValueError('Only one reference id can be given.') referenceLengths = samFilter.referenceLengths() if len(referenceLengths) == 1: referenceId, genomeLength = referenceLengths.popitem() else: raise ValueError( 'If you do not specify a reference sequence with ' '--referenceId, the SAM/BAM file must contain exactly one ' 'reference. But %s contains %d.' % (args.samfile, len(referenceLengths))) paddedSAM = PaddedSAM(samFilter) for query in paddedSAM.queries(): alignedReads.append(AlignedRead(query.id, query.sequence)) readCountAtOffset, baseCountAtOffset, readsAtOffset = gatherData( genomeLength, alignedReads) if returnSignificantOffsets: significantOffsets = list( findSignificantOffsets(baseCountAtOffset, readCountAtOffset, args.minReads, args.homogeneousCutoff)) for read in alignedReads: read.setSignificantOffsets(significantOffsets) else: significantOffsets = None return (genomeLength, alignedReads, paddedSAM, readCountAtOffset, baseCountAtOffset, readsAtOffset, significantOffsets)
'that is provided by the SAMFilter.addFilteringOptions will be ' 'silently ignored!')) args = parser.parse_args() if args.noOffsets and args.noStats: print( 'You have used both --noOffsets and --noStats, so there is no ' 'output!', file=sys.stderr) sys.exit(1) # We don't have a file of reads, we just want a read filter that we can use # to filter the SAM file query sequences and to get reference lengths from. reads = parseFASTAFilteringCommandLineOptions(args, Reads()) samFilter = SAMFilter.parseFilteringOptions(args, reads.filterRead) printOffsets = not args.noOffsets printStats = not args.noStats if samFilter.referenceIds and len(samFilter.referenceIds) > 1: print( 'Only one reference id can be given. To calculate coverage for more ' 'than one reference, run this script multiple times.', file=sys.stderr) sys.exit(1) try: referenceLengths = samFilter.referenceLengths() except UnknownReference: referenceId = samFilter.referenceIds.pop()
help='If given, write (gzip compressed) BAM output.') parser.add_argument( '--checkResultCount', type=int, help=('The number of alignments expected in the output. If this ' 'number is not seen, the script exits with status 1 (and an ' 'error message is also printed, unless --quiet was used).')) addFASTAFilteringCommandLineOptions(parser) SAMFilter.addFilteringOptions(parser) args = parser.parse_args() reads = parseFASTAFilteringCommandLineOptions(args, Reads()) samFilter = SAMFilter.parseFilteringOptions(args, reads.filterRead, storeQueryIds=True) # The following 'if' has a False in it to make it always fail. That's # because pysam issue 716 (see below) did not fix the problem as I had # hoped. Instead it throws an error if you pass a header that has a # modified SQ key with reference ids and there's a difference it # doesn't like. It's always safe to use the 'else' below, with the # slight downside being that its header will mention all sequence ids, # even if you only want a lesser number (via --referenceId). I'm # leaving the code here because this is how you would do it, and it # might be possible to just copy the 'header' dict below and further # adjust it to avoid the pysam error. if False and samFilter.referenceIds: # Make a header that only includes the wanted reference ids (if # any).
'we cut the inserted bases out of the aligned query and save the ' 'information about what would have been inserted and where. That ' 'information is printed by this option. The output gives the ' '0-based offset where the inserted base would be placed, followed ' 'by a list of the nucleotides that were suggested as being ' 'inserted and the number of times each nucleotide was suggested. ' 'So for example the output might contain "27: T:3, G:10" which ' 'indicates that 13 query (3 with T and 10 with G) matches would ' 'insert a nucleotide into the reference at offset 27.')) SAMFilter.addFilteringOptions(parser) addFASTAFilteringCommandLineOptions(parser) args = parser.parse_args() reads = parseFASTAFilteringCommandLineOptions(args, Reads()) samFilter = SAMFilter.parseFilteringOptions( args, filterRead=reads.filterRead) paddedSAM = PaddedSAM(samFilter) for read in paddedSAM.queries(rcSuffix=args.rcSuffix, rcNeeded=args.rcNeeded): print(read.toString('fasta'), end='') if args.listReferenceInsertions: if paddedSAM.referenceInsertions: print('(0-based) insertions into the reference:\n%s' % nucleotidesToStr(paddedSAM.referenceInsertions, ' '), file=sys.stderr) else: print('No matches required an insertion into the reference.', file=sys.stderr)