示例#1
0
def parseCommandLineOptions(args, returnSignificantOffsets=True):
    """
    Deal with the various command-line options added to the ArgumentParser
    instance by addCommandLineOptions.

    @param args: The result of calling C{parse_args} on an C{ArgumentParser}
        instance (the one that was passed to C{addCommandLineOptions}, unless
        we're testing).
    @param returnSignificantOffsets: If C{True} also return a list of the
        significant offsets (else that element of the return value will be
        C{None}).
    @return: A C{tuple}: (genomeLength, alignedReads, padddedSAM,
        readCountAtOffset, baseCountAtOffset, readsAtOffset,
        significantOffsets).
    """
    genomeLength = None
    alignedReads = []
    samFilter = SAMFilter.parseFilteringOptions(args)

    if samFilter.referenceIds and len(samFilter.referenceIds) > 1:
        raise ValueError('Only one reference id can be given.')

    referenceLengths = samFilter.referenceLengths()

    if len(referenceLengths) == 1:
        referenceId, genomeLength = referenceLengths.popitem()
    else:
        raise ValueError(
            'If you do not specify a reference sequence with '
            '--referenceId, the SAM/BAM file must contain exactly one '
            'reference. But %s contains %d.' %
            (args.samfile, len(referenceLengths)))

    paddedSAM = PaddedSAM(samFilter)

    for query in paddedSAM.queries():
        alignedReads.append(AlignedRead(query.id, query.sequence))

    readCountAtOffset, baseCountAtOffset, readsAtOffset = gatherData(
        genomeLength, alignedReads)

    if returnSignificantOffsets:
        significantOffsets = list(
            findSignificantOffsets(baseCountAtOffset, readCountAtOffset,
                                   args.minReads, args.homogeneousCutoff))
        for read in alignedReads:
            read.setSignificantOffsets(significantOffsets)
    else:
        significantOffsets = None

    return (genomeLength, alignedReads, paddedSAM, readCountAtOffset,
            baseCountAtOffset, readsAtOffset, significantOffsets)
示例#2
0
          'that is provided by the SAMFilter.addFilteringOptions will be '
          'silently ignored!'))

args = parser.parse_args()

if args.noOffsets and args.noStats:
    print(
        'You have used both --noOffsets and --noStats, so there is no '
        'output!',
        file=sys.stderr)
    sys.exit(1)

# We don't have a file of reads, we just want a read filter that we can use
# to filter the SAM file query sequences and to get reference lengths from.
reads = parseFASTAFilteringCommandLineOptions(args, Reads())
samFilter = SAMFilter.parseFilteringOptions(args, reads.filterRead)

printOffsets = not args.noOffsets
printStats = not args.noStats

if samFilter.referenceIds and len(samFilter.referenceIds) > 1:
    print(
        'Only one reference id can be given. To calculate coverage for more '
        'than one reference, run this script multiple times.',
        file=sys.stderr)
    sys.exit(1)

try:
    referenceLengths = samFilter.referenceLengths()
except UnknownReference:
    referenceId = samFilter.referenceIds.pop()
示例#3
0
                        help='If given, write (gzip compressed) BAM output.')

    parser.add_argument(
        '--checkResultCount',
        type=int,
        help=('The number of alignments expected in the output. If this '
              'number is not seen, the script exits with status 1 (and an '
              'error message is also printed, unless --quiet was used).'))

    addFASTAFilteringCommandLineOptions(parser)
    SAMFilter.addFilteringOptions(parser)

    args = parser.parse_args()
    reads = parseFASTAFilteringCommandLineOptions(args, Reads())
    samFilter = SAMFilter.parseFilteringOptions(args,
                                                reads.filterRead,
                                                storeQueryIds=True)

    # The following 'if' has a False in it to make it always fail. That's
    # because pysam issue 716 (see below) did not fix the problem as I had
    # hoped. Instead it throws an error if you pass a header that has a
    # modified SQ key with reference ids and there's a difference it
    # doesn't like. It's always safe to use the 'else' below, with the
    # slight downside being that its header will mention all sequence ids,
    # even if you only want a lesser number (via --referenceId). I'm
    # leaving the code here because this is how you would do it, and it
    # might be possible to just copy the 'header' dict below and further
    # adjust it to avoid the pysam error.
    if False and samFilter.referenceIds:
        # Make a header that only includes the wanted reference ids (if
        # any).
          'we cut the inserted bases out of the aligned query and save the '
          'information about what would have been inserted and where. That '
          'information is printed by this option. The output gives the '
          '0-based offset where the inserted base would be placed, followed '
          'by a list of the nucleotides that were suggested as being '
          'inserted and the number of times each nucleotide was suggested. '
          'So for example the output might contain "27: T:3, G:10" which '
          'indicates that 13 query (3 with T and 10 with G) matches would '
          'insert a nucleotide into the reference at offset 27.'))

SAMFilter.addFilteringOptions(parser)
addFASTAFilteringCommandLineOptions(parser)

args = parser.parse_args()
reads = parseFASTAFilteringCommandLineOptions(args, Reads())
samFilter = SAMFilter.parseFilteringOptions(
    args, filterRead=reads.filterRead)
paddedSAM = PaddedSAM(samFilter)

for read in paddedSAM.queries(rcSuffix=args.rcSuffix, rcNeeded=args.rcNeeded):
    print(read.toString('fasta'), end='')

if args.listReferenceInsertions:
    if paddedSAM.referenceInsertions:
        print('(0-based) insertions into the reference:\n%s' %
              nucleotidesToStr(paddedSAM.referenceInsertions, '  '),
              file=sys.stderr)
    else:
        print('No matches required an insertion into the reference.',
              file=sys.stderr)