Exemplo n.º 1
0
def parseFASTAFilteringCommandLineOptions(args, reads):
    """
    Examine parsed FASTA filtering command-line options and return filtered
    reads.

    @param args: An argparse namespace, as returned by the argparse
        C{parse_args} function.
    @param reads: A C{Reads} instance to filter.
    @return: The filtered C{Reads} instance.
    """
    keepSequences = (
        parseRangeExpression(args.keepSequences, convertToZeroBased=True)
        if args.keepSequences else None)

    removeSequences = (
        parseRangeExpression(args.removeSequences, convertToZeroBased=True)
        if args.removeSequences else None)

    return reads.filter(
        minLength=args.minLength, maxLength=args.maxLength,
        maxNFraction=args.maxNFraction,
        whitelist=set(args.whitelist) if args.whitelist else None,
        blacklist=set(args.blacklist) if args.blacklist else None,
        whitelistFile=args.whitelistFile, blacklistFile=args.blacklistFile,
        titleRegex=args.titleRegex,
        negativeTitleRegex=args.negativeTitleRegex,
        keepSequences=keepSequences, removeSequences=removeSequences,
        head=args.head, removeDuplicates=args.removeDuplicates,
        removeDuplicatesById=args.removeDuplicatesById,
        removeDuplicatesUseMD5=args.removeDuplicatesUseMD5,
        randomSubset=args.randomSubset, trueLength=args.trueLength,
        sampleFraction=args.sampleFraction,
        sequenceNumbersFile=args.sequenceNumbersFile)
Exemplo n.º 2
0
def parseFASTAEditingCommandLineOptions(args, reads):
    """
    Examine parsed FASTA editing command-line options and return information
    about kept sites and sequences.

    @param args: An argparse namespace, as returned by the argparse
        C{parse_args} function.
    @param reads: A C{Reads} instance to filter.
    @return: The filtered C{Reads} instance.
    """
    removeGaps = args.removeGaps
    removeDescriptions = args.removeDescriptions
    truncateTitlesAfter = args.truncateTitlesAfter
    keepSites = (parseRangeExpression(args.keepSites, convertToZeroBased=True)
                 if args.keepSites else None)

    if args.keepSitesFile:
        keepSites = keepSites or set()
        with open(args.keepSitesFile) as fp:
            for lineNumber, line in enumerate(fp):
                try:
                    keepSites.update(
                        parseRangeExpression(line, convertToZeroBased=True))
                except ValueError as e:
                    raise ValueError(
                        'Keep sites file %r line %d could not be parsed: '
                        '%s' % (args.keepSitesFile, lineNumber, e))

    removeSites = (parseRangeExpression(args.removeSites,
                                        convertToZeroBased=True)
                   if args.removeSites else None)

    if args.removeSitesFile:
        removeSites = removeSites or set()
        with open(args.removeSitesFile) as fp:
            for lineNumber, line in enumerate(fp):
                try:
                    removeSites.update(
                        parseRangeExpression(line, convertToZeroBased=True))
                except ValueError as e:
                    raise ValueError(
                        'Remove sites file %r line %d parse error: %s' %
                        (args.removeSitesFile, lineNumber, e))

    return reads.filter(removeGaps=removeGaps,
                        truncateTitlesAfter=truncateTitlesAfter,
                        removeDescriptions=removeDescriptions,
                        idLambda=args.idLambda,
                        readLambda=args.readLambda,
                        keepSites=keepSites,
                        removeSites=removeSites,
                        reverse=args.reverse,
                        reverseComplement=args.reverseComplement)
Exemplo n.º 3
0
 def testDoubleParens(self):
     """
     A difference with two parentheses such as '(3-5 | 7-9) & (5-7 | 9-11)'
     must produce the expected set.
     """
     self.assertEqual({5, 7, 9},
                      parseRangeExpression('(3-5 | 7-9) & (5-7 | 9-11)'))
Exemplo n.º 4
0
 def testParens(self):
     """
     A difference with parentheses such as '(3-5 | 7-9) & 5-7' must produce
     the expected set.
     """
     self.assertEqual({5, 7}, parseRangeExpression('(3-5 | 7-9) & 5-7'))
Exemplo n.º 5
0
 def testDifferenceWithSpaces(self):
     """
     A difference such as 6-10 - 7-8 must produce the expected set.
     """
     self.assertEqual({6, 9, 10}, parseRangeExpression('6-10 - 7-8'))
Exemplo n.º 6
0
 def testIntersection(self):
     """
     An intersection such as 3-4 & 4-8 must produce the expected set.
     """
     self.assertEqual({4}, parseRangeExpression('3-4 & 4-8'))
Exemplo n.º 7
0
 def testUnion(self):
     """
     A union such as 3-4 | 6-8 must produce the expected set.
     """
     self.assertEqual({3, 4, 6, 7, 8}, parseRangeExpression('3-4 | 6-8'))
Exemplo n.º 8
0
 def testTwoRangesWithSpace(self):
     """
     A simple 3-4, 6-8 string must produce the expected set.
     """
     self.assertEqual({3, 4, 6, 7, 8}, parseRangeExpression('3-4, 6-8'))
Exemplo n.º 9
0
 def testCommasAndRange(self):
     """
     A simple 3,4,5-7 string must produce the expected set.
     """
     self.assertEqual({3, 4, 5, 6, 7}, parseRangeExpression('3,4,5-7'))
Exemplo n.º 10
0
 def testCommas(self):
     """
     A simple 3,4,5 string must produce the expected set.
     """
     self.assertEqual({3, 4, 5}, parseRangeExpression('3,4,5'))
Exemplo n.º 11
0
 def testOneRangeZeroBased(self):
     """
     A simple 3-4 string must produce the expected set when
     convertToZeroBased is True.
     """
     self.assertEqual({2, 3}, parseRangeExpression('3-4', True))
Exemplo n.º 12
0
 def testOneRange(self):
     """
     A simple 3-4 string must produce the expected set.
     """
     self.assertEqual({3, 4}, parseRangeExpression('3-4'))
Exemplo n.º 13
0
 def testEmptyString(self):
     """
     An empty string must produce an empty set.
     """
     self.assertEqual(set(), parseRangeExpression(''))
Exemplo n.º 14
0
if args.align:
    len1, len2 = map(len, reads)
    if len1 == len2:
        print('Pre-alignment, sequence lengths were identical: %s' % len1)
    else:
        print('Pre-alignment, sequence lengths: %d, %d (difference %d)' %
              (len1, len2, abs(len1 - len2)))

    # Align.
    reads = needle(reads)

    if args.alignmentFile:
        assert reads.save(args.alignmentFile) == 2

offsets = (parseRangeExpression(args.sites, convertToZeroBased=True)
           if args.sites else None)

read1, read2 = reads
len1, len2 = map(len, reads)
identicalLengths = len1 == len2

# Sanity check.
if args.align:
    assert identicalLengths

match = compareAaReads(read1, read2, offsets=offsets)

x = 'Post-alignment, sequence' if args.align else 'Sequence'
if identicalLengths:
    print('%s lengths are identical: %s' % (x, len1))
Exemplo n.º 15
0
parser.add_argument(
    'range',
    metavar='NUMBER,RANGE,...',
    help=('The ranges of directory inc numbers that should be printed. E.g., '
          '1-3,5 will output just the 1st, 2nd, 3rd, and 5th directory '
          'names. All others will be omitted. This option can include '
          'parentheses and Python set operators, e.g. '
          '"(3-5 | 10-12) - 5-10".'))

args = parser.parse_args()

result = []
missing = []

for n in parseRangeExpression(args.range):
    files = glob('[DW]_[0-9][0-9][0-9][0-9][0-9][0-9]_%d_*' % n)
    if len(files) == 1:
        result.append(files[0])
    else:
        missing.append(n)

if missing:
    print('Found no matching directories for %s %s.' %
          ('index' if len(missing) == 1 else 'indices', ', '.join(
              map(str, sorted(missing)))),
          file=sys.stderr)
    sys.exit(1)
else:
    print('\n'.join(result))