Exemplo n.º 1
0
 def testMedianFourStrings(self):
     seq = '>hey\nagtcagtcagtc\n>you\nacctg\n>how\natgggtc\n>are\n\
     atggctattgaactgtatct'
     result = summarize_reads(StringIO(seq), 'fasta')
     self.assertEqual(result['median_length'], 9.5)
Exemplo n.º 2
0
 def testMedianOneString(self):
     seq = '>hey\nagtcagtcagtc'
     result = summarize_reads(StringIO(seq), 'fasta')
     self.assertEqual(result['median_length'], 12)
Exemplo n.º 3
0
 def testMedianThreeStrings(self):
     seq = '>hey\nagtcagtcagtc\n>you\nacctg\n>how\natgggtc'
     result = summarize_reads(StringIO(seq), 'fasta')
     self.assertEqual(result['median_length'], 7)
Exemplo n.º 4
0
 def testMinLengthListTwoStrings(self):
     seq = '>hey\nagtcagtcagtc\n>you\nacctg'
     result = summarize_reads(StringIO(seq), 'fasta')
     self.assertEqual(result['min_length'], 5)
Exemplo n.º 5
0
 def testMedianEmptyInput(self):
     result = summarize_reads(StringIO(), 'fasta')
     self.assertEqual(result['median_length'], 0)
Exemplo n.º 6
0
 def testBaseCountsTwoReads(self):
     seq = '>hey\nagtcagtcagtc\n>you\nacctg'
     result = summarize_reads(StringIO(seq), 'fasta')
     self.assertEqual(result['base_counts'],
                      {'a': 4, 'c': 5, 't': 4, 'g': 4})
Exemplo n.º 7
0
 def testMinLengthListEmptyInput(self):
     result = summarize_reads(StringIO(), 'fasta')
     self.assertEqual(result['min_length'], 0)
Exemplo n.º 8
0
 def testBaseCountsEmptyImput(self):
     result = summarize_reads(StringIO(), 'fasta')
     self.assertEqual(result['base_counts'], {})
Exemplo n.º 9
0
 def testBaseCountsOneRead(self):
     seq = '>hey\nagtcagtcagtc'
     result = summarize_reads(StringIO(seq), 'fasta')
     self.assertEqual(result['base_counts'],
                      {'a': 3, 'c': 3, 't': 3, 'g': 3})
Exemplo n.º 10
0
 def testTotalLengthOneString(self):
     seq = '>hey\nagtcagtcagtc'
     result = summarize_reads(StringIO(seq), 'fasta')
     self.assertEqual(result['total_length'], 12)
Exemplo n.º 11
0
 def testTotalLengthTwoStrings(self):
     seq = '>hey\nagtcagtcagtc\n>you\nacctg'
     result = summarize_reads(StringIO(seq), 'fasta')
     self.assertEqual(result['total_length'], 17)
Exemplo n.º 12
0
 def testTotalLengthEmptyInput(self):
     result = summarize_reads(StringIO(), 'fasta')
     self.assertEqual(result['total_length'], 0)
Exemplo n.º 13
0
 def testReadNumberTwoSequencesCount(self):
     seq = '>hey\nagtcagtcagtc\n>you\nacctg'
     result = summarize_reads(StringIO(seq), 'fasta')
     self.assertEqual(result['read_number'], 2)
Exemplo n.º 14
0
 def testReadNumberOneSequenceCount(self):
     seq = '>hey\nagtcagtcagtc'
     result = summarize_reads(StringIO(seq), 'fasta')
     self.assertEqual(result['read_number'], 1)
Exemplo n.º 15
0
 def testReadNumberEmptyInput(self):
     result = summarize_reads(StringIO(), 'fasta')
     self.assertEqual(result['read_number'], 0)
Exemplo n.º 16
0
#!/usr/bin/env python

from dark.summarize import summarize_reads
import sys


if len(sys.argv) > 2:
    print >>sys.stderr, "Usage: %s file.fasta / file.fastq" % sys.argv[0]
    sys.exit(1)

else:
    filename = sys.argv[1]
    if filename.endswith('a'):
        filetype = 'fasta'
    else:
        filetype = 'fastq'

    result = summarize_reads(filename, filetype)

    print "Number of reads:", result["read_number"]
    print "Total length: %s bases" % result["total_length"]
    print "The average read length: %s bases" % result["average_length"]
    print "Longest read:", result["max_length"]
    print "Shortest read:", result["min_length"]
    print "Median length:", result["median_length"]

    for base, count in result["base_counts"].items():
        print "%s: Total: %s; Average per read: %s" % (
            base, count, count / result["read_number"])