Exemplo n.º 1
0
 def test_gc_content(self):
     pytest.raises(seq.NullSequenceError, seq.gc_content, "")
     pytest.raises(seq.NullSequenceError, seq.gc_content, "NNNNN")
     assert seq.gc_content("AAATTT") == 0.0
     assert seq.gc_content("GGGGCC") == 1.0
     assert seq.gc_content("ACGTACGT") == 0.5
     assert seq.gc_content("ACCTACGT") == 0.5
Exemplo n.º 2
0
 def test_gc_content(self):
     pytest.raises(seq.NullSequenceError, seq.gc_content, "")
     pytest.raises(seq.NullSequenceError, seq.gc_content, "NNNNN")
     assert seq.gc_content("AAATTT") == 0.0
     assert seq.gc_content("GGGGCC") == 1.0
     assert seq.gc_content("ACGTACGT") == 0.5
     assert seq.gc_content("ACCTACGT") == 0.5
Exemplo n.º 3
0
def summary_statistics(reads):
    """Returns a dictionary of summary statistics of the reads. The keys are:

        "rnames":           a Counter of the rnames of the reads
        "flags":            a Counter of the bitflags of the reads
        "cigars":           a Counter of the string representations of the
                            cigars of the reads
        "gc":               the average GC content of the sequences.
        "read_count":       the number of sam reads
        "hash":             a Counter of the qnames of the reads

        "rnames_mapped":    a summary of the number of reads with each rname

    """
    summary = {
        "rnames": Counter(),
        "flags": Counter(),
        "cigars": Counter(),
        "gc": 0,
        "read_count": 0,
        "hashes": Counter()
    }

    for read in reads:
        summary["rnames"][read.rname] += 1
        summary["flags"][read.flag] += 1
        summary["cigars"][str(read.cigar)] += 1
        summary["gc"] += seq.gc_content(read.seq)
        summary["read_count"] += 1
        summary["hashes"][read.qname] += 1

    summary["gc"] /= summary["read_count"]
    return summary
Exemplo n.º 4
0
def summary_statistics(reads):
    """Returns a dictionary of summary statistics of the reads. The keys are:

        "rnames":           a Counter of the rnames of the reads
        "flags":            a Counter of the bitflags of the reads
        "cigars":           a Counter of the string representations of the
                            cigars of the reads
        "gc":               the average GC content of the sequences.
        "read_count":       the number of sam reads
        "hash":             a Counter of the qnames of the reads

        "rnames_mapped":    a summary of the number of reads with each rname

    """
    summary = {"rnames": Counter(),
               "flags": Counter(),
               "cigars": Counter(),
               "gc": 0,
               "read_count": 0,
               "hashes": Counter()}

    for read in reads:
        summary["rnames"][read.rname] += 1
        summary["flags"][read.flag] += 1
        summary["cigars"][str(read.cigar)] += 1
        summary["gc"] += seq.gc_content(read.seq)
        summary["read_count"] += 1
        summary["hashes"][read.qname] += 1

    summary["gc"] /= summary["read_count"]
    return summary