Пример #1
0
def summarizeReads(file_handle, file_type):
    """
    open a fasta or fastq file, prints number of of reads,
    average length of read, total number of bases, longest,
    shortest and median read, total number and average of
    individual base (A, T, G, C, N).
    """
    base_counts = defaultdict(int)
    read_number = 0
    total_length = 0
    length_list = []

    records = SeqIO.parse(file_handle, file_type)

    for record in records:
        total_length += len(record)
        read_number += 1
        length_list.append(len(record))
        for base in record:
            base_counts[base] += 1

    result = {
        "read_number": read_number,
        "total_length": total_length,
        "average_length": total_length / read_number if read_number > 0 else 0,
        "max_length": max(length_list) if length_list else 0,
        "min_length": min(length_list) if length_list else 0,
        "median_length": median(length_list) if length_list else 0,
        "base_counts": base_counts
    }

    return result
Пример #2
0
def summarizeReads(file_handle, file_type):
    """
    open a fasta or fastq file, prints number of of reads,
    average length of read, total number of bases, longest,
    shortest and median read, total number and average of
    individual base (A, T, G, C, N).
    """
    base_counts = defaultdict(int)
    read_number = 0
    total_length = 0
    length_list = []

    records = SeqIO.parse(file_handle, file_type)

    for record in records:
        total_length += len(record)
        read_number += 1
        length_list.append(len(record))
        for base in record:
            base_counts[base] += 1

    result = {
        "read_number": read_number,
        "total_length": total_length,
        "average_length": total_length / read_number if read_number > 0 else 0,
        "max_length": max(length_list) if length_list else 0,
        "min_length": min(length_list) if length_list else 0,
        "median_length": median(length_list) if length_list else 0,
        "base_counts": base_counts
    }

    return result
Пример #3
0
    def medianScore(self):
        """
        Find the median score for the HSPs in the alignments that match
        this title.

        @raise ValueError: If there are no HSPs.
        @return: The C{float} median score of HSPs in alignments matching the
            title.
        """
        return median([hsp.score.score for hsp in self.hsps()])
Пример #4
0
    def medianScore(self):
        """
        Find the median score for the HSPs in the alignments that match
        this title.

        @raise ValueError: If there are no HSPs.
        @return: The C{float} median score of HSPs in alignments matching the
            title.
        """
        return median([hsp.score.score for hsp in self.hsps()])
Пример #5
0
 def testMedianOfTwo(self):
     """
     The median function must work on a list of length two.
     """
     self.assertEqual(4.5, median([3.1, 5.9]))
Пример #6
0
 def testMedianOfOne(self):
     """
     The median function must work on a list of length one.
     """
     self.assertEqual(3, median([3]))
Пример #7
0
 def testMedianOfFive(self):
     """
     The median function must work on a list of length five.
     """
     self.assertEqual(5.9, median([3.1, 1.3, 7.6, 9.9, 5.9]))
Пример #8
0
 def testMedianOfFour(self):
     """
     The median function must work on a list of length four.
     """
     self.assertEqual(4.5, median([3.1, 1.3, 7.6, 5.9]))
Пример #9
0
 def testMedianOfThree(self):
     """
     The median function must work on a list of length threee.
     """
     self.assertEqual(5.9, median([3.1, 7.6, 5.9]))
Пример #10
0
 def testMedianOfFive(self):
     """
     The median function must work on a list of length five.
     """
     self.assertEqual(5.9, median([3.1, 1.3, 7.6, 9.9, 5.9]))
Пример #11
0
 def testMedianOfFour(self):
     """
     The median function must work on a list of length four.
     """
     self.assertEqual(4.5, median([3.1, 1.3, 7.6, 5.9]))
Пример #12
0
 def testMedianOfThree(self):
     """
     The median function must work on a list of length threee.
     """
     self.assertEqual(5.9, median([3.1, 7.6, 5.9]))
Пример #13
0
 def testMedianOfTwo(self):
     """
     The median function must work on a list of length two.
     """
     self.assertEqual(4.5, median([3.1, 5.9]))
Пример #14
0
 def testMedianOfOne(self):
     """
     The median function must work on a list of length one.
     """
     self.assertEqual(3, median([3]))