def test_pileup(): pe = PileupElement('A', 40) pe2 = PileupElement('A', 20) pe3 = PileupElement('G', None) pileup = Pileup([pe, pe2, pe3]) assert pileup.bases() == ['A', 'A', 'G'] assert pileup.quals() == [40, 20, None] assert pileup.most_common_base() == 'A'
def merge(reads, max_qual=46, fraction_agree=0.75): """ :type reads: list[pysam.AlignedSegment] :type max_qual: int :type fraction_agree: float :return: pysam.AlignedSegment """ logging.debug("Merging {} read(s).".format(len(reads))) if len(reads) == 1: return reads[0] merged = reads[0] max_read_len = max([len(r.seq) for r in reads]) new_seq = [] new_quals = [] for k in range(max_read_len): pileup = Pileup.from_reads(reads, k) merged_pileup_element = pileup.merge(max_qual, fraction_agree) new_seq.append(merged_pileup_element.base) new_quals.append(merged_pileup_element.phredqual()) logging.debug("new seq is {}".format("".join(new_seq))) logging.debug("new qual is {}".format("".join(new_quals))) merged.seq = "".join(new_seq) merged.qual = "".join(new_quals) return merged
def test_merge(): merged_pileup_elements = [] """ :type: list[PileupElement] """ pileups = [] """ :type: list[Pileup] """ reads = [my_read, my_read2, my_read3, my_read4] for k in range(0, 4): pile = Pileup.from_reads(reads, k) pileups.append(pile) merged_pileup_elements.append(pile.merge(max_qual=45, fraction_agree=0.75)) # test that ties gives N assert merged_pileup_elements[0].base == 'N' # tie, 2xA 2xG # test that majority vote works assert merged_pileup_elements[1].base == 'G' # G wins from 3xG 1xA # test all 4 bases that were merged assert "".join([pe.base for pe in merged_pileup_elements]) == 'NGCT' # test that qualities were properly merged assert [pe.qual for pe in merged_pileup_elements] == [2, 45, 45, 45] # test that the size of each pileup works assert [len(p.bases()) for p in pileups] == [4, 4, 4, 3] assert ReadMerger.key(a) == ReadMerger.key(b) assert Pileup.from_reads([a, b], 0).merge(45, .75).base == 'N' assert Pileup.from_reads([a, b], 0).merge(45, .75).qual == 2 assert Pileup.from_reads([a, b], 1).merge(45, .75).base == 'G' tmpfilename = tempfile.mkstemp('.bam')[1] outfile = pysam.AlignmentFile(tmpfilename, "wb", header=header) readmerger = ReadMerger([a, b], outfile, fraction_agree=0.75, max_qual=45, reads_between_logs=1) exitcode = readmerger.do_work() assert exitcode == 0 metrics = readmerger.metrics.dict() assert 'L1' in metrics os.remove(tmpfilename)