def test_find_multiple_single_base_deletion(self): ref = ReferenceChromosome("TTAAAAAGAAAAT") seq = Sequence(ref, "..*.....*....") self.assertEqual(seq.variants, { Variant(ref.chrom, 1, "TA", "T"), Variant(ref.chrom, 7, "GA", "G") })
def test_should_find_multiple_snps(self): ref = ReferenceChromosome("AAAAAAAAAAAAA") seq = Sequence(ref, ".C.........T.") self.assertEqual(seq.variants, { Variant(ref.chrom, 1, "A", "C"), Variant(ref.chrom, 11, "A", "T") })
def test_find_adjacent_insertion_and_snp(self): ref = ReferenceChromosome("T*ATAAAAAAAT") seq = Sequence(ref, ".CG.........") self.assertEqual(seq.variants, { Variant(ref.chrom, 0, "T", "TC"), Variant(ref.chrom, 1, "A", "G") })
def test_find_adjacent_snp_and_deletion(self): ref = ReferenceChromosome("TTAAAAAAAAAT") seq = Sequence(ref, ".G*.........") self.assertEqual(seq.variants, { Variant(ref.chrom, 1, "T", "G"), Variant(ref.chrom, 1, "TA", "T") })
def test_find_multiple_variants(self): ref = ReferenceChromosome("TA*AAAGCTAACT") seq = Sequence(ref, ".GC...T...**.") self.assertEqual( seq.variants, { Variant(ref.chrom, 1, "A", "G"), Variant(ref.chrom, 1, "A", "AC"), Variant(ref.chrom, 5, "G", "T"), Variant(ref.chrom, 8, "AAC", "A") })
def __get_expected_calls_from_haplotypes(ascii_strings, reference): if len(ascii_strings) != 2: raise weCallException( "Expected calls have to be defined as a diploid.") if not all(len(str) == reference.length_with_deletions() for str in ascii_strings): raise weCallException( "Ascii haplotypes have to be of the same length as the reference") vars_from_hap1 = Sequence(reference, ascii_strings[0]).variants vars_from_hap2 = Sequence(reference, ascii_strings[1]).variants calls = {} for var in vars_from_hap1.intersection(vars_from_hap2): calls[var] = GenotypeCall("1/1") for var in vars_from_hap1.symmetric_difference(vars_from_hap2): calls[var] = GenotypeCall("0/1") return calls
def build_annotated_pair(fwd, rev, n_fwd, n_rev, mapping_quality, insert_size, read_id, read_flags, cigar_string, read_start, read_mate_start): fwd_reference = ReferenceChromosome(fwd.reference_string, fwd.pos_from) rev_reference = ReferenceChromosome(rev.reference_string, rev.pos_from) fwd_sequence = Sequence(fwd_reference, fwd.sequence_string.replace(",", ".").upper(), cigar_string) rev_sequence = Sequence(rev_reference, rev.sequence_string.replace(",", ".").upper(), cigar_string) fwd_quality = SequenceQuality(fwd.quality_string) rev_quality = SequenceQuality(rev.quality_string) fwd_read_sequence = ReadSequence(fwd_sequence, fwd_quality, mapping_quality, insert_size, read_id, read_flags, read_start, read_mate_start) rev_read_sequence = ReadSequence(rev_sequence, rev_quality, mapping_quality, insert_size, read_id, read_flags, read_start, read_mate_start) return [ ReadPairWithCoverage(fwd_read_sequence, rev_read_sequence, n_fwd, n_rev) ]
def build_annotated_seq(self, n_fwd, n_rev, mapping_quality, insert_size, read_id, read_flags, cigar_string, read_start, read_mate_start): reference = ReferenceChromosome(self.reference_string, self.pos_from) sequence = Sequence(reference, self.sequence_string.replace(",", ".").upper(), cigar_string) quality = SequenceQuality(self.quality_string) read_sequence = ReadSequence(sequence, quality, mapping_quality, insert_size, read_id, read_flags, read_start, read_mate_start) if n_fwd is not None: return [ReadSequenceWithCoverage(read_sequence, n_fwd, n_rev)] elif self.is_reverse_seq(): return [ReadSequenceWithCoverage(read_sequence, 0, 1)] elif self.is_forward_seq(): return [ReadSequenceWithCoverage(read_sequence, 1, 0)] else: raise weCallException( "Raw sequence: {} is neither forward or reverse".format(self))
def test_should_find_multi_base_insertion(self): ref = ReferenceChromosome("CT**AAAAAAAAT") seq = Sequence(ref, "..GC.........") self.assertEqual(seq.variants, {Variant(ref.chrom, 1, "T", "TGC")})
def test_finds_snp(self): ref = ReferenceChromosome("AAAAAAAAAAAAA") seq = Sequence(ref, ".C...........") self.assertEqual(seq.variants, {Variant(ref.chrom, 1, "A", "C")})
def test_should_not_find_deletion_on_left_edge(self): ref = ReferenceChromosome("TAGCAAAAAAAT") seq = Sequence(ref, "*...........") print((seq.variants)) self.assertEqual(len(seq.variants), 0)
def test_should_not_find_long_deletion_on_right_edge(self): ref = ReferenceChromosome("TTAGCAAAAAACT") seq = Sequence(ref, "..........***") self.assertEqual(len(seq.variants), 0)
def test_should_get_empty_cigar(self): ref = ReferenceChromosome("") seq = Sequence(ref, "") self.assertEqual(str(seq.cigar), "")
def test_find_multi_base_deletion_with_deletion_in_reference(self): ref = ReferenceChromosome("TTA*AAAAAAAAT") seq = Sequence(ref, "..**.........") self.assertEqual(seq.variants, {Variant(ref.chrom, 1, "TA", "T")})
def test_finds_snp_at_the_ref_start(self): ref = ReferenceChromosome("CATAAAAAAAA") seq = Sequence(ref, "T..........") self.assertEqual(seq.variants, {Variant(ref.chrom, 0, "C", "T")})
def test_finds_snp_at_the_ref_end(self): ref = ReferenceChromosome("CATAAAAAAAT") seq = Sequence(ref, "..........C") self.assertEqual(seq.variants, {Variant(ref.chrom, 10, "T", "C")})
def test_finds_snp_after_asterix(self): ref = ReferenceChromosome("T*CATAAAAAAAA") seq = Sequence(ref, ".*.C.........") self.assertEqual(seq.variants, {Variant(ref.chrom, 2, "A", "C")})
def test_should_get_correct_cigar_for_dots(self): ref = ReferenceChromosome("CCAA") seq = Sequence(ref, "....") self.assertEqual(str(seq.cigar), "4M")
def test_should_get_correct_cigar_for_snp(self): ref = ReferenceChromosome("TTT") seq = Sequence(ref, ".G.") self.assertEqual(str(seq.cigar), "3M")
def test_should_get_correct_cigar_for_multiple_events(self): ref = ReferenceChromosome("CCC***AAATTT") seq = Sequence(ref, "A*.T*T...**C") self.assertEqual(str(seq.cigar), "1M1D1M2I3M2D1M")
def test_should_get_correct_cigar_for_padding_and_insertion(self): ref = ReferenceChromosome("T*T") seq = Sequence(ref, ".C.") self.assertEqual(str(seq.cigar), "1M1I1M")