def test_read_genotype_likelihoods(): tables = list(VcfReader("tests/data/genotype-likelihoods.vcf", genotype_likelihoods=True)) assert len(tables) == 1 table = tables[0] assert table.chromosome == "chrA" assert table.samples == ["sample1", "sample2"] assert len(table.variants) == 4 assert len(table.genotypes) == 2 assert list(table.genotypes[0]) == canonic_index_list_to_biallelic_gt_list([2, 1, 1, 1]) assert list(table.genotypes[1]) == canonic_index_list_to_biallelic_gt_list([1, 0, 0, 1]) gl0 = GenotypeLikelihoods([-2.1206, -0.8195, -0.07525]) gl1 = GenotypeLikelihoods([-10.3849, 0, -5.99143]) gl2 = GenotypeLikelihoods([-2.1, -0.8, -0.8]) gl3 = GenotypeLikelihoods([0, -10.0, -0.6]) assert len(table.genotype_likelihoods_of("sample1")) == 4 assert len(table.genotype_likelihoods_of("sample2")) == 4 expected1 = [gl0, gl2, None, gl0] expected2 = [gl1, gl3, None, gl1] for actual_gl, expected_gl in zip(table.genotype_likelihoods_of("sample1"), expected1): assert_genotype_likelihoods(actual_gl, expected_gl) for actual_gl, expected_gl in zip(table.genotype_likelihoods_of("sample2"), expected2): assert_genotype_likelihoods(actual_gl, expected_gl)
def test_read_tetraploid_unphased(): tables = list( VcfReader("tests/data/polyploid.chr22.unphased.vcf", phases=False)) assert len(tables) == 1 table = tables[0] assert table.chromosome == "chr22" assert table.samples == ["HG00514_NA19240"] assert len(table.variants) == 8 assert table.variants[0].reference_allele == "A" assert table.variants[0].alternative_allele == "C" assert table.variants[1].reference_allele == "G" assert table.variants[1].alternative_allele == "A" assert table.variants[2].reference_allele == "G" assert table.variants[2].alternative_allele == "T" assert table.variants[3].reference_allele == "G" assert table.variants[3].alternative_allele == "C" print("Got:") for genotype in table.genotypes[0]: print(genotype) print("Exp:") for genotypte in canonic_index_list_to_biallelic_gt_list( [3, 2, 0, 3, 3, 1, 1, 1]): print(genotype) assert table.genotypes[0] == canonic_index_list_to_biallelic_gt_list( [3, 2, 0, 3, 3, 1, 1, 1], 4)
def test_read_multisample_vcf(): tables = list(VcfReader("tests/data/multisample.vcf")) assert len(tables) == 2 table, table_b = tables assert table_b.chromosome == "chrB" assert table_b.samples == ["sample1", "sample2"] assert table.chromosome == "chrA" assert len(table.variants) == 3 assert table.samples == ["sample1", "sample2"] assert table.variants[0].reference_allele == "A" assert table.variants[0].alternative_allele == "T" assert table.variants[1].reference_allele == "C" assert table.variants[1].alternative_allele == "G" assert table.variants[2].reference_allele == "G" assert table.variants[2].alternative_allele == "T" assert len(table.genotypes) == 2 assert list(table.genotypes[0]) == canonic_index_list_to_biallelic_gt_list( [1, 1, 1]) assert list(table.genotypes[1]) == canonic_index_list_to_biallelic_gt_list( [1, 1, 0]) assert list(table.genotypes_of( "sample1")) == canonic_index_list_to_biallelic_gt_list([1, 1, 1]) assert list(table.genotypes_of( "sample2")) == canonic_index_list_to_biallelic_gt_list([1, 1, 0])
def test_phase_doubletrio_pure_genetic(): reads = "" pedigree = Pedigree(NumericSampleIds()) pedigree.add_individual( "individualA", canonic_index_list_to_biallelic_gt_list([1, 2, 1, 0])) pedigree.add_individual( "individualB", canonic_index_list_to_biallelic_gt_list([1, 0, 1, 1])) pedigree.add_individual( "individualC", canonic_index_list_to_biallelic_gt_list([2, 1, 1, 0])) pedigree.add_individual( "individualD", canonic_index_list_to_biallelic_gt_list([1, 2, 2, 1])) pedigree.add_individual( "individualE", canonic_index_list_to_biallelic_gt_list([1, 1, 1, 0])) pedigree.add_relationship("individualA", "individualB", "individualC") pedigree.add_relationship("individualC", "individualD", "individualE") recombcost = [2, 2, 2] superreads_list, transmission_vector, cost = phase_pedigree( reads, recombcost, pedigree, positions=[10, 20, 30, 40]) assert cost == 0 assert len(set(transmission_vector)) == 1 all_expected_haplotypes = [ ("0100", "1110"), ("0011", "1000"), ("1110", "1000"), ("1111", "0110"), ("1000", "0110"), ] assert_haplotypes(superreads_list, all_expected_haplotypes, 4) trio_transmission_vectors = get_trio_transmission_vectors( transmission_vector, 4) assert_trio_allele_order(superreads_list[:3], trio_transmission_vectors[0], 4) assert_trio_allele_order(superreads_list[2:], trio_transmission_vectors[1], 4)
def test_phase_trio5(): reads = """ B 101 B 101 B 101 A 111 A 111 A 111 C 111 C 111 C 111 """ pedigree = Pedigree(NumericSampleIds()) pedigree.add_individual("individual0", canonic_index_list_to_biallelic_gt_list([1, 1, 1])) pedigree.add_individual("individual1", canonic_index_list_to_biallelic_gt_list([1, 1, 1])) pedigree.add_individual("individual2", canonic_index_list_to_biallelic_gt_list([1, 1, 1])) pedigree.add_relationship("individual0", "individual1", "individual2") recombcost = [2, 2, 2] superreads_list, transmission_vector, cost = phase_pedigree( reads, recombcost, pedigree) assert cost == 3 assert len(set(transmission_vector)) == 1 all_expected_haplotypes = [("111", "000"), ("111", "000"), ("111", "000")] assert_haplotypes(superreads_list, all_expected_haplotypes, 3) assert_trio_allele_order(superreads_list, transmission_vector, 3)
def test_phase_quartet3(): reads = """ A 1111 A 0000 B 1010 C 111000 C 010101 D 000000 D 010 B 0101 C 1100 D 10010 A 0000 A 1111 B 1010 B 0101 """ pedigree = Pedigree(NumericSampleIds()) pedigree.add_individual( "individual0", canonic_index_list_to_biallelic_gt_list([1, 1, 1, 1, 1, 1])) pedigree.add_individual( "individual1", canonic_index_list_to_biallelic_gt_list([1, 1, 1, 1, 1, 1])) pedigree.add_individual( "individual2", canonic_index_list_to_biallelic_gt_list([1, 2, 1, 1, 0, 1])) pedigree.add_individual( "individual3", canonic_index_list_to_biallelic_gt_list([0, 1, 0, 0, 1, 0])) pedigree.add_relationship("individual0", "individual1", "individual2") pedigree.add_relationship("individual0", "individual1", "individual3") recombcost = [3, 3, 3, 4, 3, 3] superreads_list, transmission_vector, cost = phase_pedigree( reads, recombcost, pedigree) print(cost) print(transmission_vector) assert cost == 8 # TODO: expect transmission in both trio relations. Update once transmission vectors # are returned per trio relationship # assert transmission_vector in ([0,0,0,1,1,1], [1,1,1,0,0,0], [2,2,2,3,3,3], [3,3,3,2,2,2]) all_expected_haplotypes = [ ("111111", "000000"), ("010101", "101010"), ("111000", "010101"), ("000000", "010010"), ] assert_haplotypes(superreads_list, all_expected_haplotypes, 6) trio_transmission_vectors = get_trio_transmission_vectors( transmission_vector, 6) assert_trio_allele_order(superreads_list[:3], trio_transmission_vectors[0], 6) assert_trio_allele_order( [superreads_list[0], superreads_list[1], superreads_list[3]], trio_transmission_vectors[1], 6, )
def test_read_phased_vcf(): for filename in ["tests/data/phased-via-HP.vcf", "tests/data/phased-via-PS.vcf"]: print("Testing", filename) tables = list(VcfReader(filename, phases=True)) assert len(tables) == 2 table_a, table_b = tables assert table_a.chromosome == "chrA" assert len(table_a.variants) == 4 assert table_a.samples == ["sample1", "sample2"] assert table_b.chromosome == "chrB" assert len(table_b.variants) == 2 assert table_b.samples == ["sample1", "sample2"] assert len(table_a.genotypes) == 2 assert list(table_a.genotypes[0]) == canonic_index_list_to_biallelic_gt_list([1, 2, 1, 1]) assert list(table_a.genotypes[1]) == canonic_index_list_to_biallelic_gt_list([1, 1, 1, 1]) assert list(table_a.genotypes_of("sample1")) == canonic_index_list_to_biallelic_gt_list( [1, 2, 1, 1] ) assert list(table_a.genotypes_of("sample2")) == canonic_index_list_to_biallelic_gt_list( [1, 1, 1, 1] ) assert len(table_b.genotypes) == 2 assert list(table_b.genotypes[0]) == canonic_index_list_to_biallelic_gt_list([0, 1]) assert list(table_b.genotypes[1]) == canonic_index_list_to_biallelic_gt_list([1, 2]) assert list(table_b.genotypes_of("sample1")) == canonic_index_list_to_biallelic_gt_list( [0, 1] ) assert list(table_b.genotypes_of("sample2")) == canonic_index_list_to_biallelic_gt_list( [1, 2] ) print(table_a.phases) assert len(table_a.phases) == 2 expected_phase_sample1 = [ None, None, VariantCallPhase(block_id=300, phase=(1, 0), quality=23), VariantCallPhase(block_id=300, phase=(0, 1), quality=42), ] expected_phase_sample2 = [ VariantCallPhase(block_id=100, phase=(0, 1), quality=10), VariantCallPhase(block_id=100, phase=(1, 0), quality=20), VariantCallPhase(block_id=300, phase=(0, 1), quality=30), VariantCallPhase(block_id=300, phase=(0, 1), quality=None), ] assert list(table_a.phases[0]) == expected_phase_sample1 assert list(table_a.phases[1]) == expected_phase_sample2 assert list(table_a.phases_of("sample1")) == expected_phase_sample1 assert list(table_a.phases_of("sample2")) == expected_phase_sample2 assert len(table_b.phases) == 2 assert list(table_b.phases[0]) == [None, None] assert list(table_b.phases[1]) == [None, None] assert list(table_b.phases_of("sample1")) == [None, None] assert list(table_b.phases_of("sample2")) == [None, None]
def test_phase_quartet2(): reads = """ A 111111 A 000000 B 010101 B 101010 C 000000 C 010101 D 000000 D 010101 """ pedigree = Pedigree(NumericSampleIds()) pedigree.add_individual( "individual0", canonic_index_list_to_biallelic_gt_list([1, 1, 1, 1, 1, 1])) pedigree.add_individual( "individual1", canonic_index_list_to_biallelic_gt_list([1, 1, 1, 1, 1, 1])) pedigree.add_individual( "individual2", canonic_index_list_to_biallelic_gt_list([0, 1, 0, 1, 0, 1])) pedigree.add_individual( "individual3", canonic_index_list_to_biallelic_gt_list([0, 1, 0, 1, 0, 1])) pedigree.add_relationship("individual0", "individual1", "individual2") pedigree.add_relationship("individual0", "individual1", "individual3") recombcost = [3, 3, 3, 3, 3, 3] superreads_list, transmission_vector, cost = phase_pedigree( reads, recombcost, pedigree) assert cost == 0 assert len(set(transmission_vector)) == 1 all_expected_haplotypes = [ ("111111", "000000"), ("010101", "101010"), ("000000", "010101"), ("000000", "010101"), ] assert_haplotypes(superreads_list, all_expected_haplotypes, 6) trio_transmission_vectors = get_trio_transmission_vectors( transmission_vector, 6) assert_trio_allele_order(superreads_list[:3], trio_transmission_vectors[0], 6) assert_trio_allele_order( [superreads_list[0], superreads_list[1], superreads_list[3]], trio_transmission_vectors[1], 6, )
def test_weighted_genotyping4(): reads = """ 00 00 0000 000 111 111101 111111 111110 000 1111 """ weights = """ 11 11 1111 111 111 111111 111111 111111 111 1111 """ genotypes = canonic_index_list_to_biallelic_gt_list([1, 1, 1, 1, 1, 1]) check_genotyping_single_individual(reads, weights, None, genotypes, 10)
def test_small_example(): reads = """ 11111111 00000000 """ genotypes = canonic_index_list_to_biallelic_gt_list([1, 1, 1, 1, 1, 1, 1, 1]) check_genotyping_single_individual(reads, None, None, genotypes, 1000)
def bipartition(reads): positions = reads.get_positions() # create genotypes over your variants: all heterozygous (=1) genotypes = canonic_index_list_to_biallelic_gt_list([1] * len(positions)) # genotype likelihoods are None genotype_likelihoods = [None] * len(positions) # create empty pedigree pedigree = Pedigree(NumericSampleIds()) # add one individual to pedigree pedigree.add_individual('individual0', genotypes, genotype_likelihoods) # recombination cost vector, irrelevant if one using one individual recombcost = [1] * len(positions) # run the core phasing algorithm, creating a DP table dp_table = PedigreeDPTable(reads, recombcost, pedigree, distrust_genotypes=False) phasing, transmission_vector = dp_table.get_super_reads() #print('PHASING') #print(phasing[0]) #print(phasing[0][0]) #print(phasing[0][1]) mec_score = dp_table.get_optimal_cost() eprint("MEC Score:", mec_score) eprint("MEC Score / readset length:", float(mec_score) / float(readset_length)) # In case the bi-partition of reads is of interest: partition = dp_table.get_optimal_partitioning() #print(partition) eprint("partition fraction:", sum(partition) / float(len(partition))) return phasing, partition
def test_geno1(): reads = """ 1111111111 0000011111 """ genotypes = canonic_index_list_to_biallelic_gt_list([1, 1, 1, 1, 1, 2, 2, 2, 2, 2]) check_genotyping_single_individual(reads, None, None, genotypes, 10)
def test_geno8(): reads = """ 11 11 10 """ genotypes = canonic_index_list_to_biallelic_gt_list([2, 1]) check_genotyping_single_individual(reads, None, None, genotypes, 10)
def test_genotyping_empty_readset(): rs = ReadSet() genotypes = canonic_index_list_to_biallelic_gt_list([1, 1]) recombcost = [1, 1] numeric_sample_ids = NumericSampleIds() pedigree = Pedigree(numeric_sample_ids) genotype_likelihoods = [None, None] pedigree.add_individual("individual0", genotypes, genotype_likelihoods) _ = GenotypeDPTable(numeric_sample_ids, rs, recombcost, pedigree)
def test_phase_trio3(): reads = """ A 1111 B 1010 C 111000 C 010101 B 0101 A 0000 B 1010 C 1010 C 1100 A 0000 A 1111 B 1010 B 010 """ pedigree = Pedigree(NumericSampleIds()) pedigree.add_individual( "individual0", canonic_index_list_to_biallelic_gt_list([1, 1, 1, 1, 1, 1])) pedigree.add_individual( "individual1", canonic_index_list_to_biallelic_gt_list([1, 1, 1, 1, 1, 1])) pedigree.add_individual( "individual2", canonic_index_list_to_biallelic_gt_list([1, 2, 1, 1, 0, 1])) pedigree.add_relationship("individual0", "individual1", "individual2") recombcost = [3, 3, 3, 4, 3, 3] superreads_list, transmission_vector, cost = phase_pedigree( reads, recombcost, pedigree) assert cost == 4 assert transmission_vector in ( [0, 0, 0, 1, 1, 1], [1, 1, 1, 0, 0, 0], [2, 2, 2, 3, 3, 3], [3, 3, 3, 2, 2, 2], ) all_expected_haplotypes = [ ("111111", "000000"), ("010101", "101010"), ("111000", "010101"), ] assert_haplotypes(superreads_list, all_expected_haplotypes, 6) assert_trio_allele_order(superreads_list, transmission_vector, 6)
def test_phase_trio_genotype_likelihoods(): reads = """ A 111 A 010 A 110 B 001 B 110 B 101 C 001 C 010 C 010 """ pedigree = Pedigree(NumericSampleIds()) genotype_likelihoods_mother = [ PhredGenotypeLikelihoods([0, 0, 0]), PhredGenotypeLikelihoods([0, 0, 1]), PhredGenotypeLikelihoods([5, 0, 5]), ] genotype_likelihoods0 = [PhredGenotypeLikelihoods([0, 0, 0])] * 3 pedigree.add_individual( "individual0", canonic_index_list_to_biallelic_gt_list([0, 0, 0]), genotype_likelihoods_mother, ) pedigree.add_individual( "individual1", canonic_index_list_to_biallelic_gt_list([0, 0, 0]), genotype_likelihoods0, ) pedigree.add_individual( "individual2", canonic_index_list_to_biallelic_gt_list([0, 0, 0]), genotype_likelihoods0, ) pedigree.add_relationship("individual0", "individual1", "individual2") recombcost = [10, 10, 10] superreads_list, transmission_vector, cost = phase_pedigree( reads, recombcost, pedigree, True) assert cost == 3 assert len(set(transmission_vector)) == 1 all_expected_haplotypes = [("111", "010"), ("001", "110"), ("001", "010")] assert_haplotypes(superreads_list, all_expected_haplotypes, 3) assert_trio_allele_order(superreads_list, transmission_vector, 3)
def test_genotyping_trio13(): reads = """ A 1111 A 0000 B 1111 B 0000 """ expected_genotypes = [ canonic_index_list_to_biallelic_gt_list([1, 1, 1, 1, 1, 1]), canonic_index_list_to_biallelic_gt_list([1, 1, 1, 1, 1, 1]), canonic_index_list_to_biallelic_gt_list([1, 1, 1, 1, 1, 1]), ] numeric_sample_ids = NumericSampleIds() pedigree = Pedigree(numeric_sample_ids) pedigree.add_individual( "individual0", canonic_index_list_to_biallelic_gt_list([0, 0, 0, 0, 0, 0]), [PhredGenotypeLikelihoods([0, 1, 0])] * 6, ) pedigree.add_individual( "individual1", canonic_index_list_to_biallelic_gt_list([0, 0, 0, 0, 0, 0]), [PhredGenotypeLikelihoods([0, 1, 0])] * 6, ) pedigree.add_individual( "individual2", canonic_index_list_to_biallelic_gt_list([0, 0, 0, 0, 0, 0]), [PhredGenotypeLikelihoods([0.25, 0.5, 0.25])] * 6, ) pedigree.add_relationship("individual0", "individual1", "individual2") recombcost = [1000000, 1000000, 1000000, 1000000, 1000000, 1000000] genotype_pedigree( numeric_sample_ids, reads, recombcost, pedigree, expected_genotypes, scaling=1000, )
def test_genotyping_trio1(): reads = """ A 00 A 00 B 11 B 11 C 11 C 00 """ expected_genotypes = [ canonic_index_list_to_biallelic_gt_list([0, 0]), canonic_index_list_to_biallelic_gt_list([2, 2]), canonic_index_list_to_biallelic_gt_list([1, 1]), ] numeric_sample_ids = NumericSampleIds() pedigree = Pedigree(numeric_sample_ids) pedigree.add_individual( "individual0", canonic_index_list_to_biallelic_gt_list([1, 1]), [PhredGenotypeLikelihoods([1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0])] * 2, ) pedigree.add_individual( "individual1", canonic_index_list_to_biallelic_gt_list([1, 1]), [PhredGenotypeLikelihoods([1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0])] * 2, ) pedigree.add_individual( "individual2", canonic_index_list_to_biallelic_gt_list([1, 1]), [PhredGenotypeLikelihoods([1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0])] * 2, ) pedigree.add_relationship("individual0", "individual1", "individual2") recombcost = [10, 10] genotype_pedigree(numeric_sample_ids, reads, recombcost, pedigree, expected_genotypes)
def test_genotyping_quartet4(): reads = """ A 1111 A 0000 B 1010 C 111000 C 010101 D 000000 D 010 B 0101 C 1100 D 10010 A 0000 A 1111 B 1010 B 0101 """ expected_genotypes = [ canonic_index_list_to_biallelic_gt_list([1, 1, 1, 1, 1, 1]), canonic_index_list_to_biallelic_gt_list([1, 1, 1, 1, 1, 1]), canonic_index_list_to_biallelic_gt_list([1, 2, 1, 1, 0, 1]), canonic_index_list_to_biallelic_gt_list([0, 1, 0, 0, 1, 0]), ] numeric_sample_ids = NumericSampleIds() pedigree = Pedigree(numeric_sample_ids) pedigree.add_individual( "individual0", canonic_index_list_to_biallelic_gt_list([0, 0, 0, 0, 0, 0]), [PhredGenotypeLikelihoods([1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0])] * 6, ) pedigree.add_individual( "individual1", canonic_index_list_to_biallelic_gt_list([0, 0, 0, 0, 0, 0]), [PhredGenotypeLikelihoods([1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0])] * 6, ) pedigree.add_individual( "individual2", canonic_index_list_to_biallelic_gt_list([0, 0, 0, 0, 0, 0]), [PhredGenotypeLikelihoods([1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0])] * 6, ) pedigree.add_individual( "individual3", canonic_index_list_to_biallelic_gt_list([0, 0, 0, 0, 0, 0]), [PhredGenotypeLikelihoods([1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0])] * 6, ) pedigree.add_relationship("individual0", "individual1", "individual2") pedigree.add_relationship("individual0", "individual1", "individual3") recombcost = [3, 3, 3, 4, 3, 3] genotype_pedigree(numeric_sample_ids, reads, recombcost, pedigree, expected_genotypes)
def test_geno_exact1(): reads = """ 11 01 """ expected_likelihoods = [ PhredGenotypeLikelihoods([0.06666666666666667, 0.3333333333333333, 0.6]), PhredGenotypeLikelihoods([0.20930232558139536, 0.5813953488372093, 0.20930232558139536]), PhredGenotypeLikelihoods([0.06666666666666667, 0.3333333333333333, 0.6]), ] genotypes = canonic_index_list_to_biallelic_gt_list([2, 1, 2]) check_genotyping_single_individual(reads, None, expected_likelihoods, genotypes, 10)
def test_weighted_genotyping(): reads = """ B 00 B 11 A 11 A 00 C 11 C 11 """ weights = """ 99 99 99 99 99 99 """ expected_genotypes = [ canonic_index_list_to_biallelic_gt_list([1, 1]), canonic_index_list_to_biallelic_gt_list([1, 1]), canonic_index_list_to_biallelic_gt_list([2, 2]), ] numeric_sample_ids = NumericSampleIds() pedigree = Pedigree(numeric_sample_ids) pedigree.add_individual( "individual0", canonic_index_list_to_biallelic_gt_list([0, 0, 0, 0]), [PhredGenotypeLikelihoods([0.25, 0.5, 0.25])] * 4, ) pedigree.add_individual( "individual1", canonic_index_list_to_biallelic_gt_list([0, 0, 0, 0]), [PhredGenotypeLikelihoods([0.25, 0.5, 0.25])] * 4, ) pedigree.add_individual( "individual2", canonic_index_list_to_biallelic_gt_list([0, 0, 0, 0]), [PhredGenotypeLikelihoods([0.25, 0.5, 0.25])] * 4, ) pedigree.add_relationship("individual0", "individual1", "individual2") # recombination is extremely unlikely recombcost = [1000, 1000, 1000, 1000] expected = { 0: [[0, 1, 0], [0, 1, 0]], 1: [[0, 1, 0], [0, 1, 0]], 2: [[0, 1.0 / 3.0, 2 / 3.0], [0, 1.0 / 3.0, 2 / 3.0]], } genotype_pedigree( numeric_sample_ids, reads, recombcost, pedigree, expected_genotypes, weights, expected, scaling=500, )
def test_phase_empty_readset(algorithm): rs = ReadSet() recombcost = [1, 1] genotypes = canonic_index_list_to_biallelic_gt_list([1, 1]) pedigree = Pedigree(NumericSampleIds()) genotype_likelihoods = [None, None] pedigree.add_individual("individual0", genotypes, genotype_likelihoods) if algorithm == "hapchat": dp_table = HapChatCore(rs) else: dp_table = PedigreeDPTable(rs, recombcost, pedigree) _ = dp_table.get_super_reads()
def test_geno_exact2(): reads = """ 11 11 """ weights = """ 11 11 """ expected_likelihoods = [ PhredGenotypeLikelihoods([0.00914139256727894, 0.25040580948312685, 0.7404527979495942]), PhredGenotypeLikelihoods([0.00914139256727894, 0.25040580948312685, 0.7404527979495942]), ] genotypes = canonic_index_list_to_biallelic_gt_list([2, 2]) check_genotyping_single_individual(reads, weights, expected_likelihoods, genotypes, 10)
def test_geno_10(): reads = """ 001100 000000 000000 110011 110011 111111 """ genotypes = canonic_index_list_to_biallelic_gt_list([1, 1, 0, 0, 1, 1]) genotype_priors = [ PhredGenotypeLikelihoods([0.1, 0.8, 0.1]), PhredGenotypeLikelihoods([0.1, 0.8, 0.1]), PhredGenotypeLikelihoods([0.7, 0.2, 0.1]), PhredGenotypeLikelihoods([0.7, 0.2, 0.1]), PhredGenotypeLikelihoods([0.1, 0.8, 0.1]), PhredGenotypeLikelihoods([0.1, 0.8, 0.1]), ] check_genotyping_single_individual(reads, None, None, genotypes, 50, genotype_priors)
def test_geno6(): reads = """ 0100000000000 0100010000000 1110000000010 0100000000000 0101000001000 0100010 000 0 10000000100 1111111011100 0100111010011 1111111000111 1111110011111 11110000 000 1110011011111 1111001011111 0111111110 1 """ genotypes = canonic_index_list_to_biallelic_gt_list([1, 2, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1]) check_genotyping_single_individual(reads, None, None, genotypes, 60)
def test_genotyping_trio10(): reads = """ B 0000 B 0000 B 0000 B 0000 B 0000 B 0000 A 1111 A 1111 A 1111 A 1111 A 1111 A 1111 """ # no reads for child, but genotype must be 1/0 for each pos. (due to inheritance) expected_genotypes = [ canonic_index_list_to_biallelic_gt_list([2, 2, 2, 2]), canonic_index_list_to_biallelic_gt_list([0, 0, 0, 0]), canonic_index_list_to_biallelic_gt_list([1, 1, 1, 1]), ] numeric_sample_ids = NumericSampleIds() pedigree = Pedigree(numeric_sample_ids) pedigree.add_individual( "individual0", canonic_index_list_to_biallelic_gt_list([0, 0, 0, 0]), [PhredGenotypeLikelihoods([1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0])] * 4, ) pedigree.add_individual( "individual1", canonic_index_list_to_biallelic_gt_list([0, 0, 0, 0]), [PhredGenotypeLikelihoods([1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0])] * 4, ) pedigree.add_individual( "individual2", canonic_index_list_to_biallelic_gt_list([0, 0, 0, 0]), [PhredGenotypeLikelihoods([1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0])] * 4, ) pedigree.add_relationship("individual0", "individual1", "individual2") recombcost = [10, 10, 10, 10] genotype_pedigree(numeric_sample_ids, reads, recombcost, pedigree, expected_genotypes)