def check_genotyping_single_individual( reads, weights=None, expected=None, genotypes=None, scaling=None, genotype_priors=None, ): # 0) set up read set readset = string_to_readset(s=reads, w=weights, scale_quality=scaling) positions = readset.get_positions() # 1) Genotype using forward backward algorithm recombcost = [1] * len(positions) numeric_sample_ids = NumericSampleIds() pedigree = Pedigree(numeric_sample_ids) genotype_likelihoods = [ PhredGenotypeLikelihoods([1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0]) ] * len(positions) if genotype_priors is not None: genotype_likelihoods = genotype_priors pedigree.add_individual( "individual0", [canonic_index_to_biallelic_gt(1) for i in range(len(positions))], genotype_likelihoods, ) dp_forward_backward = GenotypeDPTable(numeric_sample_ids, readset, recombcost, pedigree) # check the results compare_to_expected(dp_forward_backward, positions, expected, genotypes)
def test_read_phased(): tables = list(VcfReader("tests/data/phasedinput.vcf", phases=True)) assert len(tables) == 1 table = tables[0] assert table.chromosome == "ref" assert table.samples == ["sample"] assert len(table.variants) == 2 assert table.variants[0].reference_allele == "A" assert table.variants[0].alternative_allele == "C" assert table.variants[1].reference_allele == "G" assert table.variants[1].alternative_allele == "T" assert table.genotypes[0][0] == table.genotypes[0][1] == canonic_index_to_biallelic_gt(1)
def verify(rs, all_heterozygous=False): positions = rs.get_positions() # recombination costs 1, should not occur recombcost = [1] * len(positions) pedigree = Pedigree(NumericSampleIds()) genotype_likelihoods = [ None if all_heterozygous else PhredGenotypeLikelihoods([0, 0, 0]) ] * len(positions) # all genotypes heterozygous pedigree.add_individual( "individual0", [canonic_index_to_biallelic_gt(1) for _ in range(len(positions))], genotype_likelihoods, ) dp_table = PedigreeDPTable(rs, recombcost, pedigree, distrust_genotypes=not all_heterozygous) verify_mec_score_and_partitioning(dp_table, rs)
def check_phasing_single_individual(reads, algorithm="whatshap", weights=None): # 0) set up read set readset = string_to_readset(reads, weights) positions = readset.get_positions() # for hapchat if algorithm == "hapchat": dp_table = HapChatCore(readset) superreads = dp_table.get_super_reads() cost = dp_table.get_optimal_cost() partition = dp_table.get_optimal_partitioning() compare_phasing_brute_force(superreads[0][0], cost, partition, readset, True, weights, algorithm) return # 1) Phase using PedMEC code for single individual for all_heterozygous in [False, True]: recombcost = [1] * len( positions) # recombination costs 1, should not occur pedigree = Pedigree(NumericSampleIds()) genotype_likelihoods = [ None if all_heterozygous else PhredGenotypeLikelihoods([0, 0, 0]) ] * len(positions) pedigree.add_individual( "individual0", [canonic_index_to_biallelic_gt(1) for i in range(len(positions))], genotype_likelihoods, ) # all genotypes heterozygous dp_table = PedigreeDPTable(readset, recombcost, pedigree, distrust_genotypes=not all_heterozygous) superreads, transmission_vector = dp_table.get_super_reads() cost = dp_table.get_optimal_cost() # TODO: transmission vectors not returned properly, see issue 73 assert len(set(transmission_vector)) == 1 partition = dp_table.get_optimal_partitioning() compare_phasing_brute_force(superreads[0], cost, partition, readset, all_heterozygous, weights) # 2) Phase using PedMEC code for trios with two "empty" individuals (i.e. having no reads) for all_heterozygous in [False, True]: recombcost = [1] * len( positions) # recombination costs 1, should not occur pedigree = Pedigree(NumericSampleIds()) genotype_likelihoods = [ None if all_heterozygous else PhredGenotypeLikelihoods([0, 0, 0]) ] * len(positions) pedigree.add_individual( "individual0", [canonic_index_to_biallelic_gt(1) for _ in range(len(positions))], genotype_likelihoods, ) # all genotypes heterozygous pedigree.add_individual( "individual1", [canonic_index_to_biallelic_gt(1) for _ in range(len(positions))], genotype_likelihoods, ) # all genotypes heterozygous pedigree.add_individual( "individual2", [canonic_index_to_biallelic_gt(1) for _ in range(len(positions))], genotype_likelihoods, ) # all genotypes heterozygous pedigree.add_relationship("individual0", "individual1", "individual2") dp_table = PedigreeDPTable(readset, recombcost, pedigree, distrust_genotypes=not all_heterozygous) cost = dp_table.get_optimal_cost() superreads, transmission_vector = dp_table.get_super_reads() assert len(set(transmission_vector)) == 1 partition = dp_table.get_optimal_partitioning() compare_phasing_brute_force(superreads[0], cost, partition, readset, all_heterozygous, weights)