def bipartition(reads): positions = reads.get_positions() # create genotypes over your variants: all heterozygous (=1) genotypes = canonic_index_list_to_biallelic_gt_list([1] * len(positions)) # genotype likelihoods are None genotype_likelihoods = [None] * len(positions) # create empty pedigree pedigree = Pedigree(NumericSampleIds()) # add one individual to pedigree pedigree.add_individual('individual0', genotypes, genotype_likelihoods) # recombination cost vector, irrelevant if one using one individual recombcost = [1] * len(positions) # run the core phasing algorithm, creating a DP table dp_table = PedigreeDPTable(reads, recombcost, pedigree, distrust_genotypes=False) phasing, transmission_vector = dp_table.get_super_reads() #print('PHASING') #print(phasing[0]) #print(phasing[0][0]) #print(phasing[0][1]) mec_score = dp_table.get_optimal_cost() eprint("MEC Score:", mec_score) eprint("MEC Score / readset length:", float(mec_score) / float(readset_length)) # In case the bi-partition of reads is of interest: partition = dp_table.get_optimal_partitioning() #print(partition) eprint("partition fraction:", sum(partition) / float(len(partition))) return phasing, partition
def phase_pedigree(reads, recombcost, pedigree, distrust_genotypes=False, positions=None): rs = string_to_readset_pedigree(reads) dp_table = PedigreeDPTable(rs, recombcost, pedigree, distrust_genotypes, positions) superreads_list, transmission_vector = dp_table.get_super_reads() cost = dp_table.get_optimal_cost() for superreads in superreads_list: for sr in superreads: print(sr) print('Cost:', dp_table.get_optimal_cost()) print('Transmission vector:', transmission_vector) print('Partition:', dp_table.get_optimal_partitioning()) return superreads_list, transmission_vector, cost