示例#1
0
def phase_pedigree(reads, recombcost, pedigree, distrust_genotypes=False, positions=None):
	rs = string_to_readset_pedigree(reads)
	dp_table = PedigreeDPTable(rs, recombcost, pedigree, distrust_genotypes, positions)
	superreads_list, transmission_vector = dp_table.get_super_reads()
	cost = dp_table.get_optimal_cost()
	for superreads in superreads_list:
		for sr in superreads:
			print(sr)
	print('Cost:', dp_table.get_optimal_cost())
	print('Transmission vector:', transmission_vector)
	print('Partition:', dp_table.get_optimal_partitioning())
	return superreads_list, transmission_vector, cost
示例#2
0
def bipartition(reads):
    positions = reads.get_positions()
    # create genotypes over your variants: all heterozygous (=1)
    genotypes = canonic_index_list_to_biallelic_gt_list([1] * len(positions))
    # genotype likelihoods are None
    genotype_likelihoods = [None] * len(positions)
    # create empty pedigree
    pedigree = Pedigree(NumericSampleIds())
    # add one individual to pedigree
    pedigree.add_individual('individual0', genotypes, genotype_likelihoods)
    # recombination cost vector, irrelevant if one using one individual
    recombcost = [1] * len(positions)

    # run the core phasing algorithm, creating a DP table
    dp_table = PedigreeDPTable(reads,
                               recombcost,
                               pedigree,
                               distrust_genotypes=False)
    phasing, transmission_vector = dp_table.get_super_reads()
    #print('PHASING')
    #print(phasing[0])
    #print(phasing[0][0])
    #print(phasing[0][1])
    mec_score = dp_table.get_optimal_cost()
    eprint("MEC Score:", mec_score)
    eprint("MEC Score / readset length:",
           float(mec_score) / float(readset_length))

    # In case the bi-partition of reads is of interest:
    partition = dp_table.get_optimal_partitioning()
    #print(partition)
    eprint("partition fraction:", sum(partition) / float(len(partition)))

    return phasing, partition
示例#3
0
def phase_MAV(reads, n_alleles, all_het, genos, genotypes, weights=None):
    readset = string_to_readset(reads, n_alleles)
    positions = readset.get_positions()
    for all_heterozygous in all_het:
        recombcost = [1] * len(
            positions)  # recombination costs 1, should not occur
        pedigree = Pedigree(NumericSampleIds())
        genotype_likelihoods = [
            None if all_heterozygous else PhredGenotypeLikelihoods(genos)
        ] * len(positions)
        pedigree.add_individual(
            'individual0', genotypes,
            genotype_likelihoods)  # all genotypes heterozygous
        dp_table = PedigreeDPTable(readset,
                                   recombcost,
                                   pedigree,
                                   distrust_genotypes=not all_heterozygous)
        superreads_list, transmission_vector = dp_table.get_super_reads()
        cost = dp_table.get_optimal_cost()
    return superreads_list, transmission_vector, cost