示例#1
0
def test_phase_trio5():
	reads = """
	  B 101
	  B 101
	  B 101
	  A 111
	  A 111
	  A 111
	  C 111
	  C 111
	  C 111
	"""
	pedigree = Pedigree(NumericSampleIds())
	pedigree.add_individual('individual0', [1,1,1])
	pedigree.add_individual('individual1', [1,1,1])
	pedigree.add_individual('individual2', [1,1,1])
	pedigree.add_relationship('individual0', 'individual1', 'individual2')
	recombcost = [2,2,2]
	superreads_list, transmission_vector, cost = phase_pedigree(reads, recombcost, pedigree)
	assert cost == 3
	assert len(set(transmission_vector)) == 1
	all_expected_haplotypes = [
		('111','000'),
		('111','000'),
		('111','000')
	]
	assert_haplotypes(superreads_list, all_expected_haplotypes, 3)
示例#2
0
def create_pedigree(
    default_gq,
    distrust_genotypes,
    family,
    gl_regularizer,
    numeric_sample_ids,
    phasable_variant_table,
    trios,
):
    pedigree = Pedigree(numeric_sample_ids)
    for sample in family:
        # If distrusting genotypes, we pass genotype likelihoods on to pedigree object
        if distrust_genotypes:
            genotype_likelihoods = []
            for gt, gl in zip(
                phasable_variant_table.genotypes_of(sample),
                phasable_variant_table.genotype_likelihoods_of(sample),
            ):
                assert gt.is_diploid_and_biallelic()
                if gl is None:
                    # all genotypes get default_gq as genotype likelihood, exept the called genotype ...
                    x = [default_gq] * 3
                    # ... which gets a 0
                    x[gt.get_index()] = 0
                    genotype_likelihoods.append(PhredGenotypeLikelihoods(x))
                else:
                    genotype_likelihoods.append(gl.as_phred(regularizer=gl_regularizer))
        else:
            genotype_likelihoods = None
        pedigree.add_individual(
            sample, phasable_variant_table.genotypes_of(sample), genotype_likelihoods
        )
    for trio in trios:
        pedigree.add_relationship(father_id=trio.father, mother_id=trio.mother, child_id=trio.child)
    return pedigree
示例#3
0
def test_phase_trio1():
	reads = """
	  A 111
	  A 010
	  A 110
	  B 001
	  B 110
	  B 101
	  C 001
	  C 010
	  C 010
	"""
	pedigree = Pedigree(NumericSampleIds())
	pedigree.add_individual('individual0', [1,2,1])
	pedigree.add_individual('individual1', [1,1,1])
	pedigree.add_individual('individual2', [0,1,1])
	pedigree.add_relationship('individual0', 'individual1', 'individual2')
	recombcost = [10,10,10]
	superreads_list, transmission_vector, cost = phase_pedigree(reads, recombcost, pedigree)
	assert cost == 2
	assert len(set(transmission_vector)) == 1
	all_expected_haplotypes = [
		('111','010'),
		('001','110'),
		('001','010')
	]
	assert_haplotypes(superreads_list, all_expected_haplotypes, 3)
示例#4
0
def test_phase_trio_genotype_likelihoods():
	reads = """
	  A 111
	  A 010
	  A 110
	  B 001
	  B 110
	  B 101
	  C 001
	  C 010
	  C 010
	"""
	pedigree = Pedigree(NumericSampleIds())
	genotype_likelihoods_mother = [
		PhredGenotypeLikelihoods(0,0,0),
		PhredGenotypeLikelihoods(0,0,1),
		PhredGenotypeLikelihoods(5,0,5)
	]
	genotype_likelihoods0 = [PhredGenotypeLikelihoods(0,0,0)] * 3
	pedigree.add_individual('individual0', [0,0,0], genotype_likelihoods_mother)
	pedigree.add_individual('individual1', [0,0,0], genotype_likelihoods0)
	pedigree.add_individual('individual2', [0,0,0], genotype_likelihoods0)
	pedigree.add_relationship('individual0', 'individual1', 'individual2')
	recombcost = [10,10,10]
	superreads_list, transmission_vector, cost = phase_pedigree(reads, recombcost, pedigree, True)
	assert cost == 3
	assert len(set(transmission_vector)) == 1
	all_expected_haplotypes = [
		('111','010'),
		('001','110'),
		('001','010')
	]
	assert_haplotypes(superreads_list, all_expected_haplotypes, 3)
示例#5
0
def test_genotyping_trio10():
    reads = """
	  B 0000
	  B 0000
	  B 0000
	  B 0000
	  B 0000
	  B 0000
	  A 1111
	  A 1111
	  A 1111
	  A 1111
	  A 1111
	  A 1111
	"""

    # no reads for child, but genotype must be 1/0 for each pos. (due to inheritance)
    expected_genotypes = [[2, 2, 2, 2], [0, 0, 0, 0], [1, 1, 1, 1]]
    numeric_sample_ids = NumericSampleIds()
    pedigree = Pedigree(numeric_sample_ids)
    pedigree.add_individual(
        'individual0', [0, 0, 0, 0],
        [PhredGenotypeLikelihoods(1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0)] * 4)
    pedigree.add_individual(
        'individual1', [0, 0, 0, 0],
        [PhredGenotypeLikelihoods(1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0)] * 4)
    pedigree.add_individual(
        'individual2', [0, 0, 0, 0],
        [PhredGenotypeLikelihoods(1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0)] * 4)
    pedigree.add_relationship('individual0', 'individual1', 'individual2')
    recombcost = [10, 10, 10, 10]
    genotype_pedigree(numeric_sample_ids, reads, recombcost, pedigree,
                      expected_genotypes)
示例#6
0
def test_phase_trio4():
	reads = """
	  B 101
	  B 101
	  B 101
	  A 111
	  A 111
	  A 111
	  C 111
	  C 111
	  C 111
	"""
	pedigree = Pedigree(NumericSampleIds())
	pedigree.add_individual('individual0', [1,1,1])
	pedigree.add_individual('individual1', [1,1,1])
	pedigree.add_individual('individual2', [1,1,1])
	pedigree.add_relationship('individual0', 'individual1', 'individual2')
	recombcost = [1,1,1]
	superreads_list, transmission_vector, cost = phase_pedigree(reads, recombcost, pedigree)
	assert cost == 2
	assert transmission_vector in ([0,2,0], [2,0,2], [1,3,1], [3,1,3])
	all_expected_haplotypes = [
		('111','000'),
		('101','010'),
		('111','000')
	]
	assert_haplotypes(superreads_list, all_expected_haplotypes, 3)
示例#7
0
def test_phase_doubletrio_pure_genetic():
    reads = ""
    pedigree = Pedigree(NumericSampleIds())
    pedigree.add_individual(
        "individualA", canonic_index_list_to_biallelic_gt_list([1, 2, 1, 0]))
    pedigree.add_individual(
        "individualB", canonic_index_list_to_biallelic_gt_list([1, 0, 1, 1]))
    pedigree.add_individual(
        "individualC", canonic_index_list_to_biallelic_gt_list([2, 1, 1, 0]))
    pedigree.add_individual(
        "individualD", canonic_index_list_to_biallelic_gt_list([1, 2, 2, 1]))
    pedigree.add_individual(
        "individualE", canonic_index_list_to_biallelic_gt_list([1, 1, 1, 0]))
    pedigree.add_relationship("individualA", "individualB", "individualC")
    pedigree.add_relationship("individualC", "individualD", "individualE")
    recombcost = [2, 2, 2]
    superreads_list, transmission_vector, cost = phase_pedigree(
        reads, recombcost, pedigree, positions=[10, 20, 30, 40])
    assert cost == 0
    assert len(set(transmission_vector)) == 1
    all_expected_haplotypes = [
        ("0100", "1110"),
        ("0011", "1000"),
        ("1110", "1000"),
        ("1111", "0110"),
        ("1000", "0110"),
    ]
    assert_haplotypes(superreads_list, all_expected_haplotypes, 4)
    trio_transmission_vectors = get_trio_transmission_vectors(
        transmission_vector, 4)
    assert_trio_allele_order(superreads_list[:3], trio_transmission_vectors[0],
                             4)
    assert_trio_allele_order(superreads_list[2:], trio_transmission_vectors[1],
                             4)
示例#8
0
def test_phase_trio3():
	reads = """
	  A 1111
	  B 1010
	  C 111000
	  C 010101
	  B 0101
	  A  0000
	  B  1010
	  C  1010
	  C  1100
	  A   0000
	  A   1111
	  B   1010
	  B    010
	"""
	pedigree = Pedigree(NumericSampleIds())
	pedigree.add_individual('individual0', [1,1,1,1,1,1])
	pedigree.add_individual('individual1', [1,1,1,1,1,1])
	pedigree.add_individual('individual2', [1,2,1,1,0,1])
	pedigree.add_relationship('individual0', 'individual1', 'individual2')
	recombcost = [3,3,3,4,3,3]
	superreads_list, transmission_vector, cost = phase_pedigree(reads, recombcost, pedigree)
	assert cost == 4
	assert transmission_vector in ([0,0,0,1,1,1], [1,1,1,0,0,0], [2,2,2,3,3,3], [3,3,3,2,2,2])
	all_expected_haplotypes = [
		('111111','000000'),
		('010101','101010'),
		('111000','010101')
	]
	assert_haplotypes(superreads_list, all_expected_haplotypes, 6)
示例#9
0
def test_genotyping_trio14():
    reads = """
	  A 111111
	  A 111111
	  B 111111
	  B 000000
	  C 000000
	"""

    expected_genotypes = [[2, 2, 2, 2, 2, 2], [1, 1, 1, 1, 1, 1],
                          [1, 1, 1, 1, 1, 1]]
    numeric_sample_ids = NumericSampleIds()
    pedigree = Pedigree(numeric_sample_ids)
    pedigree.add_individual(
        'individual0', [0, 0, 0, 0, 0, 0],
        [PhredGenotypeLikelihoods(1 / 3.0, 1 / 3.0, 1 / 3.0)] * 6)
    pedigree.add_individual(
        'individual1', [0, 0, 0, 0, 0, 0],
        [PhredGenotypeLikelihoods(1 / 3.0, 1 / 3.0, 1 / 3.0)] * 6)
    pedigree.add_individual(
        'individual2', [0, 0, 0, 0, 0, 0],
        [PhredGenotypeLikelihoods(1 / 3.0, 1 / 3.0, 1 / 3.0)] * 6)
    pedigree.add_relationship('individual0', 'individual1', 'individual2')
    recombcost = [1000000, 1000000, 1000000, 1000000, 1000000, 1000000]
    genotype_pedigree(numeric_sample_ids,
                      reads,
                      recombcost,
                      pedigree,
                      expected_genotypes,
                      scaling=1000)
示例#10
0
def test_phase_trio5():
    reads = """
      B 101
      B 101
      B 101
      A 111
      A 111
      A 111
      C 111
      C 111
      C 111
    """
    pedigree = Pedigree(NumericSampleIds())
    pedigree.add_individual("individual0",
                            canonic_index_list_to_biallelic_gt_list([1, 1, 1]))
    pedigree.add_individual("individual1",
                            canonic_index_list_to_biallelic_gt_list([1, 1, 1]))
    pedigree.add_individual("individual2",
                            canonic_index_list_to_biallelic_gt_list([1, 1, 1]))
    pedigree.add_relationship("individual0", "individual1", "individual2")
    recombcost = [2, 2, 2]
    superreads_list, transmission_vector, cost = phase_pedigree(
        reads, recombcost, pedigree)
    assert cost == 3
    assert len(set(transmission_vector)) == 1
    all_expected_haplotypes = [("111", "000"), ("111", "000"), ("111", "000")]
    assert_haplotypes(superreads_list, all_expected_haplotypes, 3)
    assert_trio_allele_order(superreads_list, transmission_vector, 3)
示例#11
0
def test_phase_doubletrio_pure_genetic():
    reads = ""
    pedigree = Pedigree(NumericSampleIds())
    pedigree.add_individual('individualA', [1, 2, 1, 0])
    pedigree.add_individual('individualB', [1, 0, 1, 1])
    pedigree.add_individual('individualC', [2, 1, 1, 0])
    pedigree.add_individual('individualD', [1, 2, 2, 1])
    pedigree.add_individual('individualE', [1, 1, 1, 0])
    pedigree.add_relationship('individualA', 'individualB', 'individualC')
    pedigree.add_relationship('individualC', 'individualD', 'individualE')
    recombcost = [2, 2, 2]
    superreads_list, transmission_vector, cost = phase_pedigree(
        reads, recombcost, pedigree, positions=[10, 20, 30, 40])
    assert cost == 0
    assert len(set(transmission_vector)) == 1
    all_expected_haplotypes = [('0100', '1110'), ('0011', '1000'),
                               ('1110', '1000'), ('1111', '0110'),
                               ('1000', '0110')]
    assert_haplotypes(superreads_list, all_expected_haplotypes, 4)
    trio_transmission_vectors = get_trio_transmission_vectors(
        transmission_vector, 4)
    assert_trio_allele_order(superreads_list[:3], trio_transmission_vectors[0],
                             4)
    assert_trio_allele_order(superreads_list[2:], trio_transmission_vectors[1],
                             4)
示例#12
0
def test_genotyping_trio1():
    reads = """
	  A 00
	  A 00
	  B 11
	  B 11
	  C 11
	  C 00
	"""

    expected_genotypes = [[0, 0], [2, 2], [1, 1]]
    numeric_sample_ids = NumericSampleIds()
    pedigree = Pedigree(numeric_sample_ids)
    pedigree.add_individual(
        'individual0', [1, 1],
        [PhredGenotypeLikelihoods(1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0)] * 2)
    pedigree.add_individual(
        'individual1', [1, 1],
        [PhredGenotypeLikelihoods(1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0)] * 2)
    pedigree.add_individual(
        'individual2', [1, 1],
        [PhredGenotypeLikelihoods(1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0)] * 2)
    pedigree.add_relationship('individual0', 'individual1', 'individual2')
    recombcost = [10, 10]
    genotype_pedigree(numeric_sample_ids, reads, recombcost, pedigree,
                      expected_genotypes)
def test_genotyping_trio1():
    reads = """
      A 00
      A 00
      B 11
      B 11
      C 11
      C 00
    """

    expected_genotypes = [
        canonic_index_list_to_biallelic_gt_list([0, 0]),
        canonic_index_list_to_biallelic_gt_list([2, 2]),
        canonic_index_list_to_biallelic_gt_list([1, 1]),
    ]
    numeric_sample_ids = NumericSampleIds()
    pedigree = Pedigree(numeric_sample_ids)
    pedigree.add_individual(
        "individual0",
        canonic_index_list_to_biallelic_gt_list([1, 1]),
        [PhredGenotypeLikelihoods([1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0])] * 2,
    )
    pedigree.add_individual(
        "individual1",
        canonic_index_list_to_biallelic_gt_list([1, 1]),
        [PhredGenotypeLikelihoods([1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0])] * 2,
    )
    pedigree.add_individual(
        "individual2",
        canonic_index_list_to_biallelic_gt_list([1, 1]),
        [PhredGenotypeLikelihoods([1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0])] * 2,
    )
    pedigree.add_relationship("individual0", "individual1", "individual2")
    recombcost = [10, 10]
    genotype_pedigree(numeric_sample_ids, reads, recombcost, pedigree, expected_genotypes)
def test_genotyping_trio13():
    reads = """
      A 1111
      A 0000
      B 1111
      B 0000
    """

    expected_genotypes = [
        canonic_index_list_to_biallelic_gt_list([1, 1, 1, 1, 1, 1]),
        canonic_index_list_to_biallelic_gt_list([1, 1, 1, 1, 1, 1]),
        canonic_index_list_to_biallelic_gt_list([1, 1, 1, 1, 1, 1]),
    ]
    numeric_sample_ids = NumericSampleIds()
    pedigree = Pedigree(numeric_sample_ids)
    pedigree.add_individual(
        "individual0",
        canonic_index_list_to_biallelic_gt_list([0, 0, 0, 0, 0, 0]),
        [PhredGenotypeLikelihoods([0, 1, 0])] * 6,
    )
    pedigree.add_individual(
        "individual1",
        canonic_index_list_to_biallelic_gt_list([0, 0, 0, 0, 0, 0]),
        [PhredGenotypeLikelihoods([0, 1, 0])] * 6,
    )
    pedigree.add_individual(
        "individual2",
        canonic_index_list_to_biallelic_gt_list([0, 0, 0, 0, 0, 0]),
        [PhredGenotypeLikelihoods([0.25, 0.5, 0.25])] * 6,
    )
    pedigree.add_relationship("individual0", "individual1", "individual2")
    recombcost = [1000000, 1000000, 1000000, 1000000, 1000000, 1000000]
    genotype_pedigree(
        numeric_sample_ids, reads, recombcost, pedigree, expected_genotypes, scaling=1000,
    )
示例#15
0
def test_genotyping_trio5():
    reads = """
	  B 101
	  B 101
	  B 101
	  A 111
	  A 111
	  A 111
	  C 111
	  C 111
	  C 101
	  C 101
	"""
    expected_genotypes = [[2, 2, 2], [2, 0, 2], [2, 1, 2]]
    numeric_sample_ids = NumericSampleIds()
    pedigree = Pedigree(numeric_sample_ids)
    pedigree.add_individual(
        'individual0', [0, 0, 0],
        [PhredGenotypeLikelihoods(1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0)] * 3)
    pedigree.add_individual(
        'individual1', [0, 0, 0],
        [PhredGenotypeLikelihoods(1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0)] * 3)
    pedigree.add_individual(
        'individual2', [0, 0, 0],
        [PhredGenotypeLikelihoods(1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0)] * 3)
    pedigree.add_relationship('individual0', 'individual1', 'individual2')
    recombcost = [2, 2, 2]
    genotype_pedigree(numeric_sample_ids, reads, recombcost, pedigree,
                      expected_genotypes)
示例#16
0
def test_phase_quartet2():
	reads = """
	  A 111111
	  A 000000
	  B 010101
	  B 101010
	  C 000000
	  C 010101
	  D 000000
	  D 010101
	"""
	pedigree = Pedigree(NumericSampleIds())
	pedigree.add_individual('individual0', [1,1,1,1,1,1])
	pedigree.add_individual('individual1', [1,1,1,1,1,1])
	pedigree.add_individual('individual2', [0,1,0,1,0,1])
	pedigree.add_individual('individual3', [0,1,0,1,0,1])
	pedigree.add_relationship('individual0', 'individual1', 'individual2')
	pedigree.add_relationship('individual0', 'individual1', 'individual3')
	recombcost =[3,3,3,3,3,3]

	superreads_list, transmission_vector, cost = phase_pedigree(reads, recombcost, pedigree)
	assert cost == 0
	assert len(set(transmission_vector)) == 1
	all_expected_haplotypes = [
		('111111','000000'),
		('010101','101010'),
		('000000','010101'),
		('000000','010101')
	]
	assert_haplotypes(superreads_list, all_expected_haplotypes, 6)
示例#17
0
def test_genotyping_quartet3():
    reads = """
	  A 111111
	  A 000000
	  B 010101
	  B 101010
	  C 000000
	  C 010101
	  D 000000
	  D 010101
	"""
    expected_genotypes = [[1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1],
                          [0, 1, 0, 1, 0, 1], [0, 1, 0, 1, 0, 1]]
    numeric_sample_ids = NumericSampleIds()
    pedigree = Pedigree(numeric_sample_ids)
    pedigree.add_individual(
        'individual0', [0, 0, 0, 0, 0, 0],
        [PhredGenotypeLikelihoods(1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0)] * 6)
    pedigree.add_individual(
        'individual1', [0, 0, 0, 0, 0, 0],
        [PhredGenotypeLikelihoods(1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0)] * 6)
    pedigree.add_individual(
        'individual2', [0, 0, 0, 0, 0, 0],
        [PhredGenotypeLikelihoods(1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0)] * 6)
    pedigree.add_individual(
        'individual3', [0, 0, 0, 0, 0, 0],
        [PhredGenotypeLikelihoods(1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0)] * 6)
    pedigree.add_relationship('individual0', 'individual1', 'individual2')
    pedigree.add_relationship('individual0', 'individual1', 'individual3')
    recombcost = [3, 3, 3, 3, 3, 3]
    genotype_pedigree(numeric_sample_ids, reads, recombcost, pedigree,
                      expected_genotypes)
def test_weighted_genotyping():
    reads = """
      B 00
      B 11
      A 11
      A 00
      C 11
      C 11
    """
    weights = """
      99
      99
      99
      99
      99
      99
    """
    expected_genotypes = [
        canonic_index_list_to_biallelic_gt_list([1, 1]),
        canonic_index_list_to_biallelic_gt_list([1, 1]),
        canonic_index_list_to_biallelic_gt_list([2, 2]),
    ]
    numeric_sample_ids = NumericSampleIds()
    pedigree = Pedigree(numeric_sample_ids)
    pedigree.add_individual(
        "individual0",
        canonic_index_list_to_biallelic_gt_list([0, 0, 0, 0]),
        [PhredGenotypeLikelihoods([0.25, 0.5, 0.25])] * 4,
    )
    pedigree.add_individual(
        "individual1",
        canonic_index_list_to_biallelic_gt_list([0, 0, 0, 0]),
        [PhredGenotypeLikelihoods([0.25, 0.5, 0.25])] * 4,
    )
    pedigree.add_individual(
        "individual2",
        canonic_index_list_to_biallelic_gt_list([0, 0, 0, 0]),
        [PhredGenotypeLikelihoods([0.25, 0.5, 0.25])] * 4,
    )
    pedigree.add_relationship("individual0", "individual1", "individual2")
    # recombination is extremely unlikely
    recombcost = [1000, 1000, 1000, 1000]

    expected = {
        0: [[0, 1, 0], [0, 1, 0]],
        1: [[0, 1, 0], [0, 1, 0]],
        2: [[0, 1.0 / 3.0, 2 / 3.0], [0, 1.0 / 3.0, 2 / 3.0]],
    }
    genotype_pedigree(
        numeric_sample_ids,
        reads,
        recombcost,
        pedigree,
        expected_genotypes,
        weights,
        expected,
        scaling=500,
    )
示例#19
0
def test_phase_quartet3():
    reads = """
      A 1111
      A 0000
      B 1010
      C 111000
      C 010101
      D 000000
      D 010
      B 0101
      C  1100
      D  10010
      A   0000
      A   1111
      B   1010
      B   0101
    """
    pedigree = Pedigree(NumericSampleIds())
    pedigree.add_individual(
        "individual0",
        canonic_index_list_to_biallelic_gt_list([1, 1, 1, 1, 1, 1]))
    pedigree.add_individual(
        "individual1",
        canonic_index_list_to_biallelic_gt_list([1, 1, 1, 1, 1, 1]))
    pedigree.add_individual(
        "individual2",
        canonic_index_list_to_biallelic_gt_list([1, 2, 1, 1, 0, 1]))
    pedigree.add_individual(
        "individual3",
        canonic_index_list_to_biallelic_gt_list([0, 1, 0, 0, 1, 0]))
    pedigree.add_relationship("individual0", "individual1", "individual2")
    pedigree.add_relationship("individual0", "individual1", "individual3")
    recombcost = [3, 3, 3, 4, 3, 3]
    superreads_list, transmission_vector, cost = phase_pedigree(
        reads, recombcost, pedigree)
    print(cost)
    print(transmission_vector)
    assert cost == 8
    # TODO: expect transmission in both trio relations. Update once transmission vectors
    #       are returned per trio relationship
    # assert transmission_vector in ([0,0,0,1,1,1], [1,1,1,0,0,0], [2,2,2,3,3,3], [3,3,3,2,2,2])
    all_expected_haplotypes = [
        ("111111", "000000"),
        ("010101", "101010"),
        ("111000", "010101"),
        ("000000", "010010"),
    ]
    assert_haplotypes(superreads_list, all_expected_haplotypes, 6)
    trio_transmission_vectors = get_trio_transmission_vectors(
        transmission_vector, 6)
    assert_trio_allele_order(superreads_list[:3], trio_transmission_vectors[0],
                             6)
    assert_trio_allele_order(
        [superreads_list[0], superreads_list[1], superreads_list[3]],
        trio_transmission_vectors[1],
        6,
    )
def test_genotyping_empty_trio():
    rs = ReadSet()
    recombcost = []
    numeric_sample_ids = NumericSampleIds()
    pedigree = Pedigree(numeric_sample_ids)
    pedigree.add_individual("individual0", [], [])
    pedigree.add_individual("individual1", [], [])
    pedigree.add_individual("individual2", [], [])
    pedigree.add_relationship("individual0", "individual1", "individual2")
    _ = GenotypeDPTable(numeric_sample_ids, rs, recombcost, pedigree)
示例#21
0
def test_phase_empty_trio():
	rs = ReadSet()
	recombcost = []
	pedigree = Pedigree(NumericSampleIds())
	pedigree.add_individual('individual0', [])
	pedigree.add_individual('individual1', [])
	pedigree.add_individual('individual2', [])
	pedigree.add_relationship('individual0', 'individual1', 'individual2')
	dp_table = PedigreeDPTable(rs, recombcost, pedigree)
	(superreadsm, superreadsf, superreadsc), transmission_vector = dp_table.get_super_reads()
示例#22
0
def test_genotyping_empty_trio():
    rs = ReadSet()
    recombcost = []
    numeric_sample_ids = NumericSampleIds()
    pedigree = Pedigree(numeric_sample_ids)
    pedigree.add_individual('individual0', [], [])
    pedigree.add_individual('individual1', [], [])
    pedigree.add_individual('individual2', [], [])
    pedigree.add_relationship('individual0', 'individual1', 'individual2')
    dp_forward_backward = GenotypeDPTable(numeric_sample_ids, rs, recombcost,
                                          pedigree)
def test_genotyping_quartet4():
    reads = """
      A 1111
      A 0000
      B 1010
      C 111000
      C 010101
      D 000000
      D 010
      B 0101
      C  1100
      D  10010
      A   0000
      A   1111
      B   1010
      B   0101
    """
    expected_genotypes = [
        canonic_index_list_to_biallelic_gt_list([1, 1, 1, 1, 1, 1]),
        canonic_index_list_to_biallelic_gt_list([1, 1, 1, 1, 1, 1]),
        canonic_index_list_to_biallelic_gt_list([1, 2, 1, 1, 0, 1]),
        canonic_index_list_to_biallelic_gt_list([0, 1, 0, 0, 1, 0]),
    ]
    numeric_sample_ids = NumericSampleIds()
    pedigree = Pedigree(numeric_sample_ids)
    pedigree.add_individual(
        "individual0",
        canonic_index_list_to_biallelic_gt_list([0, 0, 0, 0, 0, 0]),
        [PhredGenotypeLikelihoods([1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0])] * 6,
    )
    pedigree.add_individual(
        "individual1",
        canonic_index_list_to_biallelic_gt_list([0, 0, 0, 0, 0, 0]),
        [PhredGenotypeLikelihoods([1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0])] * 6,
    )
    pedigree.add_individual(
        "individual2",
        canonic_index_list_to_biallelic_gt_list([0, 0, 0, 0, 0, 0]),
        [PhredGenotypeLikelihoods([1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0])] * 6,
    )
    pedigree.add_individual(
        "individual3",
        canonic_index_list_to_biallelic_gt_list([0, 0, 0, 0, 0, 0]),
        [PhredGenotypeLikelihoods([1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0])] * 6,
    )
    pedigree.add_relationship("individual0", "individual1", "individual2")
    pedigree.add_relationship("individual0", "individual1", "individual3")
    recombcost = [3, 3, 3, 4, 3, 3]
    genotype_pedigree(numeric_sample_ids, reads, recombcost, pedigree, expected_genotypes)
示例#24
0
def test_phase_quartet2():
    reads = """
      A 111111
      A 000000
      B 010101
      B 101010
      C 000000
      C 010101
      D 000000
      D 010101
    """
    pedigree = Pedigree(NumericSampleIds())
    pedigree.add_individual(
        "individual0",
        canonic_index_list_to_biallelic_gt_list([1, 1, 1, 1, 1, 1]))
    pedigree.add_individual(
        "individual1",
        canonic_index_list_to_biallelic_gt_list([1, 1, 1, 1, 1, 1]))
    pedigree.add_individual(
        "individual2",
        canonic_index_list_to_biallelic_gt_list([0, 1, 0, 1, 0, 1]))
    pedigree.add_individual(
        "individual3",
        canonic_index_list_to_biallelic_gt_list([0, 1, 0, 1, 0, 1]))
    pedigree.add_relationship("individual0", "individual1", "individual2")
    pedigree.add_relationship("individual0", "individual1", "individual3")
    recombcost = [3, 3, 3, 3, 3, 3]

    superreads_list, transmission_vector, cost = phase_pedigree(
        reads, recombcost, pedigree)
    assert cost == 0
    assert len(set(transmission_vector)) == 1
    all_expected_haplotypes = [
        ("111111", "000000"),
        ("010101", "101010"),
        ("000000", "010101"),
        ("000000", "010101"),
    ]
    assert_haplotypes(superreads_list, all_expected_haplotypes, 6)
    trio_transmission_vectors = get_trio_transmission_vectors(
        transmission_vector, 6)
    assert_trio_allele_order(superreads_list[:3], trio_transmission_vectors[0],
                             6)
    assert_trio_allele_order(
        [superreads_list[0], superreads_list[1], superreads_list[3]],
        trio_transmission_vectors[1],
        6,
    )
示例#25
0
def test_phase_trio_pure_genetic():
	reads = ""
	pedigree = Pedigree(NumericSampleIds())
	pedigree.add_individual('individual0', [2,1,1,0])
	pedigree.add_individual('individual1', [1,2,2,1])
	pedigree.add_individual('individual2', [1,1,1,0])
	pedigree.add_relationship('individual0', 'individual1', 'individual2')
	recombcost = [2,2,2]
	superreads_list, transmission_vector, cost = phase_pedigree(reads, recombcost, pedigree, positions=[10,20,30,40])
	assert cost == 0
	assert len(set(transmission_vector)) == 1
	all_expected_haplotypes = [
		('1110','1000'),
		('1111','0110'),
		('1000','0110')
	]
	assert_haplotypes(superreads_list, all_expected_haplotypes, 4)
示例#26
0
def test_phase_trio3():
    reads = """
      A 1111
      B 1010
      C 111000
      C 010101
      B 0101
      A  0000
      B  1010
      C  1010
      C  1100
      A   0000
      A   1111
      B   1010
      B    010
    """
    pedigree = Pedigree(NumericSampleIds())
    pedigree.add_individual(
        "individual0",
        canonic_index_list_to_biallelic_gt_list([1, 1, 1, 1, 1, 1]))
    pedigree.add_individual(
        "individual1",
        canonic_index_list_to_biallelic_gt_list([1, 1, 1, 1, 1, 1]))
    pedigree.add_individual(
        "individual2",
        canonic_index_list_to_biallelic_gt_list([1, 2, 1, 1, 0, 1]))
    pedigree.add_relationship("individual0", "individual1", "individual2")
    recombcost = [3, 3, 3, 4, 3, 3]
    superreads_list, transmission_vector, cost = phase_pedigree(
        reads, recombcost, pedigree)
    assert cost == 4
    assert transmission_vector in (
        [0, 0, 0, 1, 1, 1],
        [1, 1, 1, 0, 0, 0],
        [2, 2, 2, 3, 3, 3],
        [3, 3, 3, 2, 2, 2],
    )
    all_expected_haplotypes = [
        ("111111", "000000"),
        ("010101", "101010"),
        ("111000", "010101"),
    ]
    assert_haplotypes(superreads_list, all_expected_haplotypes, 6)
    assert_trio_allele_order(superreads_list, transmission_vector, 6)
示例#27
0
def test_phase_trio_genotype_likelihoods():
    reads = """
      A 111
      A 010
      A 110
      B 001
      B 110
      B 101
      C 001
      C 010
      C 010
    """
    pedigree = Pedigree(NumericSampleIds())
    genotype_likelihoods_mother = [
        PhredGenotypeLikelihoods([0, 0, 0]),
        PhredGenotypeLikelihoods([0, 0, 1]),
        PhredGenotypeLikelihoods([5, 0, 5]),
    ]
    genotype_likelihoods0 = [PhredGenotypeLikelihoods([0, 0, 0])] * 3
    pedigree.add_individual(
        "individual0",
        canonic_index_list_to_biallelic_gt_list([0, 0, 0]),
        genotype_likelihoods_mother,
    )
    pedigree.add_individual(
        "individual1",
        canonic_index_list_to_biallelic_gt_list([0, 0, 0]),
        genotype_likelihoods0,
    )
    pedigree.add_individual(
        "individual2",
        canonic_index_list_to_biallelic_gt_list([0, 0, 0]),
        genotype_likelihoods0,
    )
    pedigree.add_relationship("individual0", "individual1", "individual2")
    recombcost = [10, 10, 10]
    superreads_list, transmission_vector, cost = phase_pedigree(
        reads, recombcost, pedigree, True)
    assert cost == 3
    assert len(set(transmission_vector)) == 1
    all_expected_haplotypes = [("111", "010"), ("001", "110"), ("001", "010")]
    assert_haplotypes(superreads_list, all_expected_haplotypes, 3)
    assert_trio_allele_order(superreads_list, transmission_vector, 3)
示例#28
0
def test_phase_quartet3():
    reads = """
	  A 1111
	  A 0000
	  B 1010
	  C 111000
	  C 010101
	  D 000000
	  D 010
	  B 0101
	  C  1100
	  D  10010
	  A   0000
	  A   1111
	  B   1010
	  B   0101
	"""
    pedigree = Pedigree(NumericSampleIds())
    pedigree.add_individual('individual0', [1, 1, 1, 1, 1, 1])
    pedigree.add_individual('individual1', [1, 1, 1, 1, 1, 1])
    pedigree.add_individual('individual2', [1, 2, 1, 1, 0, 1])
    pedigree.add_individual('individual3', [0, 1, 0, 0, 1, 0])
    pedigree.add_relationship('individual0', 'individual1', 'individual2')
    pedigree.add_relationship('individual0', 'individual1', 'individual3')
    recombcost = [3, 3, 3, 4, 3, 3]
    superreads_list, transmission_vector, cost = phase_pedigree(
        reads, recombcost, pedigree)
    print(cost)
    print(transmission_vector)
    assert cost == 8
    # TODO: expect transmission in both trio relations. Update once transmission vectors
    #       are returned per trio relationship
    #assert transmission_vector in ([0,0,0,1,1,1], [1,1,1,0,0,0], [2,2,2,3,3,3], [3,3,3,2,2,2])
    all_expected_haplotypes = [('111111', '000000'), ('010101', '101010'),
                               ('111000', '010101'), ('000000', '010010')]
    assert_haplotypes(superreads_list, all_expected_haplotypes, 6)
    trio_transmission_vectors = get_trio_transmission_vectors(
        transmission_vector, 6)
    assert_trio_allele_order(superreads_list[:3], trio_transmission_vectors[0],
                             6)
    assert_trio_allele_order(
        [superreads_list[0], superreads_list[1], superreads_list[3]],
        trio_transmission_vectors[1], 6)
示例#29
0
def test_weighted_genotyping():
    reads = """
	  B 00
	  B 11
	  A 11
	  A 00
	  C 11
	  C 11
	"""
    weights = """
	  99
	  99
	  99
	  99
	  99
	  99
	"""
    expected_genotypes = [[1, 1], [1, 1], [2, 2]]
    numeric_sample_ids = NumericSampleIds()
    pedigree = Pedigree(numeric_sample_ids)
    pedigree.add_individual('individual0', [0, 0, 0, 0],
                            [PhredGenotypeLikelihoods(0.25, 0.5, 0.25)] * 4)
    pedigree.add_individual('individual1', [0, 0, 0, 0],
                            [PhredGenotypeLikelihoods(0.25, 0.5, 0.25)] * 4)
    pedigree.add_individual('individual2', [0, 0, 0, 0],
                            [PhredGenotypeLikelihoods(0.25, 0.5, 0.25)] * 4)
    pedigree.add_relationship('individual0', 'individual1', 'individual2')
    # recombination is extremely unlikely
    recombcost = [1000, 1000, 1000, 1000]

    expected = {
        0: [[0, 1, 0], [0, 1, 0]],
        1: [[0, 1, 0], [0, 1, 0]],
        2: [[0, 1.0 / 3.0, 2 / 3.0], [0, 1.0 / 3.0, 2 / 3.0]]
    }
    genotype_pedigree(numeric_sample_ids,
                      reads,
                      recombcost,
                      pedigree,
                      expected_genotypes,
                      weights,
                      expected,
                      scaling=500)
def test_genotyping_trio10():
    reads = """
      B 0000
      B 0000
      B 0000
      B 0000
      B 0000
      B 0000
      A 1111
      A 1111
      A 1111
      A 1111
      A 1111
      A 1111
    """

    # no reads for child, but genotype must be 1/0 for each pos. (due to inheritance)
    expected_genotypes = [
        canonic_index_list_to_biallelic_gt_list([2, 2, 2, 2]),
        canonic_index_list_to_biallelic_gt_list([0, 0, 0, 0]),
        canonic_index_list_to_biallelic_gt_list([1, 1, 1, 1]),
    ]
    numeric_sample_ids = NumericSampleIds()
    pedigree = Pedigree(numeric_sample_ids)
    pedigree.add_individual(
        "individual0",
        canonic_index_list_to_biallelic_gt_list([0, 0, 0, 0]),
        [PhredGenotypeLikelihoods([1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0])] * 4,
    )
    pedigree.add_individual(
        "individual1",
        canonic_index_list_to_biallelic_gt_list([0, 0, 0, 0]),
        [PhredGenotypeLikelihoods([1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0])] * 4,
    )
    pedigree.add_individual(
        "individual2",
        canonic_index_list_to_biallelic_gt_list([0, 0, 0, 0]),
        [PhredGenotypeLikelihoods([1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0])] * 4,
    )
    pedigree.add_relationship("individual0", "individual1", "individual2")
    recombcost = [10, 10, 10, 10]
    genotype_pedigree(numeric_sample_ids, reads, recombcost, pedigree, expected_genotypes)