def test_ibd_segments_in_family(self): '''Test locating recombination locations and subsequently IBD regions between parent and child haplotypes.''' segment_set = ibd.concatenate_segments( ip.ibd_segments_in_family(self.haplotype, self.family, PhaseParam.HET_FILL_THRESHOLD)) assert_segments_almost_equal(segment_set, [ ((3, 172), (17080378, 18541497, 1.461, 0), ((1, 1), (2, 1))), ((193, 3218), (18996226, 51156934, 32.161, 1), ((1, 0), (2, 1))), ((0, 3218), (16484792, 51156934, 34.672, 4), ((1, 0), (3, 1))), ((3, 240), (17080378, 19593301, 2.513, 0), ((1, 0), (4, 1))), ((256, 3218), (19873357, 51156934, 31.284, 0), ((4, 1), (1, 1))), ((0, 3218), (16484792, 51156934, 34.672, 0), ((5, 1), (1, 0))), ((3, 2696), (17080378, 46630634, 29.550, 0), ((6, 1), (1, 1))), ((2718, 3218), (46853292, 51156934, 4.304, 1), ((1, 0), (6, 1))), ((0, 3218), (16484792, 51156934, 34.672, 0), ((1, 0), (7, 1))), ((3, 1908), (17080378, 36572515, 19.492, 0), ((8, 1), (1, 0))), ((1939, 3218), (36804282, 51156934, 14.353, 0), ((8, 1), (1, 1))) ], full_data=True, decimal=3, err_msg='Wrong IBD segments') assert_equal(segment_set.errors, [[ 2997, 2997, 132, 1364, 2493, 2800, 132, 1364, 2493, 2800, 2997, 2997 ], [1, 2, 1, 1, 1, 1, 3, 3, 3, 3, 1, 6]], 'Wrong genotype errors')
def test_ibd_segments(self): '''Test calculating IBD segments from child recombinations.''' parent_type = PATERNAL template = 2 (_, _, info) = self.child_recombinations(parent_type, template, remove_errors=True) self.comparator.phase_parent_by_template(info) #ibd.phase_by_ibd(self.problem, info.ibs_segments()) segment_set = ibd.concatenate_segments(x for x in info.ibs_segments()) #print segment_set.pprint_segments(True) assert_segments_almost_equal(segment_set, [((0 , 2800), (-1 , -1 , -1.000, 0), ((2, 0), (0, 0))), ((2800, 3218), (-1 , -1 , -1.000, 0), ((0, 1), (2, 0))), ((0 , 3218), (-1 , -1 , -1.000, 0), ((3, 0), (0, 0))), ((0 , 3218), (-1 , -1 , -1.000, 0), ((0, 0), (4, 0))), ((0 , 3218), (-1 , -1 , -1.000, 0), ((0, 1), (5, 0))), ((0 , 3218), (-1 , -1 , -1.000, 0), ((0, 0), (6, 0))), ((0 , 55), (-1 , -1 , -1.000, 0), ((0, 0), (7, 0))), ((55 , 2981), (-1 , -1 , -1.000, 0), ((0, 1), (7, 0))), ((2981, 3218), (-1 , -1 , -1.000, 0), ((0, 0), (7, 0))), ((0 , 637), (-1 , -1 , -1.000, 0), ((8, 0), (0, 0))), ((637 , 2447), (-1 , -1 , -1.000, 0), ((0, 1), (8, 0))), ((2447, 3218), (-1 , -1 , -1.000, 0), ((8, 0), (0, 0))), ((0 , 3218), (-1 , -1 , -1.000, 0), ((9, 0), (0, 0))), ((0 , 3218), (-1 , -1 , -1.000, 0), ((10, 0), (0, 0))), ((0 , 3046), (-1 , -1 , -1.000, 0), ((11, 0), (0, 0))), ((3046, 3218), (-1 , -1 , -1.000, 0), ((0, 1), (11, 0))), ((0 , 3081), (-1 , -1 , -1.000, 0), ((0, 1), (12, 0))), ((3081, 3218), (-1 , -1 , -1.000, 0), ((0, 0), (12, 0))), ((0 , 199), (-1 , -1 , -1.000, 0), ((0, 0), (13, 0))), ((199 , 3089), (-1 , -1 , -1.000, 0), ((0, 1), (13, 0))), ((3089, 3218), (-1 , -1 , -1.000, 0), ((0, 0), (13, 0))), ((0 , 860), (-1 , -1 , -1.000, 0), ((0, 0), (14, 0))), ((860 , 3218), (-1 , -1 , -1.000, 0), ((0, 1), (14, 0)))], decimal=3, err_msg='Wrong IBD segments')
def test_ibd_segments_in_family(self): '''Test locating recombination locations and subsequently IBD regions between parent and child haplotypes.''' segment_set = ibd.concatenate_segments(ip.ibd_segments_in_family(self.haplotype, self.family, PhaseParam.HET_FILL_THRESHOLD)) assert_segments_almost_equal(segment_set, [((3 , 172), (17080378, 18541497, 1.461, 0), ((1, 1), (2, 1))), ((193 , 3218), (18996226, 51156934, 32.161, 1), ((1, 0), (2, 1))), ((0 , 3218), (16484792, 51156934, 34.672, 4), ((1, 0), (3, 1))), ((3 , 240), (17080378, 19593301, 2.513, 0), ((1, 0), (4, 1))), ((256 , 3218), (19873357, 51156934, 31.284, 0), ((4, 1), (1, 1))), ((0 , 3218), (16484792, 51156934, 34.672, 0), ((5, 1), (1, 0))), ((3 , 2696), (17080378, 46630634, 29.550, 0), ((6, 1), (1, 1))), ((2718, 3218), (46853292, 51156934, 4.304, 1), ((1, 0), (6, 1))), ((0 , 3218), (16484792, 51156934, 34.672, 0), ((1, 0), (7, 1))), ((3 , 1908), (17080378, 36572515, 19.492, 0), ((8, 1), (1, 0))), ((1939, 3218), (36804282, 51156934, 14.353, 0), ((8, 1), (1, 1)))], full_data=True, decimal=3, err_msg='Wrong IBD segments') assert_equal(segment_set.errors, [[2997, 2997, 132, 1364, 2493, 2800, 132, 1364, 2493, 2800, 2997, 2997], [ 1, 2, 1, 1, 1, 1, 3, 3, 3, 3, 1, 6]], 'Wrong genotype errors')
def test_ibd_segments(self): '''Test calculating IBD segments from child recombinations.''' parent_type = PATERNAL template = 2 (_, _, info) = self.child_recombinations(parent_type, template, remove_errors=True) self.comparator.phase_parent_by_template(info) #ibd.phase_by_ibd(self.problem, info.ibs_segments()) segment_set = ibd.concatenate_segments(x for x in info.ibs_segments()) #print segment_set.pprint_segments(True) assert_segments_almost_equal( segment_set, [((0, 2800), (-1, -1, -1.000, 0), ((2, 0), (0, 0))), ((2800, 3218), (-1, -1, -1.000, 0), ((0, 1), (2, 0))), ((0, 3218), (-1, -1, -1.000, 0), ((3, 0), (0, 0))), ((0, 3218), (-1, -1, -1.000, 0), ((0, 0), (4, 0))), ((0, 3218), (-1, -1, -1.000, 0), ((0, 1), (5, 0))), ((0, 3218), (-1, -1, -1.000, 0), ((0, 0), (6, 0))), ((0, 55), (-1, -1, -1.000, 0), ((0, 0), (7, 0))), ((55, 2981), (-1, -1, -1.000, 0), ((0, 1), (7, 0))), ((2981, 3218), (-1, -1, -1.000, 0), ((0, 0), (7, 0))), ((0, 637), (-1, -1, -1.000, 0), ((8, 0), (0, 0))), ((637, 2447), (-1, -1, -1.000, 0), ((0, 1), (8, 0))), ((2447, 3218), (-1, -1, -1.000, 0), ((8, 0), (0, 0))), ((0, 3218), (-1, -1, -1.000, 0), ((9, 0), (0, 0))), ((0, 3218), (-1, -1, -1.000, 0), ((10, 0), (0, 0))), ((0, 3046), (-1, -1, -1.000, 0), ((11, 0), (0, 0))), ((3046, 3218), (-1, -1, -1.000, 0), ((0, 1), (11, 0))), ((0, 3081), (-1, -1, -1.000, 0), ((0, 1), (12, 0))), ((3081, 3218), (-1, -1, -1.000, 0), ((0, 0), (12, 0))), ((0, 199), (-1, -1, -1.000, 0), ((0, 0), (13, 0))), ((199, 3089), (-1, -1, -1.000, 0), ((0, 1), (13, 0))), ((3089, 3218), (-1, -1, -1.000, 0), ((0, 0), (13, 0))), ((0, 860), (-1, -1, -1.000, 0), ((0, 0), (14, 0))), ((860, 3218), (-1, -1, -1.000, 0), ((0, 1), (14, 0)))], decimal=3, err_msg='Wrong IBD segments')
def test_ibd_segments_in_family(self): '''Test locating recombination locations and subsequently IBD regions between parent and child haplotypes.''' segment_set = ibd.concatenate_segments(ip.ibd_segments_in_family(self.problem.haplotype, self.family, PhaseParam.HET_FILL_THRESHOLD)) assert_segments_almost_equal(segment_set, [((4 , 1411), (17087656, 32978753, 15.891, 0), ((2, 0), (0, 0))), ((1413, 3215), (33013062, 51103692, 18.091, 1), ((0, 1), (2, 0))), ((0 , 3218), (16484792, 51156934, 34.672, 1), ((0, 1), (3, 0))), ((4 , 2551), (17087656, 45007348, 27.920, 1), ((0, 1), (4, 0))), ((2569, 3215), (45198494, 51103692, 5.905, 0), ((0, 0), (4, 0))), ((0 , 3218), (16484792, 51156934, 34.672, 1), ((0, 0), (5, 0))), ((4 , 795), (17087656, 27011420, 9.924, 0), ((0, 0), (6, 0))), ((805 , 3215), (27119061, 51103692, 23.985, 1), ((0, 1), (6, 0))), ((2 , 2650), (17075353, 45892433, 28.817, 0), ((1, 1), (2, 1))), ((2657, 3218), (45940934, 51156934, 5.216, 2), ((1, 0), (2, 1))), ((1 , 576), (17065079, 25228375, 8.163, 0), ((3, 1), (1, 1))), ((600 , 3218), (25444874, 51156934, 25.712, 2), ((1, 0), (3, 1))), ((2 , 1978), (17075353, 37228277, 20.153, 2), ((1, 0), (4, 1))), ((2019, 3218), (37509844, 51156934, 13.647, 0), ((4, 1), (1, 1))), ((0 , 3218), (16484792, 51156934, 34.672, 3), ((5, 1), (1, 0))), ((2 , 301), (17075353, 20993766, 3.918, 0), ((6, 1), (1, 1))), ((334 , 3218), (21363960, 51156934, 29.793, 2), ((1, 0), (6, 1)))], full_data=True, decimal=3, err_msg='Wrong IBD segments') # Check that there are at most 4 distinct haplotypes at each SNP segment_set.group_to_disjoint(True) assert_segments_almost_equal(segment_set, [((0 , 1), (16484792, 17065079, 0.580, 0), ((5, 1), (1, 0))), ((0 , 1), (16484792, 17065079, 0.580, 0), ((0, 1), (3, 0))), ((0 , 1), (16484792, 17065079, 0.580, 0), ((0, 0), (5, 0))), ((1 , 2), (17065079, 17075353, 0.010, 0), ((5, 1), (1, 0))), ((1 , 2), (17065079, 17075353, 0.010, 0), ((3, 1), (1, 1))), ((1 , 2), (17065079, 17075353, 0.010, 0), ((0, 1), (3, 0))), ((1 , 2), (17065079, 17075353, 0.010, 0), ((0, 0), (5, 0))), ((2 , 4), (17075353, 17087656, 0.012, 0), ((0, 0), (5, 0))), ((2 , 4), (17075353, 17087656, 0.012, 0), ((0, 1), (3, 0))), ((2 , 4), (17075353, 17087656, 0.012, 0), ((6, 1), (3, 1), (1, 1), (2, 1))), ((2 , 4), (17075353, 17087656, 0.012, 0), ((5, 1), (1, 0), (4, 1))), ((4 , 301), (17087656, 20993766, 3.906, 0), ((2, 0), (0, 0), (6, 0), (5, 0))), ((4 , 301), (17087656, 20993766, 3.906, 0), ((0, 1), (3, 0), (4, 0))), ((4 , 301), (17087656, 20993766, 3.906, 0), ((6, 1), (3, 1), (1, 1), (2, 1))), ((4 , 301), (17087656, 20993766, 3.906, 0), ((5, 1), (1, 0), (4, 1))), ((301 , 334), (20993766, 21363960, 0.370, 0), ((2, 0), (0, 0), (6, 0), (5, 0))), ((301 , 334), (20993766, 21363960, 0.370, 0), ((0, 1), (3, 0), (4, 0))), ((301 , 334), (20993766, 21363960, 0.370, 0), ((3, 1), (1, 1), (2, 1))), ((301 , 334), (20993766, 21363960, 0.370, 0), ((5, 1), (1, 0), (4, 1))), ((334 , 576), (21363960, 25228375, 3.864, 0), ((2, 0), (0, 0), (6, 0), (5, 0))), ((334 , 576), (21363960, 25228375, 3.864, 0), ((0, 1), (3, 0), (4, 0))), ((334 , 576), (21363960, 25228375, 3.864, 0), ((3, 1), (1, 1), (2, 1))), ((334 , 576), (21363960, 25228375, 3.864, 0), ((5, 1), (6, 1), (4, 1), (1, 0))), ((576 , 600), (25228375, 25444874, 0.216, 0), ((1, 1), (2, 1))), ((576 , 600), (25228375, 25444874, 0.216, 0), ((2, 0), (0, 0), (6, 0), (5, 0))), ((576 , 600), (25228375, 25444874, 0.216, 0), ((0, 1), (3, 0), (4, 0))), ((576 , 600), (25228375, 25444874, 0.216, 0), ((5, 1), (6, 1), (4, 1), (1, 0))), ((600 , 795), (25444874, 27011420, 1.567, 0), ((5, 1), (6, 1), (3, 1), (4, 1), (1, 0))), ((600 , 795), (25444874, 27011420, 1.567, 0), ((2, 0), (0, 0), (6, 0), (5, 0))), ((600 , 795), (25444874, 27011420, 1.567, 0), ((0, 1), (3, 0), (4, 0))), ((600 , 795), (25444874, 27011420, 1.567, 0), ((1, 1), (2, 1))), ((795 , 805), (27011420, 27119061, 0.108, 0), ((5, 1), (6, 1), (3, 1), (4, 1), (1, 0))), ((795 , 805), (27011420, 27119061, 0.108, 0), ((2, 0), (0, 0), (5, 0))), ((795 , 805), (27011420, 27119061, 0.108, 0), ((0, 1), (3, 0), (4, 0))), ((795 , 805), (27011420, 27119061, 0.108, 0), ((1, 1), (2, 1))), ((805 , 1411), (27119061, 32978753, 5.860, 0), ((5, 1), (6, 1), (3, 1), (4, 1), (1, 0))), ((805 , 1411), (27119061, 32978753, 5.860, 0), ((2, 0), (0, 0), (5, 0))), ((805 , 1411), (27119061, 32978753, 5.860, 0), ((0, 1), (3, 0), (6, 0), (4, 0))), ((805 , 1411), (27119061, 32978753, 5.860, 0), ((1, 1), (2, 1))), ((1411, 1413), (32978753, 33013062, 0.034, 0), ((5, 1), (6, 1), (3, 1), (4, 1), (1, 0))), ((1411, 1413), (32978753, 33013062, 0.034, 0), ((1, 1), (2, 1))), ((1411, 1413), (32978753, 33013062, 0.034, 0), ((0, 1), (3, 0), (6, 0), (4, 0))), ((1411, 1413), (32978753, 33013062, 0.034, 0), ((0, 0), (5, 0))), ((1413, 1978), (33013062, 37228277, 4.215, 0), ((5, 1), (6, 1), (3, 1), (4, 1), (1, 0))), ((1413, 1978), (33013062, 37228277, 4.215, 0), ((0, 1), (3, 0), (4, 0), (6, 0), (2, 0))), ((1413, 1978), (33013062, 37228277, 4.215, 0), ((1, 1), (2, 1))), ((1413, 1978), (33013062, 37228277, 4.215, 0), ((0, 0), (5, 0))), ((1978, 2019), (37228277, 37509844, 0.282, 0), ((1, 1), (2, 1))), ((1978, 2019), (37228277, 37509844, 0.282, 0), ((0, 1), (3, 0), (4, 0), (6, 0), (2, 0))), ((1978, 2019), (37228277, 37509844, 0.282, 0), ((5, 1), (6, 1), (3, 1), (1, 0))), ((1978, 2019), (37228277, 37509844, 0.282, 0), ((0, 0), (5, 0))), ((2019, 2551), (37509844, 45007348, 7.498, 0), ((0, 1), (3, 0), (4, 0), (6, 0), (2, 0))), ((2019, 2551), (37509844, 45007348, 7.498, 0), ((0, 0), (5, 0))), ((2019, 2551), (37509844, 45007348, 7.498, 0), ((5, 1), (6, 1), (3, 1), (1, 0))), ((2019, 2551), (37509844, 45007348, 7.498, 0), ((4, 1), (1, 1), (2, 1))), ((2551, 2569), (45007348, 45198494, 0.191, 0), ((0, 0), (5, 0))), ((2551, 2569), (45007348, 45198494, 0.191, 0), ((0, 1), (3, 0), (6, 0), (2, 0))), ((2551, 2569), (45007348, 45198494, 0.191, 0), ((5, 1), (6, 1), (3, 1), (1, 0))), ((2551, 2569), (45007348, 45198494, 0.191, 0), ((4, 1), (1, 1), (2, 1))), ((2569, 2650), (45198494, 45892433, 0.694, 0), ((0, 0), (5, 0), (4, 0))), ((2569, 2650), (45198494, 45892433, 0.694, 0), ((0, 1), (3, 0), (6, 0), (2, 0))), ((2569, 2650), (45198494, 45892433, 0.694, 0), ((5, 1), (6, 1), (3, 1), (1, 0))), ((2569, 2650), (45198494, 45892433, 0.694, 0), ((4, 1), (1, 1), (2, 1))), ((2650, 2657), (45892433, 45940934, 0.049, 0), ((0, 0), (5, 0), (4, 0))), ((2650, 2657), (45892433, 45940934, 0.049, 0), ((0, 1), (3, 0), (6, 0), (2, 0))), ((2650, 2657), (45892433, 45940934, 0.049, 0), ((5, 1), (6, 1), (3, 1), (1, 0))), ((2650, 2657), (45892433, 45940934, 0.049, 0), ((4, 1), (1, 1))), ((2657, 3215), (45940934, 51103692, 5.163, 0), ((5, 1), (6, 1), (3, 1), (1, 0), (2, 1))), ((2657, 3215), (45940934, 51103692, 5.163, 0), ((0, 1), (3, 0), (6, 0), (2, 0))), ((2657, 3215), (45940934, 51103692, 5.163, 0), ((0, 0), (5, 0), (4, 0))), ((2657, 3215), (45940934, 51103692, 5.163, 0), ((4, 1), (1, 1))), ((3215, 3218), (51103692, 51156934, 0.053, 0), ((5, 1), (6, 1), (3, 1), (1, 0), (2, 1))), ((3215, 3218), (51103692, 51156934, 0.053, 0), ((4, 1), (1, 1))), ((3215, 3218), (51103692, 51156934, 0.053, 0), ((0, 1), (3, 0))), ((3215, 3218), (51103692, 51156934, 0.053, 0), ((0, 0), (5, 0)))], full_data=True, decimal=3, err_msg='Wrong IBD segments') assert_equal(segment_set.errors, [[2162, 2162, 2162, 2162, 2162, 2162, 2162, 2162, 2162, 2162, 2846, 3202, 2846, 3202, 1092, 3202, 1092, 3202, 3, 1092, 3, 1092, 3, 1092, 3202, 3, 1092, 3202, 1092, 3202, 1092, 3202], [0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 1, 1, 2, 2, 1, 1, 3, 3, 1, 1, 4, 4, 1, 1, 1, 5, 5, 5, 1, 1, 6, 6]], 'Wrong genotype errors') self.assertTrue(max(len(list(g)) for (_, g) in itertools.groupby(segment_set, lambda segment: segment.snp)) <= 4, 'Too many distinct haplotypes in nuclear family') # Extract IBD segments of child's haplotype vs. sibs segments_of_30 = SegmentSet(Segment(x.snp, x.samples - set([(0, 0), (0, 1), (1, 0), (1, 1)]), x.bp, error_snps=x.error_snps) for x in segment_set if (3, 0) in x.samples) segments_of_30.merge_consecutive() assert_segments_almost_equal(segments_of_30, [((0 , 4), (16484792, 17087656, 0.603, 0), ((3, 0),)), ((4 , 805), (17087656, 27119061, 10.031, 0), ((3, 0), (4, 0))), ((805 , 1413), (27119061, 33013062, 5.894, 0), ((3, 0), (6, 0), (4, 0))), ((1413, 2551), (33013062, 45007348, 11.994, 0), ((3, 0), (2, 0), (6, 0), (4, 0))), ((2551, 3215), (45007348, 51103692, 6.096, 0), ((3, 0), (2, 0), (6, 0))), ((3215, 3218), (51103692, 51156934, 0.053, 0), ((3, 0),))], full_data=True, decimal=3, err_msg='Wrong IBD segments') segments_of_31 = SegmentSet(Segment(x.snp, x.samples - set([(0, 0), (0, 1), (1, 0), (1, 1)]), x.bp, error_snps=x.error_snps) for x in segment_set if (3, 1) in x.samples) segments_of_31.merge_consecutive() assert_segments_almost_equal(segments_of_31, [((1 , 2), (17065079, 17075353, 0.010, 0), ((3, 1),)), ((2 , 301), (17075353, 20993766, 3.918, 0), ((6, 1), (3, 1), (2, 1))), ((301 , 576), (20993766, 25228375, 4.235, 0), ((3, 1), (2, 1))), ((600 , 1978), (25444874, 37228277, 11.783, 0), ((5, 1), (6, 1), (3, 1), (4, 1))), ((1978, 2657), (37228277, 45940934, 8.713, 0), ((5, 1), (6, 1), (3, 1))), ((2657, 3218), (45940934, 51156934, 5.216, 0), ((5, 1), (6, 1), (3, 1), (2, 1)))], full_data=True, decimal=3, err_msg='Wrong IBD segments')