예제 #1
0
def run_snip_centromere(args):
    with open(args.centromeres) as csvfile:
        reader = csv.DictReader(csvfile)
        for row in reader:
            if row["chrom"] == args.chrom:
                start = int(row["start"])
                end = int(row["end"])
                break
        else:
            raise ValueError("Did not find row")
    ts = tskit.load(args.input)
    position = ts.tables.sites.position
    s_index = np.searchsorted(position, start)
    e_index = np.searchsorted(position, end)
    # We have a bunch of sites within the centromere. Get the largest
    # distance between these and call these the start and end. Probably
    # pointless having the centromere coordinates as input in the first place,
    # since we're just searching for the largest gap anyway. However, it can
    # be useful in UKBB, since it's perfectly possible that the largest
    # gap between sites isn't in the centromere.
    X = position[s_index:e_index + 1]
    j = np.argmax(X[1:] - X[:-1])
    real_start = X[j] + 1
    real_end = X[j + 1]
    print("Centromere at", start, end, "Snipping topology from ", real_start,
          real_end)
    snipped_ts = tsinfer.snip_centromere(ts, real_start, real_end)
    snipped_ts.dump(args.output)
예제 #2
0
 def test_two_populations_high_migration_no_centromere(self):
     ts = self.two_populations_high_migration_example(mutation_rate=0)
     ts = tsinfer.snip_centromere(ts, 0.4, 0.6)
     # simplify the output to get rid of unreferenced nodes.
     ts = ts.simplify()
     A = self.verify(ts, [ts.samples(0), ts.samples(1)])
     total = np.sum(A, axis=0)
     self.assertTrue(np.allclose(total[total != 0], 1))
예제 #3
0
 def verify(self, ts, left, right):
     ts1 = self.snip_centromere(ts, left, right)
     ts2 = tsinfer.snip_centromere(ts, left, right)
     assert ts1.equals(ts2, ignore_provenance=True)
     tree_found = False
     for tree in ts1.trees():
         if tree.interval == (left, right):
             tree_found = True
             for node in ts1.nodes():
                 assert tree.parent(node.id) == tskit.NULL
             break
     assert tree_found
     return ts1
예제 #4
0
 def verify(self, ts, left, right):
     ts1 = self.snip_centromere(ts, left, right)
     ts2 = tsinfer.snip_centromere(ts, left, right)
     t1 = ts1.dump_tables()
     t2 = ts2.dump_tables()
     t1.provenances.clear()
     t2.provenances.clear()
     self.assertEqual(t1, t2)
     tree_found = False
     for tree in ts1.trees():
         if tree.interval == (left, right):
             tree_found = True
             for node in ts1.nodes():
                 self.assertEqual(tree.parent(node.id), tskit.NULL)
             break
     self.assertTrue(tree_found)
     return ts1
예제 #5
0
 def verify(self, ts, left, right):
     ts1 = self.snip_centromere(ts, left, right)
     ts2 = tsinfer.snip_centromere(ts, left, right)
     t1 = ts1.dump_tables()
     t2 = ts2.dump_tables()
     t1.provenances.clear()
     t2.provenances.clear()
     assert t1 == t2
     tree_found = False
     for tree in ts1.trees():
         if tree.interval == (left, right):
             tree_found = True
             for node in ts1.nodes():
                 assert tree.parent(node.id) == tskit.NULL
             break
     assert tree_found
     return ts1
예제 #6
0
 def test_position_errors(self):
     ts = msprime.simulate(2,
                           length=10,
                           recombination_rate=1,
                           random_seed=1,
                           mutation_rate=2)
     X = ts.tables.sites.position
     assert X.shape[0] > 3
     # Left cannot be on a site position.
     with pytest.raises(ValueError):
         tsinfer.snip_centromere(ts, X[0], X[0] + 0.001)
     # Cannot go either side of a position
     with pytest.raises(ValueError):
         tsinfer.snip_centromere(ts, X[0] - 0.001, X[0] + 0.001)
     # Cannot cover multiple positions
     with pytest.raises(ValueError):
         tsinfer.snip_centromere(ts, X[0] - 0.001, X[2] + 0.001)
예제 #7
0
 def test_coordinate_errors(self):
     ts = msprime.simulate(2,
                           length=10,
                           recombination_rate=1,
                           random_seed=1)
     with pytest.raises(ValueError):
         tsinfer.snip_centromere(ts, -1, 5)
     with pytest.raises(ValueError):
         tsinfer.snip_centromere(ts, 0, 5)
     with pytest.raises(ValueError):
         tsinfer.snip_centromere(ts, 1, 10)
     with pytest.raises(ValueError):
         tsinfer.snip_centromere(ts, 1, 11)
     with pytest.raises(ValueError):
         tsinfer.snip_centromere(ts, 6, 5)
     with pytest.raises(ValueError):
         tsinfer.snip_centromere(ts, 5, 5)