def run_snip_centromere(args): with open(args.centromeres) as csvfile: reader = csv.DictReader(csvfile) for row in reader: if row["chrom"] == args.chrom: start = int(row["start"]) end = int(row["end"]) break else: raise ValueError("Did not find row") ts = tskit.load(args.input) position = ts.tables.sites.position s_index = np.searchsorted(position, start) e_index = np.searchsorted(position, end) # We have a bunch of sites within the centromere. Get the largest # distance between these and call these the start and end. Probably # pointless having the centromere coordinates as input in the first place, # since we're just searching for the largest gap anyway. However, it can # be useful in UKBB, since it's perfectly possible that the largest # gap between sites isn't in the centromere. X = position[s_index:e_index + 1] j = np.argmax(X[1:] - X[:-1]) real_start = X[j] + 1 real_end = X[j + 1] print("Centromere at", start, end, "Snipping topology from ", real_start, real_end) snipped_ts = tsinfer.snip_centromere(ts, real_start, real_end) snipped_ts.dump(args.output)
def test_two_populations_high_migration_no_centromere(self): ts = self.two_populations_high_migration_example(mutation_rate=0) ts = tsinfer.snip_centromere(ts, 0.4, 0.6) # simplify the output to get rid of unreferenced nodes. ts = ts.simplify() A = self.verify(ts, [ts.samples(0), ts.samples(1)]) total = np.sum(A, axis=0) self.assertTrue(np.allclose(total[total != 0], 1))
def verify(self, ts, left, right): ts1 = self.snip_centromere(ts, left, right) ts2 = tsinfer.snip_centromere(ts, left, right) assert ts1.equals(ts2, ignore_provenance=True) tree_found = False for tree in ts1.trees(): if tree.interval == (left, right): tree_found = True for node in ts1.nodes(): assert tree.parent(node.id) == tskit.NULL break assert tree_found return ts1
def verify(self, ts, left, right): ts1 = self.snip_centromere(ts, left, right) ts2 = tsinfer.snip_centromere(ts, left, right) t1 = ts1.dump_tables() t2 = ts2.dump_tables() t1.provenances.clear() t2.provenances.clear() self.assertEqual(t1, t2) tree_found = False for tree in ts1.trees(): if tree.interval == (left, right): tree_found = True for node in ts1.nodes(): self.assertEqual(tree.parent(node.id), tskit.NULL) break self.assertTrue(tree_found) return ts1
def verify(self, ts, left, right): ts1 = self.snip_centromere(ts, left, right) ts2 = tsinfer.snip_centromere(ts, left, right) t1 = ts1.dump_tables() t2 = ts2.dump_tables() t1.provenances.clear() t2.provenances.clear() assert t1 == t2 tree_found = False for tree in ts1.trees(): if tree.interval == (left, right): tree_found = True for node in ts1.nodes(): assert tree.parent(node.id) == tskit.NULL break assert tree_found return ts1
def test_position_errors(self): ts = msprime.simulate(2, length=10, recombination_rate=1, random_seed=1, mutation_rate=2) X = ts.tables.sites.position assert X.shape[0] > 3 # Left cannot be on a site position. with pytest.raises(ValueError): tsinfer.snip_centromere(ts, X[0], X[0] + 0.001) # Cannot go either side of a position with pytest.raises(ValueError): tsinfer.snip_centromere(ts, X[0] - 0.001, X[0] + 0.001) # Cannot cover multiple positions with pytest.raises(ValueError): tsinfer.snip_centromere(ts, X[0] - 0.001, X[2] + 0.001)
def test_coordinate_errors(self): ts = msprime.simulate(2, length=10, recombination_rate=1, random_seed=1) with pytest.raises(ValueError): tsinfer.snip_centromere(ts, -1, 5) with pytest.raises(ValueError): tsinfer.snip_centromere(ts, 0, 5) with pytest.raises(ValueError): tsinfer.snip_centromere(ts, 1, 10) with pytest.raises(ValueError): tsinfer.snip_centromere(ts, 1, 11) with pytest.raises(ValueError): tsinfer.snip_centromere(ts, 6, 5) with pytest.raises(ValueError): tsinfer.snip_centromere(ts, 5, 5)