def upgrade_slim_provenance(tables): """ Copies the last provenance entry from a previous SLiM file version to that required by the current file version. :param TableCollection tables: the table collection """ provlist = [json.loads(x.record) for x in tables.provenances] prov_info = [(_slim_provenance_version(u), u) for u in provlist] slim_prov = [x for x in prov_info if x[0][0]] if len(slim_prov) == 0: raise ValueError("Tree sequence contains no SLiM provenance entries.") info, record = slim_prov[len(slim_prov) - 1] file_version = info[1] if not (file_version == "0.1" or file_version == "0.2"): warnings.warn("File version is not v0.1 or v0.2; not doing anything.") is_slim, version = _slim_provenance_version(record) if not is_slim: raise ValueError("Not a SLiM provenance entry.") if file_version == "0.1": new_record = make_slim_provenance_dict(record['model_type'], record['generation']) new_record['parameters']['command'] = ['pyslim', 'convert'] else: new_record = make_slim_provenance_dict( record['parameters']['model_type'], record['slim']['generation']) new_record['parameters']['command'] = ['pyslim', 'convert'] tskit.validate_provenance(new_record) tables.provenances.add_row(json.dumps(new_record))
def upgrade_slim_provenance(tables): """ Copies the last provenance entry from a previous SLiM file version to that required by the current file version. DEPRECATED: this method will dissappear at some point in the future, and this information is now stored in the tree sequence metadata. :param TableCollection tables: the table collection """ prov_info = [(slim_provenance_version(p), json.loads(p.record)) for p in tables.provenances] slim_prov = [x for x in prov_info if x[0][0]] if len(slim_prov) == 0: raise ValueError("Tree sequence contains no SLiM provenance entries.") (is_slim, file_version), record = slim_prov[len(slim_prov) - 1] if not (float(file_version) < 0.4): warnings.warn( "File version is not older than 0.4; not doing anything.") if not is_slim: raise ValueError("Not a SLiM provenance entry.") if file_version == "0.1": new_record = make_slim_provenance_dict(record['model_type'], record['generation']) new_record['parameters']['command'] = ['pyslim', 'convert'] else: new_record = make_slim_provenance_dict( record['parameters']['model_type'], record['slim']['generation']) new_record['parameters']['command'] = ['pyslim', 'convert'] tskit.validate_provenance(new_record) tables.provenances.add_row(json.dumps(new_record))
def test_software_empty_strings(self): doc = get_provenance(software_name="") with self.assertRaises(tskit.ProvenanceValidationError): tskit.validate_provenance(doc) doc = get_provenance(software_version="") with self.assertRaises(tskit.ProvenanceValidationError): tskit.validate_provenance(doc)
def test_simplify(self): ts = msprime.simulate(5, random_seed=1) ts = ts.simplify() prov = json.loads(ts.provenance(1).record) tskit.validate_provenance(prov) assert prov["parameters"]["command"] == "simplify" assert prov["environment"] == provenance.get_environment(include_tskit=False) assert prov["software"] == {"name": "tskit", "version": tskit.__version__}
def test_minimal(self): minimal = { "schema_version": "1", "software": {"name": "x", "version": "y"}, "environment": {}, "parameters": {}, } tskit.validate_provenance(minimal)
def test_software_types(self): for bad_type in [0, [1, 2, 3], {}]: doc = get_provenance(software_name=bad_type) with self.assertRaises(tskit.ProvenanceValidationError): tskit.validate_provenance(doc) doc = get_provenance(software_version=bad_type) with self.assertRaises(tskit.ProvenanceValidationError): tskit.validate_provenance(doc)
def _upgrade_old_tables(tables): with warnings.catch_warnings(): warnings.simplefilter("ignore") provenance = get_provenance(tables) file_version = provenance.file_version slim_generation = provenance.slim_generation warnings.warn( "This is an version {} SLiM tree sequence.".format(file_version) + " When you write this out, " + "it will be converted to version {}.".format(slim_file_version)) if file_version == "0.1" or file_version == "0.2": # add empty nucleotide slots to metadata mut_bytes = tskit.unpack_bytes(tables.mutations.metadata, tables.mutations.metadata_offset) mut_metadata = [ _decode_mutation_pre_nucleotides(md) for md in mut_bytes ] metadata, metadata_offset = tskit.pack_bytes(mut_metadata) tables.mutations.set_columns( site=tables.mutations.site, node=tables.mutations.node, parent=tables.mutations.parent, derived_state=tables.mutations.derived_state, derived_state_offset=tables.mutations.derived_state_offset, metadata=metadata, metadata_offset=metadata_offset) if file_version == "0.1": # shift times node_times = tables.nodes.time + slim_generation tables.nodes.set_columns(flags=tables.nodes.flags, time=node_times, population=tables.nodes.population, individual=tables.nodes.individual, metadata=tables.nodes.metadata, metadata_offset=tables.nodes.metadata_offset) migration_times = tables.migrations.time + slim_generation tables.migrations.set_columns(left=tables.migrations.left, right=tables.migrations.right, node=tables.migrations.node, source=tables.migrations.source, dest=tables.migrations.dest, time=migration_times) new_record = { "schema_version": "1.0.0", "software": { "name": "pyslim", "version": pyslim_version, }, "parameters": { "command": ["_upgrade_old_tables"], "old_file_version": file_version, "new_file_version": slim_file_version, }, "environment": get_environment(), } tskit.validate_provenance(new_record) tables.provenances.add_row(json.dumps(new_record))
def test_extra_stuff(self): extra = { "you": "can", "schema_version": "1", "software": {"put": "anything", "name": "x", "version": "y"}, "environment": {"extra": ["you", "want"]}, "parameters": {"so": ["long", "its", "JSON", 0]}, } tskit.validate_provenance(extra)
def verify_round_trip(self, ts, version): tskit.dump_legacy(ts, self.temp_file, version=version) tsp = tskit.load_legacy(self.temp_file) simplify = version < 10 self.verify_tree_sequences_equal(ts, tsp, simplify=simplify) tsp.dump(self.temp_file) tsp = tskit.load(self.temp_file) self.verify_tree_sequences_equal(ts, tsp, simplify=simplify) for provenance in tsp.provenances(): tskit.validate_provenance(json.loads(provenance.record))
def test_basic(self): def somefunc(a, b): frame = inspect.currentframe() return ancestry._build_provenance("cmd", 1234, frame) d = somefunc(42, 43) tskit.validate_provenance(d) params = d["parameters"] assert params["command"] == "cmd" assert params["random_seed"] == 1234 assert params["a"] == 42 assert params["b"] == 43
def test_basic(self): def somefunc(a, b): frame = inspect.currentframe() return ancestry._build_provenance("cmd", 1234, frame) d = somefunc(42, 43) tskit.validate_provenance(d) params = d["parameters"] self.assertEqual(params["command"], "cmd") self.assertEqual(params["random_seed"], 1234) self.assertEqual(params["a"], 42) self.assertEqual(params["b"], 43)
def verify(self, cmd, num_samples): with tempfile.TemporaryDirectory() as tmpdir: filename = pathlib.Path(tmpdir) / "output.trees" full_cmd = "python3 -m stdpopsim -q " + cmd + f" {filename}" subprocess.run(full_cmd, shell=True, check=True) # TODO converting to str isn't necessary in tskit 0.1.5. Remove. ts = tskit.load(str(filename)) self.assertEqual(ts.num_samples, num_samples) provenance = json.loads(ts.provenance(ts.num_provenances - 1).record) tskit.validate_provenance(provenance) stored_cmd = provenance["parameters"]["args"] self.assertEqual(stored_cmd[0], "-q") self.assertEqual(stored_cmd[1:-1], cmd.split())
def test_replicates(self): def somefunc(*, a, b, num_replicates, replicate_index): frame = inspect.currentframe() return ancestry._build_provenance("the_cmd", 42, frame) d = somefunc(b="b", a="a", num_replicates=100, replicate_index=1234) tskit.validate_provenance(d) params = d["parameters"] assert params["command"] == "the_cmd" assert params["random_seed"] == 42 assert params["a"] == "a" assert params["b"] == "b" assert not ("num_replicates" in d) assert not ("replicate_index" in d)
def verify(self, cmd, num_samples, seed=1): with tempfile.TemporaryDirectory() as tmpdir: filename = pathlib.Path(tmpdir) / "output.trees" full_cmd = f"{sys.executable} -m stdpopsim -q {cmd} -o {filename} -s {seed}" subprocess.run(full_cmd, shell=True, check=True) ts = tskit.load(str(filename)) assert ts.num_samples == num_samples provenance = json.loads(ts.provenance(ts.num_provenances - 1).record) tskit.validate_provenance(provenance) stored_cmd = provenance["parameters"]["args"] assert stored_cmd[0] == "-q" assert stored_cmd[-1] == str(seed) assert stored_cmd[-2] == "-s" assert stored_cmd[1:-4] == cmd.split()
def verify(self, cmd, num_samples, seed=1): with tempfile.TemporaryDirectory() as tmpdir: filename = pathlib.Path(tmpdir) / "output.trees" full_cmd = f"python3 -m stdpopsim {cmd} {filename} -s {seed} -q" subprocess.run(full_cmd, shell=True, check=True) ts = tskit.load(str(filename)) self.assertEqual(ts.num_samples, num_samples) provenance = json.loads(ts.provenance(ts.num_provenances - 1).record) tskit.validate_provenance(provenance) stored_cmd = provenance["parameters"]["args"] self.assertEqual(stored_cmd[-1], "-q") self.assertEqual(stored_cmd[-2], str(seed)) self.assertEqual(stored_cmd[-3], "-s") self.assertEqual(stored_cmd[:-4], cmd.split()) provenance = json.loads(ts.provenance(0).record) prov_seed = provenance["parameters"]["random_seed"] self.assertEqual(prov_seed, seed)
def test_missing_keys(self): minimal = get_provenance() tskit.validate_provenance(minimal) for key in minimal.keys(): copy = dict(minimal) del copy[key] with self.assertRaises(tskit.ProvenanceValidationError): tskit.validate_provenance(copy) copy = dict(minimal) del copy["software"]["name"] with self.assertRaises(tskit.ProvenanceValidationError): tskit.validate_provenance(copy) copy = dict(minimal) del copy["software"]["version"] with self.assertRaises(tskit.ProvenanceValidationError): tskit.validate_provenance(copy)
def validate_file(self, data): for timestamp, record in data.provenances(): tskit.validate_provenance(record)
def test_mutate(self): ts = msprime.simulate(5, random_seed=1) ts = msprime.mutate(ts, rate=1, random_seed=1) prov = json.loads(ts.provenance(1).record) tskit.validate_provenance(prov) self.assertEqual(prov["parameters"]["command"], "mutate")
def test_schema_version_empth(self): doc = get_provenance(schema_version="") with self.assertRaises(tskit.ProvenanceValidationError): tskit.validate_provenance(doc)
def test_simplify(self): ts = msprime.simulate(5, random_seed=1) ts = ts.simplify() prov = json.loads(ts.provenance(1).record) tskit.validate_provenance(prov) self.assertEqual(prov["parameters"]["command"], "simplify")
def test_empty(self): with self.assertRaises(tskit.ProvenanceValidationError): tskit.validate_provenance({})
def test_empty(self): with pytest.raises(tskit.ProvenanceValidationError): tskit.validate_provenance({})
def verify_round_trip_no_legacy(self, ts): ts.dump(self.temp_file) tsp = tskit.load(self.temp_file) self.verify_tree_sequences_equal(ts, tsp, simplify=False) for provenance in tsp.provenances(): tskit.validate_provenance(json.loads(provenance.record))
def verify_provenance(self, ts): for u in ts.provenances(): tskit.validate_provenance(json.loads(u.record))
def test_schema_validates(self): for params in [{}, {"a": "a"}, {"a": {"a": 1}}]: d = provenance.get_provenance_dict(params) tskit.validate_provenance(d)
def test_simulation(self): ts = msprime.simulate(5, random_seed=1) prov = json.loads(ts.provenance(0).record) tskit.validate_provenance(prov) assert prov["parameters"]["command"] == "simulate"
def test_simplify(self): ts = msprime.simulate(5, random_seed=1) ts = ts.simplify() prov = json.loads(ts.provenance(1).record) tskit.validate_provenance(prov) self.assertEqual(prov["parameters"]["command"], "simplify")
def validate_ts(self, ts): for prov in ts.provenances(): p_doc = json.loads(prov.record) tskit.validate_provenance(p_doc)
def test_schema_validates(self): d = cli.get_provenance_dict() tskit.validate_provenance(d)
def test_mutate(self): ts = msprime.simulate(5, random_seed=1) ts = msprime.mutate(ts, rate=1, random_seed=1) prov = json.loads(ts.provenance(1).record) tskit.validate_provenance(prov) self.assertEqual(prov["parameters"]["command"], "mutate")