def test_fetch_metadata_function_with_indices(tmpdir, sample_run): # Setup and run three experiments root = tmpdir.strpath tinydb_obs = run_test_experiment(exp_name='experiment 1 alpha', exp_id='1234', root_dir=root) tinydb_obs = run_test_experiment(exp_name='experiment 2 beta', exp_id='5678', root_dir=root) tinydb_obs = run_test_experiment(exp_name='experiment 3 alpha', exp_id='9990', root_dir=root) tinydb_reader = TinyDbReader(root) # Test fetch by indices res = tinydb_reader.fetch_metadata(indices=-1) res2 = tinydb_reader.fetch_metadata(indices=[-1]) assert strip_file_handles(res) == strip_file_handles(res2) res3 = tinydb_reader.fetch_metadata(indices=[0, -1]) assert len(res3) == 2 exp1_res = tinydb_reader.fetch_metadata(indices=0) assert len(exp1_res) == 1 assert exp1_res[0]['experiment']['name'] == 'experiment 1 alpha' assert exp1_res[0]['_id'] == '1234' # Test Exception with pytest.raises(ValueError): tinydb_reader.fetch_metadata(indices=4) # Test returned values exp1 = strip_file_handles(exp1_res)[0] sample_run['ex_info']['name'] = 'experiment 1 alpha' sample_run['ex_info']['sources'] = [ ['setup.py', get_digest('setup.py')] ] assert exp1 == { '_id': '1234', 'experiment': sample_run['ex_info'], 'format': tinydb_obs.VERSION, 'command': sample_run['command'], 'host': sample_run['host_info'], 'start_time': sample_run['start_time'], 'heartbeat': datetime.datetime(1999, 5, 5, 5, 5, 5, 5), 'info': {'my_info': [1, 2, 3], 'nr': 7}, 'captured_out': 'some output', 'artifacts': [ ['about', 'sacred/__about__.py', get_digest('sacred/__about__.py')] ], 'config': sample_run['config'], 'meta': sample_run['meta_info'], 'status': 'COMPLETED', 'resources': [ ['sacred/__init__.py', get_digest('sacred/__init__.py')] ], 'result': 42, 'stop_time': datetime.datetime(1999, 5, 5, 6, 6, 6, 6) }
def resource_event(self, filename): if self.fs.exists(filename=filename): md5hash = get_digest(filename) if self.fs.exists(filename=filename, md5=md5hash): resource = (filename, md5hash) if resource not in self.run_entry["resources"]: self.run_entry["resources"].append(resource) self.save() return # Pymongo 4.0: GridFS removed support for md5, we now have to compute # it manually md5hash = get_digest(filename) self.run_entry["resources"].append((filename, md5hash)) self.save()
def sample_run(): T1 = datetime.datetime(1999, 5, 4, 3, 2, 1, 0) exp = { "name": "test_exp", "sources": [], "doc": "", "base_dir": os.path.join(os.path.dirname(__file__), "..", ".."), "dependencies": ["sacred==0.7b0"], } host = {"hostname": "test_host", "cpu_count": 1, "python_version": "3.4"} config = {"config": "True", "foo": "bar", "answer": 42} command = "run" meta_info = {"comment": "test run"} sample_run = { "_id": "FED235DA13", "ex_info": exp, "command": command, "host_info": host, "start_time": T1, "config": config, "meta_info": meta_info, } filename = "setup.py" md5 = get_digest(filename) sample_run["ex_info"]["sources"] = [[filename, md5]] return sample_run
def sample_run(): T1 = datetime.datetime(1999, 5, 4, 3, 2, 1, 0) exp = { 'name': 'test_exp', 'sources': [], 'doc': '', 'base_dir': os.path.join(os.path.dirname(__file__), '..', '..'), 'dependencies': ['sacred==0.7b0'] } host = {'hostname': 'test_host', 'cpu_count': 1, 'python_version': '3.4'} config = {'config': 'True', 'foo': 'bar', 'answer': 42} command = 'run' meta_info = {'comment': 'test run'} sample_run = { '_id': 'FED235DA13', 'ex_info': exp, 'command': command, 'host_info': host, 'start_time': T1, 'config': config, 'meta_info': meta_info, } filename = 'setup.py' md5 = get_digest(filename) sample_run['ex_info']['sources'] = [[filename, md5]] return sample_run
def resource_event(self, filename): if filename not in self.resources: md5 = get_digest(filename) self.resources[filename] = md5 neptune.set_property('resources', str(list(self.resources.keys()))) neptune.set_property(filename, self.resources[filename])
def sample_run(): T1 = datetime.datetime(1999, 5, 4, 3, 2, 1, 0) exp = { 'name': 'test_exp', 'sources': [], 'doc': '', 'base_dir': '/tmp', 'dependencies': ['sacred==0.7b0'] } host = {'hostname': 'test_host', 'cpu_count': 1, 'python_version': '3.4'} config = {'config': 'True', 'foo': 'bar', 'answer': 42} command = 'run' meta_info = {'comment': 'test run'} sample_run = { '_id': 'FED235DA13', 'ex_info': exp, 'command': command, 'host_info': host, 'start_time': T1, 'config': config, 'meta_info': meta_info, } filename = 'setup.py' md5 = get_digest(filename) sample_run['ex_info']['sources'] = [[filename, md5]] return sample_run
def resource_event(self, filename): """ TODO: Maintain resources list """ if filename not in self.resources: md5 = get_digest(filename) self.resources[filename] = md5
def find_or_save(self, filename, store_dir): source_name, ext = os.path.splitext(os.path.basename(filename)) md5sum = get_digest(filename) store_name = source_name + "_" + md5sum + ext store_path = s3_join(store_dir, store_name) if len(self._list_s3_subdirs(prefix=store_path)) == 0: self.save_file(filename, store_path) return store_path, md5sum
def get_or_create(cls, filename, session): md5sum = get_digest(filename) instance = (session.query(cls).filter_by(filename=filename, md5sum=md5sum).first()) if instance: return instance with open(filename, "rb") as f: return cls(filename=filename, md5sum=md5sum, content=f.read())
def get_or_create(cls, filename, session): md5sum = get_digest(filename) instance = session.query(cls).filter_by(filename=filename, md5sum=md5sum).first() if instance: return instance with open(filename, 'rb') as f: return cls(filename=filename, md5sum=md5sum, content=f.read())
def resource_event(self, filename): if filename not in self.resources: new_prefix = self._create_new_prefix() self.resources[filename] = new_prefix md5 = get_digest(filename) neptune.set_property('{}data_path'.format(new_prefix), filename) neptune.set_property('{}data_version'.format(new_prefix), md5)
def find_or_save(self, filename, store_dir: Path): os.makedirs(str(store_dir), exist_ok=True) source_name, ext = os.path.splitext(os.path.basename(filename)) md5sum = get_digest(filename) store_name = source_name + "_" + md5sum + ext store_path = store_dir / store_name if not store_path.exists(): copyfile(filename, str(store_path)) return store_path, md5sum
def find_or_save(self, filename, store_dir): self._makedirs(store_dir, exist_ok=True) source_name, ext = os.path.splitext(os.path.basename(filename)) md5sum = get_digest(filename) store_name = source_name + '_' + md5sum + ext store_path = os.path.join(store_dir, store_name) if not os.path.exists(store_path): copyfile(filename, store_path) return store_path, md5sum
def find_or_save(self, filename, store_dir): os.makedirs(store_dir, exist_ok=True) source_name, ext = os.path.splitext(os.path.basename(filename)) md5sum = get_digest(filename) store_name = source_name + '_' + md5sum + ext store_path = os.path.join(store_dir, store_name) if not os.path.exists(store_path): copyfile(filename, store_path) return store_path, md5sum
def get_or_create(cls, filename, md5sum, session): instance = session.query(cls).filter_by(filename=filename, md5sum=md5sum).first() if instance: return instance md5sum_ = get_digest(filename) assert md5sum_ == md5sum, 'Weird: found md5 mismatch for {}: {} != {}'\ .format(filename, md5sum, md5sum_) with open(filename, 'r') as f: return cls(filename=filename, md5sum=md5sum, content=f.read())
def test_mongo_observer_resource_event(mongo_obs, sample_run): mongo_obs.started_event(**sample_run) filename = "setup.py" md5 = get_digest(filename) mongo_obs.resource_event(filename) db_run = mongo_obs.runs.find_one() assert db_run["resources"] == [[filename, md5]]
def get_or_create(cls, filename, md5sum, session, basedir): instance = session.query(cls).filter_by(filename=filename, md5sum=md5sum).first() if instance: return instance full_path = os.path.join(basedir, filename) md5sum_ = get_digest(full_path) assert md5sum_ == md5sum, 'found md5 mismatch for {}: {} != {}'\ .format(filename, md5sum, md5sum_) with open(full_path, 'r') as f: return cls(filename=filename, md5sum=md5sum, content=f.read())
def get_or_create(cls, filename, md5sum, session, basedir): instance = (session.query(cls).filter_by(filename=filename, md5sum=md5sum).first()) if instance: return instance full_path = os.path.join(basedir, filename) md5sum_ = get_digest(full_path) assert md5sum_ == md5sum, "found md5 mismatch for {}: {} != {}".format( filename, md5sum, md5sum_) with open(full_path, "r") as f: return cls(filename=filename, md5sum=md5sum, content=f.read())
def test_mongo_observer_resource_event(mongo_obs, sample_run): mongo_obs.started_event(**sample_run) filename = "setup.py" md5 = get_digest(filename) mongo_obs.resource_event(filename) assert mongo_obs.fs.exists.called mongo_obs.fs.exists.assert_any_call(filename=filename) db_run = mongo_obs.runs.find_one() assert db_run['resources'] == [(filename, md5)]
def test_tinydb_observer_resource_event_when_resource_present(tinydb_obs, sample_run): tinydb_obs.started_event(**sample_run) filename = "setup.py" md5 = get_digest(filename) # Add file by other means tinydb_obs.fs.put(filename) tinydb_obs.resource_event(filename) db_run = tinydb_obs.runs.get(eid=1) assert db_run["resources"][0][:2] == [filename, md5]
def test_tinydb_observer_resource_event_when_resource_present(tinydb_obs, sample_run): tinydb_obs.started_event(**sample_run) filename = "setup.py" md5 = get_digest(filename) # Add file by other means tinydb_obs.fs.put(filename) tinydb_obs.resource_event(filename) db_run = tinydb_obs.runs.get(eid=1) assert db_run['resources'][0][:2] == [filename, md5]
def resource_event(self, filename): if self.fs.exists(filename=filename): md5hash = get_digest(filename) if self.fs.exists(filename=filename, md5=md5hash): resource = (filename, md5hash) if resource not in self.run_entry['resources']: self.run_entry['resources'].append(resource) self.save() return with open(filename, 'rb') as f: file_id = self.fs.put(f, filename=filename) md5hash = self.fs.get(file_id).md5 self.run_entry['resources'].append((filename, md5hash)) self.save()
def test_mongo_observer_resource_event(mongo_obs, sample_run): mongo_obs.started_event(**sample_run) filename = "setup.py" md5 = get_digest(filename) mongo_obs.resource_event(filename) assert mongo_obs.fs.exists.called mongo_obs.fs.exists.assert_any_call(filename=filename) db_run = mongo_obs.runs.find_one() # for some reason py27 returns this as tuples and py36 as lists assert [tuple(r) for r in db_run['resources']] == [(filename, md5)]
def test_tinydb_observer_resource_event(tinydb_obs, sample_run): tinydb_obs.started_event(**sample_run) filename = "setup.py" md5 = get_digest(filename) tinydb_obs.resource_event(filename) assert tinydb_obs.fs.exists(filename) db_run = tinydb_obs.runs.get(eid=1) assert db_run['resources'][0][:2] == [filename, md5] with open(filename, 'rb') as f: file_content = f.read() assert db_run['resources'][0][2].read() == file_content
def test_mongo_observer_resource_event(mongo_obs): exp = {"name": "test_exp", "sources": [], "doc": ""} host = {"hostname": "test_host", "cpu_count": 1, "python_version": "3.4"} config = {"config": "True", "foo": "bar", "answer": 42} mongo_obs.started_event(exp, host, T1, config, "comment") filename = "setup.py" md5 = get_digest(filename) mongo_obs.resource_event(filename) assert mongo_obs.fs.exists.called mongo_obs.fs.exists.assert_any_call(filename=filename) db_run = mongo_obs.runs.find_one() assert db_run["resources"] == [(filename, md5)]
def test_mongo_observer_resource_event(mongo_obs): exp = {'name': 'test_exp', 'sources': [], 'doc': ''} host = {'hostname': 'test_host', 'cpu_count': 1, 'python_version': '3.4'} config = {'config': 'True', 'foo': 'bar', 'answer': 42} mongo_obs.started_event(exp, host, T1, config, 'comment') filename = "setup.py" md5 = get_digest(filename) mongo_obs.resource_event(filename) assert mongo_obs.fs.exists.called mongo_obs.fs.exists.assert_any_call(filename=filename) db_run = mongo_obs.runs.find_one() assert db_run['resources'] == [(filename, md5)]
def test_mongo_observer_resource_event(mongo_obs): exp = {'name': 'test_exp', 'sources': [], 'doc': ''} host = {'hostname': 'test_host', 'cpu_count': 1, 'python_version': '3.4'} config = {'config': 'True', 'foo': 'bar', 'answer': 42} mongo_obs.started_event(exp, host, T1, config) filename = "setup.py" md5 = get_digest(filename) mongo_obs.resource_event(filename) assert mongo_obs.fs.exists.called mongo_obs.fs.exists.assert_any_call(filename=filename) db_run = mongo_obs.runs.find_one() assert db_run['resources'] == [(filename, md5)]
def test_tinydb_observer_started_event_saves_given_sources(tinydb_obs, sample_run): filename = "setup.py" md5 = get_digest(filename) sample_run["ex_info"]["sources"] = [[filename, md5]] _id = tinydb_obs.started_event(**sample_run) assert _id is not None assert len(tinydb_obs.runs) == 1 db_run = tinydb_obs.runs.get(eid=1) # Check all but the experiment section db_run_copy = db_run.copy() del db_run_copy["experiment"] assert db_run_copy == { "_id": _id, "format": tinydb_obs.VERSION, "command": sample_run["command"], "host": sample_run["host_info"], "start_time": sample_run["start_time"], "heartbeat": None, "info": {}, "captured_out": "", "artifacts": [], "config": sample_run["config"], "meta": sample_run["meta_info"], "status": "RUNNING", "resources": [], } assert len(db_run["experiment"]["sources"]) == 1 assert len(db_run["experiment"]["sources"][0]) == 3 assert db_run["experiment"]["sources"][0][:2] == [filename, md5] assert isinstance(db_run["experiment"]["sources"][0][2], io.BufferedReader) # Check that duplicate source files are still listed in ex_info tinydb_obs.db_run_id = None tinydb_obs.started_event(**sample_run) assert len(tinydb_obs.runs) == 2 db_run2 = tinydb_obs.runs.get(eid=2) assert ( db_run["experiment"]["sources"][0][:2] == db_run2["experiment"]["sources"][0][:2] )
def find_or_save(self, filename, store_dir: Path): try: Path(filename).resolve().relative_to(Path(self.basedir).resolve()) is_relative_to = True except ValueError: is_relative_to = False if is_relative_to and not self.copy_artifacts: return filename else: store_dir.mkdir(parents=True, exist_ok=True) source_name, ext = os.path.splitext(os.path.basename(filename)) md5sum = get_digest(filename) store_name = source_name + "_" + md5sum + ext store_path = store_dir / store_name if not store_path.exists(): copyfile(filename, str(store_path)) return store_path
def test_mongo_observer_resource_event(mongo_obs, sample_run): mongo_obs.started_event(**sample_run) filename = "setup.py" md5 = get_digest(filename) mongo_obs.resource_event(filename) # Add extra heartbeat to make sure that run is updated. info = {"my_info": [1, 2, 3], "nr": 7} outp = "some output" mongo_obs.heartbeat_event(info=info, captured_out=outp, beat_time=T2, result=1337) mongo_obs.join() db_run = mongo_obs.runs.find_one() assert db_run["resources"][0] == [filename, md5]
def test_tinydb_observer_started_event_saves_given_sources(tinydb_obs, sample_run): filename = 'setup.py' md5 = get_digest(filename) sample_run['ex_info']['sources'] = [[filename, md5]] _id = tinydb_obs.started_event(**sample_run) assert _id is not None assert len(tinydb_obs.runs) == 1 db_run = tinydb_obs.runs.get(eid=1) # Check all but the experiment section db_run_copy = db_run.copy() del db_run_copy['experiment'] assert db_run_copy == { '_id': _id, 'format': tinydb_obs.VERSION, 'command': sample_run['command'], 'host': sample_run['host_info'], 'start_time': sample_run['start_time'], 'heartbeat': None, 'info': {}, 'captured_out': '', 'artifacts': [], 'config': sample_run['config'], 'meta': sample_run['meta_info'], 'status': 'RUNNING', 'resources': [] } assert len(db_run['experiment']['sources']) == 1 assert len(db_run['experiment']['sources'][0]) == 3 assert db_run['experiment']['sources'][0][:2] == [filename, md5] assert isinstance(db_run['experiment']['sources'][0][2], io.BufferedReader) # Check that duplicate source files are still listed in ex_info tinydb_obs.db_run_id = None tinydb_obs.started_event(**sample_run) assert len(tinydb_obs.runs) == 2 db_run2 = tinydb_obs.runs.get(eid=2) assert (db_run['experiment']['sources'][0][:2] == db_run2['experiment']['sources'][0][:2])
def test_mongo_observer_resource_event(mongo_obs, sample_run): mongo_obs.started_event(**sample_run) filename = "setup.py" md5 = get_digest(filename) mongo_obs.resource_event(filename) # Add extra heartbeat to make sure that run is updated. info = {'my_info': [1, 2, 3], 'nr': 7} outp = 'some output' mongo_obs.heartbeat_event(info=info, captured_out=outp, beat_time=T2, result=1337) mongo_obs.join() assert mongo_obs.fs.exists.called mongo_obs.fs.exists.assert_any_call(filename=filename) db_run = mongo_obs.runs.find_one() # for some reason py27 returns this as tuples and py36 as lists assert [tuple(r) for r in db_run['resources']] == [(filename, md5)]
def test_source_get_digest(): assert get_digest(EXAMPLE_SOURCE) == EXAMPLE_DIGEST
def store_raw_source(zealot, filename): zealot.sources.add( dependencies.Source(filename, dependencies.get_digest(filename)))
def test_fetch_metadata_function_with_indices(tmpdir): sample_run_ = sample_run() # Setup and run three experiments root = tmpdir.strpath tinydb_obs = run_test_experiment(exp_name="experiment 1 alpha", exp_id="1234", root_dir=root) tinydb_obs = run_test_experiment(exp_name="experiment 2 beta", exp_id="5678", root_dir=root) tinydb_obs = run_test_experiment(exp_name="experiment 3 alpha", exp_id="9990", root_dir=root) tinydb_reader = TinyDbReader(root) # Test fetch by indices res = tinydb_reader.fetch_metadata(indices=-1) res2 = tinydb_reader.fetch_metadata(indices=[-1]) assert strip_file_handles(res) == strip_file_handles(res2) res3 = tinydb_reader.fetch_metadata(indices=[0, -1]) assert len(res3) == 2 exp1_res = tinydb_reader.fetch_metadata(indices=0) assert len(exp1_res) == 1 assert exp1_res[0]["experiment"]["name"] == "experiment 1 alpha" assert exp1_res[0]["_id"] == "1234" # Test Exception with pytest.raises(ValueError): tinydb_reader.fetch_metadata(indices=4) # Test returned values exp1 = strip_file_handles(exp1_res)[0] sample_run_["ex_info"]["name"] = "experiment 1 alpha" sample_run_["ex_info"]["sources"] = [["setup.py", get_digest("setup.py")]] assert exp1 == { "_id": "1234", "experiment": sample_run_["ex_info"], "format": tinydb_obs.VERSION, "command": sample_run_["command"], "host": sample_run_["host_info"], "start_time": sample_run_["start_time"], "heartbeat": datetime.datetime(1999, 5, 5, 5, 5, 5, 5), "info": { "my_info": [1, 2, 3], "nr": 7 }, "captured_out": "some output", "artifacts": [["about", "sacred/__about__.py", get_digest("sacred/__about__.py")]], "config": sample_run_["config"], "meta": sample_run_["meta_info"], "status": "COMPLETED", "resources": [["sacred/__init__.py", get_digest("sacred/__init__.py")]], "result": 42, "stop_time": datetime.datetime(1999, 5, 5, 6, 6, 6, 6), }
def test_fetch_metadata_function_with_indices(tmpdir, sample_run): # Setup and run three experiments root = tmpdir.strpath tinydb_obs = run_test_experiment(exp_name='experiment 1 alpha', exp_id='1234', root_dir=root) tinydb_obs = run_test_experiment(exp_name='experiment 2 beta', exp_id='5678', root_dir=root) tinydb_obs = run_test_experiment(exp_name='experiment 3 alpha', exp_id='9990', root_dir=root) tinydb_reader = TinyDbReader(root) # Test fetch by indices res = tinydb_reader.fetch_metadata(indices=-1) res2 = tinydb_reader.fetch_metadata(indices=[-1]) assert strip_file_handles(res) == strip_file_handles(res2) res3 = tinydb_reader.fetch_metadata(indices=[0, -1]) assert len(res3) == 2 exp1_res = tinydb_reader.fetch_metadata(indices=0) assert len(exp1_res) == 1 assert exp1_res[0]['experiment']['name'] == 'experiment 1 alpha' assert exp1_res[0]['_id'] == '1234' # Test Exception with pytest.raises(ValueError): tinydb_reader.fetch_metadata(indices=4) # Test returned values exp1 = strip_file_handles(exp1_res)[0] sample_run['ex_info']['name'] = 'experiment 1 alpha' sample_run['ex_info']['sources'] = [['setup.py', get_digest('setup.py')]] assert exp1 == { '_id': '1234', 'experiment': sample_run['ex_info'], 'format': tinydb_obs.VERSION, 'command': sample_run['command'], 'host': sample_run['host_info'], 'start_time': sample_run['start_time'], 'heartbeat': datetime.datetime(1999, 5, 5, 5, 5, 5, 5), 'info': { 'my_info': [1, 2, 3], 'nr': 7 }, 'captured_out': 'some output', 'artifacts': [['about', 'sacred/__about__.py', get_digest('sacred/__about__.py')]], 'config': sample_run['config'], 'meta': sample_run['meta_info'], 'status': 'COMPLETED', 'resources': [['sacred/__init__.py', get_digest('sacred/__init__.py')]], 'result': 42, 'stop_time': datetime.datetime(1999, 5, 5, 6, 6, 6, 6) }