def test_old_to_new(some_paramspecs): ps1 = some_paramspecs[1]['ps1'] ps2 = some_paramspecs[1]['ps2'] ps3 = some_paramspecs[1]['ps3'] ps4 = some_paramspecs[1]['ps4'] ps5 = some_paramspecs[1]['ps5'] ps6 = some_paramspecs[1]['ps6'] idps_old = InterDependencies(ps1, ps2, ps3) idps_new = old_to_new(idps_old) ps1_base = ps1.base_version() ps2_base = ps2.base_version() ps3_base = ps3.base_version() ps4_base = ps4.base_version() ps5_base = ps5.base_version() ps6_base = ps6.base_version() assert idps_new.dependencies == {} assert idps_new.inferences == {ps3_base: (ps1_base, )} assert idps_new.standalones == set((ps2_base, )) paramspecs = (ps1_base, ps2_base, ps3_base) assert idps_new._id_to_paramspec == {ps.name: ps for ps in paramspecs} idps_old = InterDependencies(ps2, ps4, ps1, ps2, ps3, ps5, ps6) idps_new = old_to_new(idps_old) assert idps_new.dependencies == { ps5_base: (ps3_base, ps4_base), ps6_base: (ps3_base, ps4_base) } assert idps_new.inferences == { ps3_base: (ps1_base, ), ps4_base: (ps2_base, ) } assert idps_new.standalones == set() paramspecs = (ps1_base, ps2_base, ps3_base, ps4_base, ps5_base, ps6_base) assert idps_new._id_to_paramspec == {ps.name: ps for ps in paramspecs} idps_old = InterDependencies(ps1, ps2) idps_new = old_to_new(idps_old) assert idps_new.dependencies == {} assert idps_new.inferences == {} assert idps_new.standalones == set((ps1_base, ps2_base)) paramspecs = (ps1_base, ps2_base) assert idps_new._id_to_paramspec == {ps.name: ps for ps in paramspecs}
def test_fix_wrong_run_descriptions(): v3fixpath = os.path.join(fixturepath, 'db_files', 'version3') dbname_old = os.path.join(v3fixpath, 'some_runs_without_run_description.db') if not os.path.exists(dbname_old): pytest.skip("No db-file fixtures found. You can generate test db-files" " using the scripts in the legacy_DB_generation folder") def make_ps(n): ps = ParamSpec(f'p{n}', label=f'Parameter {n}', unit=f'unit {n}', paramtype='numeric') return ps paramspecs = [make_ps(n) for n in range(6)] paramspecs[2]._inferred_from = ['p0'] paramspecs[3]._inferred_from = ['p1', 'p0'] paramspecs[4]._depends_on = ['p2', 'p3'] paramspecs[5]._inferred_from = ['p0'] with temporarily_copied_DB(dbname_old, debug=False, version=3) as conn: assert get_user_version(conn) == 3 expected_description = RunDescriber( old_to_new(v0.InterDependencies(*paramspecs))) empty_description = RunDescriber(old_to_new(v0.InterDependencies())) fix_wrong_run_descriptions(conn, [1, 2, 3, 4]) for run_id in [1, 2, 3]: desc_str = get_run_description(conn, run_id) desc = serial.from_json_to_current(desc_str) assert desc == expected_description desc_str = get_run_description(conn, run_id=4) desc = serial.from_json_to_current(desc_str) assert desc == empty_description
def _assert_dicts_are_related_as_expected(v0, v1, v2): assert v1['interdependencies'] == old_to_new( InterDependencies._from_dict(v0['interdependencies']))._to_dict() assert v1['version'] == 1 assert len(v1) == 2 # conversion does not preserve order in the dict so use deepdiff to compare assert DeepDiff(v2['interdependencies'], v0['interdependencies'], ignore_order=True) == {} assert v2['interdependencies_'] == v1['interdependencies'] assert v2['version'] == 2 assert len(v2) == 3
def test_construct_currect_rundesciber_from_v0(some_paramspecs): pgroup1 = some_paramspecs[1] interdeps = InterDependencies(pgroup1['ps1'], pgroup1['ps2'], pgroup1['ps3'], pgroup1['ps4'], pgroup1['ps6']) v0 = RunDescriberV0Dict(interdependencies=interdeps._to_dict(), version=0) rds1 = RunDescriber._from_dict(v0) rds2 = from_dict_to_current(v0) expected_v2_dict = RunDescriberV2Dict( interdependencies=interdeps._to_dict(), interdependencies_=old_to_new(interdeps)._to_dict(), version=2) assert DeepDiff(rds1._to_dict(), expected_v2_dict, ignore_order=True) == {} assert DeepDiff(rds2._to_dict(), expected_v2_dict, ignore_order=True) == {}
def fix_wrong_run_descriptions(conn: ConnectionPlus, run_ids: Sequence[int]) -> None: """ NB: This is a FIX function. Do not use it unless your database has been diagnosed with the problem that this function fixes. Overwrite faulty run_descriptions by using information from the layouts and dependencies tables. If a correct description is found for a run, that run is left untouched. Args: conn: The connection to the database run_ids: The runs to (potentially) fix """ user_version = get_user_version(conn) if not user_version == 3: raise RuntimeError('Database of wrong version. Will not apply fix. ' 'Expected version 3, found version {user_version}') log.info('[*] Fixing run descriptions...') for run_id in run_ids: trusted_paramspecs = _get_parameters(conn, run_id) interdeps = v0.InterDependencies(*trusted_paramspecs) interdeps_ = old_to_new(interdeps) trusted_desc = RunDescriber(interdeps_) actual_desc_str = select_one_where(conn, "runs", "run_description", "run_id", run_id) trusted_json = serial.to_json_as_version(trusted_desc, 0) if actual_desc_str == trusted_json: log.info(f'[+] Run id: {run_id} had an OK description') else: log.info(f'[-] Run id: {run_id} had a broken description. ' f'Description found: {actual_desc_str}') update_run_description(conn, run_id, trusted_json) log.info(f' Run id: {run_id} has been updated.')
def test_old_to_new_and_back(some_paramspecs): idps_old = InterDependencies(*some_paramspecs[1].values()) idps_new = old_to_new(idps_old) assert new_to_old(idps_new) == idps_old
def __init__(self, path_to_db: str = None, run_id: Optional[int] = None, conn: Optional[ConnectionPlus] = None, exp_id=None, name: str = None, specs: Optional[SpecsOrInterDeps] = None, values=None, metadata=None) -> None: """ Create a new DataSet object. The object can either hold a new run or an already existing run. If a run_id is provided, then an old run is looked up, else a new run is created. Args: path_to_db: path to the sqlite file on disk. If not provided, the path will be read from the config. run_id: provide this when loading an existing run, leave it as None when creating a new run conn: connection to the DB; if provided and `path_to_db` is provided as well, then a ValueError is raised (this is to prevent the possibility of providing a connection to a DB file that is different from `path_to_db`) exp_id: the id of the experiment in which to create a new run. Ignored if run_id is provided. name: the name of the dataset. Ignored if run_id is provided. specs: paramspecs belonging to the dataset. Ignored if run_id is provided. values: values to insert into the dataset. Ignored if run_id is provided. metadata: metadata to insert into the dataset. Ignored if run_id is provided. """ self.conn = conn_from_dbpath_or_conn(conn, path_to_db) self._run_id = run_id self._debug = False self.subscribers: Dict[str, _Subscriber] = {} self._interdeps: InterDependencies_ if run_id is not None: if not run_exists(self.conn, run_id): raise ValueError(f"Run with run_id {run_id} does not exist in " f"the database") self._completed = completed(self.conn, self.run_id) run_desc = self._get_run_description_from_db() self._interdeps = run_desc.interdeps self._metadata = get_metadata_from_run_id(self.conn, run_id) self._started = self.run_timestamp_raw is not None else: # Actually perform all the side effects needed for the creation # of a new dataset. Note that a dataset is created (in the DB) # with no parameters; they are written to disk when the dataset # is marked as started if exp_id is None: if len(get_experiments(self.conn)) > 0: exp_id = get_last_experiment(self.conn) else: raise ValueError("No experiments found." "You can start a new one with:" " new_experiment(name, sample_name)") name = name or "dataset" _, run_id, __ = create_run(self.conn, exp_id, name, generate_guid(), parameters=None, values=values, metadata=metadata) # this is really the UUID (an ever increasing count in the db) self._run_id = run_id self._completed = False self._started = False if isinstance(specs, InterDependencies_): self._interdeps = specs elif specs is not None: self._interdeps = old_to_new(InterDependencies(*specs)) else: self._interdeps = InterDependencies_() self._metadata = get_metadata_from_run_id(self.conn, self.run_id)
def _insert_run( conn: ConnectionPlus, exp_id: int, name: str, guid: str, parameters: Optional[List[ParamSpec]] = None, ): # get run counter and formatter from experiments run_counter, format_string = select_many_where(conn, "experiments", "run_counter", "format_string", where_column="exp_id", where_value=exp_id) run_counter += 1 formatted_name = format_table_name(format_string, name, exp_id, run_counter) table = "runs" parameters = parameters or [] run_desc = RunDescriber(old_to_new(InterDependencies(*parameters))) desc_str = serial.to_json_for_storage(run_desc) with atomic(conn) as conn: if parameters: query = f""" INSERT INTO {table} (name, exp_id, guid, result_table_name, result_counter, run_timestamp, parameters, is_completed, run_description) VALUES (?,?,?,?,?,?,?,?,?) """ curr = transaction(conn, query, name, exp_id, guid, formatted_name, run_counter, None, ",".join([p.name for p in parameters]), False, desc_str) _add_parameters_to_layout_and_deps(conn, formatted_name, *parameters) else: query = f""" INSERT INTO {table} (name, exp_id, guid, result_table_name, result_counter, run_timestamp, is_completed, run_description) VALUES (?,?,?,?,?,?,?,?) """ curr = transaction(conn, query, name, exp_id, guid, formatted_name, run_counter, None, False, desc_str) run_id = curr.lastrowid return run_counter, formatted_name, run_id