def _add_metadata_to_xarray( dataset: DataSetProtocol, xrdataset: Union[xr.Dataset, xr.DataArray] ) -> None: xrdataset.attrs.update( { "ds_name": dataset.name, "sample_name": dataset.sample_name, "exp_name": dataset.exp_name, "snapshot": dataset._snapshot_raw or "null", "guid": dataset.guid, "run_timestamp": dataset.run_timestamp() or "", "completed_timestamp": dataset.completed_timestamp() or "", "captured_run_id": dataset.captured_run_id, "captured_counter": dataset.captured_counter, "run_id": dataset.run_id, "run_description": serial.to_json_for_storage(dataset.description), "parent_dataset_links": links_to_str(dataset.parent_dataset_links), } ) if dataset.run_timestamp_raw is not None: xrdataset.attrs["run_timestamp_raw"] = dataset.run_timestamp_raw if dataset.completed_timestamp_raw is not None: xrdataset.attrs[ "completed_timestamp_raw"] = dataset.completed_timestamp_raw if len(dataset.metadata) > 0: for metadata_tag, metadata in dataset.metadata.items(): xrdataset.attrs[metadata_tag] = metadata
def _add_run_to_runs_table( dataset: DataSetProtocol, target_conn: ConnectionPlus, target_exp_id: int, create_run_table: bool = True, ) -> Optional[str]: metadata = dataset.metadata snapshot_raw = dataset._snapshot_raw captured_run_id = dataset.captured_run_id captured_counter = dataset.captured_counter parent_dataset_links = links_to_str(dataset.parent_dataset_links) _, target_run_id, target_table_name = create_run( target_conn, target_exp_id, name=dataset.name, guid=dataset.guid, metadata=metadata, captured_run_id=captured_run_id, captured_counter=captured_counter, parent_dataset_links=parent_dataset_links, create_run_table=create_run_table, snapshot_raw=snapshot_raw, description=dataset.description, ) mark_run_complete(target_conn, target_run_id) _rewrite_timestamps( target_conn, target_run_id, dataset.run_timestamp_raw, dataset.completed_timestamp_raw, ) return target_table_name
def _assert_xarray_metadata_is_as_expected(xarray_ds, qc_dataset): assert xarray_ds.ds_name == qc_dataset.name assert xarray_ds.sample_name == qc_dataset.sample_name assert xarray_ds.exp_name == qc_dataset.exp_name assert xarray_ds.snapshot == qc_dataset.snapshot_raw if qc_dataset.snapshot_raw is not None else "null" assert xarray_ds.guid == qc_dataset.guid assert xarray_ds.run_timestamp == qc_dataset.run_timestamp() assert xarray_ds.completed_timestamp == qc_dataset.completed_timestamp() assert xarray_ds.captured_run_id == qc_dataset.captured_run_id assert xarray_ds.captured_counter == qc_dataset.captured_counter assert xarray_ds.run_id == qc_dataset.run_id assert xarray_ds.run_description == serial.to_json_for_storage( qc_dataset.description) assert xarray_ds.parent_dataset_links == links_to_str( qc_dataset.parent_dataset_links)
def _perform_start_actions(self) -> None: """ Perform the actions that must take place once the run has been started """ paramspecs = new_to_old(self._interdeps).paramspecs for spec in paramspecs: add_parameter(self.conn, self.table_name, spec) desc_str = serial.to_json_for_storage(self.description) update_run_description(self.conn, self.run_id, desc_str) set_run_timestamp(self.conn, self.run_id) pdl_str = links_to_str(self._parent_dataset_links) update_parent_datasets(self.conn, self.run_id, pdl_str)
def _perform_start_actions(self) -> None: """ Perform the actions that must take place once the run has been started """ with contextlib.closing( conn_from_dbpath_or_conn(conn=None, path_to_db=self._path_to_db)) as conn: paramspecs = new_to_old(self.description.interdeps).paramspecs for spec in paramspecs: add_parameter(spec, conn=conn, run_id=self.run_id, insert_into_results_table=False) desc_str = serial.to_json_for_storage(self.description) update_run_description(conn, self.run_id, desc_str) self._run_timestamp_raw = time.time() set_run_timestamp(conn, self.run_id, self._run_timestamp_raw) pdl_str = links_to_str(self._parent_dataset_links) update_parent_datasets(conn, self.run_id, pdl_str)
def _extract_single_dataset_into_db(dataset: DataSet, target_conn: ConnectionPlus, target_exp_id: int) -> None: """ NB: This function should only be called from within meth:`extract_runs_into_db` Insert the given dataset into the specified database file as the latest run. Trying to insert a run already in the DB is a NOOP. Args: dataset: A dataset representing the run to be copied target_conn: connection to the DB. Must be atomically guarded target_exp_id: The ``exp_id`` of the (target DB) experiment in which to insert the run """ if not dataset.completed: raise ValueError('Dataset not completed. An incomplete dataset ' 'can not be copied. The incomplete dataset has ' f'GUID: {dataset.guid} and run_id: {dataset.run_id}') source_conn = dataset.conn run_id = get_runid_from_guid(target_conn, dataset.guid) if run_id != -1: return if dataset.parameters is not None: param_names = dataset.parameters.split(',') else: param_names = [] parspecs_dict = { p.name: p for p in new_to_old(dataset._interdeps).paramspecs } parspecs = [parspecs_dict[p] for p in param_names] metadata = dataset.metadata snapshot_raw = dataset.snapshot_raw captured_run_id = dataset.captured_run_id captured_counter = dataset.captured_counter parent_dataset_links = links_to_str(dataset.parent_dataset_links) _, target_run_id, target_table_name = create_run( target_conn, target_exp_id, name=dataset.name, guid=dataset.guid, parameters=parspecs, metadata=metadata, captured_run_id=captured_run_id, captured_counter=captured_counter, parent_dataset_links=parent_dataset_links) _populate_results_table(source_conn, target_conn, dataset.table_name, target_table_name) mark_run_complete(target_conn, target_run_id) _rewrite_timestamps(target_conn, target_run_id, dataset.run_timestamp_raw, dataset.completed_timestamp_raw) if snapshot_raw is not None: add_meta_data(target_conn, target_run_id, {'snapshot': snapshot_raw})
def test_links_to_str_and_back(N): links = generate_some_links(N) new_links = str_to_links(links_to_str(links)) assert new_links == links