Пример #1
0
def test_atomicity(two_empty_temp_db_connections, some_interdeps):
    """
    Test the atomicity of the transaction by extracting and inserting two
    runs where the second one is not completed. The not completed error must
    roll back any changes to the target
    """
    source_conn, target_conn = two_empty_temp_db_connections

    source_path = path_to_dbfile(source_conn)
    target_path = path_to_dbfile(target_conn)

    # The target file must exist for us to be able to see whether it has
    # changed
    Path(target_path).touch()

    source_exp = Experiment(conn=source_conn)
    source_ds_1 = DataSet(conn=source_conn, exp_id=source_exp.exp_id)
    source_ds_2 = DataSet(conn=source_conn, exp_id=source_exp.exp_id)

    for ds in (source_ds_1, source_ds_2):
        ds.set_interdependencies(some_interdeps[1])
        ds.mark_started()
        ds.add_result({name: 2.1 for name in some_interdeps[1].names})

    # importantly, source_ds_2 is NOT marked as completed
    source_ds_1.mark_completed()

    # now check that the target file is untouched
    with raise_if_file_changed(target_path):
        # although the not completed error is a ValueError, we get the
        # RuntimeError from SQLite
        with pytest.raises(RuntimeError):
            extract_runs_into_db(source_path, target_path, 1, 2)
Пример #2
0
def test_runs_from_different_experiments_raises(two_empty_temp_db_connections,
                                                some_paramspecs):
    """
    Test that inserting runs from multiple experiments raises
    """
    source_conn, target_conn = two_empty_temp_db_connections

    source_path = path_to_dbfile(source_conn)
    target_path = path_to_dbfile(target_conn)

    source_exp_1 = Experiment(conn=source_conn)
    source_exp_2 = Experiment(conn=source_conn)

    # make 5 runs in first experiment

    exp_1_run_ids = []
    for _ in range(5):

        source_dataset = DataSet(conn=source_conn, exp_id=source_exp_1.exp_id)
        exp_1_run_ids.append(source_dataset.run_id)

        for ps in some_paramspecs[2].values():
            source_dataset.add_parameter(ps)

        source_dataset.mark_started()

        for val in range(10):
            source_dataset.add_result(
                {ps.name: val
                 for ps in some_paramspecs[2].values()})
        source_dataset.mark_completed()

    # make 5 runs in second experiment

    exp_2_run_ids = []
    for _ in range(5):

        source_dataset = DataSet(conn=source_conn, exp_id=source_exp_2.exp_id)
        exp_2_run_ids.append(source_dataset.run_id)

        for ps in some_paramspecs[2].values():
            source_dataset.add_parameter(ps)

        source_dataset.mark_started()

        for val in range(10):
            source_dataset.add_result(
                {ps.name: val
                 for ps in some_paramspecs[2].values()})
        source_dataset.mark_completed()

    run_ids = exp_1_run_ids + exp_2_run_ids
    source_exp_ids = np.unique([1, 2])
    matchstring = ('Did not receive runs from a single experiment\\. '
                   f'Got runs from experiments {source_exp_ids}')
    # make the matchstring safe to use as a regexp
    matchstring = matchstring.replace('[', '\\[').replace(']', '\\]')
    with pytest.raises(ValueError, match=matchstring):
        extract_runs_into_db(source_path, target_path, *run_ids)
Пример #3
0
def test_mark_completed_twice(start_bg_writer):
    """
    Ensure that its not an error to call mark_completed
    on an already completed dataset
    """
    ds = DataSet()
    ds.mark_started(start_bg_writer=start_bg_writer)
    ds.mark_completed()
    ds.mark_completed()
def test_old_versions_not_touched(two_empty_temp_db_connections,
                                  some_interdeps):

    source_conn, target_conn = two_empty_temp_db_connections

    target_path = path_to_dbfile(target_conn)
    source_path = path_to_dbfile(source_conn)

    _, new_v = get_db_version_and_newest_available_version(source_path)

    fixturepath = os.sep.join(qcodes.tests.dataset.__file__.split(os.sep)[:-1])
    fixturepath = os.path.join(fixturepath,
                               'fixtures', 'db_files', 'version2',
                               'some_runs.db')
    if not os.path.exists(fixturepath):
        pytest.skip("No db-file fixtures found. You can generate test db-files"
                    " using the scripts in the legacy_DB_generation folder")

    # First test that we cannot use an old version as source

    with raise_if_file_changed(fixturepath):
        with pytest.warns(UserWarning) as warning:
            extract_runs_into_db(fixturepath, target_path, 1)
            expected_mssg = ('Source DB version is 2, but this '
                             f'function needs it to be in version {new_v}. '
                             'Run this function again with '
                             'upgrade_source_db=True to auto-upgrade '
                             'the source DB file.')
            assert warning[0].message.args[0] == expected_mssg

    # Then test that we cannot use an old version as target

    # first create a run in the new version source
    source_exp = Experiment(conn=source_conn)
    source_ds = DataSet(conn=source_conn, exp_id=source_exp.exp_id)

    source_ds.set_interdependencies(some_interdeps[1])

    source_ds.mark_started()
    source_ds.add_results([{name: 0.0
                            for name in some_interdeps[1].names}])
    source_ds.mark_completed()

    with raise_if_file_changed(fixturepath):
        with pytest.warns(UserWarning) as warning:
            extract_runs_into_db(source_path, fixturepath, 1)
            expected_mssg = ('Target DB version is 2, but this '
                             f'function needs it to be in version {new_v}. '
                             'Run this function again with '
                             'upgrade_target_db=True to auto-upgrade '
                             'the target DB file.')
            assert warning[0].message.args[0] == expected_mssg
Пример #5
0
def test_experiments_with_NULL_sample_name(two_empty_temp_db_connections,
                                           some_paramspecs):
    """
    In older API versions (corresponding to DB version 3),
    users could get away with setting the sample name to None

    This test checks that such an experiment gets correctly recognised and
    is thus not ever re-inserted into the target DB
    """
    source_conn, target_conn = two_empty_temp_db_connections
    source_exp_1 = Experiment(conn=source_conn, name='null_sample_name')

    source_path = path_to_dbfile(source_conn)
    target_path = path_to_dbfile(target_conn)

    # make 5 runs in experiment

    exp_1_run_ids = []
    for _ in range(5):

        source_dataset = DataSet(conn=source_conn, exp_id=source_exp_1.exp_id)
        exp_1_run_ids.append(source_dataset.run_id)

        for ps in some_paramspecs[2].values():
            source_dataset.add_parameter(ps)
        source_dataset.mark_started()

        for val in range(10):
            source_dataset.add_result(
                {ps.name: val
                 for ps in some_paramspecs[2].values()})
        source_dataset.mark_completed()

    sql = """
          UPDATE experiments
          SET sample_name = NULL
          WHERE exp_id = 1
          """
    source_conn.execute(sql)
    source_conn.commit()

    assert source_exp_1.sample_name is None

    extract_runs_into_db(source_path, target_path, 1, 2, 3, 4, 5)

    assert len(get_experiments(target_conn)) == 1

    extract_runs_into_db(source_path, target_path, 1, 2, 3, 4, 5)

    assert len(get_experiments(target_conn)) == 1

    assert len(Experiment(exp_id=1, conn=target_conn)) == 5
Пример #6
0
def test_integer_timestamps_in_database_are_supported():
    ds = DataSet()

    ds.mark_started()
    ds.mark_completed()

    with atomic(ds.conn) as conn:
        _rewrite_timestamps(conn, ds.run_id, 42, 69)

    assert isinstance(ds.run_timestamp_raw, float)
    assert isinstance(ds.completed_timestamp_raw, float)
    assert isinstance(ds.run_timestamp(), str)
    assert isinstance(ds.completed_timestamp(), str)
Пример #7
0
def test_dataset_length():

    path_to_db = get_DB_location()
    ds = DataSet(path_to_db, run_id=None)

    assert len(ds) == 0

    parameter = ParamSpecBase(name='single', paramtype='numeric',
                              label='', unit='N/A')
    idps = InterDependencies_(standalones=(parameter,))
    ds.set_interdependencies(idps)

    ds.mark_started()
    ds.add_results([{parameter.name: 1}])
    ds.mark_completed()

    assert len(ds) == 1
def test_column_mismatch(two_empty_temp_db_connections, some_interdeps, inst):
    """
    Test insertion of runs with no metadata and no snapshot into a DB already
    containing a run that has both
    """

    source_conn, target_conn = two_empty_temp_db_connections
    source_path = path_to_dbfile(source_conn)
    target_path = path_to_dbfile(target_conn)

    target_exp = Experiment(conn=target_conn)

    # Set up measurement scenario
    station = Station(inst)

    meas = Measurement(exp=target_exp, station=station)
    meas.register_parameter(inst.back)
    meas.register_parameter(inst.plunger)
    meas.register_parameter(inst.cutter, setpoints=(inst.back, inst.plunger))

    with meas.run() as datasaver:
        for back_v in [1, 2, 3]:
            for plung_v in [-3, -2.5, 0]:
                datasaver.add_result((inst.back, back_v),
                                     (inst.plunger, plung_v),
                                     (inst.cutter, back_v+plung_v))
    datasaver.dataset.add_metadata('meta_tag', 'meta_value')

    Experiment(conn=source_conn)
    source_ds = DataSet(conn=source_conn)
    source_ds.set_interdependencies(some_interdeps[1])

    source_ds.mark_started()
    source_ds.add_results([{name: 2.1
                            for name in some_interdeps[1].names}])
    source_ds.mark_completed()

    extract_runs_into_db(source_path, target_path, 1)

    # compare
    target_copied_ds = DataSet(conn=target_conn, run_id=2)

    assert target_copied_ds.the_same_dataset_as(source_ds)
Пример #9
0
def test_extracting_dataless_run(two_empty_temp_db_connections):
    """
    Although contrived, it could happen that a run with no data is extracted
    """
    source_conn, target_conn = two_empty_temp_db_connections

    source_path = path_to_dbfile(source_conn)
    target_path = path_to_dbfile(target_conn)

    Experiment(conn=source_conn)

    source_ds = DataSet(conn=source_conn)
    source_ds.mark_started()
    source_ds.mark_completed()

    extract_runs_into_db(source_path, target_path, source_ds.run_id)

    loaded_ds = DataSet(conn=target_conn, run_id=1)

    assert loaded_ds.the_same_dataset_as(source_ds)
Пример #10
0
def test_atomicity(two_empty_temp_db_connections, some_paramspecs):
    """
    Test the atomicity of the transaction by extracting and inserting two
    runs where the second one is not completed. The not completed error must
    roll back any changes to the target
    """
    source_conn, target_conn = two_empty_temp_db_connections

    source_path = path_to_dbfile(source_conn)
    target_path = path_to_dbfile(target_conn)

    # The target file must exist for us to be able to see whether it has
    # changed
    Path(target_path).touch()

    source_exp = Experiment(conn=source_conn)
    source_ds_1 = DataSet(conn=source_conn, exp_id=source_exp.exp_id)
    for ps in some_paramspecs[2].values():
        source_ds_1.add_parameter(ps)
    source_ds_1.mark_started()
    source_ds_1.add_result(
        {ps.name: 2.1
         for ps in some_paramspecs[2].values()})
    source_ds_1.mark_completed()

    source_ds_2 = DataSet(conn=source_conn, exp_id=source_exp.exp_id)
    for ps in some_paramspecs[2].values():
        source_ds_2.add_parameter(ps)
    source_ds_2.mark_started()
    source_ds_2.add_result(
        {ps.name: 2.1
         for ps in some_paramspecs[2].values()})
    # This dataset is NOT marked as completed

    # now check that the target file is untouched
    with raise_if_file_changed(target_path):
        # although the not completed error is a ValueError, we get the
        # RuntimeError from SQLite
        with pytest.raises(RuntimeError):
            extract_runs_into_db(source_path, target_path, 1, 2)
Пример #11
0
def test_missing_runs_raises(two_empty_temp_db_connections, some_paramspecs):
    """
    Test that an error is raised if we attempt to extract a run not present in
    the source DB
    """
    source_conn, target_conn = two_empty_temp_db_connections

    source_exp_1 = Experiment(conn=source_conn)

    # make 5 runs in first experiment

    exp_1_run_ids = []
    for _ in range(5):

        source_dataset = DataSet(conn=source_conn, exp_id=source_exp_1.exp_id)
        exp_1_run_ids.append(source_dataset.run_id)

        for ps in some_paramspecs[2].values():
            source_dataset.add_parameter(ps)

        source_dataset.mark_started()

        for val in range(10):
            source_dataset.add_result(
                {ps.name: val
                 for ps in some_paramspecs[2].values()})
        source_dataset.mark_completed()

    source_path = path_to_dbfile(source_conn)
    target_path = path_to_dbfile(target_conn)

    run_ids = [1, 8, 5, 3, 2, 4, 4, 4, 7, 8]
    wrong_ids = [8, 7, 8]

    expected_err = ("Error: not all run_ids exist in the source database. "
                    "The following run(s) is/are not present: "
                    f"{wrong_ids}")

    with pytest.raises(ValueError, match=re.escape(expected_err)):
        extract_runs_into_db(source_path, target_path, *run_ids)
Пример #12
0
def test_foreground_after_background_non_concurrent(empty_temp_db_connection):
    new_experiment("test", "test1", conn=empty_temp_db_connection)
    ds1 = DataSet(conn=empty_temp_db_connection)
    ds1.mark_started(start_bg_writer=True)
    ds1.mark_completed()

    ds2 = DataSet(conn=empty_temp_db_connection)
    ds2.mark_started(start_bg_writer=False)
    ds2.mark_completed()

    ds3 = DataSet(conn=empty_temp_db_connection)
    ds3.mark_started(start_bg_writer=True)
    ds3.mark_completed()
Пример #13
0
def test_result_table_naming_and_run_id(two_empty_temp_db_connections,
                                        some_paramspecs):
    """
    Check that a correct result table name is given and that a correct run_id
    is assigned
    """
    source_conn, target_conn = two_empty_temp_db_connections

    source_path = path_to_dbfile(source_conn)
    target_path = path_to_dbfile(target_conn)

    source_exp1 = Experiment(conn=source_conn)
    source_ds_1_1 = DataSet(conn=source_conn, exp_id=source_exp1.exp_id)
    for ps in some_paramspecs[2].values():
        source_ds_1_1.add_parameter(ps)
    source_ds_1_1.mark_started()
    source_ds_1_1.add_result(
        {ps.name: 0.0
         for ps in some_paramspecs[2].values()})
    source_ds_1_1.mark_completed()

    source_exp2 = Experiment(conn=source_conn)
    source_ds_2_1 = DataSet(conn=source_conn, exp_id=source_exp2.exp_id)
    for ps in some_paramspecs[2].values():
        source_ds_2_1.add_parameter(ps)
    source_ds_2_1.mark_started()
    source_ds_2_1.add_result(
        {ps.name: 0.0
         for ps in some_paramspecs[2].values()})
    source_ds_2_1.mark_completed()
    source_ds_2_2 = DataSet(conn=source_conn,
                            exp_id=source_exp2.exp_id,
                            name="customname")
    for ps in some_paramspecs[2].values():
        source_ds_2_2.add_parameter(ps)
    source_ds_2_2.mark_started()
    source_ds_2_2.add_result(
        {ps.name: 0.0
         for ps in some_paramspecs[2].values()})
    source_ds_2_2.mark_completed()

    extract_runs_into_db(source_path, target_path, source_ds_2_2.run_id)

    # The target ds ought to have a runs table "customname-1-1"
    # and ought to be the same dataset as its "ancestor"
    target_ds = DataSet(conn=target_conn, run_id=1)

    assert target_ds.table_name == "customname-1-1"
    assert target_ds.the_same_dataset_as(source_ds_2_2)
Пример #14
0
def test_load_by_X_functions(two_empty_temp_db_connections, some_paramspecs):
    """
    Test some different loading functions
    """
    source_conn, target_conn = two_empty_temp_db_connections

    source_path = path_to_dbfile(source_conn)
    target_path = path_to_dbfile(target_conn)

    source_exp1 = Experiment(conn=source_conn)
    source_ds_1_1 = DataSet(conn=source_conn, exp_id=source_exp1.exp_id)
    for ps in some_paramspecs[2].values():
        source_ds_1_1.add_parameter(ps)
    source_ds_1_1.mark_started()
    source_ds_1_1.add_result(
        {ps.name: 0.0
         for ps in some_paramspecs[2].values()})
    source_ds_1_1.mark_completed()

    source_exp2 = Experiment(conn=source_conn)
    source_ds_2_1 = DataSet(conn=source_conn, exp_id=source_exp2.exp_id)
    for ps in some_paramspecs[2].values():
        source_ds_2_1.add_parameter(ps)
    source_ds_2_1.mark_started()
    source_ds_2_1.add_result(
        {ps.name: 0.0
         for ps in some_paramspecs[2].values()})
    source_ds_2_1.mark_completed()
    source_ds_2_2 = DataSet(conn=source_conn,
                            exp_id=source_exp2.exp_id,
                            name="customname")
    for ps in some_paramspecs[2].values():
        source_ds_2_2.add_parameter(ps)
    source_ds_2_2.mark_started()
    source_ds_2_2.add_result(
        {ps.name: 0.0
         for ps in some_paramspecs[2].values()})
    source_ds_2_2.mark_completed()

    extract_runs_into_db(source_path, target_path, source_ds_2_2.run_id)

    test_ds = load_by_guid(source_ds_2_2.guid, target_conn)
    assert source_ds_2_2.the_same_dataset_as(test_ds)

    test_ds = load_by_id(1, target_conn)
    assert source_ds_2_2.the_same_dataset_as(test_ds)

    test_ds = load_by_counter(1, 1, target_conn)
    assert source_ds_2_2.the_same_dataset_as(test_ds)
Пример #15
0
def test_dataset_states():
    """
    Test the interplay between pristine, started, running, and completed
    """

    ds = DataSet()

    assert ds.pristine is True
    assert ds.running is False
    assert ds.started is False
    assert ds.completed is False

    with pytest.raises(RuntimeError, match='Can not mark DataSet as complete '
                                           'before it has '
                                           'been marked as started.'):
        ds.mark_completed()

    match = ('This DataSet has not been marked as started. '
             'Please mark the DataSet as started before '
             'adding results to it.')
    with pytest.raises(RuntimeError, match=match):
        ds.add_results([{'x': 1}])

    parameter = ParamSpecBase(name='single', paramtype='numeric',
                              label='', unit='N/A')
    idps = InterDependencies_(standalones=(parameter,))
    ds.set_interdependencies(idps)

    ds.mark_started()

    assert ds.pristine is False
    assert ds.running is True
    assert ds.started is True
    assert ds.completed is False

    match = ('Can not set interdependencies on a DataSet that has '
             'been started.')

    with pytest.raises(RuntimeError, match=match):
        ds.set_interdependencies(idps)

    ds.add_results([{parameter.name: 1}])

    ds.mark_completed()

    assert ds.pristine is False
    assert ds.running is False
    assert ds.started is True
    assert ds.completed is True

    match = ('Can not set interdependencies on a DataSet that has '
             'been started.')

    with pytest.raises(RuntimeError, match=match):
        ds.set_interdependencies(idps)

    match = ('This DataSet is complete, no further '
             'results can be added to it.')

    with pytest.raises(CompletedError, match=match):
        ds.add_results([{parameter.name: 1}])
Пример #16
0
class Runner:
    """
    Context manager for the measurement.

    Lives inside a :class:`Measurement` and should never be instantiated
    outside a Measurement.

    This context manager handles all the dirty business of writing data
    to the database. Additionally, it may perform experiment bootstrapping
    and clean-up after a measurement.
    """
    def __init__(
        self,
        enteractions: Sequence[ActionType],
        exitactions: Sequence[ActionType],
        experiment: Optional[Experiment] = None,
        station: Optional[Station] = None,
        write_period: Optional[float] = None,
        interdeps: InterDependencies_ = InterDependencies_(),
        name: str = "",
        subscribers: Optional[Sequence[SubscriberType]] = None,
        parent_datasets: Sequence[Mapping[Any, Any]] = (),
        extra_log_info: str = "",
        write_in_background: bool = False,
        shapes: Optional[Shapes] = None,
        in_memory_cache: bool = True,
        dataset_class: DataSetType = DataSetType.DataSet,
    ) -> None:

        self._dataset_class = dataset_class
        self.write_period = self._calculate_write_period(
            write_in_background, write_period)

        self.enteractions = enteractions
        self.exitactions = exitactions
        self.subscribers: Sequence[SubscriberType]
        if subscribers is None:
            self.subscribers = []
        else:
            self.subscribers = subscribers
        self.experiment = experiment
        self.station = station
        self._interdependencies = interdeps
        self._shapes: Shapes = shapes
        self.name = name if name else 'results'
        self._parent_datasets = parent_datasets
        self._extra_log_info = extra_log_info
        self._write_in_background = write_in_background
        self._in_memory_cache = in_memory_cache
        self.ds: DataSetProtocol

    @staticmethod
    def _calculate_write_period(write_in_background: bool,
                                write_period: Optional[float]) -> float:
        write_period_changed_from_default = (
            write_period is not None
            and write_period != qc.config.defaults.dataset.write_period)
        if write_in_background and write_period_changed_from_default:
            warnings.warn(f"The specified write period of {write_period} s "
                          "will be ignored, since write_in_background==True")
        if write_in_background:
            return 0.0
        if write_period is None:
            write_period = qc.config.dataset.write_period
        return float(write_period)

    def __enter__(self) -> DataSaver:
        # TODO: should user actions really precede the dataset?
        # first do whatever bootstrapping the user specified

        for func, args in self.enteractions:
            func(*args)

        dataset_class: Type[DataSetProtocol]

        # next set up the "datasaver"
        if self.experiment is not None:
            exp_id: Optional[int] = self.experiment.exp_id
            path_to_db: Optional[str] = self.experiment.path_to_db
            conn: Optional["ConnectionPlus"] = self.experiment.conn
        else:
            exp_id = None
            path_to_db = None
            conn = None

        if self._dataset_class is DataSetType.DataSet:
            self.ds = DataSet(
                name=self.name,
                exp_id=exp_id,
                conn=conn,
                in_memory_cache=self._in_memory_cache,
            )
        elif self._dataset_class is DataSetType.DataSetInMem:
            if self._in_memory_cache is False:
                raise RuntimeError("Cannot disable the in memory cache for a "
                                   "dataset that is only in memory.")
            self.ds = DataSetInMem._create_new_run(
                name=self.name,
                exp_id=exp_id,
                path_to_db=path_to_db,
            )
        else:
            raise RuntimeError("Does not support any other dataset classes")

        # .. and give the dataset a snapshot as metadata
        if self.station is None:
            station = Station.default
        else:
            station = self.station

        if station is not None:
            snapshot = station.snapshot()
        else:
            snapshot = {}

        self.ds.prepare(
            snapshot=snapshot,
            interdeps=self._interdependencies,
            write_in_background=self._write_in_background,
            shapes=self._shapes,
            parent_datasets=self._parent_datasets,
        )

        # register all subscribers
        if isinstance(self.ds, DataSet):
            for (callble, state) in self.subscribers:
                # We register with minimal waiting time.
                # That should make all subscribers be called when data is flushed
                # to the database
                log.debug(f"Subscribing callable {callble} with state {state}")
                self.ds.subscribe(callble,
                                  min_wait=0,
                                  min_count=1,
                                  state=state)

        print(f"Starting experimental run with id: {self.ds.captured_run_id}."
              f" {self._extra_log_info}")
        log.info(f"Starting measurement with guid: {self.ds.guid}, "
                 f'sample_name: "{self.ds.sample_name}", '
                 f'exp_name: "{self.ds.exp_name}", '
                 f'ds_name: "{self.ds.name}". '
                 f"{self._extra_log_info}")
        log.info(f"Using background writing: {self._write_in_background}")

        self.datasaver = DataSaver(dataset=self.ds,
                                   write_period=self.write_period,
                                   interdeps=self._interdependencies)

        return self.datasaver

    def __exit__(self, exception_type: Optional[Type[BaseException]],
                 exception_value: Optional[BaseException],
                 traceback: Optional[TracebackType]) -> None:
        with DelayedKeyboardInterrupt():
            self.datasaver.flush_data_to_database(block=True)

            # perform the "teardown" events
            for func, args in self.exitactions:
                func(*args)

            if exception_type:
                # if an exception happened during the measurement,
                # log the exception
                stream = io.StringIO()
                tb_module.print_exception(exception_type,
                                          exception_value,
                                          traceback,
                                          file=stream)
                exception_string = stream.getvalue()
                log.warning('An exception occured in measurement with guid: '
                            f'{self.ds.guid};\nTraceback:\n{exception_string}')
                self.ds.add_metadata("measurement_exception", exception_string)

            # and finally mark the dataset as closed, thus
            # finishing the measurement
            # Note that the completion of a dataset entails waiting for the
            # write thread to terminate (iff the write thread has been started)
            self.ds.mark_completed()
            if get_data_export_automatic():
                self.datasaver.export_data()
            log.info(f'Finished measurement with guid: {self.ds.guid}. '
                     f'{self._extra_log_info}')
            if isinstance(self.ds, DataSet):
                self.ds.unsubscribe_all()
Пример #17
0
def test_basic_extraction(two_empty_temp_db_connections, some_interdeps):
    source_conn, target_conn = two_empty_temp_db_connections

    source_path = path_to_dbfile(source_conn)
    target_path = path_to_dbfile(target_conn)

    type_casters = {
        'numeric':
        float,
        'array': (lambda x: np.array(x)
                  if hasattr(x, '__iter__') else np.array([x])),
        'text':
        str
    }

    source_exp = Experiment(conn=source_conn)
    source_dataset = DataSet(conn=source_conn, name="basic_copy_paste_name")

    with pytest.raises(RuntimeError) as excinfo:
        extract_runs_into_db(source_path, target_path, source_dataset.run_id)

    assert error_caused_by(excinfo, ('Dataset not completed. An incomplete '
                                     'dataset can not be copied. The '
                                     'incomplete dataset has GUID: '
                                     f'{source_dataset.guid} and run_id: '
                                     f'{source_dataset.run_id}'))

    source_dataset.set_interdependencies(some_interdeps[0])

    source_dataset.mark_started()

    for value in range(10):
        result = {
            ps.name: type_casters[ps.type](value)
            for ps in some_interdeps[0].paramspecs
        }
        source_dataset.add_result(result)

    source_dataset.add_metadata('goodness', 'fair')
    source_dataset.add_metadata('test', True)

    source_dataset.mark_completed()

    extract_runs_into_db(source_path, target_path, source_dataset.run_id)

    target_exp = Experiment(conn=target_conn, exp_id=1)

    length1 = len(target_exp)
    assert length1 == 1

    # trying to insert the same run again should be a NOOP
    with raise_if_file_changed(target_path):
        extract_runs_into_db(source_path, target_path, source_dataset.run_id)

    assert len(target_exp) == length1

    target_dataset = DataSet(conn=target_conn, run_id=1)

    # Now make the interesting comparisons: are the target objects the same as
    # the source objects?

    assert source_dataset.the_same_dataset_as(target_dataset)

    source_data = source_dataset.get_data(
        *source_dataset.parameters.split(','))
    target_data = target_dataset.get_data(
        *target_dataset.parameters.split(','))

    assert source_data == target_data

    exp_attrs = [
        'name', 'sample_name', 'format_string', 'started_at', 'finished_at'
    ]

    for exp_attr in exp_attrs:
        assert getattr(source_exp, exp_attr) == getattr(target_exp, exp_attr)

    # trying to insert the same run again should be a NOOP
    with raise_if_file_changed(target_path):
        extract_runs_into_db(source_path, target_path, source_dataset.run_id)
Пример #18
0
def test_correct_experiment_routing(two_empty_temp_db_connections,
                                    some_interdeps):
    """
    Test that existing experiments are correctly identified AND that multiple
    insertions of the same runs don't matter (run insertion is idempotent)
    """
    source_conn, target_conn = two_empty_temp_db_connections

    source_exp_1 = Experiment(conn=source_conn)

    # make 5 runs in first experiment

    exp_1_run_ids = []
    for _ in range(5):

        source_dataset = DataSet(conn=source_conn, exp_id=source_exp_1.exp_id)
        exp_1_run_ids.append(source_dataset.run_id)

        source_dataset.set_interdependencies(some_interdeps[1])

        source_dataset.mark_started()

        for val in range(10):
            source_dataset.add_result(
                {name: val
                 for name in some_interdeps[1].names})
        source_dataset.mark_completed()

    # make a new experiment with 1 run

    source_exp_2 = Experiment(conn=source_conn)
    ds = DataSet(conn=source_conn, exp_id=source_exp_2.exp_id, name="lala")
    exp_2_run_ids = [ds.run_id]

    ds.set_interdependencies(some_interdeps[1])

    ds.mark_started()

    for val in range(10):
        ds.add_result({name: val for name in some_interdeps[1].names})

    ds.mark_completed()

    source_path = path_to_dbfile(source_conn)
    target_path = path_to_dbfile(target_conn)

    # now copy 2 runs
    extract_runs_into_db(source_path, target_path, *exp_1_run_ids[:2])

    target_exp1 = Experiment(conn=target_conn, exp_id=1)

    assert len(target_exp1) == 2

    # copy two other runs, one of them already in
    extract_runs_into_db(source_path, target_path, *exp_1_run_ids[1:3])

    assert len(target_exp1) == 3

    # insert run from different experiment
    extract_runs_into_db(source_path, target_path, ds.run_id)

    assert len(target_exp1) == 3

    target_exp2 = Experiment(conn=target_conn, exp_id=2)

    assert len(target_exp2) == 1

    # finally insert every single run from experiment 1

    extract_runs_into_db(source_path, target_path, *exp_1_run_ids)

    # check for idempotency once more by inserting all the runs but in another
    # order
    with raise_if_file_changed(target_path):
        extract_runs_into_db(source_path, target_path, *exp_1_run_ids[::-1])

    target_exps = get_experiments(target_conn)

    assert len(target_exps) == 2
    assert len(target_exp1) == 5
    assert len(target_exp2) == 1

    # check that all the datasets match up
    for run_id in exp_1_run_ids + exp_2_run_ids:
        source_ds = DataSet(conn=source_conn, run_id=run_id)
        target_ds = load_by_guid(guid=source_ds.guid, conn=target_conn)

        assert source_ds.the_same_dataset_as(target_ds)

        source_data = source_ds.get_data(*source_ds.parameters.split(','))
        target_data = target_ds.get_data(*target_ds.parameters.split(','))

        assert source_data == target_data