def test_combine_runs(two_empty_temp_db_connections, empty_temp_db_connection, some_interdeps): """ Test that datasets that are exported in random order from 2 datasets can be reloaded by the original captured_run_id and the experiment name. """ source_conn_1, source_conn_2 = two_empty_temp_db_connections target_conn = empty_temp_db_connection source_1_exp = Experiment(conn=source_conn_1, name='exp1', sample_name='no_sample') source_1_datasets = [ DataSet(conn=source_conn_1, exp_id=source_1_exp.exp_id) for i in range(10) ] source_2_exp = Experiment(conn=source_conn_2, name='exp2', sample_name='no_sample') source_2_datasets = [ DataSet(conn=source_conn_2, exp_id=source_2_exp.exp_id) for i in range(10) ] source_all_datasets = source_1_datasets + source_2_datasets shuffled_datasets = source_all_datasets.copy() random.shuffle(shuffled_datasets) for ds in source_all_datasets: ds.set_interdependencies(some_interdeps[1]) ds.mark_started() ds.add_results([{name: 0.0 for name in some_interdeps[1].names}]) ds.mark_completed() # now let's insert all datasets in random order for ds in shuffled_datasets: extract_runs_into_db(ds.conn.path_to_dbfile, target_conn.path_to_dbfile, ds.run_id) for ds in source_all_datasets: loaded_ds = load_by_run_spec(captured_run_id=ds.captured_run_id, experiment_name=ds.exp_name, conn=target_conn) assert ds.the_same_dataset_as(loaded_ds) for ds in source_all_datasets: loaded_ds = load_by_run_spec(captured_run_id=ds.captured_counter, experiment_name=ds.exp_name, conn=target_conn) assert ds.the_same_dataset_as(loaded_ds) # Now test that we generate the correct table for the guids above # this could be split out into its own test # but the test above has the useful side effect of # setting up datasets for this test. guids = [ds.guid for ds in source_all_datasets] table = generate_dataset_table(guids, conn=target_conn) lines = table.split('\n') headers = re.split(r'\s+', lines[0].strip()) cfg = qc.config guid_comp = cfg['GUID_components'] # borrowed fallback logic from generate_guid sampleint = guid_comp['sample'] if sampleint == 0: sampleint = int('a' * 8, base=16) for i in range(2, len(lines)): split_line = re.split(r'\s+', lines[i].strip()) mydict = {headers[j]: split_line[j] for j in range(len(split_line))} ds = load_by_guid(guids[i - 2], conn=target_conn) assert ds.captured_run_id == int(mydict['captured_run_id']) assert ds.captured_counter == int(mydict['captured_counter']) assert ds.exp_name == mydict['experiment_name'] assert ds.sample_name == mydict['sample_name'] assert int(mydict['sample_id']) == sampleint assert guid_comp['location'] == int(mydict['location']) assert guid_comp['work_station'] == int(mydict['work_station'])
def test_correct_experiment_routing(two_empty_temp_db_connections, some_interdeps): """ Test that existing experiments are correctly identified AND that multiple insertions of the same runs don't matter (run insertion is idempotent) """ source_conn, target_conn = two_empty_temp_db_connections source_exp_1 = Experiment(conn=source_conn) # make 5 runs in first experiment exp_1_run_ids = [] for _ in range(5): source_dataset = DataSet(conn=source_conn, exp_id=source_exp_1.exp_id) exp_1_run_ids.append(source_dataset.run_id) source_dataset.set_interdependencies(some_interdeps[1]) source_dataset.mark_started() for val in range(10): source_dataset.add_results( [{name: val for name in some_interdeps[1].names}]) source_dataset.mark_completed() # make a new experiment with 1 run source_exp_2 = Experiment(conn=source_conn) ds = DataSet(conn=source_conn, exp_id=source_exp_2.exp_id, name="lala") exp_2_run_ids = [ds.run_id] ds.set_interdependencies(some_interdeps[1]) ds.mark_started() for val in range(10): ds.add_results([{name: val for name in some_interdeps[1].names}]) ds.mark_completed() source_path = path_to_dbfile(source_conn) target_path = path_to_dbfile(target_conn) # now copy 2 runs extract_runs_into_db(source_path, target_path, *exp_1_run_ids[:2]) target_exp1 = Experiment(conn=target_conn, exp_id=1) assert len(target_exp1) == 2 # copy two other runs, one of them already in extract_runs_into_db(source_path, target_path, *exp_1_run_ids[1:3]) assert len(target_exp1) == 3 # insert run from different experiment extract_runs_into_db(source_path, target_path, ds.run_id) assert len(target_exp1) == 3 target_exp2 = Experiment(conn=target_conn, exp_id=2) assert len(target_exp2) == 1 # finally insert every single run from experiment 1 extract_runs_into_db(source_path, target_path, *exp_1_run_ids) # check for idempotency once more by inserting all the runs but in another # order with raise_if_file_changed(target_path): extract_runs_into_db(source_path, target_path, *exp_1_run_ids[::-1]) target_exps = get_experiments(target_conn) assert len(target_exps) == 2 assert len(target_exp1) == 5 assert len(target_exp2) == 1 # check that all the datasets match up for run_id in exp_1_run_ids + exp_2_run_ids: source_ds = DataSet(conn=source_conn, run_id=run_id) target_ds = load_by_guid(guid=source_ds.guid, conn=target_conn) assert source_ds.the_same_dataset_as(target_ds) source_data = source_ds.get_data(*source_ds.parameters.split(',')) target_data = target_ds.get_data(*target_ds.parameters.split(',')) assert source_data == target_data