def select_events(data_path: Path) -> xr.DataArray: """ selection of events in data :param data_path: path to context data file :return: data array with data of selected events """ # select events with breakdown in 20 ms bd_selection_list = ["is_bd_in_20ms"] selection = dataset_utils.select_events_from_list(context_data_file_path=data_path / "context.hdf", selection_list=bd_selection_list) # read features into data_array feature_list = ["PEI Amplitude", "PSI Amplitude", "PSR Amplitude", "PKI Amplitude", "DC Up", "DC Down"] data_array = dataset_utils.read_features_from_selection(data_path, feature_list, selection) # read label and metadata label_name = "is_bd_in_20ms" is_bd_in_20ms, timestamp, run_no = dataset_utils.read_label_and_meta_data_from_selection(data_path, label_name, selection) # add label to data_array data_array = data_array.assign_coords(is_bd_in_20ms=("event", is_bd_in_20ms)) # add meta data data_array = data_array.assign_coords(run_no=("event", run_no)) data_array = data_array.assign_coords(timestamp=("event", timestamp)) return data_array
def test__select_events_from_list(tmpdir): """ Test select_events_from_list() function """ # ARRANGE path = tmpdir.join("dummy.hdf") context_dummy = h5py.File(path, 'w') dataset = np.ones((6, ), dtype=bool) dummy_event_timestamps = np.array([ np.datetime64('2021-08-18T17:59:00'), np.datetime64('2021-08-18T17:59:04'), np.datetime64('2021-08-18T17:59:02'), np.datetime64('2021-08-18T17:59:06'), np.datetime64('2021-08-18T17:59:07'), np.datetime64('2021-08-18T17:59:08') ]) dummy_trend_timestamps = np.array([ np.datetime64('2021-08-18T17:59:00'), np.datetime64('2021-08-18T17:59:01'), np.datetime64('2021-08-18T17:59:02'), np.datetime64('2021-08-18T17:59:03'), np.datetime64('2021-08-18T17:59:08'), np.datetime64('2021-08-18T17:59:09') ]) with context_dummy as f: f.create_dataset("Timestamp", data=dummy_event_timestamps.astype( h5py.opaque_dtype(dummy_event_timestamps.dtype))) f.create_dataset("PrevTrendData/Timestamp", data=dummy_trend_timestamps.astype( h5py.opaque_dtype(dummy_trend_timestamps.dtype))) f.create_dataset("clic_label/is_healthy", data=dataset) f.create_dataset("run_no", data=dataset) f.create_dataset("test1", data=dataset) f.create_dataset("test2", data=dataset) f.create_dataset("PSI Amplitude/pulse_amplitude", data=dataset) selection_list = ["test1", "test2"] selection_expected = np.array([False, True, False, True, False, False]) # ACT np.random.seed(42) selection_out = dataset_utils.select_events_from_list(path, selection_list) # ASSERT assert (selection_out == selection_expected).all()
def select_events(data_path: Path) -> list: """ selection of events in data :param data_path: path to context data file :return: boolean filter for selecting breakdown events """ # select only events with breakdown in 20 ms + some healthy events bd_selection_list = ["is_bd_in_20ms"] selection = dataset_utils.select_events_from_list( context_data_file_path=data_path / "context.hdf", selection_list=bd_selection_list) # read features into data_array feature_list = [ "Loadside win", "Tubeside win", "Collector", "Gun", "IP before PC", "PC IP", "WG IP", "IP Load", "IP before structure", "US Beam Axis IP", "Klystron Flange Temp", "Load Temp", "PC Left Cavity Temp", "PC Right Cavity Temp", "Bunker WG Temp", "Structure Input Temp", "Chiller 1", "Chiller 2", "Chiller 3", "PKI FT avg", "PSI FT avg", "PSR FT avg", "PSI max", "PSR max", "PEI max", "DC Down min", "DC Up min", "PSI Pulse Width" ] label_name = "is_bd_in_20ms" with h5py.File(data_path / "context.hdf") as file: # Get real timestamp timestamp_trend_selection = dataset_utils.read_hdf_dataset( file, "PrevTrendData/Timestamp")[selection] # remove duplicate timestamps timestamp_trend_selection, unique_selection = np.unique( timestamp_trend_selection, return_index=True) # Get label and meta data is_bd_in_20ms = dataset_utils.read_hdf_dataset( file, label_name)[selection] is_bd_in_20ms = is_bd_in_20ms[unique_selection] timestamp = dataset_utils.read_hdf_dataset(file, "Timestamp")[selection] timestamp = timestamp[unique_selection] run_no = dataset_utils.read_hdf_dataset(file, "run_no")[selection] run_no = run_no[unique_selection] # Get selected features with h5py.File(data_path / "TrendDataFull.hdf") as file: # Read trend data timestamps and compare to selected trend_timestamp = file["Timestamp"][:] trend_selection = np.in1d(trend_timestamp, timestamp_trend_selection) # Create filter for selecting two previous trend data trend_selection_one_before = dataset_utils.shift_values( np.array(trend_selection), -1, fill_value=False) trend_selection_two_before = dataset_utils.shift_values( np.array(trend_selection), -2, fill_value=False) # Read selected features data_read = np.empty(shape=(np.sum(trend_selection), 3, len(feature_list))) for feature_ind, feature in enumerate(feature_list): data_read[:, 0, feature_ind] = dataset_utils.read_hdf_dataset( file, feature)[trend_selection_two_before] data_read[:, 1, feature_ind] = dataset_utils.read_hdf_dataset( file, feature)[trend_selection_one_before] data_read[:, 2, feature_ind] = dataset_utils.read_hdf_dataset( file, feature)[trend_selection] # Create xarray DataArray dim_names = ["event", "sample", "feature"] feature_names = [ feature.replace("/", "__").replace(" ", "_") for feature in feature_list ] data_array = xr.DataArray(data=data_read, dims=dim_names, coords={"feature": feature_names}) # add label to data_array data_array = data_array.assign_coords(is_bd_in_20ms=("event", is_bd_in_20ms)) # add meta data data_array = data_array.assign_coords(run_no=("event", run_no)) data_array = data_array.assign_coords(timestamp=("event", timestamp)) return data_array