Пример #1
0
    def select_events(data_path: Path) -> xr.DataArray:
        """
        selection of events in data
        :param data_path: path to context data file
        :return: data array with data of selected events
        """
        # select events with breakdown in 20 ms
        bd_selection_list = ["is_bd_in_20ms"]
        selection = dataset_utils.select_events_from_list(context_data_file_path=data_path / "context.hdf",
                                                          selection_list=bd_selection_list)

        # read features into data_array
        feature_list = ["PEI Amplitude",
                        "PSI Amplitude",
                        "PSR Amplitude",
                        "PKI Amplitude",
                        "DC Up",
                        "DC Down"]
        data_array = dataset_utils.read_features_from_selection(data_path, feature_list, selection)


        # read label and metadata
        label_name = "is_bd_in_20ms"
        is_bd_in_20ms, timestamp, run_no = dataset_utils.read_label_and_meta_data_from_selection(data_path,
                                                                                                 label_name,
                                                                                                 selection)

        # add label to data_array
        data_array = data_array.assign_coords(is_bd_in_20ms=("event", is_bd_in_20ms))
        # add meta data
        data_array = data_array.assign_coords(run_no=("event", run_no))
        data_array = data_array.assign_coords(timestamp=("event", timestamp))

        return data_array
Пример #2
0
def test__select_events_from_list(tmpdir):
    """
    Test select_events_from_list() function
    """
    # ARRANGE
    path = tmpdir.join("dummy.hdf")
    context_dummy = h5py.File(path, 'w')
    dataset = np.ones((6, ), dtype=bool)
    dummy_event_timestamps = np.array([
        np.datetime64('2021-08-18T17:59:00'),
        np.datetime64('2021-08-18T17:59:04'),
        np.datetime64('2021-08-18T17:59:02'),
        np.datetime64('2021-08-18T17:59:06'),
        np.datetime64('2021-08-18T17:59:07'),
        np.datetime64('2021-08-18T17:59:08')
    ])
    dummy_trend_timestamps = np.array([
        np.datetime64('2021-08-18T17:59:00'),
        np.datetime64('2021-08-18T17:59:01'),
        np.datetime64('2021-08-18T17:59:02'),
        np.datetime64('2021-08-18T17:59:03'),
        np.datetime64('2021-08-18T17:59:08'),
        np.datetime64('2021-08-18T17:59:09')
    ])
    with context_dummy as f:
        f.create_dataset("Timestamp",
                         data=dummy_event_timestamps.astype(
                             h5py.opaque_dtype(dummy_event_timestamps.dtype)))
        f.create_dataset("PrevTrendData/Timestamp",
                         data=dummy_trend_timestamps.astype(
                             h5py.opaque_dtype(dummy_trend_timestamps.dtype)))
        f.create_dataset("clic_label/is_healthy", data=dataset)
        f.create_dataset("run_no", data=dataset)
        f.create_dataset("test1", data=dataset)
        f.create_dataset("test2", data=dataset)
        f.create_dataset("PSI Amplitude/pulse_amplitude", data=dataset)

    selection_list = ["test1", "test2"]

    selection_expected = np.array([False, True, False, True, False, False])

    # ACT
    np.random.seed(42)
    selection_out = dataset_utils.select_events_from_list(path, selection_list)

    # ASSERT
    assert (selection_out == selection_expected).all()
Пример #3
0
    def select_events(data_path: Path) -> list:
        """
        selection of events in data
        :param data_path: path to context data file
        :return: boolean filter for selecting breakdown events
        """
        # select only events with breakdown in 20 ms + some healthy events
        bd_selection_list = ["is_bd_in_20ms"]
        selection = dataset_utils.select_events_from_list(
            context_data_file_path=data_path / "context.hdf",
            selection_list=bd_selection_list)

        # read features into data_array
        feature_list = [
            "Loadside win", "Tubeside win", "Collector", "Gun", "IP before PC",
            "PC IP", "WG IP", "IP Load", "IP before structure",
            "US Beam Axis IP", "Klystron Flange Temp", "Load Temp",
            "PC Left Cavity Temp", "PC Right Cavity Temp", "Bunker WG Temp",
            "Structure Input Temp", "Chiller 1", "Chiller 2", "Chiller 3",
            "PKI FT avg", "PSI FT avg", "PSR FT avg", "PSI max", "PSR max",
            "PEI max", "DC Down min", "DC Up min", "PSI Pulse Width"
        ]
        label_name = "is_bd_in_20ms"

        with h5py.File(data_path / "context.hdf") as file:
            # Get real timestamp
            timestamp_trend_selection = dataset_utils.read_hdf_dataset(
                file, "PrevTrendData/Timestamp")[selection]
            # remove duplicate timestamps
            timestamp_trend_selection, unique_selection = np.unique(
                timestamp_trend_selection, return_index=True)
            # Get label and meta data
            is_bd_in_20ms = dataset_utils.read_hdf_dataset(
                file, label_name)[selection]
            is_bd_in_20ms = is_bd_in_20ms[unique_selection]
            timestamp = dataset_utils.read_hdf_dataset(file,
                                                       "Timestamp")[selection]
            timestamp = timestamp[unique_selection]
            run_no = dataset_utils.read_hdf_dataset(file, "run_no")[selection]
            run_no = run_no[unique_selection]

        # Get selected features
        with h5py.File(data_path / "TrendDataFull.hdf") as file:
            # Read trend data timestamps and compare to selected
            trend_timestamp = file["Timestamp"][:]
            trend_selection = np.in1d(trend_timestamp,
                                      timestamp_trend_selection)

            # Create filter for selecting two previous trend data
            trend_selection_one_before = dataset_utils.shift_values(
                np.array(trend_selection), -1, fill_value=False)
            trend_selection_two_before = dataset_utils.shift_values(
                np.array(trend_selection), -2, fill_value=False)

            # Read selected features
            data_read = np.empty(shape=(np.sum(trend_selection), 3,
                                        len(feature_list)))
            for feature_ind, feature in enumerate(feature_list):
                data_read[:, 0, feature_ind] = dataset_utils.read_hdf_dataset(
                    file, feature)[trend_selection_two_before]
                data_read[:, 1, feature_ind] = dataset_utils.read_hdf_dataset(
                    file, feature)[trend_selection_one_before]
                data_read[:, 2, feature_ind] = dataset_utils.read_hdf_dataset(
                    file, feature)[trend_selection]

        # Create xarray DataArray
        dim_names = ["event", "sample", "feature"]
        feature_names = [
            feature.replace("/", "__").replace(" ", "_")
            for feature in feature_list
        ]
        data_array = xr.DataArray(data=data_read,
                                  dims=dim_names,
                                  coords={"feature": feature_names})
        # add label to data_array
        data_array = data_array.assign_coords(is_bd_in_20ms=("event",
                                                             is_bd_in_20ms))
        # add meta data
        data_array = data_array.assign_coords(run_no=("event", run_no))
        data_array = data_array.assign_coords(timestamp=("event", timestamp))
        return data_array