def add_sensor_component_feature(self, sliding_window: SlidingWindow, sensor_component: SensorComponent, feature: Feature, feature_df: DataFrame): self.is_valid_data_set_manager() training_data_set = self.workspace_repository.get_training_data_set( self.workspace_id) assert str( sliding_window) in training_data_set.feature_extraction_cache assert sensor_component not in training_data_set.feature_extraction_cache[str( sliding_window )].sensor_component_feature_df_file_IDs or feature not in training_data_set.feature_extraction_cache[ str(sliding_window )].sensor_component_feature_df_file_IDs[sensor_component] file_IDs_dict = training_data_set.feature_extraction_cache[str( sliding_window)].sensor_component_feature_df_file_IDs file_ID = self.file_repository.put_file( FeatureExtractionData.serialize_sensor_component_feature_df( feature_df)) if sensor_component in file_IDs_dict.keys(): file_IDs_dict[sensor_component][feature] = file_ID else: file_IDs_dict[sensor_component] = {feature: file_ID} self.workspace_repository.set_training_data_set( self.workspace_id, training_data_set)
def get_cached_sensor_component_feature(self, sliding_window: SlidingWindow, sensor_component: SensorComponent, feature: Feature) -> DataFrame: self.is_valid_data_set_manager() training_data_set = self.workspace_repository.get_training_data_set( self.workspace_id) if str( sliding_window ) not in training_data_set.feature_extraction_cache or sensor_component not in training_data_set.feature_extraction_cache[ str( sliding_window )].sensor_component_feature_df_file_IDs or feature not in training_data_set.feature_extraction_cache[ str(sliding_window)].sensor_component_feature_df_file_IDs[ sensor_component]: raise RuntimeError( "There is no cached feature extraction data with the given sliding window, sensor component and feature" ) file_ID = training_data_set.feature_extraction_cache[str( sliding_window )].sensor_component_feature_df_file_IDs[sensor_component][feature] return FeatureExtractionData.deserialize_sensor_component_feature_df( self.file_repository.get_file(file_ID))
def get_feature_extraction_data_stub_5_1(): return FeatureExtractionData( data_windows_df_file_ID=ObjectId("60707242b377f2b04ebf6737"), labels_of_data_windows_file_ID=ObjectId("6070724cdd135f6e692c0959"), sensor_component_feature_df_file_IDs={ "x_Accelerometer": { Feature.MINIMUM: ObjectId("6070727ed1a8cbba14ea120f"), Feature.MAXIMUM: ObjectId("6070728ce5eed4a716c39858") }, "y_Accelerometer": { Feature.MINIMUM: ObjectId("607072b8c5d9f62dc8af39c5"), Feature.MAXIMUM: ObjectId("607072bd0cfbe6257fd2ccaa") }, "z_Accelerometer": { Feature.MINIMUM: ObjectId("607072c3c456e5966ed54877"), Feature.MAXIMUM: ObjectId("607072c761d523b4852bc4fb") }, "x_Gyroscope": { Feature.MINIMUM: ObjectId("607072ccc9439e3d45c48a31"), Feature.MAXIMUM: ObjectId("607072d1d7184b370db7a7f0") }, "y_Gyroscope": { Feature.MINIMUM: ObjectId("607072d5638d1b75b59e8990"), Feature.MAXIMUM: ObjectId("607072dae852a74abbef32ca") }, "z_Gyroscope": { Feature.MINIMUM: ObjectId("607072dfcbc9bd19451f0be2"), Feature.MAXIMUM: ObjectId("607072e37e4784db534231bf") }, })
def get_feature_extraction_data_stub_4_2(): return FeatureExtractionData( data_windows_df_file_ID=ObjectId("6070730112e067c7e6bf65df"), labels_of_data_windows_file_ID=ObjectId("6070730644135e99d11e07e3"), sensor_component_feature_df_file_IDs={ "x_Accelerometer": { Feature.MEAN: ObjectId("6070730a3cb3407c3cee5088"), Feature.MEDIAN: ObjectId("6070730f960d158336e60381") }, "y_Accelerometer": { Feature.MEAN: ObjectId("60707313d126b4fc5b5aad21"), Feature.MEDIAN: ObjectId("6070731781da40c314d7fa59") }, "z_Accelerometer": { Feature.MEAN: ObjectId("6070731c86cd07184cb380ec"), Feature.MEDIAN: ObjectId("607073213b96af39b7103af7") }, "x_Gyroscope": { Feature.MEAN: ObjectId("60707326750a110c0518ff57"), Feature.MEDIAN: ObjectId("6070732a0464a34f7737b1c4") }, "y_Gyroscope": { Feature.MEAN: ObjectId("6070732e6c7d7545d2c3a6a5"), Feature.MEDIAN: ObjectId("6070733273f8f9217b07af9d") }, "z_Gyroscope": { Feature.MEAN: ObjectId("6070733ab4862c8a4ea5591f"), Feature.MEDIAN: ObjectId("6070733e37039f4f3f0a2bf1") }, })
def add_split_to_windows(self, sliding_window: SlidingWindow, data_windows: DataFrame, labels_of_data_windows: List[str]): self.is_valid_data_set_manager() training_data_set = self.workspace_repository.get_training_data_set( self.workspace_id) assert str( sliding_window) not in training_data_set.feature_extraction_cache data_windows_df_file_ID = self.file_repository.put_file( FeatureExtractionData.serialize_data_windows_df(data_windows)) labels_of_data_windows_file_ID = self.file_repository.put_file( FeatureExtractionData.serialize_labels_of_data_windows( labels_of_data_windows)) res = FeatureExtractionData( data_windows_df_file_ID=data_windows_df_file_ID, labels_of_data_windows_file_ID=labels_of_data_windows_file_ID) training_data_set.feature_extraction_cache[str(sliding_window)] = res self.workspace_repository.set_training_data_set( self.workspace_id, training_data_set)
def get_cached_split_to_windows( self, sliding_window: SlidingWindow) -> DataFrame: self.is_valid_data_set_manager() training_data_set = self.workspace_repository.get_training_data_set( self.workspace_id) if str(sliding_window ) not in training_data_set.feature_extraction_cache: raise RuntimeError( "There is no cached split to windows with the given sliding window" ) file_ID = training_data_set.feature_extraction_cache[str( sliding_window)].data_windows_df_file_ID return FeatureExtractionData.deserialize_data_windows_df( self.file_repository.get_file(file_ID))