def get_parameters_from_segments(self, dataframe: pd.DataFrame, labeled: List[dict], deleted: List[dict], model: ModelType) -> dict: logging.debug('Start parsing segments') learning_info = LearningInfo() data = dataframe['value'] for segment in labeled: confidence = utils.find_confidence(segment.data)[0] learning_info.confidence.append(confidence) segment_center = segment.center_index learning_info.segment_center_list.append(segment_center) learning_info.pattern_timestamp.append(segment.pattern_timestamp) aligned_segment = utils.get_interval(data, segment_center, self.state.window_size) aligned_segment = utils.subtract_min_without_nan(aligned_segment) if len(aligned_segment) == 0: logging.warning('cant add segment to learning because segment is empty where segments center is: {}, window_size: {}, and len_data: {}'.format( segment_center, self.state.window_size, len(data))) continue learning_info.patterns_list.append(aligned_segment) # TODO: use Triangle/Stair types if model == ModelType.PEAK or model == ModelType.TROUGH: learning_info.pattern_height.append(utils.find_confidence(aligned_segment)[1]) learning_info.patterns_value.append(aligned_segment.values.max()) if model == ModelType.JUMP or model == ModelType.DROP: pattern_height, pattern_length = utils.find_parameters(segment.data, segment.from_index, model.value) learning_info.pattern_height.append(pattern_height) learning_info.pattern_width.append(pattern_length) learning_info.patterns_value.append(aligned_segment.values[self.state.window_size]) logging.debug('Parsing segments ended correctly with learning_info: {}'.format(learning_info)) return learning_info
def do_fit( self, dataframe: pd.DataFrame, labeled_segments: List[AnalyticSegment], deleted_segments: List[AnalyticSegment], learning_info: LearningInfo ) -> None: data = utils.cut_dataframe(dataframe) data = data['value'] self.state.pattern_center = list(set(self.state.pattern_center + learning_info.segment_center_list)) self.state.pattern_model = utils.get_av_model(learning_info.patterns_list) convolve_list = utils.get_convolve(self.state.pattern_center, self.state.pattern_model, data, self.state.window_size) correlation_list = utils.get_correlation(self.state.pattern_center, self.state.pattern_model, data, self.state.window_size) height_list = learning_info.patterns_value del_conv_list = [] delete_pattern_width = [] delete_pattern_height = [] delete_pattern_timestamp = [] for segment in deleted_segments: delete_pattern_timestamp.append(segment.pattern_timestamp) deleted = utils.get_interval(data, segment.center_index, self.state.window_size) deleted = utils.subtract_min_without_nan(deleted) del_conv = scipy.signal.fftconvolve(deleted, self.state.pattern_model) if len(del_conv): del_conv_list.append(max(del_conv)) delete_pattern_height.append(utils.find_confidence(deleted)[1]) self._update_fiting_result(self.state, learning_info.confidence, convolve_list, del_conv_list, height_list)
def do_fit(self, dataframe: pd.DataFrame, labeled_segments: list, deleted_segments: list, learning_info: dict) -> None: data = utils.cut_dataframe(dataframe) data = data['value'] window_size = self.state['WINDOW_SIZE'] last_pattern_center = self.state.get('pattern_center', []) self.state['pattern_center'] = list( set(last_pattern_center + learning_info['segment_center_list'])) self.state['pattern_model'] = utils.get_av_model( learning_info['patterns_list']) convolve_list = utils.get_convolve(self.state['pattern_center'], self.state['pattern_model'], data, window_size) correlation_list = utils.get_correlation(self.state['pattern_center'], self.state['pattern_model'], data, window_size) height_list = learning_info['patterns_value'] del_conv_list = [] delete_pattern_width = [] delete_pattern_height = [] delete_pattern_timestamp = [] for segment in deleted_segments: del_min_index = segment.center_index delete_pattern_timestamp.append(segment.pattern_timestamp) deleted = utils.get_interval(data, del_min_index, window_size) deleted = utils.subtract_min_without_nan(deleted) del_conv = scipy.signal.fftconvolve(deleted, self.state['pattern_model']) if len(del_conv): del_conv_list.append(max(del_conv)) delete_pattern_height.append(utils.find_confidence(deleted)[1]) delete_pattern_width.append(utils.find_width(deleted, False)) self._update_fiting_result(self.state, learning_info['confidence'], convolve_list, del_conv_list, height_list)
def do_fit(self, dataframe: pd.DataFrame, labeled_segments: List[AnalyticSegment], deleted_segments: List[AnalyticSegment], learning_info: LearningInfo) -> None: data = utils.cut_dataframe(dataframe) data = data['value'] last_pattern_center = self.state.pattern_center self.state.pattern_center = utils.remove_duplicates_and_sort( last_pattern_center + learning_info.segment_center_list) self.state.pattern_model = utils.get_av_model( learning_info.patterns_list) convolve_list = utils.get_convolve(self.state.pattern_center, self.state.pattern_model, data, self.state.window_size) correlation_list = utils.get_correlation(self.state.pattern_center, self.state.pattern_model, data, self.state.window_size) del_conv_list = [] delete_pattern_timestamp = [] for segment in deleted_segments: del_mid_index = segment.center_index delete_pattern_timestamp.append(segment.pattern_timestamp) deleted_pat = utils.get_interval(data, del_mid_index, self.state.window_size) deleted_pat = utils.subtract_min_without_nan(deleted_pat) del_conv_pat = scipy.signal.fftconvolve(deleted_pat, self.state.pattern_model) if len(del_conv_pat): del_conv_list.append(max(del_conv_pat)) self.state.convolve_min, self.state.convolve_max = utils.get_min_max( convolve_list, self.state.window_size / 3) self.state.conv_del_min, self.state.conv_del_max = utils.get_min_max( del_conv_list, self.state.window_size)
def get_correlation(segments: list, av_model: list, data: pd.Series, window_size: int) -> list: labeled_segment = [] correlation_list = [] p_value_list = [] for segment in segments: labeled_segment = utils.get_interval(data, segment, window_size) labeled_segment = utils.subtract_min_without_nan(labeled_segment) labeled_segment = utils.check_nan_values(labeled_segment) correlation = pearsonr(labeled_segment, av_model) correlation_list.append(correlation[0]) p_value_list.append(correlation[1]) return correlation_list
def get_convolve(segments: list, av_model: list, data: pd.Series, window_size: int) -> list: labeled_segment = [] convolve_list = [] for segment in segments: labeled_segment = utils.get_interval(data, segment, window_size) labeled_segment = utils.subtract_min_without_nan(labeled_segment) labeled_segment = utils.check_nan_values(labeled_segment) auto_convolve = scipy.signal.fftconvolve(labeled_segment, labeled_segment) convolve_segment = scipy.signal.fftconvolve(labeled_segment, av_model) if len(auto_convolve) > 0: convolve_list.append(max(auto_convolve)) if len(convolve_segment) > 0: convolve_list.append(max(convolve_segment)) return convolve_list
def do_detect(self, dataframe: pd.DataFrame) -> list: data = utils.cut_dataframe(dataframe) data = data['value'] pat_data = self.state['pattern_model'] if pat_data.count(0) == len(pat_data): raise ValueError('Labeled patterns must not be empty') self.all_conv = [] for i in range(self.state['WINDOW_SIZE'] * 2, len(data)): watch_data = data[i - self.state['WINDOW_SIZE'] * 2: i] watch_data = utils.subtract_min_without_nan(watch_data) conv = scipy.signal.fftconvolve(watch_data, pat_data) self.all_conv.append(max(conv)) all_conv_peaks = utils.peak_finder(self.all_conv, self.state['WINDOW_SIZE'] * 2) filtered = self.__filter_detection(all_conv_peaks, data) return set(item + self.state['WINDOW_SIZE'] for item in filtered)
def __filter_detection(self, segments: list, data: list) -> list: delete_list = [] variance_error = self.state['WINDOW_SIZE'] close_patterns = utils.close_filtering(segments, variance_error) segments = utils.best_pattern(close_patterns, data, 'min') if len(segments) == 0 or len(self.state.get('pattern_center', [])) == 0: segments = [] return segments pattern_data = self.state['pattern_model'] up_height = self.state['height_max'] * (1 + self.HEIGHT_ERROR) low_height = self.state['height_min'] * (1 - self.HEIGHT_ERROR) up_conv = self.state['convolve_max'] * (1 + 1.5 * self.CONV_ERROR) low_conv = self.state['convolve_min'] * (1 - self.CONV_ERROR) up_del_conv = self.state['conv_del_max'] * (1 + self.DEL_CONV_ERROR) low_del_conv = self.state['conv_del_min'] * (1 - self.DEL_CONV_ERROR) for segment in segments: if segment > self.state['WINDOW_SIZE']: convol_data = utils.get_interval(data, segment, self.state['WINDOW_SIZE']) convol_data = utils.subtract_min_without_nan(convol_data) percent_of_nans = convol_data.isnull().sum() / len(convol_data) if percent_of_nans > 0.5: delete_list.append(segment) continue elif 0 < percent_of_nans <= 0.5: nan_list = utils.find_nan_indexes(convol_data) convol_data = utils.nan_to_zero(convol_data, nan_list) pattern_data = utils.nan_to_zero(pattern_data, nan_list) conv = scipy.signal.fftconvolve(convol_data, pattern_data) pattern_height = convol_data.values.max() if pattern_height > up_height or pattern_height < low_height: delete_list.append(segment) continue if max(conv) > up_conv or max(conv) < low_conv: delete_list.append(segment) continue if max(conv) < up_del_conv and max(conv) > low_del_conv: delete_list.append(segment) else: delete_list.append(segment) for item in delete_list: segments.remove(item) return set(segments)
def __filter_detection(self, segments: Generator[int, None, None], data: pd.Series) -> Generator[int, None, None]: if not self.state.pattern_center: return [] window_size = self.state.window_size pattern_model = self.state.pattern_model for ind, val in segments: watch_data = data[ind - window_size: ind + window_size + 1] watch_data = utils.subtract_min_without_nan(watch_data) convolve_segment = scipy.signal.fftconvolve(watch_data, pattern_model) if len(convolve_segment) > 0: watch_conv = max(convolve_segment) else: continue if watch_conv < self.state.convolve_min * 0.8 or val < PEARSON_FACTOR: continue if watch_conv < self.state.conv_del_max * 1.02 and watch_conv > self.state.conv_del_min * 0.98: continue yield ind
def __filter_detection(self, segments: List[int], data: pd.Series) -> list: delete_list = [] variance_error = self.state.window_size close_patterns = utils.close_filtering(segments, variance_error) segments = self.get_best_pattern(close_patterns, data) if len(segments) == 0 or len(self.state.pattern_model) == 0: return [] pattern_data = self.state.pattern_model up_height = self.state.height_max * (1 + self.HEIGHT_ERROR) low_height = self.state.height_min * (1 - self.HEIGHT_ERROR) up_conv = self.state.convolve_max * (1 + 1.5 * self.CONV_ERROR) low_conv = self.state.convolve_min * (1 - self.CONV_ERROR) up_del_conv = self.state.conv_del_max * (1 + self.DEL_CONV_ERROR) low_del_conv = self.state.conv_del_min * (1 - self.DEL_CONV_ERROR) for segment in segments: if segment > self.state.window_size: convol_data = utils.get_interval(data, segment, self.state.window_size) convol_data = utils.subtract_min_without_nan(convol_data) percent_of_nans = convol_data.isnull().sum() / len(convol_data) if percent_of_nans > 0.5: delete_list.append(segment) continue elif 0 < percent_of_nans <= 0.5: nan_list = utils.find_nan_indexes(convol_data) convol_data = utils.nan_to_zero(convol_data, nan_list) pattern_data = utils.nan_to_zero(pattern_data, nan_list) conv = scipy.signal.fftconvolve(convol_data, pattern_data) pattern_height = convol_data.values.max() if pattern_height > up_height or pattern_height < low_height: delete_list.append(segment) continue if max(conv) > up_conv or max(conv) < low_conv: delete_list.append(segment) continue if max(conv) < up_del_conv and max(conv) > low_del_conv: delete_list.append(segment) else: delete_list.append(segment) for item in delete_list: segments.remove(item) return set(segments)
def do_fit(self, dataframe: pd.DataFrame, labeled_segments: list, deleted_segments: list, learning_info: dict) -> None: data = utils.cut_dataframe(dataframe) data = data['value'] last_pattern_center = self.state.get('pattern_center', []) self.state['pattern_center'] = list(set(last_pattern_center + learning_info['segment_center_list'])) self.state['pattern_model'] = utils.get_av_model(learning_info['patterns_list']) convolve_list = utils.get_convolve(self.state['pattern_center'], self.state['pattern_model'], data, self.state['WINDOW_SIZE']) correlation_list = utils.get_correlation(self.state['pattern_center'], self.state['pattern_model'], data, self.state['WINDOW_SIZE']) del_conv_list = [] delete_pattern_timestamp = [] for segment in deleted_segments: del_mid_index = segment.center_index delete_pattern_timestamp.append(segment.pattern_timestamp) deleted_pat = utils.get_interval(data, del_mid_index, self.state['WINDOW_SIZE']) deleted_pat = utils.subtract_min_without_nan(deleted_pat) del_conv_pat = scipy.signal.fftconvolve(deleted_pat, self.state['pattern_model']) if len(del_conv_pat): del_conv_list.append(max(del_conv_pat)) self.state['convolve_min'], self.state['convolve_max'] = utils.get_min_max(convolve_list, self.state['WINDOW_SIZE'] / 3) self.state['conv_del_min'], self.state['conv_del_max'] = utils.get_min_max(del_conv_list, self.state['WINDOW_SIZE'])
def do_fit(self, dataframe: pd.DataFrame, labeled_segments: List[AnalyticSegment], deleted_segments: List[AnalyticSegment], learning_info: LearningInfo) -> None: data = utils.cut_dataframe(dataframe) data = data['value'] window_size = self.state.window_size last_pattern_center = self.state.pattern_center self.state.pattern_center = utils.remove_duplicates_and_sort( last_pattern_center + learning_info.segment_center_list) self.state.pattern_model = utils.get_av_model( learning_info.patterns_list) convolve_list = utils.get_convolve(self.state.pattern_center, self.state.pattern_model, data, window_size) correlation_list = utils.get_correlation(self.state.pattern_center, self.state.pattern_model, data, window_size) height_list = learning_info.patterns_value del_conv_list = [] delete_pattern_timestamp = [] for segment in deleted_segments: segment_cent_index = segment.center_index delete_pattern_timestamp.append(segment.pattern_timestamp) deleted_stair = utils.get_interval(data, segment_cent_index, window_size) deleted_stair = utils.subtract_min_without_nan(deleted_stair) del_conv_stair = scipy.signal.fftconvolve(deleted_stair, self.state.pattern_model) if len(del_conv_stair) > 0: del_conv_list.append(max(del_conv_stair)) self._update_fitting_result(self.state, learning_info.confidence, convolve_list, del_conv_list) self.state.stair_height = int( min(learning_info.pattern_height, default=1)) self.state.stair_length = int( max(learning_info.pattern_width, default=1))
def test_subtract_min_with_nan(self): segment = [np.nan, 2, 4, 1, 2, 4] segment = pd.Series(segment) result = [2, 4, 1, 2, 4] utils_result = list(utils.subtract_min_without_nan(segment)[1:]) self.assertEqual(utils_result, result)
def test_subtract_min_without_nan(self): segment = [1, 2, 4, 1, 2, 4] segment = pd.Series(segment) result = [0, 1, 3, 0, 1, 3] utils_result = list(utils.subtract_min_without_nan(segment)) self.assertEqual(utils_result, result)