def find_interval(dataframe: pd.DataFrame) -> int: if len(dataframe) < 2: raise ValueError( 'Can`t find interval: length of data must be at least 2') delta = utils.convert_pd_timestamp_to_ms( dataframe.timestamp[1]) - utils.convert_pd_timestamp_to_ms( dataframe.timestamp[0]) return delta
def detect(self, dataframe: pd.DataFrame, id: AnalyticUnitId) -> dict: logging.debug('Start method detect for analytic unit {}'.format(id)) result = self.do_detect(dataframe) segments = [( utils.convert_pd_timestamp_to_ms(dataframe['timestamp'][x[0]]), utils.convert_pd_timestamp_to_ms(dataframe['timestamp'][x[1]]), ) for x in result] if not self.state: logging.warning('Return empty self.state after detect') logging.debug('Method detect complete successful for analytic unit {}'.format(id)) return { 'segments': segments, 'cache': self.state, }
def detect(self, dataframe: pd.DataFrame, cache: Optional[ModelCache]) -> DetectionResult: logger.debug('Unit {} got {} data points for detection'.format( self.analytic_unit_id, len(dataframe))) # TODO: split and sleep (https://github.com/hastic/hastic-server/pull/124#discussion_r214085643) if cache is None: msg = f'{self.analytic_unit_id} detection got invalid cache, skip detection' logger.error(msg) raise ValueError(msg) self.model.state = self.model.get_state(cache) window_size = self.model.state.window_size if window_size is None: message = '{} got cache without window_size for detection'.format( self.analytic_unit_id) logger.error(message) raise ValueError(message) if len(dataframe) < window_size * 2: message = f'{self.analytic_unit_id} skip detection: dataset length {len(dataframe)} points less than minimal length {window_size * 2} points' logger.error(message) raise ValueError(message) detected = self.model.detect(dataframe, self.analytic_unit_id) segments = [ Segment(segment[0], segment[1]) for segment in detected['segments'] ] new_cache = detected['cache'].to_json() last_dataframe_time = dataframe.iloc[-1]['timestamp'] last_detection_time = convert_pd_timestamp_to_ms(last_dataframe_time) return DetectionResult(new_cache, segments, last_detection_time)
def detect(self, dataframe: pd.DataFrame, cache: Optional[ModelCache]) -> DetectionResult: if cache == None: raise f'Analytic unit {self.analytic_unit_id} got empty cache' data = dataframe['value'] cache = AnomalyCache.from_json(cache) segments = cache.segments enabled_bounds = cache.get_enabled_bounds() smoothed_data = utils.exponential_smoothing(data, cache.alpha) lower_bound = smoothed_data - cache.confidence upper_bound = smoothed_data + cache.confidence if len(segments) > 0: data_start_time = utils.convert_pd_timestamp_to_ms( dataframe['timestamp'][0]) for segment in segments: seasonality_index = cache.seasonality // cache.time_step seasonality_offset = self.get_seasonality_offset( segment.from_timestamp, cache.seasonality, data_start_time, cache.time_step) segment_data = pd.Series(segment.data) lower_bound = self.add_season_to_data(lower_bound, segment_data, seasonality_offset, seasonality_index, Bound.LOWER) upper_bound = self.add_season_to_data(upper_bound, segment_data, seasonality_offset, seasonality_index, Bound.UPPER) detected_segments = list( self.detections_generator(dataframe, upper_bound, lower_bound, enabled_bounds)) last_dataframe_time = dataframe.iloc[-1]['timestamp'] last_detection_time = utils.convert_pd_timestamp_to_ms( last_dataframe_time) return DetectionResult(cache.to_json(), detected_segments, last_detection_time)
def detections_generator( self, dataframe: pd.DataFrame, upper_bound: pd.DataFrame, lower_bound: pd.DataFrame, enabled_bounds: Bound ) -> Generator[Segment, None, Segment]: in_segment = False segment_start = 0 bound: Bound = None for idx, val in enumerate(dataframe['value'].values): if val > upper_bound.values[idx]: if enabled_bounds == Bound.UPPER or enabled_bounds == Bound.ALL: if not in_segment: in_segment = True segment_start = dataframe['timestamp'][idx] bound = Bound.UPPER continue if val < lower_bound.values[idx]: if enabled_bounds == Bound.LOWER or enabled_bounds == Bound.ALL: if not in_segment: in_segment = True segment_start = dataframe['timestamp'][idx] bound = Bound.LOWER continue if in_segment: segment_end = dataframe['timestamp'][idx - 1] yield Segment( utils.convert_pd_timestamp_to_ms(segment_start), utils.convert_pd_timestamp_to_ms(segment_end), message=f'{val} out of {str(bound.value)} bound' ) in_segment = False else: if in_segment: segment_end = dataframe['timestamp'][idx] return Segment( utils.convert_pd_timestamp_to_ms(segment_start), utils.convert_pd_timestamp_to_ms(segment_end), message=f'{val} out of {str(bound.value)} bound' )
def process_data(self, dataframe: pd.DataFrame, cache: ModelCache) -> ProcessingResult: cache = AnomalyCache.from_json(cache) segments = cache.segments enabled_bounds = cache.get_enabled_bounds() # TODO: exponential_smoothing should return dataframe with related timestamps smoothed_data = utils.exponential_smoothing(dataframe['value'], cache.alpha) lower_bound = smoothed_data - cache.confidence upper_bound = smoothed_data + cache.confidence if len(segments) > 0: data_start_time = utils.convert_pd_timestamp_to_ms( dataframe['timestamp'][0]) for segment in segments: seasonality_index = cache.seasonality // cache.time_step # TODO: move it to utils and add tests seasonality_offset = self.get_seasonality_offset( segment.from_timestamp, cache.seasonality, data_start_time, cache.time_step) segment_data = pd.Series(segment.data) lower_bound = self.add_season_to_data(lower_bound, segment_data, seasonality_offset, seasonality_index, Bound.LOWER) upper_bound = self.add_season_to_data(upper_bound, segment_data, seasonality_offset, seasonality_index, Bound.UPPER) # TODO: support multiple segments timestamps = utils.convert_series_to_timestamp_list( dataframe.timestamp) lower_bound_timeseries = list( zip(timestamps, lower_bound.values.tolist())) upper_bound_timeseries = list( zip(timestamps, upper_bound.values.tolist())) if enabled_bounds == Bound.ALL: return ProcessingResult(lower_bound_timeseries, upper_bound_timeseries) elif enabled_bounds == Bound.UPPER: return ProcessingResult(upper_bound=upper_bound_timeseries) elif enabled_bounds == Bound.LOWER: return ProcessingResult(lower_bound=lower_bound_timeseries)
def detect(self, dataframe: pd.DataFrame, cache: ModelCache) -> DetectionResult: if cache is None or cache == {}: raise ValueError('Threshold detector error: cannot detect before learning') if len(dataframe) == 0: return None value = cache['value'] condition = cache['condition'] segments = [] for index, row in dataframe.iterrows(): current_value = row['value'] current_timestamp = utils.convert_pd_timestamp_to_ms(row['timestamp']) segment = Segment(current_timestamp, current_timestamp) # TODO: merge segments if pd.isnull(current_value): if condition == 'NO_DATA': segment.message = 'NO_DATA detected' segments.append(segment) continue comparators = { '>': operator.gt, '<': operator.lt, '=': operator.eq, '>=': operator.ge, '<=': operator.le } assert condition in comparators.keys(), f'condition {condition} not allowed' if comparators[condition](current_value, value): segment.message = f"{current_value} {condition} threshold's value {value}" segments.append(segment) last_entry = dataframe.iloc[-1] last_detection_time = utils.convert_pd_timestamp_to_ms(last_entry['timestamp']) return DetectionResult(cache, segments, last_detection_time)
def detect(self, dataframe: pd.DataFrame, cache: Optional[models.ModelCache]) -> dict: logger.debug('Unit {} got {} data points for detection'.format(self.analytic_unit_id, len(dataframe))) # TODO: split and sleep (https://github.com/hastic/hastic-server/pull/124#discussion_r214085643) detected = self.model.detect(dataframe, cache) segments = [{ 'from': segment[0], 'to': segment[1] } for segment in detected['segments']] newCache = detected['cache'] last_dataframe_time = dataframe.iloc[-1]['timestamp'] last_detection_time = convert_pd_timestamp_to_ms(last_dataframe_time) return { 'cache': newCache, 'segments': segments, 'lastDetectionTime': last_detection_time }
def detect(self, dataframe: pd.DataFrame, cache: ModelCache) -> dict: if cache == None: raise 'Threshold detector error: cannot detect before learning' value = cache['value'] condition = cache['condition'] now = convert_sec_to_ms(time()) segments = [] dataframe_without_nans = dataframe.dropna() if len(dataframe_without_nans) == 0: if condition == 'NO_DATA': segments.append({'from': now, 'to': now}) else: return None else: last_entry = dataframe_without_nans.iloc[-1] last_time = convert_pd_timestamp_to_ms(last_entry['timestamp']) last_value = last_entry['value'] segment = {'from': last_time, 'to': last_time} if condition == '>': if last_value > value: segments.append(segment) elif condition == '>=': if last_value >= value: segments.append(segment) elif condition == '=': if last_value == value: segments.append(segment) elif condition == '<=': if last_value <= value: segments.append(segment) elif condition == '<': if last_value < value: segments.append(segment) return {'cache': cache, 'segments': segments, 'lastDetectionTime': now}