Exemplo n.º 1
0
    def test_trough_antisegments(self):
        data_val = [
            9.0, 9.0, 9.0, 9.0, 7.0, 4.0, 7.0, 9.0, 9.0, 9.0, 5.0, 1.0, 5.0,
            9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0
        ]
        dataframe = create_dataframe(data_val)
        segments = [{
            '_id': 'Esl7uetLhx4lCqHa',
            'analyticUnitId': 'opnICRJwOmwBELK8',
            'from': 1523889000010,
            'to': 1523889000012,
            'labeled': True,
            'deleted': False
        }, {
            '_id': 'Esl7uetLhx4lCqHa',
            'analyticUnitId': 'opnICRJwOmwBELK8',
            'from': 1523889000003,
            'to': 1523889000005,
            'labeled': False,
            'deleted': True
        }]
        segments = [Segment.from_json(segment) for segment in segments]

        try:
            model = models.TroughModel()
            model_name = model.__class__.__name__
            model.state = model.get_state(None)
            model.fit(dataframe, segments, 'test')
        except ValueError:
            self.fail('Model {} raised unexpectedly'.format(model_name))
Exemplo n.º 2
0
    def test_jump_model_for_cache(self):
        cache = {
            'patternCenter': [2, 6],
            'patternModel': [5, 0.5, 4],
            'confidence': 2,
            'convolveMax': 8,
            'convolveMin': 7,
            'window_size': 1,
            'convDelMin': 0,
            'convDelMax': 0,
        }
        data_val = [
            1.0, 1.0, 1.0, 4.0, 4.0, 0.0, 0.0, 5.0, 5.0, 0.0, 0.0, 4.0, 4.0,
            4.0, 4.0
        ]
        dataframe = create_dataframe(data_val)
        segments = [{
            '_id': 'Esl7uetLhx4lCqHa',
            'analyticUnitId': 'opnICRJwOmwBELK8',
            'from': 152388900009,
            'to': 1523889000013,
            'labeled': True,
            'deleted': False
        }]
        segments = [Segment.from_json(segment) for segment in segments]

        model = models.JumpModel()
        model.state = model.get_state(cache)
        result = model.fit(dataframe, segments, 'test')
        self.assertEqual(len(result.pattern_center), 3)
Exemplo n.º 3
0
    def test_models_for_pattern_model_cache(self):
        cache = {
            'patternCenter': [4, 12],
            'patternModel': [],
            'confidence': 2,
            'convolveMax': 8,
            'convolveMin': 7,
            'window_size': 2,
            'convDelMin': 0,
            'convDelMax': 0,
        }
        data_val = [
            5.0, 5.0, 5.0, 5.0, 1.0, 1.0, 1.0, 1.0, 9.0, 9.0, 9.0, 9.0, 0, 0,
            0, 0, 0, 0, 6.0, 6.0, 6.0, 1.0, 1.0, 1.0, 1.0, 1.0
        ]
        dataframe = create_dataframe(data_val)
        segments = [{
            '_id': 'Esl7uetLhx4lCqHa',
            'analyticUnitId': 'opnICRJwOmwBELK8',
            'from': 1523889000019,
            'to': 1523889000024,
            'labeled': True,
            'deleted': False
        }]
        segments = [Segment.from_json(segment) for segment in segments]

        try:
            model = models.DropModel()
            model_name = model.__class__.__name__
            model.state = model.get_state(cache)
            model.fit(dataframe, segments, 'test')
        except ValueError:
            self.fail('Model {} raised unexpectedly'.format(model_name))
Exemplo n.º 4
0
    def test_three_value_segment(self):
        data_val = [
            1.0, 1.0, 1.0, 1.0, 1.0, 5.0, 2.0, 5.0, 5.0, 1.0, 1.0, 1.0, 1.0,
            9.0, 9.0, 9.0, 9.0, 2.0, 3.0, 4.0, 5.0, 4.0, 2.0, 1.0, 3.0, 4.0
        ]
        dataframe = create_dataframe(data_val)
        segments = [{
            '_id': 'Esl7uetLhx4lCqHa',
            'analyticUnitId': 'opnICRJwOmwBELK8',
            'from': 1523889000004,
            'to': 1523889000006,
            'labeled': True,
            'deleted': False
        }]
        segments = [Segment.from_json(segment) for segment in segments]

        model_instances = [
            models.GeneralModel(),
            models.PeakModel(),
        ]
        try:
            for model in model_instances:
                model_name = model.__class__.__name__
                model.state = model.get_state(None)
                model.fit(dataframe, segments, 'test')
        except ValueError:
            self.fail('Model {} raised unexpectedly'.format(model_name))
Exemplo n.º 5
0
 def test_positive_and_negative_segments(self):
     data_val = [
         1.0, 1.0, 1.0, 2.0, 3.0, 2.0, 1.0, 1.0, 1.0, 1.0, 5.0, 7.0, 5.0,
         1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0
     ]
     dataframe = create_dataframe(data_val)
     segments = [{
         '_id': 'Esl7uetLhx4lCqHa',
         'analyticUnitId': 'opnICRJwOmwBELK8',
         'from': 1523889000004,
         'to': 1523889000006,
         'labeled': True,
         'deleted': False
     }, {
         '_id': 'Esl7uetLhx4lCqHa',
         'analyticUnitId': 'opnICRJwOmwBELK8',
         'from': 1523889000001,
         'to': 1523889000003,
         'labeled': False,
         'deleted': False
     }]
     segments = [Segment.from_json(segment) for segment in segments]
     cache = {}
     detector = pattern_detector.PatternDetector('PEAK', 'test_id')
     try:
         detector.train(dataframe, segments, cache)
     except Exception as e:
         self.fail('detector.train fail with error {}'.format(e))
Exemplo n.º 6
0
    def test_only_negative_segments(self):
        data_val = [0, 1, 2, 1, 2, 10, 1, 2, 1]
        data_ind = [1523889000000 + i for i in range(len(data_val))]
        data = {'timestamp': data_ind, 'value': data_val}
        dataframe = pd.DataFrame(data=data)
        segments = [{
            '_id': 'Esl7uetLhx4lCqHa',
            'analyticUnitId': 'opnICRJwOmwBELK8',
            'from': 1523889000019,
            'to': 1523889000025,
            'labeled': False,
            'deleted': False
        }, {
            '_id': 'Esl7uetLhx4lCqHa',
            'analyticUnitId': 'opnICRJwOmwBELK8',
            'from': 1523889000002,
            'to': 1523889000008,
            'labeled': False,
            'deleted': False
        }]
        segments = [Segment.from_json(segment) for segment in segments]
        cache = {}
        detector = pattern_detector.PatternDetector('PEAK', 'test_id')
        excepted_error_message = 'test_id has no positive labeled segments. Pattern detector needs at least 1 positive labeled segment'

        try:
            detector.train(dataframe, segments, cache)
        except ValueError as e:
            self.assertEqual(str(e), excepted_error_message)
Exemplo n.º 7
0
    def detect(self, dataframe: pd.DataFrame,
               cache: Optional[ModelCache]) -> DetectionResult:
        logger.debug('Unit {} got {} data points for detection'.format(
            self.analytic_unit_id, len(dataframe)))
        # TODO: split and sleep (https://github.com/hastic/hastic-server/pull/124#discussion_r214085643)

        if cache is None:
            msg = f'{self.analytic_unit_id} detection got invalid cache, skip detection'
            logger.error(msg)
            raise ValueError(msg)

        self.model.state = self.model.get_state(cache)
        window_size = self.model.state.window_size

        if window_size is None:
            message = '{} got cache without window_size for detection'.format(
                self.analytic_unit_id)
            logger.error(message)
            raise ValueError(message)

        if len(dataframe) < window_size * 2:
            message = f'{self.analytic_unit_id} skip detection: dataset length {len(dataframe)} points less than minimal length {window_size * 2} points'
            logger.error(message)
            raise ValueError(message)

        detected = self.model.detect(dataframe, self.analytic_unit_id)

        segments = [
            Segment(segment[0], segment[1]) for segment in detected['segments']
        ]
        new_cache = detected['cache'].to_json()
        last_dataframe_time = dataframe.iloc[-1]['timestamp']
        last_detection_time = convert_pd_timestamp_to_ms(last_dataframe_time)
        return DetectionResult(new_cache, segments, last_detection_time)
Exemplo n.º 8
0
    def detections_generator(
        self,
        dataframe: pd.DataFrame,
        upper_bound: pd.DataFrame,
        lower_bound: pd.DataFrame,
        enabled_bounds: Bound
    ) -> Generator[Segment, None, Segment]:
        in_segment = False
        segment_start = 0
        bound: Bound = None
        for idx, val in enumerate(dataframe['value'].values):
            if val > upper_bound.values[idx]:
                if enabled_bounds == Bound.UPPER or enabled_bounds == Bound.ALL:
                    if not in_segment:
                        in_segment = True
                        segment_start = dataframe['timestamp'][idx]
                        bound = Bound.UPPER
                    continue

            if val < lower_bound.values[idx]:
                if enabled_bounds == Bound.LOWER or enabled_bounds == Bound.ALL:
                    if not in_segment:
                        in_segment = True
                        segment_start = dataframe['timestamp'][idx]
                        bound = Bound.LOWER
                    continue

            if in_segment:
                segment_end = dataframe['timestamp'][idx - 1]
                yield Segment(
                    utils.convert_pd_timestamp_to_ms(segment_start),
                    utils.convert_pd_timestamp_to_ms(segment_end),
                    message=f'{val} out of {str(bound.value)} bound'
                )
                in_segment = False
        else:
            if in_segment:
                segment_end = dataframe['timestamp'][idx]
                return Segment(
                    utils.convert_pd_timestamp_to_ms(segment_start),
                    utils.convert_pd_timestamp_to_ms(segment_end),
                    message=f'{val} out of {str(bound.value)} bound'
                )
Exemplo n.º 9
0
    def detect(self, dataframe: pd.DataFrame, cache: ModelCache) -> DetectionResult:
        if cache is None or cache == {}:
            raise ValueError('Threshold detector error: cannot detect before learning')
        if len(dataframe) == 0:
            return None

        value = cache['value']
        condition = cache['condition']

        segments = []
        for index, row in dataframe.iterrows():
            current_value = row['value']
            current_timestamp = utils.convert_pd_timestamp_to_ms(row['timestamp'])
            segment = Segment(current_timestamp, current_timestamp)
            # TODO: merge segments
            if pd.isnull(current_value):
                if condition == 'NO_DATA':
                    segment.message = 'NO_DATA detected'
                    segments.append(segment)
                continue

            comparators = {
                '>': operator.gt,
                '<': operator.lt,
                '=': operator.eq,
                '>=': operator.ge,
                '<=': operator.le
            }

            assert condition in comparators.keys(), f'condition {condition} not allowed'

            if comparators[condition](current_value, value):
                segment.message = f"{current_value} {condition} threshold's value {value}"
                segments.append(segment)

        last_entry = dataframe.iloc[-1]
        last_detection_time = utils.convert_pd_timestamp_to_ms(last_entry['timestamp'])
        return DetectionResult(cache, segments, last_detection_time)
    async def __handle_analytic_task(self, task: object) -> dict:
        """
            returns payload or None
        """
        analytic_unit_id: AnalyticUnitId = task['analyticUnitId']
        log.debug('Analytics get task with type: {} for unit: {}'.format(
            task['type'], analytic_unit_id))
        if task['type'] == 'CANCEL':
            if analytic_unit_id in self.analytic_workers:
                self.analytic_workers[analytic_unit_id].cancel()
            return

        payload = task['payload']
        worker = self.__ensure_worker(analytic_unit_id, payload['detector'],
                                      payload['analyticUnitType'])
        data = payload.get('data')
        if task['type'] == 'PUSH':
            # TODO: do it a better way
            res = await worker.consume_data(data, payload['cache'])
            if res:
                res.update({'analyticUnitId': analytic_unit_id})
            return res
        elif task['type'] == 'LEARN':
            if 'segments' in payload:
                segments = payload['segments']
                segments = [Segment.from_json(segment) for segment in segments]
                return await worker.do_train(segments, data, payload['cache'])
            elif 'threshold' in payload:
                return await worker.do_train(payload['threshold'], data,
                                             payload['cache'])
            elif 'anomaly' in payload:
                return await worker.do_train(payload['anomaly'], data,
                                             payload['cache'])
            else:
                raise ValueError('No segments or threshold in LEARN payload')
        elif task['type'] == 'DETECT':
            return await worker.do_detect(data, payload['cache'])
        elif task['type'] == 'PROCESS':
            return await worker.process_data(data, payload['cache'])

        raise ValueError('Unknown task type "%s"' % task['type'])
Exemplo n.º 11
0
    def test_general_for_two_labeling(self):
        data_val = [
            1.0, 2.0, 5.0, 2.0, 1.0, 1.0, 3.0, 6.0, 4.0, 2.0, 1.0, 0, 0
        ]
        dataframe = create_dataframe(data_val)
        segments = [{
            '_id': 'Esl7uetLhx4lCqHa',
            'analyticUnitId': 'opnICRJwOmwBELK8',
            'from': 1523889000001,
            'to': 1523889000003,
            'labeled': True,
            'deleted': False
        }]
        segments = [Segment.from_json(segment) for segment in segments]

        model = models.GeneralModel()
        model.state = model.get_state(None)
        model.fit(dataframe, segments, 'test')
        result = len(data_val) + 1
        for _ in range(2):
            model.do_detect(dataframe)
            max_pattern_index = max(model.do_detect(dataframe))
            self.assertLessEqual(max_pattern_index[0], result)
Exemplo n.º 12
0
    def test_value_error_dataset_input_should_have_multiple_elements(self):
        data_val = [
            1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 5.0, 5.0, 4.0, 5.0, 5.0,
            6.0, 5.0, 1.0, 2.0, 3.0, 4.0, 5.0, 3.0, 3.0, 2.0, 7.0, 8.0, 9.0,
            8.0, 7.0, 6.0
        ]
        dataframe = create_dataframe(data_val)
        segments = [{
            '_id': 'Esl7uetLhx4lCqHa',
            'analyticUnitId': 'opnICRJwOmwBELK8',
            'from': 1523889000007,
            'to': 1523889000011,
            'labeled': True,
            'deleted': False
        }]
        segments = [Segment.from_json(segment) for segment in segments]

        try:
            model = models.JumpModel()
            model.state = model.get_state(None)
            model_name = model.__class__.__name__
            model.fit(dataframe, segments, 'test')
        except ValueError:
            self.fail('Model {} raised unexpectedly'.format(model_name))
Exemplo n.º 13
0
 def test_serialize(self):
     segment_list = [Segment(100, 200)]
     serialize_list = utils.meta.SerializableList(segment_list)
     meta_result = utils.meta.serialize(serialize_list)
     expected_result = [{'from': 100, 'to': 200}]
     self.assertEqual(meta_result, expected_result)
Exemplo n.º 14
0
    def test_merge_intersecting_segments(self):
        test_cases = [
            {
                'index': [Segment(10, 20), Segment(30, 40)],
                'result': [[10, 20], [30, 40]],
                'step': 0,
            },
            {
                'index': [
                    Segment(10, 20),
                    Segment(13, 23),
                    Segment(15, 17),
                    Segment(20, 40)
                ],
                'result': [[10, 40]],
                'step':
                0,
            },
            {
                'index': [],
                'result': [],
                'step': 0,
            },
            {
                'index': [Segment(10, 20)],
                'result': [[10, 20]],
                'step': 0,
            },
            {
                'index': [
                    Segment(10, 20),
                    Segment(13, 23),
                    Segment(25, 30),
                    Segment(35, 40)
                ],
                'result': [[10, 23], [25, 30], [35, 40]],
                'step':
                0,
            },
            {
                'index': [
                    Segment(10, 50),
                    Segment(5, 40),
                    Segment(15, 25),
                    Segment(6, 50)
                ],
                'result': [[5, 50]],
                'step':
                0,
            },
            {
                'index': [Segment(5, 10),
                          Segment(10, 20),
                          Segment(25, 50)],
                'result': [[5, 20], [25, 50]],
                'step': 0,
            },
            {
                'index': [Segment(20, 40),
                          Segment(10, 15),
                          Segment(50, 60)],
                'result': [[10, 15], [20, 40], [50, 60]],
                'step': 0,
            },
            {
                'index': [Segment(20, 40),
                          Segment(10, 20),
                          Segment(50, 60)],
                'result': [[10, 40], [50, 60]],
                'step': 0,
            },
            {
                'index': [Segment(10, 10),
                          Segment(20, 20),
                          Segment(30, 30)],
                'result': [[10, 30]],
                'step': 10,
            },
        ]

        for case in test_cases:
            utils_result = utils.merge_intersecting_segments(
                case['index'], case['step'])
            for got, expected in zip(utils_result, case['result']):
                self.assertEqual(got.from_timestamp, expected[0])
                self.assertEqual(got.to_timestamp, expected[1])