def test_class_no_cluster(self):
     self.max_diff = None
     job = create_test_job(
         clustering=create_test_clustering(
             clustering_type=ClusteringMethods.NO_CLUSTER.value),
         split=repair_example(),
         encoding=create_test_encoding(prefix_length=5,
                                       padding=True,
                                       add_elapsed_time=True),
         predictive_model=create_test_predictive_model(
             predictive_model=PredictiveModels.CLASSIFICATION.value,
             prediction_method=ClassificationMethods.RANDOM_FOREST.value))
     result, _ = calculate(job)
     del result['elapsed_time']
     self.assertDictEqual(
         result, {
             'f1score': 1.0,
             'acc': 1.0,
             'true_positive': '--',
             'true_negative': '--',
             'false_negative': '--',
             'false_positive': '--',
             'precision': 1.0,
             'recall': 1.0,
             'auc': 0.0
         })
示例#2
0
 def test_hyperopt(self):
     job = Job.objects.create(
         split=create_test_split(split_type=SplitTypes.SPLIT_SINGLE.value,
                                 original_log=create_test_log(
                                     log_name=general_example_filename,
                                     log_path=general_example_filepath)),
         encoding=create_test_encoding(
             value_encoding=ValueEncodings.SIMPLE_INDEX.value,
             prefix_length=3,
             padding=False),
         labelling=create_test_labelling(
             label_type=LabelTypes.REMAINING_TIME.value),
         clustering=create_test_clustering(
             clustering_type=ClusteringMethods.KMEANS.value),
         predictive_model=create_test_predictive_model(
             predictive_model=PredictiveModels.CLASSIFICATION.value,
             prediction_method=ClassificationMethods.RANDOM_FOREST.value),
         hyperparameter_optimizer=create_test_hyperparameter_optimizer(
             hyperoptim_type=HyperparameterOptimizationMethods.HYPEROPT.
             value,
             performance_metric=HyperOptLosses.ACC.value,
             max_evals=2))
     prediction_task(job.pk)
     job = Job.objects.get(pk=1)
     self.assertFalse(classification_random_forest(
     ) == job.predictive_model.classification.__getattribute__(
         ClassificationMethods.RANDOM_FOREST.value.lower()).to_dict())
示例#3
0
    def test_next_activity_zero_padding_elapsed_time(self):
        labelling = create_test_labelling(
            label_type=LabelTypes.NEXT_ACTIVITY.value)
        encoding = create_test_encoding(
            value_encoding=ValueEncodings.SIMPLE_INDEX.value,
            add_elapsed_time=True,
            task_generation_type=TaskGenerationTypes.ONLY_THIS.value,
            prefix_length=10,
            padding=True)

        df, _ = encode_label_logs(
            self.test_log, self.test_log,
            create_test_job(
                encoding=encoding,
                labelling=labelling,
                predictive_model=create_test_predictive_model(
                    predictive_model=PredictiveModels.CLASSIFICATION.value)))
        self.assertEqual(df.shape, (2, 13))
        self.assertTrue('elapsed_time' in df.columns.values.tolist())
        trace_5 = df[df.trace_id == '5'].iloc[0].values.tolist()
        self.assertListEqual(trace_5,
                             ['5', 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
        trace_4 = df[df.trace_id == '4'].iloc[0].values.tolist()
        self.assertListEqual(trace_4,
                             ['4', 1, 1, 2, 1, 2, 0, 0, 0, 0, 0, 2, 0])
    def test_attribute_number(self):
        encoding = create_test_encoding(
            value_encoding=ValueEncodings.BOOLEAN.value,
            prefix_length=2,
            task_generation_type=TaskGenerationTypes.ONLY_THIS.value)
        labelling = create_test_labelling(
            label_type=LabelTypes.ATTRIBUTE_NUMBER.value,
            attribute_name='AMOUNT')

        _, df = encode_label_logs(
            self.test_log, self.test_log,
            create_test_job(
                encoding=encoding,
                labelling=labelling,
                predictive_model=create_test_predictive_model(
                    predictive_model=PredictiveModels.CLASSIFICATION.value)))
        self.assertEqual(df.shape, (2, 9))
        trace_5 = df[df.trace_id == '5'].iloc[0].values.tolist()
        self.assertListEqual(
            trace_5,
            ['5', True, True, False, False, False, False, False, False])
        trace_4 = df[df.trace_id == '4'].iloc[0].values.tolist()
        self.assertListEqual(
            trace_4,
            ['4', True, False, True, False, False, False, False, True])
    def test_next_activity_zero_padding_elapsed_time(self):
        labelling = create_test_labelling(
            label_type=LabelTypes.NEXT_ACTIVITY.value)
        encoding = create_test_encoding(
            value_encoding=ValueEncodings.BOOLEAN.value,
            add_elapsed_time=True,
            task_generation_type=TaskGenerationTypes.ONLY_THIS.value,
            prefix_length=3)

        _, df = encode_label_logs(
            self.test_log, self.test_log,
            create_test_job(
                encoding=encoding,
                labelling=labelling,
                predictive_model=create_test_predictive_model(
                    predictive_model=PredictiveModels.CLASSIFICATION.value)))
        self.assertEqual(df.shape, (2, 10))
        self.assertTrue('elapsed_time' in df.columns.values.tolist())
        trace_5 = df[df.trace_id == '5'].iloc[0].values.tolist()
        self.assertListEqual(trace_5, [
            '5', True, True, True, False, False, False, False, 181200.0,
            'decide'
        ])
        trace_4 = df[df.trace_id == '4'].iloc[0].values.tolist()
        self.assertListEqual(trace_4, [
            '4', True, False, True, False, False, False, True, 171660.0,
            'decide'
        ])
 def test_next_activity_kmeans(self):
     self.max_diff = None
     job = create_test_job(
         clustering=create_test_clustering(
             clustering_type=ClusteringMethods.KMEANS.value),
         split=repair_example(),
         encoding=create_test_encoding(prefix_length=8, padding=True),
         labelling=create_test_labelling(
             label_type=LabelTypes.NEXT_ACTIVITY.value),
         predictive_model=create_test_predictive_model(
             predictive_model=PredictiveModels.CLASSIFICATION.value,
             prediction_method=ClassificationMethods.RANDOM_FOREST.value))
     result, _ = calculate(job)
     del result['elapsed_time']
     self.assertDictEqual(
         result, {
             'f1score': 0.54239884582595577,
             'acc': 0.80995475113122173,
             'true_positive': '--',
             'true_negative': '--',
             'false_negative': '--',
             'false_positive': '--',
             'precision': 0.62344720496894401,
             'recall': 0.5224945442336747,
             'auc': 0.4730604801339352
         })
    def test_label_remaining_time_with_elapsed_time_custom_threshold(self):
        labelling = create_test_labelling(
            label_type=LabelTypes.REMAINING_TIME.value,
            threshold_type=ThresholdTypes.THRESHOLD_CUSTOM.value,
            threshold=40000)
        encoding = create_test_encoding(
            value_encoding=ValueEncodings.BOOLEAN.value,
            prefix_length=3,
            add_elapsed_time=True,
            add_remaining_time=True,
            task_generation_type=TaskGenerationTypes.ONLY_THIS.value)

        _, df = encode_label_logs(
            self.test_log, self.test_log,
            create_test_job(
                encoding=encoding,
                labelling=labelling,
                predictive_model=create_test_predictive_model(
                    predictive_model=PredictiveModels.CLASSIFICATION.value)))
        self.assertEqual(df.shape, (2, 10))
        trace_5 = df[df.trace_id == '5'].iloc[0].values.tolist()
        self.assertListEqual(trace_5, [
            '5', True, True, True, False, False, False, False, 181200.0, False
        ])
        trace_4 = df[df.trace_id == '4'].iloc[0].values.tolist()
        self.assertListEqual(trace_4, [
            '4', True, False, True, False, False, False, True, 171660.0, False
        ])
示例#8
0
    def test_label_remaining_time_with_elapsed_time_custom_threshold(self):
        encoding = create_test_encoding(
            value_encoding=ValueEncodings.SIMPLE_INDEX.value,
            add_elapsed_time=True,
            add_remaining_time=True,
            task_generation_type=TaskGenerationTypes.ONLY_THIS.value,
            prefix_length=2)
        labelling = create_test_labelling(
            label_type=LabelTypes.REMAINING_TIME.value,
            threshold_type=ThresholdTypes.THRESHOLD_CUSTOM.value,
            threshold=40000)

        df, _ = encode_label_logs(
            self.test_log, self.test_log,
            create_test_job(
                encoding=encoding,
                labelling=labelling,
                predictive_model=create_test_predictive_model(
                    predictive_model=PredictiveModels.CLASSIFICATION.value)))
        self.assertEqual(df.shape, (2, 5))
        self.assertListEqual(
            df.columns.values.tolist(),
            ['trace_id', 'prefix_1', 'prefix_2', 'elapsed_time', 'label'])
        trace_5 = df[df.trace_id == '5'].iloc[0].values.tolist()
        self.assertListEqual(trace_5, ['5', 1, 2, 2, 1])
        trace_4 = df[df.trace_id == '4'].iloc[0].values.tolist()
        self.assertListEqual(trace_4, ['4', 1, 1, 1, 1])
    def test_no_exceptions(self):
        filtered_labels = [enum.value for enum in LabelTypes]

        filtered_classification_methods = [
            enum.value for enum in ClassificationMethods
        ]

        filtered_encoding_methods = [enum.value for enum in ValueEncodings]

        filtered_padding = [True, False]

        choices = [
            filtered_encoding_methods, filtered_padding,
            filtered_classification_methods, filtered_labels
        ]

        job_combinations = list(itertools.product(*choices))

        for (encoding, padding, method, label) in job_combinations:
            print(encoding, padding, method, label)

            if method == 'nn' and (padding == False or label
                                   == LabelTypes.ATTRIBUTE_STRING.value):
                pass
            job = create_test_job(
                predictive_model=create_test_predictive_model(
                    prediction_method=method),
                encoding=create_test_encoding(value_encoding=encoding,
                                              padding=padding),
                labelling=create_test_labelling(label_type=label))
            # with HidePrints():
            calculate(job)
 def test_update_nb(self):
     job = create_test_job(
         predictive_model=create_test_predictive_model(
             prediction_method=ClassificationMethods.HOEFFDING_TREE.value),
         labelling=create_test_labelling(
             label_type=LabelTypes.ATTRIBUTE_STRING.value,
             attribute_name='concept:name'),
         clustering=create_test_clustering(
             clustering_type=ClusteringMethods.NO_CLUSTER.value),
         create_models=True)
     result1, _ = calculate(job)
     job = create_test_job(predictive_model=create_test_predictive_model(
         prediction_method=ClassificationMethods.HOEFFDING_TREE.value),
                           encoding=job.encoding,
                           labelling=create_test_labelling(
                               label_type=LabelTypes.ATTRIBUTE_STRING.value,
                               attribute_name='concept:name'),
                           clustering=job.clustering,
                           incremental_train=job)
     result2, _ = calculate(job)
     del result1['elapsed_time']
     del result2['elapsed_time']
     self.assertDictEqual(
         result1, {
             'f1score': 0.0,
             'acc': 0.0,
             'precision': 0.0,
             'recall': 0.0,
             'true_positive': 0,
             'true_negative': 0,
             'false_negative': 2,
             'false_positive': 0,
             'auc': 0.0
         })
     self.assertDictEqual(
         result2, {
             'f1score': 0.0,
             'acc': 0.0,
             'precision': 0.0,
             'recall': 0.0,
             'true_positive': 0,
             'true_negative': 0,
             'false_negative': 2,
             'false_positive': 0,
             'auc': 0.0
         })
示例#11
0
    def test_explain(self):
        split = create_test_split(
            split_type=SplitTypes.SPLIT_DOUBLE.value,
            split_ordering_method=SplitOrderingMethods.SPLIT_SEQUENTIAL.value,
            test_size=0.2,
            original_log=None,
            train_log=create_test_log(
                log_name='train_explainability.xes',
                log_path='cache/log_cache/test_logs/train_explainability.xes'
            ),
            test_log=create_test_log(
                log_name='test_explainability.xes',
                log_path='cache/log_cache/test_logs/test_explainability.xes'
            )
        )

        predictive_model = create_test_predictive_model(
            predictive_model=PredictiveModels.CLASSIFICATION.value,
            prediction_method=ClassificationMethods.DECISION_TREE.value
        )

        job = create_test_job(
            split=split,
            encoding=create_test_encoding(
                prefix_length=4,
                padding=True,
                value_encoding=ValueEncodings.SIMPLE_INDEX.value
            ),
            labelling=create_test_labelling(label_type=LabelTypes.ATTRIBUTE_STRING.value, attribute_name='label'),
            clustering=None,
            create_models=True,
            predictive_model=predictive_model,
            job_type=JobTypes.PREDICTION.value,
            hyperparameter_optimizer=None,
            incremental_train=None
        )

        prediction_task(job.id, do_publish_result=False)
        job.refresh_from_db()

        exp = Explanation.objects.get_or_create(
            type=ExplanationTypes.SHAP.value,
            split=split,
            predictive_model=predictive_model,
            job=job,
            results={}
        )[0]
        training_df_old, test_df_old = get_encoded_logs(job)

        explanation_target = '2_101'
        prefix_target = 'prefix_1'

        explanation = explain(exp, training_df_old, test_df_old, explanation_target, prefix_target)
        training_df_old, test_df_old = get_encoded_logs(job)
        explanation_temp = shap_temporal_stability(exp, training_df_old, test_df_old, explanation_target)

        self.assertTrue(type(explanation) is dict)
        self.assertTrue(type(explanation_temp) is dict)
 def test_shape_training(self):
     training_df, test_df = encode_label_logs(self.training_log, self.test_log, create_test_job(
         encoding=self.encoding,
         labelling=self.labelling,
         predictive_model=create_test_predictive_model(
             predictive_model=PredictiveModels.CLASSIFICATION.value)
     ))
     self.assert_shape(training_df, (4, 4))
     self.assert_shape(test_df, (2, 4))
    def test_str(self):
        job = create_test_job(predictive_model=create_test_predictive_model(
            predictive_model=PredictiveModels.CLASSIFICATION.value,
            prediction_method=ClassificationMethods.RANDOM_FOREST.value))

        self.assertEqual(
            len(job.__str__()),
            len("{created_date: 2019-10-01 09:38:35.245361+00:00, modified_date: 2019-10-01 09:38:35.245655+00:00, error: , status: created, type: prediction, create_models: False, split: {'id': 1, 'type': 'single', 'test_size': 0.2, 'splitting_method': 'sequential', 'original_log_path': 'cache/log_cache/test_logs/general_example.xes'}, encoding: {'data_encoding': 'label_encoder', 'value_encoding': 'simpleIndex', 'add_elapsed_time': False, 'add_remaining_time': False, 'add_executed_events': False, 'add_resources_used': False, 'add_new_traces': False, 'features': {}, 'prefix_length': 1, 'padding': False, 'task_generation_type': 'only'}, labelling: {'type': 'next_activity', 'attribute_name': None, 'threshold_type': 'threshold_mean', 'threshold': 0.0, 'results': {}}, clustering: {'clustering_method': 'noCluster'}, predictive_model: {'n_estimators': 10, 'max_depth': None, 'max_features': 'auto'}, evaluation: [None], hyperparameter_optimizer: [None], incremental_train: [None]}"
                ))
示例#14
0
 def test_next_activity_DecisionTree(self):
     job = create_test_job(
         predictive_model=create_test_predictive_model(
             prediction_method=ClassificationMethods.DECISION_TREE.value),
         labelling=create_test_labelling(
             label_type=LabelTypes.NEXT_ACTIVITY.value),
         clustering=create_test_clustering(
             clustering_type=ClusteringMethods.NO_CLUSTER.value))
     result, _ = calculate(job)
     self.assertDictEqual(result, self.results3())
示例#15
0
    def get_classification_job(predictive_model: str, prediction_method: str, metric: HyperOptLosses = HyperOptLosses.ACC.value):
        encoding = create_test_encoding(prefix_length=8, padding=True)
        pred_model = create_test_predictive_model(predictive_model=predictive_model,
                                                  prediction_method=prediction_method)
        hyperparameter_optimizer = create_test_hyperparameter_optimizer(performance_metric=metric)

        job = create_test_job(predictive_model=pred_model,
                              encoding=encoding,
                              hyperparameter_optimizer=hyperparameter_optimizer)
        return job
示例#16
0
 def test_class_randomForest(self):
     job = create_test_job(
         predictive_model=create_test_predictive_model(
             prediction_method=ClassificationMethods.RANDOM_FOREST.value),
         labelling=create_test_labelling(
             label_type=LabelTypes.ATTRIBUTE_STRING.value,
             attribute_name='label'),
         clustering=create_test_clustering(
             clustering_type=ClusteringMethods.NO_CLUSTER.value))
     result, _ = calculate(job)
     self.assertDictEqual(result, self.results2())
    def test_no_label(self):
        labelling = create_test_labelling(label_type=LabelTypes.NO_LABEL.value)

        _, df = encode_label_logs(
            self.train_log, self.test_log,
            create_test_job(
                encoding=self.encoding,
                labelling=labelling,
                predictive_model=create_test_predictive_model(
                    predictive_model=PredictiveModels.CLASSIFICATION.value)))
        self.assertEqual(df.shape, (2, 9))
    def test_remaining_time(self):
        labelling = create_test_labelling(
            label_type=LabelTypes.REMAINING_TIME.value)

        _, df = encode_label_logs(
            self.train_log, self.test_log,
            create_test_job(
                encoding=self.encoding,
                labelling=labelling,
                predictive_model=create_test_predictive_model(
                    predictive_model=PredictiveModels.CLASSIFICATION.value)))
        self.assertEqual(df.shape, (2, 11))
    def test_row_test(self):
        training_df, test_df = encode_label_logs(self.training_log, self.test_log, create_test_job(
            encoding=self.encoding,
            labelling=self.labelling,
            predictive_model=create_test_predictive_model(
                predictive_model=PredictiveModels.CLASSIFICATION.value)
        ))
        row = test_df[(test_df.trace_id == '4')].iloc[0]

        self.assertEqual(1, row.prefix_1)
        self.assertEqual(0, row.elapsed_time)
        self.assertEqual(0, row.label)
 def test_tsp_gru(self):
     job = create_test_job(
         predictive_model=create_test_predictive_model(
             predictive_model=PredictiveModels.TIME_SERIES_PREDICTION.value,
             prediction_method=TimeSeriesPredictionMethods.RNN.value,
             configuration={'rnn_type': 'gru'}),
         labelling=create_test_labelling(),
         encoding=create_test_encoding(prefix_length=2, padding=True),
         clustering=create_test_clustering(
             clustering_type=ClusteringMethods.NO_CLUSTER.value))
     result, _ = calculate(job)
     del result['elapsed_time']
     self.assertDictEqual(result, {'nlevenshtein': 0.6})
    def test_next_activity(self):
        labelling = create_test_labelling(
            label_type=LabelTypes.NEXT_ACTIVITY.value)

        _, df = encode_label_logs(
            self.train_log, self.test_log,
            create_test_job(
                encoding=self.encoding,
                labelling=labelling,
                predictive_model=create_test_predictive_model(
                    predictive_model=PredictiveModels.CLASSIFICATION.value)),
            get_additional_columns(self.train_log))
        self.assertEqual(df.shape, (2, 14))
示例#22
0
    def get_regression_job(predictive_model: str, prediction_method: str,
                           metric: HyperOptLosses = HyperOptLosses.ACC.value):
        encoding = create_test_encoding(prefix_length=8, padding=True)
        pred_model = create_test_predictive_model(predictive_model=predictive_model,
                                                  prediction_method=prediction_method)
        hyperparameter_optimizer = create_test_hyperparameter_optimizer(performance_metric=metric)

        job = create_test_job(predictive_model=pred_model,
                              encoding=encoding,
                              labelling=create_test_labelling(label_type=LabelTypes.REMAINING_TIME.value,
                                                              threshold_type=ThresholdTypes.NONE.value),
                              hyperparameter_optimizer=hyperparameter_optimizer)
        return job
 def test_prefix0(self):
     encoding = create_test_encoding(
         value_encoding=ValueEncodings.FREQUENCY.value,
         add_elapsed_time=True,
         task_generation_type=TaskGenerationTypes.ONLY_THIS.value,
         prefix_length=0)
     self.assertRaises(ValueError,
                       encode_label_logs, self.training_log, self.test_log, create_test_job(
             encoding=encoding,
             labelling=self.labelling,
             predictive_model=create_test_predictive_model(
                 predictive_model=PredictiveModels.CLASSIFICATION.value)
         ))
 def test_regression_nn(self):
     job = create_test_job(
         predictive_model=create_test_predictive_model(
             predictive_model=PredictiveModels.REGRESSION.value,
             prediction_method=RegressionMethods.NN.value),
         labelling=create_test_labelling(
             label_type=LabelTypes.REMAINING_TIME.value),
         clustering=create_test_clustering(
             clustering_type=ClusteringMethods.NO_CLUSTER.value))
     result, _ = calculate(job)
     del result['elapsed_time']
     print(result)
     self.assertAlmostEqual(result['mae'], 0.0001388888888888889)
     self.assertAlmostEqual(result['mape'], -1)
    def test_attribute_number(self):
        labelling = create_test_labelling(
            label_type=LabelTypes.ATTRIBUTE_NUMBER.value,
            attribute_name='AMOUNT')

        _, df = encode_label_logs(
            self.test_log, self.test_log,
            create_test_job(
                encoding=self.encoding,
                labelling=labelling,
                predictive_model=create_test_predictive_model(
                    predictive_model=PredictiveModels.CLASSIFICATION.value)),
            get_additional_columns(self.test_log))
        self.assertEqual(df.shape, (2, 15))
示例#26
0
 def test_regression_no_cluster(self):
     self.max_diff = None
     job = create_test_job(
         clustering=create_test_clustering(
             clustering_type=ClusteringMethods.NO_CLUSTER.value),
         split=repair_example(),
         encoding=create_test_encoding(prefix_length=5, padding=True),
         predictive_model=create_test_predictive_model(
             predictive_model=PredictiveModels.REGRESSION.value,
             prediction_method=RegressionMethods.RANDOM_FOREST.value))
     result, _ = calculate(job)
     self.assertAlmostEqual(result['rmse'], 0.03263757)
     self.assertAlmostEqual(result['mae'], 0.00011685)
     self.assertAlmostEqual(result['rscore'], 0.13776124)
     self.assertAlmostEqual(result['mape'], float('inf'))
 def test_regression_kmeans(self):
     self.max_diff = None
     job = create_test_job(
         clustering=create_test_clustering(clustering_type=ClusteringMethods.KMEANS.value),
         split=repair_example(),
         encoding=create_test_encoding(prefix_length=5, padding=True),
         labelling=create_test_labelling(label_type=LabelTypes.DURATION.value),
         predictive_model=create_test_predictive_model(predictive_model=PredictiveModels.REGRESSION.value,
                                                       prediction_method=RegressionMethods.RANDOM_FOREST.value)
     )
     result, _ = calculate(job)
     self.assertAlmostEqual(result['rmse'], 0.48841552839653984)
     self.assertAlmostEqual(result['mae'], 0.44282462605873457)
     self.assertAlmostEqual(result['rscore'], 0.015130407121517586)
     self.assertAlmostEqual(result['mape'], -1)
 def test_regression_no_cluster(self):
     self.max_diff = None
     job = create_test_job(
         clustering=create_test_clustering(clustering_type=ClusteringMethods.NO_CLUSTER.value),
         split=repair_example(),
         encoding=create_test_encoding(prefix_length=5, padding=True),
         labelling=create_test_labelling(label_type=LabelTypes.DURATION.value),
         predictive_model=create_test_predictive_model(predictive_model=PredictiveModels.REGRESSION.value,
                                                       prediction_method=RegressionMethods.RANDOM_FOREST.value)
     )
     result, _ = calculate(job)
     self.assertAlmostEqual(result['rmse'], 0.4868515876868242)
     self.assertAlmostEqual(result['mae'], 0.44340838774645464)
     self.assertAlmostEqual(result['rscore'], 0.02142755175443678)
     self.assertAlmostEqual(result['mape'], -1)
 def setUp(self):
     test_log = get_log(
         create_test_log(log_name=general_example_test_filename,
                         log_path=general_example_test_filepath))
     training_log = get_log(
         create_test_log(log_name=general_example_train_filename,
                         log_path=general_example_train_filepath))
     self.training_df, self.test_df = encode_label_logs(
         training_log, test_log,
         create_test_job(
             encoding=create_test_encoding(
                 value_encoding=ValueEncodings.BOOLEAN.value,
                 add_elapsed_time=True),
             predictive_model=create_test_predictive_model(
                 predictive_model=PredictiveModels.CLASSIFICATION.value)))
示例#30
0
 def test_regression_kmeans(self):
     self.max_diff = None
     job = create_test_job(
         clustering=create_test_clustering(
             clustering_type=ClusteringMethods.KMEANS.value),
         split=repair_example(),
         encoding=create_test_encoding(prefix_length=5, padding=True),
         predictive_model=create_test_predictive_model(
             predictive_model=PredictiveModels.REGRESSION.value,
             prediction_method=RegressionMethods.RANDOM_FOREST.value))
     result, _ = calculate(job)
     self.assertAlmostEqual(result['rmse'], 0.0325738)
     self.assertAlmostEqual(result['mae'], 0.00014269)
     self.assertAlmostEqual(result['rscore'], -0.11336870)
     self.assertAlmostEqual(result['mape'], float('inf'))