def test_no_exceptions(self):
        filtered_labels = [enum.value for enum in LabelTypes]

        filtered_classification_methods = [
            enum.value for enum in ClassificationMethods
        ]

        filtered_encoding_methods = [enum.value for enum in ValueEncodings]

        filtered_padding = [True, False]

        choices = [
            filtered_encoding_methods, filtered_padding,
            filtered_classification_methods, filtered_labels
        ]

        job_combinations = list(itertools.product(*choices))

        for (encoding, padding, method, label) in job_combinations:
            print(encoding, padding, method, label)

            if method == 'nn' and (padding == False or label
                                   == LabelTypes.ATTRIBUTE_STRING.value):
                pass
            job = create_test_job(
                predictive_model=create_test_predictive_model(
                    prediction_method=method),
                encoding=create_test_encoding(value_encoding=encoding,
                                              padding=padding),
                labelling=create_test_labelling(label_type=label))
            # with HidePrints():
            calculate(job)
Exemplo n.º 2
0
 def test_duration(self):
     """Trace atr, zero padding means prefix length has no effect"""
     job = self.get_job()
     job.labelling = create_test_labelling(label_type=LabelTypes.DURATION.value)
     job.save()
     result1, _ = calculate(job)
     job.encoding = create_test_encoding(prefix_length=22, padding=True)
     job.save()
     result2, _ = calculate(job)
     self.assertEqual(result1, result2)
Exemplo n.º 3
0
def prediction_task(job_id):
    logger.info("Start prediction task ID {}".format(job_id))
    job = Job.objects.get(id=job_id)

    try:
        if (job.status == JobStatuses.CREATED.value and job.type != JobTypes.UPDATE.value) or \
           (job.status == JobStatuses.CREATED.value and job.type == JobTypes.UPDATE.value and
            job.incremental_train.status == JobStatuses.COMPLETED.value):

            job.status = JobStatuses.RUNNING.value
            job.save()
            start_time = time.time()
            if job.hyperparameter_optimizer is not None and \
                job.hyperparameter_optimizer.optimization_method != HyperparameterOptimizationMethods.NONE.value:
                result, model_split = hyperopt_task(job)
            else:
                result, model_split = calculate(job)
            elapsed_time = time.time() - start_time
            logger.info('\tJob took: {} in HH:MM:ss'.format(
                time.strftime("%H:%M:%S", time.gmtime(elapsed_time))))
            if job.create_models:
                save_models(model_split, job)
            job.result = result
            job.status = JobStatuses.COMPLETED.value
    except Exception as e:
        logger.error(e)
        job.status = JobStatuses.ERROR.value
        job.error = str(e.__repr__())
        raise e
    finally:
        job.save()
        publish(job)
Exemplo n.º 4
0
def prediction_task(job_id):
    print("Start prediction task ID {}".format(job_id))
    job = Job.objects.get(id=job_id)

    try:
        if job.status == JobStatuses.CREATED.value:
            job.status = JobStatuses.RUNNING.value
            job.save()
            start_time = time.time()
            if job.hyperparameter_optimizer is not None:
                result, model_split = hyperopt_task(job)
            else:
                result, model_split = calculate(job)
            elapsed_time = time.time() - start_time
            print('\tJob took: {} in HH:MM:ss'.format(
                time.strftime("%H:%M:%S", time.gmtime(elapsed_time))))
            if job.create_models:
                save_models(model_split, job)
            job.result = result
            job.status = JobStatuses.COMPLETED.value
    except Exception as e:
        job.status = JobStatuses.ERROR.value
        job.error = str(e.__repr__())
        raise e
    finally:
        job.save()
        publish(job)
Exemplo n.º 5
0
 def test_atr_string(self):
     job = self.get_job()
     job.labelling = create_test_labelling(label_type=LabelTypes.ATTRIBUTE_STRING.value,
                                           attribute_name='description')
     job.save()
     result, _ = calculate(job)
     self.assertEqual(result, {'Simulated process instance': 883})
 def test_remaining_time(self):
     job = self.get_job()
     job.labelling = create_test_labelling(
         label_type=LabelTypes.REMAINING_TIME.value)
     job.save()
     result, _ = calculate(job)
     self.assertEqual(result, {'true': 529, 'false': 354})
 def test_next_activity_kmeans(self):
     self.max_diff = None
     job = create_test_job(
         clustering=create_test_clustering(
             clustering_type=ClusteringMethods.KMEANS.value),
         split=repair_example(),
         encoding=create_test_encoding(prefix_length=8, padding=True),
         labelling=create_test_labelling(
             label_type=LabelTypes.NEXT_ACTIVITY.value),
         predictive_model=create_test_predictive_model(
             predictive_model=PredictiveModels.CLASSIFICATION.value,
             prediction_method=ClassificationMethods.RANDOM_FOREST.value))
     result, _ = calculate(job)
     del result['elapsed_time']
     self.assertDictEqual(
         result, {
             'f1score': 0.54239884582595577,
             'acc': 0.80995475113122173,
             'true_positive': '--',
             'true_negative': '--',
             'false_negative': '--',
             'false_positive': '--',
             'precision': 0.62344720496894401,
             'recall': 0.5224945442336747,
             'auc': 0.4730604801339352
         })
 def test_class_no_cluster(self):
     self.max_diff = None
     job = create_test_job(
         clustering=create_test_clustering(
             clustering_type=ClusteringMethods.NO_CLUSTER.value),
         split=repair_example(),
         encoding=create_test_encoding(prefix_length=5,
                                       padding=True,
                                       add_elapsed_time=True),
         predictive_model=create_test_predictive_model(
             predictive_model=PredictiveModels.CLASSIFICATION.value,
             prediction_method=ClassificationMethods.RANDOM_FOREST.value))
     result, _ = calculate(job)
     del result['elapsed_time']
     self.assertDictEqual(
         result, {
             'f1score': 1.0,
             'acc': 1.0,
             'true_positive': '--',
             'true_negative': '--',
             'false_negative': '--',
             'false_positive': '--',
             'precision': 1.0,
             'recall': 1.0,
             'auc': 0.0
         })
Exemplo n.º 9
0
 def test_next_activity(self):
     job = self.get_job()
     job.labelling = create_test_labelling(label_type=LabelTypes.NEXT_ACTIVITY.value)
     job.save()
     result, _ = calculate(job)
     self.assertEqual(result, {'0': 2, 'Repair (Complex)': 306, 'Test Repair': 432, 'Inform User': 5,
                               'Repair (Simple)': 138})
 def test_update_nb(self):
     job = create_test_job(
         predictive_model=create_test_predictive_model(
             prediction_method=ClassificationMethods.HOEFFDING_TREE.value),
         labelling=create_test_labelling(
             label_type=LabelTypes.ATTRIBUTE_STRING.value,
             attribute_name='concept:name'),
         clustering=create_test_clustering(
             clustering_type=ClusteringMethods.NO_CLUSTER.value),
         create_models=True)
     result1, _ = calculate(job)
     job = create_test_job(predictive_model=create_test_predictive_model(
         prediction_method=ClassificationMethods.HOEFFDING_TREE.value),
                           encoding=job.encoding,
                           labelling=create_test_labelling(
                               label_type=LabelTypes.ATTRIBUTE_STRING.value,
                               attribute_name='concept:name'),
                           clustering=job.clustering,
                           incremental_train=job)
     result2, _ = calculate(job)
     del result1['elapsed_time']
     del result2['elapsed_time']
     self.assertDictEqual(
         result1, {
             'f1score': 0.0,
             'acc': 0.0,
             'precision': 0.0,
             'recall': 0.0,
             'true_positive': 0,
             'true_negative': 0,
             'false_negative': 2,
             'false_positive': 0,
             'auc': 0.0
         })
     self.assertDictEqual(
         result2, {
             'f1score': 0.0,
             'acc': 0.0,
             'precision': 0.0,
             'recall': 0.0,
             'true_positive': 0,
             'true_negative': 0,
             'false_negative': 2,
             'false_positive': 0,
             'auc': 0.0
         })
Exemplo n.º 11
0
 def test_remaining_custom_threshold(self):
     job = self.get_job()
     job.labelling = create_test_labelling(label_type=LabelTypes.REMAINING_TIME.value,
                                           threshold_type=ThresholdTypes.THRESHOLD_CUSTOM.value,
                                           threshold=1600)
     job.save()
     result, _ = calculate(job)
     self.assertEqual(result, {'true': 444, 'false': 439})
Exemplo n.º 12
0
 def test_next_activity_DecisionTree(self):
     job = create_test_job(
         predictive_model=create_test_predictive_model(
             prediction_method=ClassificationMethods.DECISION_TREE.value),
         labelling=create_test_labelling(
             label_type=LabelTypes.NEXT_ACTIVITY.value),
         clustering=create_test_clustering(
             clustering_type=ClusteringMethods.NO_CLUSTER.value))
     result, _ = calculate(job)
     self.assertDictEqual(result, self.results3())
Exemplo n.º 13
0
 def test_class_randomForest(self):
     job = create_test_job(
         predictive_model=create_test_predictive_model(
             prediction_method=ClassificationMethods.RANDOM_FOREST.value),
         labelling=create_test_labelling(
             label_type=LabelTypes.ATTRIBUTE_STRING.value,
             attribute_name='label'),
         clustering=create_test_clustering(
             clustering_type=ClusteringMethods.NO_CLUSTER.value))
     result, _ = calculate(job)
     self.assertDictEqual(result, self.results2())
 def test_tsp_gru(self):
     job = create_test_job(
         predictive_model=create_test_predictive_model(
             predictive_model=PredictiveModels.TIME_SERIES_PREDICTION.value,
             prediction_method=TimeSeriesPredictionMethods.RNN.value,
             configuration={'rnn_type': 'gru'}),
         labelling=create_test_labelling(),
         encoding=create_test_encoding(prefix_length=2, padding=True),
         clustering=create_test_clustering(
             clustering_type=ClusteringMethods.NO_CLUSTER.value))
     result, _ = calculate(job)
     del result['elapsed_time']
     self.assertDictEqual(result, {'nlevenshtein': 0.6})
 def test_regression_nn(self):
     job = create_test_job(
         predictive_model=create_test_predictive_model(
             predictive_model=PredictiveModels.REGRESSION.value,
             prediction_method=RegressionMethods.NN.value),
         labelling=create_test_labelling(
             label_type=LabelTypes.REMAINING_TIME.value),
         clustering=create_test_clustering(
             clustering_type=ClusteringMethods.NO_CLUSTER.value))
     result, _ = calculate(job)
     del result['elapsed_time']
     print(result)
     self.assertAlmostEqual(result['mae'], 0.0001388888888888889)
     self.assertAlmostEqual(result['mape'], -1)
Exemplo n.º 16
0
 def test_regression_no_cluster(self):
     self.max_diff = None
     job = create_test_job(
         clustering=create_test_clustering(clustering_type=ClusteringMethods.NO_CLUSTER.value),
         split=repair_example(),
         encoding=create_test_encoding(prefix_length=5, padding=True),
         labelling=create_test_labelling(label_type=LabelTypes.DURATION.value),
         predictive_model=create_test_predictive_model(predictive_model=PredictiveModels.REGRESSION.value,
                                                       prediction_method=RegressionMethods.RANDOM_FOREST.value)
     )
     result, _ = calculate(job)
     self.assertAlmostEqual(result['rmse'], 0.4868515876868242)
     self.assertAlmostEqual(result['mae'], 0.44340838774645464)
     self.assertAlmostEqual(result['rscore'], 0.02142755175443678)
     self.assertAlmostEqual(result['mape'], -1)
Exemplo n.º 17
0
 def test_regression_kmeans(self):
     self.max_diff = None
     job = create_test_job(
         clustering=create_test_clustering(clustering_type=ClusteringMethods.KMEANS.value),
         split=repair_example(),
         encoding=create_test_encoding(prefix_length=5, padding=True),
         labelling=create_test_labelling(label_type=LabelTypes.DURATION.value),
         predictive_model=create_test_predictive_model(predictive_model=PredictiveModels.REGRESSION.value,
                                                       prediction_method=RegressionMethods.RANDOM_FOREST.value)
     )
     result, _ = calculate(job)
     self.assertAlmostEqual(result['rmse'], 0.48841552839653984)
     self.assertAlmostEqual(result['mae'], 0.44282462605873457)
     self.assertAlmostEqual(result['rscore'], 0.015130407121517586)
     self.assertAlmostEqual(result['mape'], -1)
Exemplo n.º 18
0
 def test_regression_kmeans(self):
     self.max_diff = None
     job = create_test_job(
         clustering=create_test_clustering(
             clustering_type=ClusteringMethods.KMEANS.value),
         split=repair_example(),
         encoding=create_test_encoding(prefix_length=5, padding=True),
         predictive_model=create_test_predictive_model(
             predictive_model=PredictiveModels.REGRESSION.value,
             prediction_method=RegressionMethods.RANDOM_FOREST.value))
     result, _ = calculate(job)
     self.assertAlmostEqual(result['rmse'], 0.0325738)
     self.assertAlmostEqual(result['mae'], 0.00014269)
     self.assertAlmostEqual(result['rscore'], -0.11336870)
     self.assertAlmostEqual(result['mape'], float('inf'))
Exemplo n.º 19
0
 def test_regression_no_cluster(self):
     self.max_diff = None
     job = create_test_job(
         clustering=create_test_clustering(
             clustering_type=ClusteringMethods.NO_CLUSTER.value),
         split=repair_example(),
         encoding=create_test_encoding(prefix_length=5, padding=True),
         predictive_model=create_test_predictive_model(
             predictive_model=PredictiveModels.REGRESSION.value,
             prediction_method=RegressionMethods.RANDOM_FOREST.value))
     result, _ = calculate(job)
     self.assertAlmostEqual(result['rmse'], 0.03263757)
     self.assertAlmostEqual(result['mae'], 0.00011685)
     self.assertAlmostEqual(result['rscore'], 0.13776124)
     self.assertAlmostEqual(result['mape'], float('inf'))
Exemplo n.º 20
0
 def test_regression_kmeans(self):
     self.max_diff = None
     job = create_test_job(
         clustering=create_test_clustering(
             clustering_type=ClusteringMethods.KMEANS.value),
         split=repair_example(),
         encoding=create_test_encoding(prefix_length=5, padding=True),
         predictive_model=create_test_predictive_model(
             predictive_model=PredictiveModels.REGRESSION.value,
             prediction_method=RegressionMethods.RANDOM_FOREST.value))
     result, _ = calculate(job)
     self.assertAlmostEqual(result['rmse'], 0.036930128)
     self.assertAlmostEqual(result['mae'], 0.023046561975)
     self.assertAlmostEqual(result['rscore'], 0.99830687)
     self.assertAlmostEqual(result['mape'], 0.5761640)
Exemplo n.º 21
0
    def test_next_activity_no_cluster(self):
        self.max_diff = None
        job = create_test_job(
            clustering=create_test_clustering(clustering_type=ClusteringMethods.NO_CLUSTER.value),
            split=repair_example(),
            encoding=create_test_encoding(prefix_length=8, padding=True),
            labelling=create_test_labelling(label_type=LabelTypes.NEXT_ACTIVITY.value),
            predictive_model=create_test_predictive_model(predictive_model=PredictiveModels.CLASSIFICATION.value,
                                                          prediction_method=ClassificationMethods.RANDOM_FOREST.value)
        )
        result, _ = calculate(job)

        self.assertAlmostEqual(result['f1score'], 0.542398845)
        self.assertAlmostEqual(result['acc'], 0.809954751)
        self.assertAlmostEqual(result['precision'], 0.623447204)
        self.assertAlmostEqual(result['recall'], 0.52249454423)
        self.assertAlmostEqual(result['auc'], 0)
 def test_regression_xgboost(self):
     job = create_test_job(
         predictive_model=create_test_predictive_model(
             predictive_model=PredictiveModels.REGRESSION.value,
             prediction_method=RegressionMethods.XGBOOST.value),
         labelling=create_test_labelling(
             label_type=LabelTypes.REMAINING_TIME.value),
         clustering=create_test_clustering(
             clustering_type=ClusteringMethods.NO_CLUSTER.value))
     result, _ = calculate(job)
     del result['elapsed_time']
     print(result)
     self.assertDictEqual(
         result, {
             'mae': 0.00011968612670898438,
             'mape': -1,
             'rmse': 0.00011968612670898438,
             'rscore': 0.0
         })
 def test_regression_lasso(self):
     job = create_test_job(
         predictive_model=create_test_predictive_model(
             predictive_model=PredictiveModels.REGRESSION.value,
             prediction_method=RegressionMethods.LASSO.value),
         labelling=create_test_labelling(
             label_type=LabelTypes.REMAINING_TIME.value),
         clustering=create_test_clustering(
             clustering_type=ClusteringMethods.NO_CLUSTER.value))
     result, _ = calculate(job)
     del result['elapsed_time']
     print(result)
     self.assertDictEqual(
         result, {
             'mae': 0.0001388888888888889,
             'mape': -1,
             'rmse': 0.0001552824984374854,
             'rscore': -0.25
         })
 def test_class_nn(self):
     job = create_test_job(
         predictive_model=create_test_predictive_model(
             prediction_method=ClassificationMethods.NN.value),
         labelling=create_test_labelling(
             label_type=LabelTypes.ATTRIBUTE_STRING.value,
             attribute_name='concept:name'),
         clustering=create_test_clustering(
             clustering_type=ClusteringMethods.NO_CLUSTER.value))
     result, _ = calculate(job)
     del result['elapsed_time']
     self.assertIn('f1score', result)
     self.assertIn('acc', result)
     self.assertIn('precision', result)
     self.assertIn('recall', result)
     self.assertIn('true_positive', result)
     self.assertIn('true_negative', result)
     self.assertIn('false_negative', result)
     self.assertIn('false_positive', result)
     self.assertIn('auc', result)
 def test_class_nn_binary(self):
     job = create_test_job(
         predictive_model=create_test_predictive_model(
             prediction_method=ClassificationMethods.NN.value),
         labelling=create_test_labelling(
             label_type=LabelTypes.REMAINING_TIME.value,
             threshold_type=ThresholdTypes.THRESHOLD_MEAN.value),
         clustering=create_test_clustering(
             clustering_type=ClusteringMethods.NO_CLUSTER.value))
     result, _ = calculate(job)
     del result['elapsed_time']
     self.assertDictEqual(
         result, {
             'f1score': 0.0,
             'acc': 0.0,
             'precision': 0.0,
             'recall': 0.0,
             'true_positive': 0,
             'true_negative': 0,
             'false_negative': 2,
             'false_positive': 0,
             'auc': 0.0
         })
 def test_class_perc(self):
     job = create_test_job(
         predictive_model=create_test_predictive_model(
             prediction_method=ClassificationMethods.PERCEPTRON.value),
         labelling=create_test_labelling(
             label_type=LabelTypes.ATTRIBUTE_STRING.value,
             attribute_name='concept:name'),
         clustering=create_test_clustering(
             clustering_type=ClusteringMethods.NO_CLUSTER.value))
     result, _ = calculate(job)
     del result['elapsed_time']
     self.assertDictEqual(
         result, {
             'f1score': 0.0,
             'acc': 0.0,
             'precision': 0.0,
             'recall': 0.0,
             'true_positive': 0,
             'true_negative': 0,
             'false_negative': 2,
             'false_positive': 0,
             'auc': 0.0
         })
 def calculate_helper(job):
     start_time = time.time()
     calculate(job)
     print('Total for %s %s seconds' %
           (job['method'], time.time() - start_time))