Exemplo n.º 1
0
def std_experiments(dataset, prefix_length, models, splits,
                    classification_method, encoding_method):
    models[dataset]['0-40_80-100'] = send_job_request(
        payload=create_classification_payload(
            split=splits[dataset]['0-40_80-100'],
            encodings=[encoding_method],
            encoding={
                "padding": "zero_padding",
                "generation_type": TaskGenerationTypes.ALL_IN_ONE.value,
                "prefix_length": prefix_length,
                "features": []
            },
            labeling={
                "type": LabelTypes.ATTRIBUTE_STRING.value,
                "attribute_name": "label",
                "add_remaining_time": False,
                "add_elapsed_time": False,
                "add_executed_events": False,
                "add_resources_used": False,
                "add_new_traces": False
            },
            hyperparameter_optimization={
                "type": HyperparameterOptimizationMethods.HYPEROPT.value,
                "max_evaluations": 1000,
                "performance_metric": HyperOptLosses.AUC.value,
                "algorithm_type": HyperOptAlgorithms.TPE.value
            },
            classification=[classification_method]),
        server_port='50401',
        server_name='ashkin')[0]['id']

    models[dataset]['0-80_80-100'] = send_job_request(
        payload=create_classification_payload(
            split=splits[dataset]['0-80_80-100'],
            encodings=[encoding_method],
            encoding={
                "padding": "zero_padding",
                "generation_type": TaskGenerationTypes.ALL_IN_ONE.value,
                "prefix_length": prefix_length,
                "features": []
            },
            labeling={
                "type": LabelTypes.ATTRIBUTE_STRING.value,
                "attribute_name": "label",
                "add_remaining_time": False,
                "add_elapsed_time": False,
                "add_executed_events": False,
                "add_resources_used": False,
                "add_new_traces": False
            },
            hyperparameter_optimization={
                "type": HyperparameterOptimizationMethods.HYPEROPT.value,
                "max_evaluations": 1000,
                "performance_metric": HyperOptLosses.AUC.value,
                "algorithm_type": HyperOptAlgorithms.TPE.value
            },
            classification=[classification_method]),
        server_port='50401',
        server_name='ashkin')[0]['id']
Exemplo n.º 2
0
def drift_size_experimentation(dataset, prefix_length, models, splits,
                               classification_method, encoding_method):
    if classification_method != "randomForest":
        models[dataset]['40-55_80-100'] = send_job_request(
            payload=create_classification_payload(
                split=splits[dataset]['40-55_80-100'],
                encodings=[encoding_method],
                encoding={
                    "padding": "zero_padding",
                    "generation_type": TaskGenerationTypes.ALL_IN_ONE.value,
                    "prefix_length": prefix_length,
                    "features": []
                },
                labeling={
                    "type": LabelTypes.ATTRIBUTE_STRING.value,
                    "attribute_name": "label",
                    "add_remaining_time": False,
                    "add_elapsed_time": False,
                    "add_executed_events": False,
                    "add_resources_used": False,
                    "add_new_traces": False
                },
                classification=[classification_method],
                hyperparameter_optimization={
                    "type": HyperparameterOptimizationMethods.NONE.value
                },
                incremental_train=[
                    get_pretrained_model_id(config=retrieve_job(
                        config={
                            'type': JobTypes.PREDICTION.value,
                            # 'status': JobStatuses.COMPLETED.value, # TODO sometimes some jobs hang in running while they are actually finished
                            'create_models': True,
                            'split': splits[dataset]['0-40_80-100'],
                            'encoding': {
                                "value_encoding":
                                encoding_method,
                                "padding":
                                True,
                                "task_generation_type":
                                TaskGenerationTypes.ALL_IN_ONE.value,
                                "prefix_length":
                                prefix_length
                            },
                            'labelling': {
                                "type": LabelTypes.ATTRIBUTE_STRING.value,
                                "attribute_name": "label",
                                "add_remaining_time": False,
                                "add_elapsed_time": False,
                                "add_executed_events": False,
                                "add_resources_used": False,
                                "add_new_traces": False
                            },
                            'hyperparameter_optimization': {
                                "optimization_method":
                                HyperparameterOptimizationMethods.HYPEROPT.
                                value
                            },
                            # "max_evaluations": 1000, #TODO not yet supported
                            # "performance_metric": HyperOptLosses.AUC.value,
                            # "algorithm_type": HyperOptAlgorithms.TPE.value},
                            'predictive_model': {
                                'predictive_model': 'classification',
                                'prediction_method': classification_method
                            },
                            'clustering': {
                                'clustering_method':
                                ClusteringMethods.NO_CLUSTER.value
                            }
                        },
                        server_name='ashkin',
                        server_port='50401'))
                ]),
            server_port='50401',
            server_name='ashkin')[0]['id']

    models[dataset]['0-55_80-100'] = send_job_request(
        payload=create_classification_payload(
            split=splits[dataset]['0-55_80-100'],
            encodings=[encoding_method],
            encoding={
                "padding": "zero_padding",
                "generation_type": TaskGenerationTypes.ALL_IN_ONE.value,
                "prefix_length": prefix_length,
                "features": []
            },
            labeling={
                "type": LabelTypes.ATTRIBUTE_STRING.value,
                "attribute_name": "label",
                "add_remaining_time": False,
                "add_elapsed_time": False,
                "add_executed_events": False,
                "add_resources_used": False,
                "add_new_traces": False
            },
            classification=[classification_method],
            hyperparameter_optimization={
                "type": HyperparameterOptimizationMethods.HYPEROPT.value,
                "max_evaluations": 1000,
                "performance_metric": HyperOptLosses.AUC.value,
                "algorithm_type": HyperOptAlgorithms.TPE.value
            },
        ),
        server_port='50401',
        server_name='ashkin')[0]['id']