Exemplo n.º 1
0
def set_model_name(job: Job) -> None:
    if job.create_models:
        if job.predictive_model.model_path != '':
            # job.predictive_model = duplicate_orm_row(PredictiveModel.objects.filter(pk=job.predictive_model.pk)[0])  #todo: replace with simple CREATE
            job.predictive_model = PredictiveModel.init(
                job.predictive_model.get_full_dict(
                )  #todo: doublecheck me, are you sure get_full_dict is returning everything needed?
            )  #todo: futurebug if object changes
            job.predictive_model.save()
            job.save()

        if job.clustering.clustering_method != ClusteringMethods.NO_CLUSTER.value:
            job.clustering.model_path = 'cache/model_cache/job_{}-split_{}-clusterer-{}-v0.sav'.format(
                job.id, job.split.id, job.type)
            job.clustering.save()

        if job.type == JobTypes.UPDATE.value:
            job.type = JobTypes.PREDICTION.value  #TODO: Y am I doing this?
            predictive_model_filename = 'cache/model_cache/job_{}-split_{}-predictive_model-{}-v{}.sav'.format(
                job.id, job.split.id, job.type, str(time.time()))
        else:
            predictive_model_filename = 'cache/model_cache/job_{}-split_{}-predictive_model-{}-v0.sav'.format(
                job.id, job.split.id, job.type)
        job.predictive_model.model_path = predictive_model_filename
        job.predictive_model.save()
        job.save()
Exemplo n.º 2
0
def create_test_predictive_model(
        predictive_model: str = PredictiveModels.CLASSIFICATION.value,
        prediction_method: str = ClassificationMethods.RANDOM_FOREST.value,
        configuration: dict = {}) -> PredictiveModel:
    pred_model = PredictiveModel.init(
        get_prediction_method_config(predictive_model, prediction_method,
                                     configuration))
    return pred_model
Exemplo n.º 3
0
def _calculate_and_evaluate(args) -> dict:
    global trial_number
    if trial_number % 20 == 0:
        logger.info("Trial {}".format(trial_number))
    trial_number += 1
    local_job = global_job

    predictive_model = local_job.predictive_model.predictive_model
    prediction_method = local_job.predictive_model.prediction_method

    model_config = {
        'predictive_model': predictive_model,
        'prediction_method': prediction_method,
        **args
    }

    new_predictive_model = PredictiveModel.init(model_config)
    local_job.predictive_model = duplicate_orm_row(new_predictive_model)
    local_job.predictive_model.save()
    local_job.save()
    # local_job = duplicate_orm_row(local_job) #TODO not sure it is ok to have this here.

    performance_metric = local_job.hyperparameter_optimizer.__getattribute__(
        local_job.hyperparameter_optimizer.optimization_method.lower(
        )).performance_metric
    multiplier = _get_metric_multiplier(performance_metric)

    try:
        results, model_split = run_by_type(training_df.copy(), test_df.copy(),
                                           local_job)
        return {
            'loss': -results[performance_metric] * multiplier,
            'status': STATUS_OK,
            'results': results,
            'predictive_model_id': local_job.predictive_model.pk,
            'model_split': model_split,
            'config': model_config
        }
    except:
        return {
            'loss': 100,
            'status': STATUS_FAIL,
            'results': {},
            'predictive_model_id': {},
            'model_split': {},
            'config': {}
        }
Exemplo n.º 4
0
def update(split, payload, generation_type=PredictiveModels.CLASSIFICATION.value):  # TODO adapt to allow selecting the predictive_model to update
    jobs = []

    config = payload['config']
    labelling_config = config['labelling'] if 'labelling' in config else {}

    for method in payload['config']['methods']:
        for clustering in payload['config']['clusterings']:
            for incremental_base_model in payload['config']['incremental_train']:
                for encMethod in payload['config']['encodings']:
                    encoding = payload['config']['encoding']
                    if encoding['generation_type'] == UP_TO:
                        for i in range(1, encoding['prefix_length'] + 1):
                            job, _ = Job.objects.get_or_create(
                                status=JobStatuses.CREATED.value,
                                type=JobTypes.UPDATE.value,
                                split=split,
                                encoding=Encoding.objects.get_or_create(  # TODO fixme
                                    data_encoding=DataEncodings.LABEL_ENCODER.value,
                                    value_encoding=encMethod,
                                    add_elapsed_time=labelling_config.get('add_elapsed_time', False),
                                    add_remaining_time=labelling_config.get('add_remaining_time', False),
                                    add_executed_events=labelling_config.get('add_executed_events', False),
                                    add_resources_used=labelling_config.get('add_resources_used', False),
                                    add_new_traces=labelling_config.get('add_new_traces', False),
                                    prefix_length=i,
                                    # TODO static check?
                                    padding=True if config['encoding']['padding'] == 'zero_padding' else False,
                                    task_generation_type=config['encoding'].get('generation_type', 'only_this'),
                                    features=config['encoding'].get('features', [])
                                )[0],
                                labelling=Labelling.objects.get_or_create(
                                    type=labelling_config.get('type', None),
                                    # TODO static check?
                                    attribute_name=labelling_config.get('attribute_name', None),
                                    threshold_type=labelling_config.get('threshold_type', None),
                                    threshold=labelling_config.get('threshold', None)
                                )[0] if labelling_config != {} else None,
                                clustering=Clustering.init(clustering, configuration=config.get(clustering, {})),
                                predictive_model=PredictiveModel.init(
                                    get_prediction_method_config(generation_type, method, payload)
                                ),
                                hyperparameter_optimizer=HyperparameterOptimization.init(
                                    config.get('hyperparameter_optimizer', None)),
                                create_models=config.get('create_models', False),
                                incremental_train=Job.objects.filter(
                                    pk=incremental_base_model
                                )[0]
                            )

                            check_predictive_model_not_overwrite(job)
                            set_model_name(job)

                            jobs.append(job)
                    else:
                        job, _ = Job.objects.get_or_create(
                            status=JobStatuses.CREATED.value,
                            type=JobTypes.UPDATE.value,

                            split=split,
                            encoding=Encoding.objects.get_or_create(  # TODO fixme
                                data_encoding=DataEncodings.LABEL_ENCODER.value,
                                value_encoding=encMethod,
                                add_elapsed_time=labelling_config.get('add_elapsed_time', False),
                                add_remaining_time=labelling_config.get('add_remaining_time', False),
                                add_executed_events=labelling_config.get('add_executed_events', False),
                                add_resources_used=labelling_config.get('add_resources_used', False),
                                add_new_traces=labelling_config.get('add_new_traces', False),
                                prefix_length=config['encoding']['prefix_length'],
                                # TODO static check?
                                padding=True if config['encoding']['padding'] == 'zero_padding' else False,
                                task_generation_type=config['encoding'].get('generation_type', 'only_this'),
                                features=config['encoding'].get('features', [])
                            )[0],
                            labelling=Labelling.objects.get_or_create(
                                type=labelling_config.get('type', None),
                                # TODO static check?
                                attribute_name=labelling_config.get('attribute_name', None),
                                threshold_type=labelling_config.get('threshold_type', None),
                                threshold=labelling_config.get('threshold', None)
                            )[0] if labelling_config != {} else None,
                            clustering=Clustering.init(clustering, configuration=config.get(clustering, {})),
                            predictive_model=PredictiveModel.init(
                                get_prediction_method_config(generation_type, method, payload)
                            ),
                            hyperparameter_optimizer=HyperparameterOptimization.init(
                                config.get('hyperparameter_optimizer', None)),
                            create_models=config.get('create_models', False),
                            incremental_train=Job.objects.filter(
                                pk=incremental_base_model
                            )[0]
                        )

                        check_predictive_model_not_overwrite(job)
                        set_model_name(job)

                        jobs.append(job)
    return jobs
Exemplo n.º 5
0
def generate(split, payload):
    jobs = []

    config = payload['config']
    labelling_config = config['labelling'] if 'labelling' in config else {}
    job_type = JobTypes.PREDICTION.value
    prediction_type = payload['type']

    for method in config['methods']:
        for clustering in config['clusterings']:
            for encMethod in config['encodings']:
                encoding = config['encoding']
                if encoding['generation_type'] == UP_TO:
                    for i in range(1, encoding['prefix_length'] + 1):
                        encoding = Encoding.objects.get_or_create(
                            data_encoding=DataEncodings.LABEL_ENCODER.value,
                            value_encoding=encMethod,
                            add_elapsed_time=labelling_config.get('add_elapsed_time', False),
                            add_remaining_time=labelling_config.get('add_remaining_time', False),
                            add_executed_events=labelling_config.get('add_executed_events', False),
                            add_resources_used=labelling_config.get('add_resources_used', False),
                            add_new_traces=labelling_config.get('add_new_traces', False),
                            prefix_length=i,
                            # TODO static check?
                            padding=True if config['encoding']['padding'] == 'zero_padding' else False,
                            task_generation_type=config['encoding'].get('generation_type', 'only_this'),
                            features=config['encoding'].get('features', [])
                        )[0]

                        predictive_model = PredictiveModel.init(
                            get_prediction_method_config(prediction_type, method, config))

                        job = Job.objects.get_or_create(
                            status=JobStatuses.CREATED.value,
                            type=job_type,
                            split=split,
                            encoding=encoding,
                            labelling=Labelling.objects.get_or_create(
                                type=labelling_config.get('type', None),
                                # TODO static check?
                                attribute_name=labelling_config.get('attribute_name', None),
                                threshold_type=labelling_config.get('threshold_type', None),
                                threshold=labelling_config.get('threshold', None)
                            )[0] if labelling_config != {} else None,
                            clustering=Clustering.init(clustering, configuration=config.get(clustering, {}))
                            if predictive_model.predictive_model != PredictiveModels.TIME_SERIES_PREDICTION.value
                            else Clustering.init(ClusteringMethods.NO_CLUSTER.value, configuration={}),
                            # TODO TEMPORARY workaround,
                            hyperparameter_optimizer=HyperparameterOptimization.init(
                                config.get('hyperparameter_optimizer', {
                                    'type': None}) if predictive_model.predictive_model != PredictiveModels.TIME_SERIES_PREDICTION.value else {
                                    'type': None}),
                            # TODO TEMPORARY workaround
                            predictive_model=predictive_model,
                            create_models=config.get('create_models', False)
                        )[0]

                        check_predictive_model_not_overwrite(job)
                        set_model_name(job)

                        jobs.append(job)
                else:
                    predictive_model = PredictiveModel.init(
                        get_prediction_method_config(prediction_type, method, config))

                    job = Job.objects.get_or_create(
                        status=JobStatuses.CREATED.value,
                        type=job_type,
                        split=split,
                        encoding=Encoding.objects.get_or_create(
                            data_encoding=DataEncodings.LABEL_ENCODER.value,
                            value_encoding=encMethod,
                            add_elapsed_time=labelling_config.get('add_elapsed_time', False),
                            add_remaining_time=labelling_config.get('add_remaining_time', False),
                            add_executed_events=labelling_config.get('add_executed_events', False),
                            add_resources_used=labelling_config.get('add_resources_used', False),
                            add_new_traces=labelling_config.get('add_new_traces', False),
                            prefix_length=config['encoding']['prefix_length'],
                            # TODO static check?
                            padding=True if config['encoding']['padding'] == 'zero_padding' else False,
                            task_generation_type=config['encoding'].get('generation_type', 'only_this'),
                            features=config['encoding'].get('features', [])
                        )[0],
                        labelling=Labelling.objects.get_or_create(
                            type=labelling_config.get('type', None),
                            # TODO static check?
                            attribute_name=labelling_config.get('attribute_name', None),
                            threshold_type=labelling_config.get('threshold_type', None),
                            threshold=labelling_config.get('threshold', None)
                        )[0] if labelling_config != {} else None,
                        clustering=Clustering.init(clustering, configuration=config.get(clustering, {}))
                        if predictive_model.predictive_model != PredictiveModels.TIME_SERIES_PREDICTION.value
                        else Clustering.init(ClusteringMethods.NO_CLUSTER.value, configuration={}),
                        hyperparameter_optimizer=HyperparameterOptimization.init(
                            config.get('hyperparameter_optimizer', {
                                'type': 'none'}) if predictive_model.predictive_model != PredictiveModels.TIME_SERIES_PREDICTION.value else {
                                'type': 'none'}),
                        # TODO TEMPORARY workaround
                        predictive_model=predictive_model,
                        create_models=config.get('create_models', False)
                    )[0]

                    check_predictive_model_not_overwrite(job)
                    set_model_name(job)

                    jobs.append(job)

    return jobs
Exemplo n.º 6
0
def generate(split, payload):
    jobs = []

    config = payload['config']
    labelling_config = config['labelling'] if 'labelling' in config else {}
    job_type = JobTypes.PREDICTION.value
    prediction_type = payload['type']

    for method in config['methods']:
        for clustering in config['clusterings']:
            for encMethod in config['encodings']:
                encoding = config['encoding']
                if encoding['generation_type'] == UP_TO:
                    for i in range(1, encoding['prefix_length'] + 1):
                        encoding = Encoding.objects.get_or_create(
                            data_encoding='label_encoder',
                            value_encoding=encMethod,
                            add_elapsed_time=labelling_config.get(
                                'add_elapsed_time', False),
                            add_remaining_time=labelling_config.get(
                                'add_remaining_time', False),
                            add_executed_events=labelling_config.get(
                                'add_executed_events', False),
                            add_resources_used=labelling_config.get(
                                'add_resources_used', False),
                            add_new_traces=labelling_config.get(
                                'add_new_traces', False),
                            prefix_length=i,
                            # TODO static check?
                            padding=True if config['encoding']['padding']
                            == 'zero_padding' else False,
                            task_generation_type=config['encoding'].get(
                                'generation_type', 'only_this'))[0]

                        predictive_model = PredictiveModel.init(
                            get_prediction_method_config(
                                prediction_type, method, config))

                        job = Job.objects.get_or_create(
                            status=JobStatuses.CREATED.value,
                            type=job_type,
                            split=split,
                            encoding=encoding,
                            labelling=Labelling.objects.get_or_create(
                                type=labelling_config.get('type', None),
                                # TODO static check?
                                attribute_name=labelling_config.get(
                                    'attribute_name', None),
                                threshold_type=labelling_config.get(
                                    'threshold_type', None),
                                threshold=labelling_config.get(
                                    'threshold', None))[0]
                            if labelling_config != {} else None,
                            clustering=Clustering.init(
                                clustering,
                                configuration=config.get(clustering, {})),
                            predictive_model=predictive_model)[0]

                        jobs.append(job)
                else:
                    predictive_model = PredictiveModel.init(
                        get_prediction_method_config(prediction_type, method,
                                                     config))

                    job = Job.objects.get_or_create(
                        status=JobStatuses.CREATED.value,
                        type=job_type,
                        split=split,
                        encoding=Encoding.objects.get_or_create(
                            data_encoding='label_encoder',
                            value_encoding=encMethod,
                            add_elapsed_time=labelling_config.get(
                                'add_elapsed_time', False),
                            add_remaining_time=labelling_config.get(
                                'add_remaining_time', False),
                            add_executed_events=labelling_config.get(
                                'add_executed_events', False),
                            add_resources_used=labelling_config.get(
                                'add_resources_used', False),
                            add_new_traces=labelling_config.get(
                                'add_new_traces', False),
                            prefix_length=config['encoding']['prefix_length'],
                            # TODO static check?
                            padding=True if config['encoding']['padding']
                            == 'zero_padding' else False,
                            task_generation_type=config['encoding'].get(
                                'generation_type', 'only_this'))[0],
                        labelling=Labelling.objects.get_or_create(
                            type=labelling_config.get('type', None),
                            # TODO static check?
                            attribute_name=labelling_config.get(
                                'attribute_name', None),
                            threshold_type=labelling_config.get(
                                'threshold_type', None),
                            threshold=labelling_config.get('threshold',
                                                           None))[0]
                        if labelling_config != {} else None,
                        clustering=Clustering.init(clustering,
                                                   configuration=config.get(
                                                       clustering, {})),
                        predictive_model=predictive_model)[0]
                    jobs.append(job)

    return jobs
def progetto_padova():
    JOB = Job.objects.get_or_create(
        status=JobStatuses.CREATED.value,
        type=JobTypes.PREDICTION.value,
        # split=Split.objects.get_or_create(  # this creates the split of the log
        #             type=SplitTypes.SPLIT_SINGLE.value,
        #             original_log=create_log(  # this imports the log
        #                 import_log(BASE_DIR + RELATIVE_TRAIN_PATH),
        #                 RELATIVE_TRAIN_PATH,
        #                 BASE_DIR,
        #                 import_in_cache=False
        #             ),
        #             splitting_method=SplitOrderingMethods.SPLIT_TEMPORAL.value,
        #             test_size=0.2
        #         )[0],
        split=Split.objects.get_or_create(  # this creates the split of the log
            type=SplitTypes.SPLIT_DOUBLE.value,
            train_log=create_log(  # this imports the log
                import_log(BASE_DIR + RELATIVE_TRAIN_PATH),
                RELATIVE_TRAIN_PATH,
                BASE_DIR,
                import_in_cache=False
            ),
            test_log=create_log(  # this imports the log
                import_log(BASE_DIR + RELATIVE_VALIDATION_PATH),
                RELATIVE_VALIDATION_PATH,
                BASE_DIR,
                import_in_cache=False
            )
        )[0],
        encoding=Encoding.objects.get_or_create(  # this defines the encoding method
            data_encoding=DataEncodings.LABEL_ENCODER.value,
            value_encoding=ValueEncodings.SIMPLE_INDEX.value,
            add_elapsed_time=False,
            add_remaining_time=False,
            add_executed_events=False,
            add_resources_used=False,
            add_new_traces=False,
            prefix_length=5,
            padding=True,
            task_generation_type=TaskGenerationTypes.ALL_IN_ONE.value,
            features=[]
        )[0],
        labelling=Labelling.objects.get_or_create(  # this defines the label
            type=LabelTypes.ATTRIBUTE_STRING.value,
            attribute_name='label',
            threshold_type=None,
            threshold=None
        )[0],
        clustering=Clustering.init(ClusteringMethods.NO_CLUSTER.value, configuration={}),
        predictive_model=PredictiveModel.init(  # this defines the predictive model
            get_prediction_method_config(
                PredictiveModels.CLASSIFICATION.value,
                ClassificationMethods.DECISION_TREE.value,
                payload={
                    'max_depth': 2,
                    'min_samples_split': 2,
                    'min_samples_leaf': 2
                }
            )
        ),
        hyperparameter_optimizer=HyperparameterOptimization.init({  # this defines the hyperparameter optimisation procedure
            'type': HyperparameterOptimizationMethods.HYPEROPT.value,
            'max_evaluations': 10,
            'performance_metric': HyperOptAlgorithms.TPE.value,
            'algorithm_type': HyperOptLosses.AUC.value
        }),
        create_models=True
    )[0]

    # load log
    train_log, test_log, additional_columns = get_train_test_log(JOB.split)

    # encode
    train_df, test_df = encode_label_logs(train_log, test_log, JOB)

    # train + evaluate
    results, model_split = MODEL[JOB.predictive_model.predictive_model][ModelActions.BUILD_MODEL_AND_TEST.value](
        train_df,
        test_df,
        _init_clusterer(JOB.clustering, train_df),
        JOB
    )

    if JOB.create_models:
        check_predictive_model_not_overwrite(JOB)
        set_model_name(JOB)
        save_models(model_split, JOB)

    # predict
    data_df = pd.concat([train_df, test_df])
    results = MODEL[JOB.predictive_model.predictive_model][ModelActions.PREDICT.value](JOB, data_df)

    results_with_probs = MODEL[JOB.predictive_model.predictive_model][ModelActions.PREDICT_PROBA.value](JOB, data_df)

    # lime
    exp = Explanation.objects.get_or_create(
        type=ExplanationTypes.LIME.value,
        split=JOB.split,  # this defines the analysed log, you can use a different one from the training one
        predictive_model=JOB.predictive_model,
        job=JOB
    )[0]
    error, result = explanation(exp.id, int(EXPLANATION_TARGET))