示例#1
0
def get_roc_auc_value(pipeline: Pipeline, train_data: InputData, test_data: InputData) -> (float, float):
    train_pred = pipeline.predict(input_data=train_data)
    test_pred = pipeline.predict(input_data=test_data)
    roc_auc_value_test = roc_auc(y_true=test_data.target, y_score=test_pred.predict)
    roc_auc_value_train = roc_auc(y_true=train_data.target, y_score=train_pred.predict)

    return roc_auc_value_train, roc_auc_value_test
示例#2
0
def run_tpot_vs_fedot_example(train_file_path: str, test_file_path: str):
    train_data = InputData.from_csv(train_file_path)
    test_data = InputData.from_csv(test_file_path)

    training_features = train_data.features
    testing_features = test_data.features
    training_target = train_data.target
    testing_target = test_data.target

    # Average CV score on the training set was: 0.93755
    exported_pipeline = make_pipeline(
        StackingEstimator(estimator=BernoulliNB()), RandomForestClassifier())
    # Fix random state for all the steps in exported pipeline
    set_param_recursive(exported_pipeline.steps, 'random_state', 1)

    exported_pipeline.fit(training_features, training_target)
    results = exported_pipeline.predict_proba(testing_features)[:, 1]

    roc_auc_value = roc_auc(y_true=testing_target, y_score=results)

    print(f'ROC AUC for TPOT: {roc_auc_value}')

    node_scaling = PrimaryNode('scaling')
    node_bernb = SecondaryNode('bernb', nodes_from=[node_scaling])
    node_rf = SecondaryNode('rf', nodes_from=[node_bernb, node_scaling])
    pipeline = Pipeline(node_rf)

    pipeline.fit(train_data)
    results = pipeline.predict(test_data)

    roc_auc_value = roc_auc(y_true=testing_target, y_score=results.predict)
    print(f'ROC AUC for FEDOT: {roc_auc_value}')

    return roc_auc_value
示例#3
0
def test_knn_classification_tune_correct(data_fixture, request):
    data = request.getfixturevalue(data_fixture)
    data.features = ScalingWithImputation().fit(data.features).apply(
        data.features)
    train_data, test_data = train_test_data_setup(data=data)

    knn = Model(model_type='knn')
    model, _ = knn.fit(data=train_data)
    test_predicted = knn.predict(fitted_model=model, data=test_data)

    roc_on_test = roc_auc(y_true=test_data.target, y_score=test_predicted)

    roc_on_test_tuned_list = []
    for _ in range(3):
        knn_for_tune = Model(model_type='knn')
        model, _ = knn_for_tune.fine_tune(data=train_data,
                                          iterations=10,
                                          max_lead_time=timedelta(minutes=1))

        test_predicted_tuned = knn_for_tune.predict(fitted_model=model,
                                                    data=test_data)

        roc_on_test_tuned = roc_auc(y_true=test_data.target,
                                    y_score=test_predicted_tuned)

        roc_on_test_tuned_list.append(roc_on_test_tuned)

    roc_threshold = 0.6
    assert np.array(
        roc_on_test_tuned_list).any() >= roc_on_test > roc_threshold
示例#4
0
def test_scoring_logreg_tune_correct(data_fixture, request):
    train_data, test_data = request.getfixturevalue(data_fixture)

    train_data.features = Scaling().fit(train_data.features).apply(
        train_data.features)
    test_data.features = Scaling().fit(test_data.features).apply(
        test_data.features)

    logreg = Model(model_type='logit')

    model, _ = logreg.fit(train_data)
    test_predicted = logreg.predict(fitted_model=model, data=test_data)

    test_roc_auc = roc_auc(y_true=test_data.target, y_score=test_predicted)

    logreg_for_tune = Model(model_type='logit')

    model_tuned, _ = logreg_for_tune.fine_tune(
        train_data, iterations=50, max_lead_time=timedelta(minutes=0.1))
    test_predicted_tuned = logreg_for_tune.predict(fitted_model=model_tuned,
                                                   data=test_data)

    test_roc_auc_tuned = roc_auc(y_true=test_data.target,
                                 y_score=test_predicted_tuned)

    roc_threshold = 0.6

    assert round(test_roc_auc_tuned, 2) >= round(test_roc_auc,
                                                 2) > roc_threshold
def test_gp_composer_quality(data_fixture, request):
    random.seed(1)
    data = request.getfixturevalue(data_fixture)
    dataset_to_compose = data
    dataset_to_validate = data
    models_repo = ModelTypesRepository()
    available_model_types, _ = models_repo.search_models(
        desired_metainfo=ModelMetaInfoTemplate(
            input_type=NumericalDataTypesEnum.table,
            output_type=CategoricalDataTypesEnum.vector,
            task_type=MachineLearningTasksEnum.classification,
            can_be_initial=True,
            can_be_secondary=True))
    metric_function = MetricsRepository().metric_by_id(
        ClassificationMetricsEnum.ROCAUC)

    baseline = baseline_chain()
    baseline.fit_from_scratch(input_data=dataset_to_compose)

    predict_baseline = baseline.predict(dataset_to_validate).predict
    dataset_to_compose.target = np.array(
        [int(round(i)) for i in predict_baseline])

    composer_requirements = GPComposerRequirements(
        primary=available_model_types,
        secondary=available_model_types,
        max_arity=2,
        max_depth=3,
        pop_size=5,
        num_of_generations=5,
        crossover_prob=0.8,
        mutation_prob=0.8)

    # Create GP-based composer
    composer = GPComposer()
    composed_chain = composer.compose_chain(
        data=dataset_to_compose,
        initial_chain=None,
        composer_requirements=composer_requirements,
        metrics=metric_function)
    composed_chain.fit_from_scratch(input_data=dataset_to_compose)

    predict_composed = composed_chain.predict(dataset_to_validate).predict

    roc_auc_chain_created_by_hand = roc_auc(y_true=dataset_to_validate.target,
                                            y_score=predict_baseline)
    roc_auc_chain_evo_alg = roc_auc(y_true=dataset_to_validate.target,
                                    y_score=predict_composed)
    print("model created by hand prediction:", roc_auc_chain_created_by_hand)
    print("gp composed model prediction:", roc_auc_chain_evo_alg)

    assert composed_chain == baseline or composed_chain != baseline and abs(
        roc_auc_chain_created_by_hand - roc_auc_chain_evo_alg) < 0.01
示例#6
0
def test_gp_composer_build_pipeline_correct(data_fixture, request):
    random.seed(1)
    np.random.seed(1)
    data = request.getfixturevalue(data_fixture)
    dataset_to_compose = data
    dataset_to_validate = data
    task = Task(TaskTypesEnum.classification)
    available_model_types, _ = OperationTypesRepository().suitable_operation(
        task_type=task.task_type)

    metric_function = ClassificationMetricsEnum.ROCAUC

    req = GPComposerRequirements(primary=available_model_types,
                                 secondary=available_model_types,
                                 max_arity=2,
                                 max_depth=2,
                                 pop_size=2,
                                 num_of_generations=1,
                                 crossover_prob=0.4,
                                 mutation_prob=0.5)

    builder = GPComposerBuilder(task).with_requirements(req).with_metrics(
        metric_function)
    gp_composer = builder.build()
    pipeline_gp_composed = gp_composer.compose_pipeline(
        data=dataset_to_compose)

    pipeline_gp_composed.fit_from_scratch(input_data=dataset_to_compose)
    predicted_gp_composed = pipeline_gp_composed.predict(dataset_to_validate)

    roc_on_valid_gp_composed = roc_auc(y_true=dataset_to_validate.target,
                                       y_score=predicted_gp_composed.predict)

    assert roc_on_valid_gp_composed > 0.6
示例#7
0
def test_gp_composer_build_chain_correct(data_fixture, request):
    random.seed(1)
    np.random.seed(1)
    data = request.getfixturevalue(data_fixture)
    dataset_to_compose = data
    dataset_to_validate = data

    available_model_types, _ = ModelTypesRepository().suitable_model(
        task_type=TaskTypesEnum.classification)

    metric_function = MetricsRepository().metric_by_id(
        ClassificationMetricsEnum.ROCAUC)

    gp_composer = GPComposer()
    req = GPComposerRequirements(primary=available_model_types,
                                 secondary=available_model_types,
                                 max_arity=2,
                                 max_depth=2,
                                 pop_size=2,
                                 num_of_generations=1,
                                 crossover_prob=0.4,
                                 mutation_prob=0.5)
    chain_gp_composed = gp_composer.compose_chain(data=dataset_to_compose,
                                                  initial_chain=None,
                                                  composer_requirements=req,
                                                  metrics=metric_function)

    chain_gp_composed.fit_from_scratch(input_data=dataset_to_compose)
    predicted_gp_composed = chain_gp_composed.predict(dataset_to_validate)

    roc_on_valid_gp_composed = roc_auc(y_true=dataset_to_validate.target,
                                       y_score=predicted_gp_composed.predict)

    assert roc_on_valid_gp_composed > 0.6
示例#8
0
def calculate_validation_metric(predicted: OutputData,
                                dataset_to_validate: InputData) -> float:
    # the quality assessment for the simulation results
    roc_auc_value = roc_auc(y_true=dataset_to_validate.target,
                            y_score=predicted.predict,
                            multi_class="ovo")
    return roc_auc_value
示例#9
0
def run_chain_from_automl(train_file_path: str, test_file_path: str,
                          max_run_time: timedelta = timedelta(minutes=10)):
    train_data = InputData.from_csv(train_file_path)
    test_data = InputData.from_csv(test_file_path)

    testing_target = test_data.target

    chain = Chain()
    node_tpot = PrimaryNode('tpot')

    node_tpot.model.params = {'max_run_time_sec': max_run_time.seconds}

    node_lda = PrimaryNode('lda')
    node_rf = SecondaryNode('rf')

    node_rf.nodes_from = [node_tpot, node_lda]

    chain.add_node(node_rf)

    chain.fit(train_data)
    results = chain.predict(test_data)

    roc_auc_value = roc_auc(y_true=testing_target,
                            y_score=results.predict)
    print(roc_auc_value)

    return roc_auc_value
示例#10
0
def chain_tuning(nodes_to_tune: str,
                 chain: Chain,
                 train_data: InputData,
                 test_data: InputData,
                 local_iter: int,
                 tuner_iter_num: int = 50) -> (float, list):
    several_iter_scores_test = []

    if nodes_to_tune == 'primary':
        print('primary_node_tuning')
        chain_tune_strategy = chain.fine_tune_primary_nodes
    elif nodes_to_tune == 'root':
        print('root_node_tuning')
        chain_tune_strategy = chain.fine_tune_all_nodes
    else:
        raise ValueError(
            f'Invalid type of nodes. Nodes must be primary or root')

    for iteration in range(local_iter):
        print(f'current local iteration {iteration}')

        # Chain tuning
        chain_tune_strategy(train_data, iterations=tuner_iter_num)

        # After tuning prediction
        chain.fit(train_data)
        after_tuning_predicted = chain.predict(test_data)

        # Metrics
        aft_tun_roc_auc = roc_auc(y_true=test_data.target,
                                  y_score=after_tuning_predicted.predict)
        several_iter_scores_test.append(aft_tun_roc_auc)

    return float(np.mean(several_iter_scores_test)), several_iter_scores_test
示例#11
0
def calculate_validation_metric(
        chain: Chain,
        dataset_to_validate: InputData) -> Tuple[float, float, float]:
    # the execution of the obtained composite models
    predicted = chain.predict(dataset_to_validate)
    # the quality assessment for the simulation results
    roc_auc_value = roc_auc(y_true=dataset_to_validate.target,
                            y_score=predicted.predict,
                            multi_class="ovo",
                            average="macro")
    y_pred = []
    y_values_pred = [[0, 0, 0] for _ in range(predicted.idx.size)]
    for i, predict in enumerate(predicted.predict):
        # true_class = dataset_to_validate.target[i]
        # y_class_pred = predict[true_class]
        y_class_pred = np.argmax(predict)
        # y_class_pred2 = np.argmax(predict)
        y_values_pred[i][y_class_pred] = 1

        # y_pred.append(predicted.predict)
    y_pred = np.array([predict for predict in predicted.predict])
    y_values_pred = np.array(y_values_pred)
    log_loss_value = log_loss(y_true=dataset_to_validate.target, y_pred=y_pred)
    # y_pred = [round(predict[0]) for predict in predicted.predict]
    # y_pred_acc = [predict for predict in y_values_pred]
    accuracy_score_value = accuracy_score(dataset_to_validate.target,
                                          y_values_pred)
    # np.ones((len(y_pred), len(dataset_to_validate.target))))

    return roc_auc_value, log_loss_value, accuracy_score_value
示例#12
0
def calculate_validation_metric_multiclass(
        chain: Chain,
        dataset_to_validate: InputData) -> Tuple[float, float, float]:
    # the execution of the obtained composite models
    predicted = chain.predict(dataset_to_validate)
    # the quality assessment for the simulation results
    # roc_auc_value = roc_auc(y_true=dataset_to_validate.target,
    #                         y_score=predicted.predict,
    #                         multi_class="ovr", average="weighted")
    y_pred = []
    roc_auc_values = []
    for predict, true in zip(predicted.predict, dataset_to_validate.target):
        roc_auc_score = roc_auc(y_true=true, y_score=predict)
        roc_auc_values.append(roc_auc_score)
    roc_auc_value = statistics.mean(roc_auc_values)

    for predict in predicted.predict:
        values = []
        for val in predict:
            values.append(round(val))
        y_pred.append(np.float64(values))
    y_pred = np.array(y_pred)
    log_loss_value = log_loss(y_true=dataset_to_validate.target, y_pred=y_pred)
    accuracy_score_value = accuracy_score(y_true=dataset_to_validate.target,
                                          y_pred=y_pred)

    return roc_auc_value, log_loss_value, accuracy_score_value
示例#13
0
def run_pipeline_from_automl(train_file_path: str,
                             test_file_path: str,
                             max_run_time: timedelta = timedelta(minutes=10)):
    """ Function run pipeline with Auto ML models in nodes

    :param train_file_path: path to the csv file with data for train
    :param test_file_path: path to the csv file with data for validation
    :param max_run_time: maximum running time for customization of the "tpot" model

    :return roc_auc_value: ROC AUC metric for pipeline
    """
    train_data = InputData.from_csv(train_file_path)
    test_data = InputData.from_csv(test_file_path)

    testing_target = test_data.target

    node_scaling = PrimaryNode('scaling')
    node_tpot = PrimaryNode('tpot')

    node_tpot.operation.params = {'max_run_time_sec': max_run_time.seconds}

    node_lda = SecondaryNode('lda', nodes_from=[node_scaling])
    node_rf = SecondaryNode('rf', nodes_from=[node_tpot, node_lda])
    OperationTypesRepository.assign_repo('model', 'automl_repository.json')
    pipeline = Pipeline(node_rf)

    pipeline.fit(train_data)
    results = pipeline.predict(test_data)

    roc_auc_value = roc_auc(y_true=testing_target, y_score=results.predict)
    print(roc_auc_value)

    return roc_auc_value
示例#14
0
 def on_epoch_end(self, epoch, logs=None):
     tp = 0
     tn = 0
     fp = 0
     fn = 0
     y_true = []
     y_score = []
     for i in range(len(self.valid_data_gen)):
         x, y = next(self.valid_data_gen)
         _y = self.model.predict_classes(x, batch_size=1, verbose=0)
         _y_score = self.model.predict_proba(x, batch_size=1)
         y_score.append(_y_score[0][0])
         y_true.append(y[0])
         if _y[0][0] == 1:
             if _y[0][0] == y[0]:
                 tp += 1
             else:
                 fp += 1
         else:
             if _y[0][0] == y[0]:
                 tn += 1
             else:
                 fn += 1
     y_score = np.array(y_score)
     y_true = np.array(y_true)
     fpr, tpr, _ = roc_curve(y_true, y_score)
     auc = roc_auc(fpr, tpr)
     precision = tp / (tp + fp)
     recall = tp / (tp + fn)
     f1 = 2 * ((precision * recall) / (precision + recall))
     print("\n validation: tp = ", tp, " fp = ", fp, " tn = ", tn, " fn = ",
           fn, " accuracy = ", (tp + tn) / (len(valid_data_gen)), " auc = ",
           auc, "F1 Score = ", f1, "\n")
示例#15
0
def eval_auc(model, data_loader, attack_config, device, adv=True):
    """
  Compute AUC on a dataset comprised of pos and neg samples.

  Parameters
  ----------
  adv: bool, optional.
    If true, perturb negative samples, otherwise
    use negative samples as is. Default to True.
  """
    assert not model.training
    logits = []
    labels = []
    for i, (batch_images, batch_labels) in enumerate(data_loader):
        batch_images = batch_images.to(device)
        if adv:
            target = batch_images[batch_labels == 1]
            others = batch_images[batch_labels == 0]
            if others.nelement() > 0:
                others_adv = perturb(model,
                                     others,
                                     random_start=False,
                                     **attack_config)
                batch_images = torch.cat([target, others_adv], 0)
            else:
                batch_images = target
        with torch.no_grad():
            batch_logits = forward(model, batch_images)
        logits.append(batch_logits.cpu().numpy())
        labels.append(batch_labels.numpy())
    logits = np.concatenate(logits)
    labels = np.concatenate(labels)
    fpr_, tpr_, thresholds = roc_curve(labels, logits)
    return roc_auc(fpr_, tpr_)
示例#16
0
def run_chain_from_automl(train_file_path: str,
                          test_file_path: str,
                          max_run_time: timedelta = timedelta(minutes=10)):
    """ Function run chain with Auto ML models in nodes

    :param train_file_path: path to the csv file with data for train
    :param test_file_path: path to the csv file with data for validation
    :param max_run_time: maximum running time for customization of the "tpot" model

    :return roc_auc_value: ROC AUC metric for chain
    """
    train_data = InputData.from_csv(train_file_path)
    test_data = InputData.from_csv(test_file_path)

    testing_target = test_data.target

    chain = Chain()
    node_scaling = PrimaryNode('scaling')
    node_tpot = PrimaryNode('tpot')

    node_tpot.operation.params = {'max_run_time_sec': max_run_time.seconds}

    node_lda = SecondaryNode('lda', nodes_from=[node_scaling])
    node_rf = SecondaryNode('rf', nodes_from=[node_tpot, node_lda])
    chain.add_node(node_rf)

    chain.fit(train_data)
    results = chain.predict(test_data)

    roc_auc_value = roc_auc(y_true=testing_target, y_score=results.predict)
    print(roc_auc_value)

    return roc_auc_value
示例#17
0
def calculate_validation_metric(chain: Chain, dataset_to_validate: InputData) -> float:
    # the execution of the obtained composite models
    predicted = chain.predict(dataset_to_validate)
    # the quality assessment for the simulation results
    roc_auc_value = roc_auc(y_true=dataset_to_validate.target,
                            y_score=predicted.predict)
    return roc_auc_value
示例#18
0
def test_composer_with_cv_optimization_correct():
    task = Task(task_type=TaskTypesEnum.classification)
    dataset_to_compose, dataset_to_validate = get_data(task)

    models_repo = OperationTypesRepository()
    available_model_types, _ = models_repo.suitable_operation(
        task_type=task.task_type, tags=['simple'])

    metric_function = [
        ClassificationMetricsEnum.ROCAUC_penalty,
        ClassificationMetricsEnum.accuracy, ClassificationMetricsEnum.logloss
    ]

    composer_requirements = GPComposerRequirements(
        primary=available_model_types,
        secondary=available_model_types,
        timeout=timedelta(minutes=1),
        num_of_generations=2,
        cv_folds=3)

    builder = GPComposerBuilder(task).with_requirements(
        composer_requirements).with_metrics(metric_function)
    composer = builder.build()

    pipeline_evo_composed = composer.compose_pipeline(data=dataset_to_compose,
                                                      is_visualise=False)[0]

    assert isinstance(pipeline_evo_composed, Pipeline)

    pipeline_evo_composed.fit(input_data=dataset_to_compose)
    predicted = pipeline_evo_composed.predict(dataset_to_validate)
    roc_on_valid_evo_composed = roc_auc(y_score=predicted.predict,
                                        y_true=dataset_to_validate.target)

    assert roc_on_valid_evo_composed > 0
示例#19
0
def log_metrics(logger, phase, epoch_num, y_hat, y):
    th = 0.5
    accuracy = metrics.accuracy(y_hat, y, th, True)
    f1_score = metrics.f1score(y_hat, y, th, True)
    specificity = metrics.specificity(y_hat, y, th, True)
    sensitivity = metrics.sensitivity(y_hat, y, th, True)
    roc_auc = metrics.roc_auc(y_hat, y)

    classes = [
        'epidural', 'intraparenchymal', 'intraventricular', 'subarachnoid',
        'subdural', 'any'
    ]
    for acc, f1, spec, sens, roc, class_name in zip(accuracy, f1_score,
                                                    specificity, sensitivity,
                                                    roc_auc, classes):
        logger.add_scalar(f'{phase}_acc_{class_name}', acc, epoch_num)
        logger.add_scalar(f'{phase}_f1_{class_name}', f1, epoch_num)
        logger.add_scalar(f'{phase}_spec_{class_name}', spec, epoch_num)
        logger.add_scalar(f'{phase}_sens_{class_name}', sens, epoch_num)
        logger.add_scalar(f'{phase}_roc_{class_name}', roc, epoch_num)

    for i, class_name in enumerate(classes):
        logger.add_scalar(f'{phase}_bce_{class_name}',
                          sklearn.metrics.log_loss(y[:, i], y_hat[:, i]),
                          epoch_num)
示例#20
0
def test_random_composer(data_fixture, request):
    random.seed(1)
    np.random.seed(1)
    data = request.getfixturevalue(data_fixture)
    dataset_to_compose = data
    dataset_to_validate = data

    available_model_types, _ = ModelTypesRepository().suitable_model(
        task_type=TaskTypesEnum.classification)

    metric_function = MetricsRepository().metric_by_id(
        ClassificationMetricsEnum.ROCAUC)

    random_composer = RandomSearchComposer(iter_num=1)
    req = ComposerRequirements(primary=available_model_types,
                               secondary=available_model_types)
    chain_random_composed = random_composer.compose_chain(
        data=dataset_to_compose,
        initial_chain=None,
        composer_requirements=req,
        metrics=metric_function)
    chain_random_composed.fit_from_scratch(input_data=dataset_to_compose)

    predicted_random_composed = chain_random_composed.predict(
        dataset_to_validate)

    roc_on_valid_random_composed = roc_auc(
        y_true=dataset_to_validate.target,
        y_score=predicted_random_composed.predict)

    assert roc_on_valid_random_composed > 0.6
示例#21
0
def test_parameter_free_composer_build_chain_correct(data_fixture, request):
    random.seed(1)
    np.random.seed(1)
    data = request.getfixturevalue(data_fixture)
    dataset_to_compose = data
    dataset_to_validate = data
    available_model_types, _ = ModelTypesRepository().suitable_model(
        task_type=TaskTypesEnum.classification)

    metric_function = MetricsRepository().metric_by_id(
        ClassificationMetricsEnum.ROCAUC)

    req = GPComposerRequirements(primary=available_model_types,
                                 secondary=available_model_types,
                                 max_arity=2,
                                 max_depth=2,
                                 pop_size=2,
                                 num_of_generations=1,
                                 crossover_prob=0.4,
                                 mutation_prob=0.5)
    opt_params = GPChainOptimiserParameters(
        genetic_scheme_type=GeneticSchemeTypesEnum.parameter_free)
    builder = GPComposerBuilder(task=Task(
        TaskTypesEnum.classification)).with_requirements(req).with_metrics(
            metric_function).with_optimiser_parameters(opt_params)
    gp_composer = builder.build()
    chain_gp_composed = gp_composer.compose_chain(data=dataset_to_compose)

    chain_gp_composed.fit_from_scratch(input_data=dataset_to_compose)
    predicted_gp_composed = chain_gp_composed.predict(dataset_to_validate)

    roc_on_valid_gp_composed = roc_auc(y_true=dataset_to_validate.target,
                                       y_score=predicted_gp_composed.predict)

    assert roc_on_valid_gp_composed > 0.6
示例#22
0
def get_metrics(prediction, y_test):
    '''
	Computes accuracy, precision, recall, ROC-AUC and F1 metrics for 
	consideroing predictions produced by a ML and actual values of a 
	dependent variables.
	Inputs:
		- prediction: an array with predictions.
		- y_test: an array with actual values.
	Returns a dictionary with metrics of a ML model.
	'''
    Accuracy = accuracy(prediction, y_test)
    Precision = precision(prediction, y_test)
    Recall = recall(prediction, y_test)
    try:
        AUC = roc_auc(prediction, y_test)
    except ValueError:
        AUC = 0
    F1 = f1(prediction, y_test)

    metrics_dict = {
        'Accuracy': Accuracy,
        'Precision': Precision,
        'Recall': Recall,
        'AUC': AUC,
        'F1': F1
    }
    return metrics_dict
示例#23
0
def calculate_validation_metric(pred: OutputData, valid: InputData) -> float:
    predicted = np.ravel(pred.predict)
    real = np.ravel(valid.target)

    err = roc_auc(y_true=real,
                  y_score=predicted)

    return round(err, 2)
示例#24
0
def validate_model_quality(model: Pipeline, data_path: str):
    dataset_to_validate = InputData.from_csv(data_path)
    predicted_labels = model.predict(dataset_to_validate).predict

    roc_auc_valid = round(roc_auc(y_true=test_data.target,
                                  y_score=predicted_labels,
                                  multi_class='ovo',
                                  average='macro'), 3)
    return roc_auc_valid
示例#25
0
def run_tpot_vs_fedot_example(train_file_path: str, test_file_path: str):
    train_data = InputData.from_csv(train_file_path)
    test_data = InputData.from_csv(test_file_path)

    training_features = train_data.features
    testing_features = test_data.features
    training_target = train_data.target
    testing_target = test_data.target

    # Average CV score on the training set was: 0.93755
    exported_pipeline = make_pipeline(
        StackingEstimator(estimator=BernoulliNB()),
        RandomForestClassifier()
    )
    # Fix random state for all the steps in exported pipeline
    set_param_recursive(exported_pipeline.steps, 'random_state', 1)

    exported_pipeline.fit(training_features, training_target)
    results = exported_pipeline.predict_proba(testing_features)[:, 1]

    roc_auc_value = roc_auc(y_true=testing_target,
                            y_score=results)

    print(roc_auc_value)

    chain = Chain()
    node_first = PrimaryNode('direct_data_model')
    node_second = PrimaryNode('bernb')
    node_third = SecondaryNode('rf')

    node_third.nodes_from.append(node_first)
    node_third.nodes_from.append(node_second)

    chain.add_node(node_third)

    chain.fit(train_data)
    results = chain.predict(test_data)

    roc_auc_value = roc_auc(y_true=testing_target,
                            y_score=results.predict)
    print(roc_auc_value)

    return roc_auc_value
def test_qda_fit_correct(data_fixture, request):
    data = request.getfixturevalue(data_fixture)
    data.features = Scaling().fit(data.features).apply(data.features)
    train_data, test_data = train_test_data_setup(data=data)

    qda = Model(model_type=ModelTypesIdsEnum.qda)

    _, train_predicted = qda.fit(data=train_data)
    roc_on_train = roc_auc(y_true=train_data.target, y_score=train_predicted)
    roc_threshold = 0.95
    assert roc_on_train >= roc_threshold
def test_log_regression_fit_correct(classification_dataset):
    data = classification_dataset
    data.features = Scaling().fit(data.features).apply(data.features)
    train_data, test_data = train_test_data_setup(data=data)

    log_reg = Model(model_type=ModelTypesIdsEnum.logit)

    _, train_predicted = log_reg.fit(data=train_data)
    roc_on_train = roc_auc(y_true=train_data.target, y_score=train_predicted)
    roc_threshold = 0.95
    assert roc_on_train >= roc_threshold
示例#28
0
def execute_pipeline_for_text_problem(train_data, test_data):
    node_text_clean = PrimaryNode('text_clean')
    node_tfidf = SecondaryNode('tfidf', nodes_from=[node_text_clean])
    model_node = SecondaryNode('multinb', nodes_from=[node_tfidf])
    pipeline = Pipeline(model_node)
    pipeline.fit(train_data)

    predicted = pipeline.predict(test_data)

    roc_auc_metric = roc_auc(y_true=test_data.target,
                             y_score=predicted.predict)

    return roc_auc_metric
示例#29
0
def test_multiclassification_pipeline_fit_correct():
    data = get_iris_data()
    pipeline = pipeline_simple()
    train_data, test_data = train_test_data_setup(data, shuffle_flag=True)

    pipeline.fit(input_data=train_data)
    results = pipeline.predict(input_data=test_data)

    roc_auc_on_test = roc_auc(y_true=test_data.target,
                              y_score=results.predict,
                              multi_class='ovo',
                              average='macro')

    assert roc_auc_on_test > 0.95
示例#30
0
def test_rf_class_tune_correct(data_fixture, request):
    data = request.getfixturevalue(data_fixture)
    data.features = Scaling().fit(data.features).apply(data.features)
    train_data, test_data = train_test_data_setup(data=data)

    rf = Model(model_type='rf')

    model, _ = rf.fit(train_data)
    test_predicted = rf.predict(fitted_model=model, data=test_data)

    test_roc_auc = roc_auc(y_true=test_data.target, y_score=test_predicted)

    model_tuned, _ = rf.fine_tune(data=train_data,
                                  iterations=12,
                                  max_lead_time=timedelta(minutes=0.1))
    test_predicted_tuned = rf.predict(fitted_model=model_tuned, data=test_data)

    test_roc_auc_tuned = roc_auc(y_true=test_data.target,
                                 y_score=test_predicted_tuned)
    roc_threshold = 0.7

    assert test_roc_auc_tuned != test_roc_auc
    assert test_roc_auc_tuned > roc_threshold