Пример #1
0
def run_tpot_vs_fedot_example(train_file_path: str, test_file_path: str):
    train_data = InputData.from_csv(train_file_path)
    test_data = InputData.from_csv(test_file_path)

    training_features = train_data.features
    testing_features = test_data.features
    training_target = train_data.target
    testing_target = test_data.target

    # Average CV score on the training set was: 0.93755
    exported_pipeline = make_pipeline(
        StackingEstimator(estimator=BernoulliNB()), RandomForestClassifier())
    # Fix random state for all the steps in exported pipeline
    set_param_recursive(exported_pipeline.steps, 'random_state', 1)

    exported_pipeline.fit(training_features, training_target)
    results = exported_pipeline.predict_proba(testing_features)[:, 1]

    roc_auc_value = roc_auc(y_true=testing_target, y_score=results)

    print(f'ROC AUC for TPOT: {roc_auc_value}')

    node_scaling = PrimaryNode('scaling')
    node_bernb = SecondaryNode('bernb', nodes_from=[node_scaling])
    node_rf = SecondaryNode('rf', nodes_from=[node_bernb, node_scaling])
    chain = Chain(node_rf)

    chain.fit(train_data)
    results = chain.predict(test_data)

    roc_auc_value = roc_auc(y_true=testing_target, y_score=results.predict)
    print(f'ROC AUC for FEDOT: {roc_auc_value}')

    return roc_auc_value
Пример #2
0
def test_forecast_with_exog():
    train_source_ts, predict_source_ts, train_exog_ts, predict_exog_ts, ts_test = synthetic_with_exogenous_ts(
    )

    # Source data for lagged node
    node_lagged = PrimaryNode('lagged',
                              node_data={
                                  'fit': train_source_ts,
                                  'predict': predict_source_ts
                              })
    # Set window size for lagged transformation
    node_lagged.custom_params = {'window_size': window_size}
    # Exogenous variable for exog node
    node_exog = PrimaryNode('exog',
                            node_data={
                                'fit': train_exog_ts,
                                'predict': predict_exog_ts
                            })

    node_final = SecondaryNode('linear', nodes_from=[node_lagged, node_exog])
    chain = Chain(node_final)

    chain.fit()

    forecast = chain.predict()
    prediction = np.ravel(np.array(forecast.predict))

    assert tuple(prediction) == tuple(ts_test)
Пример #3
0
def test_chain_hierarchy_fit_correct(data_setup):
    data = data_setup
    train, _ = train_test_data_setup(data)

    first = PrimaryNode(model_type='logit')
    second = SecondaryNode(model_type='logit', nodes_from=[first])
    third = SecondaryNode(model_type='logit', nodes_from=[first])
    final = SecondaryNode(model_type='logit', nodes_from=[second, third])

    chain = Chain()
    for node in [first, second, third, final]:
        chain.add_node(node)

    train_predicted = chain.fit(input_data=train, use_cache=False)

    assert chain.root_node.descriptive_id == ('((/n_logit_default_params;)/'
                                              'n_logit_default_params;;(/'
                                              'n_logit_default_params;)/'
                                              'n_logit_default_params;)/'
                                              'n_logit_default_params')

    assert chain.length == 4
    assert chain.depth == 3
    assert train_predicted.predict.shape[0] == train.target.shape[0]
    assert final.cache.actual_cached_state is not None
Пример #4
0
def create_chain_with_several_nested_atomized_model() -> Chain:
    chain = Chain()
    node_atomized_model = PrimaryNode(
        model_type=create_atomized_model_with_several_atomized_models())

    node_atomized_model_secondary = SecondaryNode(
        model_type=create_atomized_model())
    node_atomized_model_secondary.nodes_from = [node_atomized_model]

    node_knn = SecondaryNode('knn')
    node_knn.custom_params = {'n_neighbors': 9}
    node_knn.nodes_from = [node_atomized_model]

    node_knn_second = SecondaryNode('knn')
    node_knn_second.custom_params = {'n_neighbors': 5}
    node_knn_second.nodes_from = [
        node_atomized_model, node_atomized_model_secondary, node_knn
    ]

    node_atomized_model_secondary_second = \
        SecondaryNode(model_type=create_atomized_model_with_several_atomized_models())

    node_atomized_model_secondary_second.nodes_from = [node_knn_second]

    chain.add_node(node_atomized_model_secondary_second)

    return chain
Пример #5
0
def chain_tuning(nodes_to_tune: str, chain: Chain, train_data: InputData,
                 test_data: InputData, local_iter: int,
                 tuner_iter_num: int = 50) -> (float, list):
    several_iter_scores_test = []

    if nodes_to_tune == 'primary':
        print('primary_node_tuning')
        chain_tune_strategy = chain.fine_tune_primary_nodes
    elif nodes_to_tune == 'root':
        print('root_node_tuning')
        chain_tune_strategy = chain.fine_tune_all_nodes
    else:
        raise ValueError(f'Invalid type of nodes. Nodes must be primary or root')

    for iteration in range(local_iter):
        print(f'current local iteration {iteration}')

        # Chain tuning
        chain_tune_strategy(train_data, iterations=tuner_iter_num)

        # After tuning prediction
        chain.fit(train_data)
        after_tuning_predicted = chain.predict(test_data)

        # Metrics
        aft_tun_roc_auc = roc_auc(y_true=test_data.target,
                                  y_score=after_tuning_predicted.predict)
        several_iter_scores_test.append(aft_tun_roc_auc)

    return float(np.mean(several_iter_scores_test)), several_iter_scores_test
Пример #6
0
def create_chain():
    first = PrimaryNode(operation_type='logit')
    second = PrimaryNode(operation_type='lda')
    final = SecondaryNode(operation_type='knn', nodes_from=[first, second])

    chain = Chain(final)
    chain.fitness = 1
    return chain
Пример #7
0
def get_rmse_value(chain: Chain, train_data: InputData,
                   test_data: InputData) -> (float, float):
    train_pred = chain.predict(input_data=train_data)
    test_pred = chain.predict(input_data=test_data)

    rmse_value_test = ts_mse(obs=test_data.target, pred=test_pred.predict)
    rmse_value_train = ts_mse(obs=train_data.target, pred=train_pred.predict)

    return rmse_value_train, rmse_value_test, train_pred, test_pred
Пример #8
0
def chain_with_multiple_roots():
    first = PrimaryNode(operation_type='logit')
    root_first = SecondaryNode(operation_type='logit', nodes_from=[first])
    root_second = SecondaryNode(operation_type='logit', nodes_from=[first])
    chain = Chain()

    for node in [first, root_first, root_second]:
        chain.add_node(node)

    return chain
Пример #9
0
def chain_with_cycle():
    first = PrimaryNode(operation_type='logit')
    second = SecondaryNode(operation_type='logit', nodes_from=[first])
    third = SecondaryNode(operation_type='logit', nodes_from=[second, first])
    second.nodes_from.append(third)
    chain = Chain()
    for node in [first, second, third]:
        chain.add_node(node)

    return chain
Пример #10
0
def get_roc_auc_value(chain: Chain, train_data: InputData,
                      test_data: InputData) -> (float, float):
    train_pred = chain.predict(input_data=train_data)
    test_pred = chain.predict(input_data=test_data)
    roc_auc_value_test = roc_auc(y_true=test_data.target,
                                 y_score=test_pred.predict)
    roc_auc_value_train = roc_auc(y_true=train_data.target,
                                  y_score=train_pred.predict)

    return roc_auc_value_train, roc_auc_value_test
Пример #11
0
def two_level_chain():
    first = PrimaryNode(model_type='logit')
    second = PrimaryNode(model_type='knn')
    third = SecondaryNode(model_type='xgboost',
                          nodes_from=[first, second])

    chain = Chain()
    for node in [first, second, third]:
        chain.add_node(node)

    return chain
Пример #12
0
def test_import_json_to_fitted_chain_correctly():
    json_path_load = create_correct_path('test_fitted_chain_convert_to_json')

    chain = Chain()
    chain.load(json_path_load)
    json_actual = chain.save('test_import_json_to_fitted_chain_correctly')

    with open(json_path_load, 'r') as json_file:
        json_expected = json.load(json_file)

    assert json_actual == json.dumps(json_expected)
Пример #13
0
def chain_with_isolated_components():
    first = PrimaryNode(operation_type='logit')
    second = SecondaryNode(operation_type='logit', nodes_from=[first])
    third = SecondaryNode(operation_type='logit', nodes_from=[])
    fourth = SecondaryNode(operation_type='logit', nodes_from=[third])

    chain = Chain()
    for node in [first, second, third, fourth]:
        chain.add_node(node)

    return chain
Пример #14
0
def valid_chain():
    first = PrimaryNode(operation_type='logit')
    second = SecondaryNode(operation_type='logit', nodes_from=[first])
    third = SecondaryNode(operation_type='logit', nodes_from=[second])
    last = SecondaryNode(operation_type='logit', nodes_from=[third])

    chain = Chain()
    for node in [first, second, third, last]:
        chain.add_node(node)

    return chain
Пример #15
0
def chain_with_isolated_nodes():
    first = PrimaryNode(model_type='logit')
    second = SecondaryNode(model_type='logit', nodes_from=[first])
    third = SecondaryNode(model_type='logit', nodes_from=[second])
    isolated = SecondaryNode(model_type='logit', nodes_from=[])
    chain = Chain()

    for node in [first, second, third, isolated]:
        chain.add_node(node)

    return chain
Пример #16
0
def get_composite_multiscale_chain():
    chain = Chain()
    node_trend = PrimaryNode('trend_data_model')
    node_lstm_trend = SecondaryNode('ridge', nodes_from=[node_trend])
    node_residual = PrimaryNode('residual_data_model')
    node_ridge_residual = SecondaryNode('ridge', nodes_from=[node_residual])

    node_final = SecondaryNode(
        'linear', nodes_from=[node_ridge_residual, node_lstm_trend])
    chain.add_node(node_final)
    return chain
Пример #17
0
def chain_third():
    #    QDA
    #  |     \
    # RF     RF
    chain = Chain()
    new_node = SecondaryNode('qda')
    for model_type in ('rf', 'rf'):
        new_node.nodes_from.append(PrimaryNode(model_type))
    chain.add_node(new_node)
    [chain.add_node(node_from) for node_from in new_node.nodes_from]
    return chain
Пример #18
0
def test_import_json_to_chain_correctly():
    json_path_load = create_correct_path('test_chain_convert_to_json')

    chain = Chain()
    chain.load_chain(json_path_load)
    json_actual = chain.save_chain('test_import_json_to_chain_correctly_1')

    chain_expected = create_chain()
    json_expected = chain_expected.save_chain('test_import_json_to_chain_correctly_2')

    assert json.dumps(json_actual) == json.dumps(json_expected)
Пример #19
0
def test_ts_forecasting_lagged_data_operation():
    train_input, predict_input, y_test = get_time_series()

    node_lagged = PrimaryNode('lagged')
    node_ridge = SecondaryNode('ridge', nodes_from=[node_lagged])
    chain = Chain(node_ridge)

    chain.fit_from_scratch(train_input)
    predicted_output = chain.predict(predict_input)
    predicted = np.ravel(predicted_output.predict)

    assert len(predicted) == len(np.ravel(y_test))
Пример #20
0
 def metric_for_nodes(self, metric_function, train_data: InputData,
                      test_data: InputData, is_chain_shared: bool,
                      chain: Chain) -> float:
     try:
         validate(chain)
         if is_chain_shared:
             chain = SharedChain(base_chain=chain, shared_cache=self.shared_cache)
         chain.fit(input_data=train_data)
         return metric_function(chain, test_data)
     except Exception as ex:
         self.log.info(f'Error in chain assessment during composition: {ex}. Continue.')
         return max_int_value
Пример #21
0
def test_arima_chain_fit_correct():
    train_data, test_data = get_synthetic_ts_data_linear(forecast_length=12)

    chain = Chain(PrimaryNode('arima'))

    chain.fit(input_data=train_data)
    rmse_on_train, rmse_on_test, _, _ = get_rmse_value(chain, train_data,
                                                       test_data)

    rmse_threshold = _max_rmse_threshold_by_std(test_data.target)

    assert rmse_on_train < rmse_threshold
Пример #22
0
def get_rmse_value(chain: Chain, train_data: InputData,
                   test_data: InputData) -> (float, float):
    train_pred = chain.predict(input_data=train_data)
    test_pred = chain.predict(input_data=test_data)
    rmse_value_test = mse(y_true=test_data.target,
                          y_pred=test_pred.predict,
                          squared=False)
    rmse_value_train = mse(y_true=train_data.target,
                           y_pred=train_pred.predict,
                           squared=False)

    return rmse_value_train, rmse_value_test
Пример #23
0
def test_chain_repr():
    first = PrimaryNode(operation_type='logit')
    second = PrimaryNode(operation_type='lda')
    third = PrimaryNode(operation_type='knn')
    final = SecondaryNode(operation_type='xgboost',
                          nodes_from=[first, second, third])
    chain = Chain()
    chain.add_node(final)

    expected_chain_description = "{'depth': 2, 'length': 4, 'nodes': [xgboost, logit, lda, knn]}"

    assert repr(chain) == expected_chain_description
Пример #24
0
def create_json_models_files():
    """
    Creating JSON's files for test before tests.
    """
    chain = create_chain()
    chain.save('test_chain_convert_to_json')

    chain_fitted = create_fitted_chain()
    chain_fitted.save('test_fitted_chain_convert_to_json')

    chain_empty = Chain()
    chain_empty.save('test_empty_chain_convert_to_json')
Пример #25
0
def test_log_clustering_fit_correct(data_fixture, request):
    data = request.getfixturevalue(data_fixture)
    train_data, test_data = train_test_data_setup(data=data)

    # Scaling chain. Fit predict it
    scaling_chain = Chain(PrimaryNode('normalization'))
    scaling_chain.fit(train_data)
    scaled_data = scaling_chain.predict(train_data)

    kmeans = Model(operation_type='kmeans')
    _, train_predicted = kmeans.fit(data=scaled_data)

    assert all(np.unique(train_predicted.predict) == [0, 1])
Пример #26
0
def execute_chain_for_text_problem(train_data, test_data):
    node_text_clean = PrimaryNode('text_clean')
    node_tfidf = SecondaryNode('tfidf', nodes_from=[node_text_clean])
    model_node = SecondaryNode('multinb', nodes_from=[node_tfidf])
    chain = Chain(model_node)
    chain.fit(train_data)

    predicted = chain.predict(test_data)

    roc_auc_metric = roc_auc(y_true=test_data.target,
                             y_score=predicted.predict)

    return roc_auc_metric
Пример #27
0
def test_regression_chain_period_exog_forecast_multistep_correct():
    train_data, test_data = get_synthetic_ts_data_period(forecast_length=2,
                                                         max_window_size=3)

    chain = Chain(PrimaryNode('linear'))

    chain.fit(input_data=train_data)
    rmse_on_train, rmse_on_test, _, _ = get_rmse_value(chain, train_data,
                                                       test_data)

    rmse_threshold = 1.5
    assert rmse_on_train < rmse_threshold
    assert rmse_on_test < rmse_threshold
Пример #28
0
def test_regression_chain_forecast_multistep_correct():
    train_data, test_data = get_synthetic_ts_data_period(forecast_length=20,
                                                         max_window_size=30)

    chain = Chain(PrimaryNode('ridge'))

    chain.fit(input_data=train_data)
    _, rmse_on_test, _, _ = get_rmse_value(chain, train_data, test_data)

    rmse_threshold = _max_rmse_threshold_by_std(test_data.target,
                                                is_strict=False)

    assert rmse_on_test < rmse_threshold
Пример #29
0
def test_chain_with_wrong_data():
    chain = Chain(PrimaryNode('linear'))
    data_seq = np.arange(0, 10)
    task = Task(TaskTypesEnum.ts_forecasting,
                TsForecastingParams(forecast_length=10))

    data = InputData(idx=data_seq,
                     features=data_seq,
                     target=data_seq,
                     data_type=DataTypesEnum.ts,
                     task=task)

    with pytest.raises(ValueError):
        chain.fit(data)
Пример #30
0
def test_ts_forecasting_smoothing_data_operation():
    train_input, predict_input, y_test = get_time_series()

    for smoothing_operation in ['smoothing', 'gaussian_filter']:
        node_smoothing = PrimaryNode(smoothing_operation)
        node_lagged = SecondaryNode('lagged', nodes_from=[node_smoothing])
        node_ridge = SecondaryNode('ridge', nodes_from=[node_lagged])
        chain = Chain(node_ridge)

        chain.fit_from_scratch(train_input)
        predicted_output = chain.predict(predict_input)
        predicted = np.ravel(predicted_output.predict)

        assert len(predicted) == len(np.ravel(y_test))