Пример #1
0
def test_multivariate_ts():
    forecast_length = 1

    file_path_train = 'cases/data/metocean/metocean_data_train.csv'
    full_path_train = os.path.join(str(fedot_project_root()), file_path_train)

    # a dataset for a final validation of the composed model
    file_path_test = 'cases/data/metocean/metocean_data_test.csv'
    full_path_test = os.path.join(str(fedot_project_root()), file_path_test)

    target_history, add_history, obs = prepare_input_data(
        full_path_train, full_path_test)

    historical_data = {
        'ws': add_history,  # additional variable
        'ssh': target_history,  # target variable
    }

    fedot = Fedot(
        problem='ts_forecasting',
        composer_params=composer_params,
        task_params=TsForecastingParams(forecast_length=forecast_length))
    fedot.fit(features=historical_data, target=target_history)
    forecast = fedot.forecast(historical_data, forecast_length=forecast_length)
    assert forecast is not None
Пример #2
0
def test_pandas_input_for_api():
    train_data, test_data, threshold = get_dataset('classification')

    train_features = pd.DataFrame(train_data.features)
    train_target = pd.Series(train_data.target)

    test_features = pd.DataFrame(test_data.features)
    test_target = pd.Series(test_data.target)

    # task selection, initialisation of the framework
    baseline_model = Fedot(problem='classification')

    # fit model without optimisation - single XGBoost node is used
    baseline_model.fit(features=train_features,
                       target=train_target,
                       predefined_model='xgboost')

    # evaluate the prediction with test data
    prediction = baseline_model.predict(features=test_features)

    assert len(prediction) == len(test_target)

    # evaluate quality metric for the test sample
    baseline_metrics = baseline_model.get_metrics(metric_names='f1',
                                                  target=test_target)

    assert baseline_metrics['f1'] > 0
Пример #3
0
def run_multi_output_case(path, vis=False):
    """ Function launch case for river levels prediction on Lena river as
    multi-output regression task

    :param path: path to the file with table
    :param vis: is it needed to visualise pipeline and predictions
    """
    target_columns = [
        '1_day', '2_day', '3_day', '4_day', '5_day', '6_day', '7_day'
    ]

    data = InputData.from_csv(path,
                              target_columns=target_columns,
                              columns_to_drop=['date'])
    train, test = train_test_data_setup(data)

    problem = 'regression'

    automl_model = Fedot(problem=problem)
    automl_model.fit(features=train)
    predicted_array = automl_model.predict(features=test)

    # Convert output into one dimensional array
    forecast = np.ravel(predicted_array)

    mae_value = mean_absolute_error(np.ravel(test.target), forecast)
    print(f'MAE - {mae_value:.2f}')

    if vis:
        plot_predictions(predicted_array, test)
Пример #4
0
def run_one_model_with_specific_evaluation_mod(train_data,
                                               test_data,
                                               mode: str = None):
    """
    Runs the example with one model svc.

    :param train_data: train data for pipeline training
    :param test_data: test data for pipeline training
    :param mode: pass gpu flag to make gpu evaluation
    """

    problem = 'classification'

    if mode == 'gpu':
        baseline_model = Fedot(problem=problem, preset='gpu')
    else:
        baseline_model = Fedot(problem=problem)
    svc_node_with_custom_params = PrimaryNode('svc')
    # the custom params are needed to make probability evaluation available
    # otherwise an error is occurred
    svc_node_with_custom_params.custom_params = dict(kernel='rbf',
                                                     C=10,
                                                     gamma=1,
                                                     cache_size=2000,
                                                     probability=True)
    preset_pipeline = Pipeline(svc_node_with_custom_params)

    start = datetime.now()
    baseline_model.fit(features=train_data,
                       target='target',
                       predefined_model=preset_pipeline)
    print(f'Completed with custom params in: {datetime.now() - start}')

    baseline_model.predict(features=test_data)
    print(baseline_model.get_metrics())
Пример #5
0
def test_classification_quality_improvement():
    # input data initialization
    train_data_path = f'{project_root()}/cases/data/scoring/scoring_train.csv'
    test_data_path = f'{project_root()}/cases/data/scoring/scoring_test.csv'

    problem = 'classification'

    baseline_model = Fedot(problem=problem)
    baseline_model.fit(features=train_data_path, target='target', predefined_model='xgboost')
    expected_baseline_quality = 0.823

    baseline_model.predict_proba(features=test_data_path)
    baseline_metrics = baseline_model.get_metrics()

    # Define parameters for composing
    composer_params = {'max_depth': 3,
                       'max_arity': 3,
                       'pop_size': 20,
                       'num_of_generations': 20,
                       'learning_time': 10,
                       'with_tuning': True}

    auto_model = Fedot(problem=problem, composer_params=composer_params, seed=42, verbose_level=4)
    auto_model.fit(features=train_data_path, target='target')
    auto_model.predict_proba(features=test_data_path)
    auto_metrics = auto_model.get_metrics()
    print(auto_metrics['roc_auc'])
    assert auto_metrics['roc_auc'] > baseline_metrics['roc_auc'] >= expected_baseline_quality
Пример #6
0
def test_multiobj_for_api():
    train_data, test_data, _ = get_dataset('classification')
    composer_params['composer_metric'] = ['f1', 'node_num']

    model = Fedot(problem='classification', composer_params=composer_params)
    model.fit(features=train_data)
    prediction = model.predict(features=test_data)
    metric = model.get_metrics()

    assert len(prediction) == len(test_data.target)
    assert metric['f1'] > 0
    assert model.best_models is not None
Пример #7
0
def test_multi_target_regression_composing_correct(multi_target_data_setup):
    # Load simple dataset for multi-target
    train, test = multi_target_data_setup

    problem = 'regression'
    simple_composer_params = get_simple_composer_params()

    automl_model = Fedot(problem=problem,
                         composer_params=simple_composer_params)
    automl_model.fit(features=train)
    predicted_array = automl_model.predict(features=test)
    assert predicted_array is not None
Пример #8
0
def test_api_forecast_correct(task_type: str = 'ts_forecasting'):
    # The forecast length must be equal to 12
    forecast_length = 12
    train_data, test_data, _ = get_dataset(task_type)
    model = Fedot(problem='ts_forecasting', composer_params=composer_params,
                  task_params=TsForecastingParams(forecast_length=forecast_length))

    model.fit(features=train_data)
    ts_forecast = model.predict(features=train_data)
    metric = model.get_metrics(target=test_data.target, metric_names='rmse')

    assert len(ts_forecast) == forecast_length
    assert metric['rmse'] >= 0
Пример #9
0
def run_ts_forecasting_example(with_plot=True,
                               with_pipeline_vis=True,
                               timeout=None):
    train_data_path = f'{fedot_project_root()}/examples/data/salaries.csv'

    target = pd.read_csv(train_data_path)['target']

    # Define forecast length and define parameters - forecast length
    forecast_length = 30
    task_parameters = TsForecastingParams(forecast_length=forecast_length)

    # init model for the time series forecasting
    model = Fedot(problem='ts_forecasting',
                  task_params=task_parameters,
                  timeout=timeout)

    # run AutoML model design in the same way
    pipeline = model.fit(features=train_data_path, target='target')
    if with_pipeline_vis:
        pipeline.show()

    # use model to obtain forecast
    forecast = model.predict(features=train_data_path)

    print(
        model.get_metrics(metric_names=['rmse', 'mae', 'mape'], target=target))

    # plot forecasting result
    if with_plot:
        model.plot_prediction()

    return forecast
Пример #10
0
def run_metocean_forecasting_problem(train_file_path,
                                     test_file_path,
                                     forecast_length=1,
                                     is_visualise=False,
                                     timeout=5):
    # Prepare data for train and test
    ssh_history, ws_history, ssh_obs = prepare_input_data(
        train_file_path, test_file_path)

    historical_data = {
        'ws': ws_history,  # additional variable
        'ssh': ssh_history,  # target variable
    }

    fedot = Fedot(
        problem='ts_forecasting',
        task_params=TsForecastingParams(forecast_length=forecast_length),
        timeout=timeout,
        verbose_level=4)

    pipeline = fedot.fit(features=historical_data, target=ssh_history)
    fedot.forecast(historical_data, forecast_length=forecast_length)
    metric = fedot.get_metrics(target=ssh_obs)

    if is_visualise:
        pipeline.show()
        fedot.plot_prediction()

    return metric
Пример #11
0
def test_api_forecast_numpy_input_with_static_model_correct(task_type: str = 'ts_forecasting'):
    forecast_length = 10
    train_data, test_data, _ = get_dataset(task_type)
    model = Fedot(problem='ts_forecasting',
                  task_params=TsForecastingParams(forecast_length=forecast_length))

    # Define chain for prediction
    node_lagged = PrimaryNode('lagged')
    chain = Chain(SecondaryNode('linear', nodes_from=[node_lagged]))

    model.fit(features=train_data.features,
              target=train_data.target,
              predefined_model=chain)
    ts_forecast = model.predict(features=train_data)
    metric = model.get_metrics(target=test_data.target, metric_names='rmse')

    assert len(ts_forecast) == forecast_length
    assert metric['rmse'] >= 0
Пример #12
0
def test_baseline_with_api():
    train_data, test_data, threshold = get_dataset('classification')

    # task selection, initialisation of the framework
    baseline_model = Fedot(problem='classification')

    # fit model without optimisation - single XGBoost node is used
    baseline_model.fit(features=train_data, target='target', predefined_model='xgboost')

    # evaluate the prediction with test data
    prediction = baseline_model.predict_proba(features=test_data)

    assert len(prediction) == len(test_data.target)

    # evaluate quality metric for the test sample
    baseline_metrics = baseline_model.get_metrics(metric_names='f1')

    assert baseline_metrics['f1'] > 0
Пример #13
0
def test_api_predict_correct(task_type: str = 'classification'):
    train_data, test_data, _ = get_dataset(task_type)
    model = Fedot(problem=task_type, composer_params=composer_params)
    fedot_model = model.fit(features=train_data)
    prediction = model.predict(features=test_data)
    metric = model.get_metrics()

    assert isinstance(fedot_model, Pipeline)
    assert len(prediction) == len(test_data.target)
    assert metric['f1'] > 0
Пример #14
0
def run_credit_scoring_problem(train_file_path, test_file_path,
                               timeout: float = 5.0,
                               is_visualise=False,
                               with_tuning=False,
                               cache_path=None):

    preset = 'light_tun' if with_tuning else 'light'
    automl = Fedot(problem='classification', timeout=timeout, verbose_level=4,
                   preset=preset)
    automl.fit(train_file_path)
    predict = automl.predict(test_file_path)
    metrics = automl.get_metrics()

    if is_visualise:
        automl.current_pipeline.show()

    print(f'Composed ROC AUC is {round(metrics["roc_auc"], 3)}')

    return metrics["roc_auc"]
Пример #15
0
def run_classification_example(timeout=None):
    train_data_path = f'{fedot_project_root()}/cases/data/scoring/scoring_train.csv'
    test_data_path = f'{fedot_project_root()}/cases/data/scoring/scoring_test.csv'

    problem = 'classification'

    baseline_model = Fedot(problem=problem, timeout=timeout)
    baseline_model.fit(features=train_data_path,
                       target='target',
                       predefined_model='xgboost')

    baseline_model.predict(features=test_data_path)
    print(baseline_model.get_metrics())

    auto_model = Fedot(problem=problem, seed=42, timeout=timeout)
    auto_model.fit(features=train_data_path, target='target')
    prediction = auto_model.predict_proba(features=test_data_path)
    print(auto_model.get_metrics())

    return prediction
Пример #16
0
def run_regression_example():
    data_path = f'{fedot_project_root()}/cases/data/cholesterol/cholesterol.csv'

    data = InputData.from_csv(data_path)
    train, test = train_test_data_setup(data)

    problem = 'regression'

    baseline_model = Fedot(problem=problem)
    baseline_model.fit(features=train, predefined_model='xgbreg')

    baseline_model.predict(features=test)
    print(baseline_model.get_metrics())

    auto_model = Fedot(problem=problem, seed=42)
    auto_model.fit(features=train, target='target')
    prediction = auto_model.predict(features=test)
    print(auto_model.get_metrics())

    return prediction
Пример #17
0
def run_pipeline_with_specific_evaluation_mode(train_data: InputData,
                                               test_data: InputData,
                                               mode: str = None):
    """
    Runs the example with 3-node pipeline.

    :param train_data: train data for pipeline training
    :param test_data: test data for pipeline training
    :param mode: pass gpu flag to make gpu evaluation
    """
    problem = 'classification'

    if mode == 'gpu':
        baseline_model = Fedot(problem=problem, preset='gpu')
    else:
        baseline_model = Fedot(problem=problem)

    svc_node_with_custom_params = PrimaryNode('svc')
    svc_node_with_custom_params.custom_params = dict(kernel='rbf',
                                                     C=10,
                                                     gamma=1,
                                                     cache_size=2000,
                                                     probability=True)

    logit_node = PrimaryNode('logit')

    rf_node = SecondaryNode(
        'rf', nodes_from=[svc_node_with_custom_params, logit_node])

    preset_pipeline = Pipeline(rf_node)

    start = datetime.now()
    baseline_model.fit(features=train_data,
                       target='target',
                       predefined_model=preset_pipeline)
    print(f'Completed with custom params in: {datetime.now() - start}')

    baseline_model.predict(features=test_data)
    print(baseline_model.get_metrics())
Пример #18
0
def run_classification_multiobj_example(with_plot=True):
    train_data = pd.read_csv(
        f'{project_root()}/examples/data/Hill_Valley_with_noise_Training.data')
    test_data = pd.read_csv(
        f'{project_root()}/examples/data/Hill_Valley_with_noise_Testing.data')
    target = test_data['class']
    del test_data['class']
    problem = 'classification'

    auto_model = Fedot(problem=problem,
                       learning_time=2,
                       preset='light',
                       composer_params={'metric': ['f1', 'node_num']},
                       seed=42)
    auto_model.fit(features=train_data, target='class')
    prediction = auto_model.predict_proba(features=test_data)
    print(auto_model.get_metrics(target))

    if with_plot:
        auto_model.best_models.show()

    return prediction
Пример #19
0
def make_forecast(df, len_forecast: int, time_series_label: str):
    """
    Function for making time series forecasting with Prophet library

    :param df: dataframe to process
    :param len_forecast: forecast length
    :param time_series_label: name of time series to process

    :return predicted_values: forecast
    :return model_name: name of the model (always 'AutoTS')
    """

    # Define parameters
    task = Task(TaskTypesEnum.ts_forecasting,
                TsForecastingParams(forecast_length=len_forecast))

    # Init model for the time series forecasting
    model = Fedot(problem='ts_forecasting',
                  task_params=task.task_params,
                  composer_params={
                      'timeout': 1,
                      'preset': 'ultra_light_tun'
                  },
                  preset='ultra_light_tun')

    input_data = InputData(idx=np.arange(0, len(df)),
                           features=np.array(df[time_series_label]),
                           target=np.array(df[time_series_label]),
                           task=task,
                           data_type=DataTypesEnum.ts)

    start_forecast = len(df)
    end_forecast = start_forecast + len_forecast
    predict_input = InputData(idx=np.arange(start_forecast, end_forecast),
                              features=np.array(df[time_series_label]),
                              target=np.array(df[time_series_label]),
                              task=task,
                              data_type=DataTypesEnum.ts)
    # Run AutoML model design in the same way
    pipeline = model.fit(features=input_data)
    predicted_values = model.predict(predict_input)

    model_name = 'FEDOT'
    return predicted_values, model_name
Пример #20
0
def test_cv_api_correct():
    composer_params = {
        'max_depth': 1,
        'max_arity': 2,
        'timeout': 0.0001,
        'preset': 'ultra_light',
        'cv_folds': 10
    }
    task = Task(task_type=TaskTypesEnum.classification)
    dataset_to_compose, dataset_to_validate = get_data(task)
    model = Fedot(problem='classification',
                  composer_params=composer_params,
                  verbose_level=2)
    fedot_model = model.fit(features=dataset_to_compose)
    prediction = model.predict(features=dataset_to_validate)
    metric = model.get_metrics()

    assert isinstance(fedot_model, Pipeline)
    assert len(prediction) == len(dataset_to_validate.target)
    assert metric['f1'] > 0
Пример #21
0
def test_api_cv_correct():
    """ Checks if the composer works correctly when using cross validation for
    time series through api """
    folds = 2
    _, forecast_len, validation_blocks, time_series = configure_experiment()
    composer_params = {
        'max_depth': 1,
        'max_arity': 2,
        'timeout': 0.05,
        'preset': 'ultra_light',
        'cv_folds': folds,
        'validation_blocks': validation_blocks
    }
    task_parameters = TsForecastingParams(forecast_length=forecast_len)

    model = Fedot(problem='ts_forecasting',
                  composer_params=composer_params,
                  task_params=task_parameters,
                  verbose_level=2)
    fedot_model = model.fit(features=time_series)
    is_succeeded = True
    assert is_succeeded
Пример #22
0
def run_oil_forecasting(path_to_file, path_to_file_crm, len_forecast,
                        len_forecast_full, ax, well_id, timeout):
    if timeout is None:
        timeout = 1
    df = pd.read_csv(path_to_file, sep=' *, *')
    df_crm = pd.read_csv(path_to_file_crm, sep=' *, *')

    len_forecast_for_split = len_forecast_full
    dates, target_train, data_fit, data_predict, input_data_fit, input_data_predict, test_data, \
    train_data, time_series = prepare_dataset(df, len_forecast, len_forecast_for_split, well_id)

    dates, target_train_crm, data_fit_crm, data_predict_crm, input_data_fit_crm, input_data_predict_crm, test_data_crm, \
    train_data, time_series = prepare_dataset(df_crm, len_forecast, len_forecast_for_split, well_id)

    task_parameters = TsForecastingParams(forecast_length=len_forecast)

    if not os.path.exists(f'pipeline_{well_id}/pipeline_{well_id}.json'):
        model = Fedot(problem='ts_forecasting',
                      task_params=task_parameters,
                      composer_params={'timeout': timeout},
                      preset='ultra_light',
                      verbose_level=4)

        # run AutoML model design in the same way
        pipeline = model.fit(features=data_fit, target=target_train)
        pipeline.save(f'pipeline_{well_id}')  # , datetime_in_path=False)
    else:
        pipeline = Pipeline()
        pipeline.load(f'pipeline_{well_id}/pipeline_{well_id}.json')

    if not os.path.exists(
            f'pipeline_crm_{well_id}/pipeline_crm_{well_id}.json'):
        model = Fedot(problem='ts_forecasting',
                      task_params=task_parameters,
                      composer_params={'timeout': timeout},
                      preset='ultra_light',
                      verbose_level=4)

        # run AutoML model design in the same way
        pipeline_crm = model.fit(features=data_fit_crm,
                                 target=target_train_crm)
        pipeline_crm.save(
            f'pipeline_crm_{well_id}')  # , datetime_in_path=False)
    else:
        pipeline_crm = Pipeline()
        pipeline_crm.load(
            f'pipeline_crm_{well_id}/pipeline_crm_{well_id}.json')

    sources = dict(
        (f'data_source_ts/{data_part_key}', data_part)
        for (data_part_key, data_part) in input_data_predict.items())
    input_data_predict_mm = MultiModalData(sources)

    sources_crm = dict(
        (f'data_source_ts/{data_part_key}', data_part)
        for (data_part_key, data_part) in input_data_predict_crm.items())
    input_data_predict_mm_crm = MultiModalData(sources_crm)

    forecast = in_sample_ts_forecast(pipeline,
                                     input_data_predict_mm,
                                     horizon=len_forecast_full)
    forecast_crm = in_sample_ts_forecast(pipeline_crm,
                                         input_data_predict_mm_crm,
                                         horizon=len_forecast_full)

    predicted = np.ravel(np.array(forecast))
    predicted_crm = np.ravel(np.array(forecast_crm))
    predicted_only_crm = np.asarray(
        df_crm[f'crm_{well_id}'][-len_forecast_full:])

    test_data = np.ravel(test_data)

    print('CRM')
    predicted_only_crm[np.isnan(predicted_only_crm)] = 0
    mse_before = mean_squared_error(test_data,
                                    predicted_only_crm,
                                    squared=False)
    mae_before = mean_absolute_error(test_data, predicted_only_crm)
    print(f'RMSE - {mse_before:.4f}')
    print(f'MAE - {mae_before:.4f}\n')

    print('ML')
    mse_before = mean_squared_error(test_data, predicted, squared=False)
    mae_before = mean_absolute_error(test_data, predicted)
    print(f'RMSE - {mse_before:.4f}')
    print(f'MAE - {mae_before:.4f}\n')

    print('AutoML+CRM')
    mse_before = mean_squared_error(test_data, predicted_crm, squared=False)
    mae_before = mean_absolute_error(test_data, predicted_crm)
    print(f'RMSE - {mse_before:.4f}')
    print(f'MAE - {mae_before:.4f}\n')

    if ax:
        x_for = range(len(train_data), len(time_series))
        ax.plot(x_for,
                time_series[-len_forecast_full:],
                label='Actual time series',
                linewidth=0.5)
        ax.plot(x_for, predicted_crm, label='AutoML+CRM', linewidth=0.5)
        ax.plot(x_for, predicted_only_crm, label='CRM', linewidth=0.5)

        ci_crm = t_conf_interval(np.std(predicted_crm), 0.975,
                                 len(predicted_crm)) * 1.96
        ax.fill_between(x_for, (predicted_crm - ci_crm),
                        (predicted_crm + ci_crm),
                        color='orange',
                        alpha=.5)

        ci_crmonly = t_conf_interval(np.std(predicted_only_crm), 0.975,
                                     len(predicted_only_crm)) * 1.96
        ax.fill_between(x_for, (predicted_only_crm - ci_crmonly),
                        (predicted_only_crm + ci_crmonly),
                        color='green',
                        alpha=.5)

        ax.set(xlabel='Days from 2013.06.01', ylabel='Oil volume, m3')
        if well_id == '5351':
            ax.legend()
        ax.set_title(well_id)
        ax.plot()
Пример #23
0
def run_additional_learning_example():
    train_data_path = f'{fedot_project_root()}/cases/data/scoring/scoring_train.csv'
    test_data_path = f'{fedot_project_root()}/cases/data/scoring/scoring_test.csv'

    train_data = pd.read_csv(train_data_path)
    test_data = pd.read_csv(test_data_path)
    test_data_target = test_data['target']
    del test_data['target']

    problem = 'classification'

    auto_model = Fedot(problem=problem,
                       seed=42,
                       preset='light',
                       timeout=5,
                       composer_params={
                           'initial_pipeline':
                           Pipeline(
                               SecondaryNode(
                                   'logit',
                                   nodes_from=[PrimaryNode('scaling')]))
                       })

    auto_model.fit(features=deepcopy(train_data.head(1000)), target='target')
    auto_model.predict_proba(features=deepcopy(test_data))
    print('auto_model',
          auto_model.get_metrics(target=deepcopy(test_data_target)))

    prev_model = auto_model.current_pipeline
    prev_model.show()

    prev_model.unfit()
    atomized_model = Pipeline(
        SecondaryNode(operation_type=AtomizedModel(prev_model),
                      nodes_from=[PrimaryNode('scaling')]))
    non_atomized_model = deepcopy(prev_model)

    train_data = train_data.head(5000)
    timeout = 1

    auto_model_from_atomized = Fedot(
        problem=problem,
        seed=42,
        preset='light',
        timeout=timeout,
        composer_params={'initial_pipeline': atomized_model},
        verbose_level=2)
    auto_model_from_atomized.fit(features=deepcopy(train_data),
                                 target='target')
    auto_model_from_atomized.predict_proba(features=deepcopy(test_data))
    auto_model_from_atomized.current_pipeline.show()
    print('auto_model_from_atomized',
          auto_model_from_atomized.get_metrics(deepcopy(test_data_target)))

    auto_model_from_pipeline = Fedot(
        problem=problem,
        seed=42,
        preset='light',
        timeout=timeout,
        composer_params={'initial_pipeline': non_atomized_model},
        verbose_level=2)
    auto_model_from_pipeline.fit(features=deepcopy(train_data),
                                 target='target')
    auto_model_from_pipeline.predict_proba(features=deepcopy(test_data))
    auto_model_from_pipeline.current_pipeline.show()
    print('auto_model_from_pipeline',
          auto_model_from_pipeline.get_metrics(deepcopy(test_data_target)))