示例#1
0
def create_pipeline() -> Pipeline:
    node_logit = PrimaryNode('logit')

    node_lda = PrimaryNode('lda')
    node_lda.custom_params = {'n_components': 1}

    node_xgboost = PrimaryNode('xgboost')

    node_knn = PrimaryNode('knn')
    node_knn.custom_params = {'n_neighbors': 9}

    node_knn_second = SecondaryNode('knn')
    node_knn_second.custom_params = {'n_neighbors': 5}
    node_knn_second.nodes_from = [node_lda, node_knn]

    node_logit_second = SecondaryNode('logit')
    node_logit_second.nodes_from = [node_xgboost, node_lda]

    node_lda_second = SecondaryNode('lda')
    node_lda_second.custom_params = {'n_components': 1}
    node_lda_second.nodes_from = [node_logit_second, node_knn_second, node_logit]

    node_xgboost_second = SecondaryNode('xgboost')
    node_xgboost_second.nodes_from = [node_logit, node_logit_second, node_knn]

    node_knn_third = SecondaryNode('knn')
    node_knn_third.custom_params = {'n_neighbors': 8}
    node_knn_third.nodes_from = [node_lda_second, node_xgboost_second]

    pipeline = Pipeline(node_knn_third)

    return pipeline
示例#2
0
def get_composite_pipeline(composite_flag: bool = True) -> Pipeline:
    node_first = PrimaryNode('cnn')
    node_first.custom_params = {
        'image_shape': (28, 28, 1),
        'architecture': 'deep',
        'num_classes': 10,
        'epochs': 15,
        'batch_size': 128
    }
    node_second = PrimaryNode('cnn')
    node_second.custom_params = {
        'image_shape': (28, 28, 1),
        'architecture_type': 'simplified',
        'num_classes': 10,
        'epochs': 10,
        'batch_size': 128
    }
    node_final = SecondaryNode('rf', nodes_from=[node_first, node_second])

    if not composite_flag:
        node_final = SecondaryNode('rf', nodes_from=[node_first])

    pipeline = Pipeline(node_final)

    return pipeline
示例#3
0
def get_pipeline():
    node_lagged_1 = PrimaryNode('lagged')
    node_lagged_1.custom_params = {'window_size': 120}
    node_lagged_2 = PrimaryNode('lagged')
    node_lagged_2.custom_params = {'window_size': 10}

    node_first = SecondaryNode('ridge', nodes_from=[node_lagged_1])
    node_second = SecondaryNode('dtreg', nodes_from=[node_lagged_2])
    node_final = SecondaryNode('ridge', nodes_from=[node_first, node_second])
    pipeline = Pipeline(node_final)

    return pipeline
示例#4
0
def test_multi_modal_pipeline():
    task = Task(TaskTypesEnum.classification)
    images_size = (128, 128)

    files_path = os.path.join('test', 'data', 'multi_modal')
    path = os.path.join(str(fedot_project_root()), files_path)

    train_num, _, train_img, _, train_text, _ = \
        prepare_multi_modal_data(path, task, images_size, with_split=False)

    # image
    image_node = PrimaryNode('cnn')
    image_node.custom_params = {'image_shape': (images_size[0], images_size[1], 1),
                                'architecture': 'simplified',
                                'num_classes': 2,
                                'epochs': 1,
                                'batch_size': 128}

    # image
    ds_image = PrimaryNode('data_source_img')
    image_node = SecondaryNode('cnn', nodes_from=[ds_image])
    image_node.custom_params = {'image_shape': (images_size[0], images_size[1], 1),
                                'architecture': 'simplified',
                                'num_classes': 2,
                                'epochs': 15,
                                'batch_size': 128}

    # table
    ds_table = PrimaryNode('data_source_table')
    scaling_node = SecondaryNode('scaling', nodes_from=[ds_table])
    numeric_node = SecondaryNode('rf', nodes_from=[scaling_node])

    # text
    ds_text = PrimaryNode('data_source_text')
    node_text_clean = SecondaryNode('text_clean', nodes_from=[ds_text])
    text_node = SecondaryNode('tfidf', nodes_from=[node_text_clean])

    pipeline = Pipeline(SecondaryNode('logit', nodes_from=[numeric_node, image_node, text_node]))

    fit_data = MultiModalData({
        'data_source_img': train_img,
        'data_source_table': train_num,
        'data_source_text': train_text
    })

    pipeline.fit(fit_data)
    prediction = pipeline.predict(fit_data)

    assert prediction is not None
示例#5
0
def get_simple_pipeline():
    """ Function returns simple pipeline """
    node_lagged = PrimaryNode('lagged')
    node_lagged.custom_params = {'window_size': 150}
    node_ridge = SecondaryNode('ridge', nodes_from=[node_lagged])
    ridge_pipeline = Pipeline(node_ridge)
    return ridge_pipeline
示例#6
0
def test_forecast_with_exog():
    train_source_ts, predict_source_ts, train_exog_ts, predict_exog_ts, ts_test = synthetic_with_exogenous_ts(
    )

    # Source data for lagged node
    node_lagged = PrimaryNode('lagged')
    # Set window size for lagged transformation
    node_lagged.custom_params = {'window_size': window_size}
    # Exogenous variable for exog node
    node_exog = PrimaryNode('exog_ts_data_source')

    node_final = SecondaryNode('linear', nodes_from=[node_lagged, node_exog])
    pipeline = Pipeline(node_final)

    pipeline.fit(input_data=MultiModalData({
        'exog_ts_data_source': train_exog_ts,
        'lagged': train_source_ts
    }))

    forecast = pipeline.predict(
        input_data=MultiModalData({
            'exog_ts_data_source': predict_exog_ts,
            'lagged': predict_source_ts
        }))
    prediction = np.ravel(np.array(forecast.predict))

    assert tuple(prediction) == tuple(ts_test)
示例#7
0
def run_one_model_with_specific_evaluation_mod(train_data,
                                               test_data,
                                               mode: str = None):
    """
    Runs the example with one model svc.

    :param train_data: train data for pipeline training
    :param test_data: test data for pipeline training
    :param mode: pass gpu flag to make gpu evaluation
    """

    problem = 'classification'

    if mode == 'gpu':
        baseline_model = Fedot(problem=problem, preset='gpu')
    else:
        baseline_model = Fedot(problem=problem)
    svc_node_with_custom_params = PrimaryNode('svc')
    # the custom params are needed to make probability evaluation available
    # otherwise an error is occurred
    svc_node_with_custom_params.custom_params = dict(kernel='rbf',
                                                     C=10,
                                                     gamma=1,
                                                     cache_size=2000,
                                                     probability=True)
    preset_pipeline = Pipeline(svc_node_with_custom_params)

    start = datetime.now()
    baseline_model.fit(features=train_data,
                       target='target',
                       predefined_model=preset_pipeline)
    print(f'Completed with custom params in: {datetime.now() - start}')

    baseline_model.predict(features=test_data)
    print(baseline_model.get_metrics())
示例#8
0
def get_ts_pipeline(window_size):
    """ Function return pipeline with lagged transformation in it """
    node_lagged = PrimaryNode('lagged')
    node_lagged.custom_params = {'window_size': window_size}

    node_final = SecondaryNode('ridge', nodes_from=[node_lagged])
    pipeline = Pipeline(node_final)
    return pipeline
示例#9
0
def get_simple_ts_pipeline(model_root: str = 'ridge', window_size: int = 20):
    node_lagged = PrimaryNode('lagged')
    node_lagged.custom_params = {'window_size': window_size}
    node_root = SecondaryNode(model_root, nodes_from=[node_lagged])

    pipeline = Pipeline(node_root)

    return pipeline
示例#10
0
def get_composite_pipeline():
    """
    The function returns prepared pipeline of 5 models

    :return: Pipeline object
    """

    node_1 = PrimaryNode('lagged')
    node_1.custom_params = {'window_size': 150}
    node_2 = PrimaryNode('lagged')
    node_2.custom_params = {'window_size': 100}
    node_linear_1 = SecondaryNode('linear', nodes_from=[node_1])
    node_linear_2 = SecondaryNode('linear', nodes_from=[node_2])

    node_final = SecondaryNode('ridge', nodes_from=[node_linear_1,
                                                    node_linear_2])
    pipeline = Pipeline(node_final)
    return pipeline
示例#11
0
def get_stlarima_pipeline():
    """ Function return complex pipeline with the following structure
        stl_arima
    """

    node_final = PrimaryNode('stl_arima')
    node_final.custom_params = {'period': 80, 'p': 2, 'd': 1, 'q': 0}
    pipeline = Pipeline(node_final)
    return pipeline
示例#12
0
def get_simple_short_lagged_pipeline():
    # Create simple pipeline for forecasting
    node_lagged = PrimaryNode('lagged')
    # Use 4 elements in time series as predictors
    node_lagged.custom_params = {'window_size': 4}
    node_final = SecondaryNode('linear', nodes_from=[node_lagged])
    pipeline = Pipeline(node_final)

    return pipeline
示例#13
0
def get_multiscale_pipeline():
    # First branch
    node_lagged_1 = PrimaryNode('lagged')
    node_lagged_1.custom_params = {'window_size': 20}
    node_ridge_1 = SecondaryNode('ridge', nodes_from=[node_lagged_1])

    # Second branch, which will try to make prediction based on smoothed ts
    node_filtering = PrimaryNode('gaussian_filter')
    node_filtering.custom_params = {'sigma': 3}
    node_lagged_2 = SecondaryNode('lagged', nodes_from=[node_filtering])
    node_lagged_2.custom_params = {'window_size': 100}
    node_ridge_2 = SecondaryNode('ridge', nodes_from=[node_lagged_2])

    node_final = SecondaryNode('linear', nodes_from=[node_ridge_1, node_ridge_2])

    pipeline = Pipeline(node_final)

    return pipeline
示例#14
0
def get_non_refinement_pipeline(lagged):
    """ Create 4-level pipeline without decompose operation """

    node_lagged = PrimaryNode('lagged')
    node_lagged.custom_params = {'window_size': lagged}
    node_lasso = SecondaryNode('lasso', nodes_from=[node_lagged])
    node_dtreg = SecondaryNode('dtreg', nodes_from=[node_lagged])
    node_dtreg.custom_params = {'max_depth': 3}
    final_node = SecondaryNode('ridge', nodes_from=[node_lasso, node_dtreg])

    pipeline = Pipeline(final_node)
    return pipeline
示例#15
0
def get_stlarima_nemo_pipeline():
    """ Function return complex pipeline with the following structure
        stl_arima \
                   linear
            nemo  |
    """

    node_arima = PrimaryNode('stl_arima')
    node_arima.custom_params = {'period': 80, 'p': 2, 'd': 1, 'q': 0}
    node_nemo = PrimaryNode('exog_ts_data_source')
    node_final = SecondaryNode('linear', nodes_from=[node_arima, node_nemo])
    pipeline = Pipeline(node_final)
    return pipeline
示例#16
0
def create_pipeline() -> Pipeline:
    pipeline = Pipeline()
    node_logit = PrimaryNode('logit')

    node_lda = PrimaryNode('lda')
    node_lda.custom_params = {'n_components': 1}

    node_xgboost = SecondaryNode('xgboost')
    node_xgboost.custom_params = {'n_components': 1}
    node_xgboost.nodes_from = [node_logit, node_lda]

    pipeline.add_node(node_xgboost)

    return pipeline
示例#17
0
def test_forecast_with_sparse_lagged():
    train_source_ts, predict_source_ts, train_exog_ts, predict_exog_ts, ts_test = synthetic_with_exogenous_ts(
    )

    # Source data for lagged node
    node_lagged = PrimaryNode('sparse_lagged')
    # Set window size for lagged transformation
    node_lagged.custom_params = {'window_size': window_size}

    node_final = SecondaryNode('linear', nodes_from=[node_lagged])
    pipeline = Pipeline(node_final)

    pipeline.fit(input_data=MultiModalData({'sparse_lagged': train_source_ts}))

    forecast = pipeline.predict(
        input_data=MultiModalData({'sparse_lagged': predict_source_ts}))
    is_forecasted = True

    assert is_forecasted
示例#18
0
def get_refinement_pipeline(lagged):
    """ Create 4-level pipeline with decompose operation """

    node_lagged = PrimaryNode('lagged')
    node_lagged.custom_params = {'window_size': lagged}
    node_lasso = SecondaryNode('lasso', nodes_from=[node_lagged])
    node_decompose = SecondaryNode('decompose', nodes_from=[node_lagged, node_lasso])
    node_dtreg = SecondaryNode('dtreg', nodes_from=[node_decompose])
    node_dtreg.custom_params = {'max_depth': 3}

    # Pipelines with different outputs
    pipeline_with_decompose_finish = Pipeline(node_dtreg)
    pipeline_with_main_finish = Pipeline(node_lasso)

    # Combining branches with different targets (T and T_decomposed)
    final_node = SecondaryNode('ridge', nodes_from=[node_lasso, node_dtreg])

    pipeline = Pipeline(final_node)
    return pipeline_with_main_finish, pipeline_with_decompose_finish, pipeline
示例#19
0
def run_pipeline_with_specific_evaluation_mode(train_data: InputData,
                                               test_data: InputData,
                                               mode: str = None):
    """
    Runs the example with 3-node pipeline.

    :param train_data: train data for pipeline training
    :param test_data: test data for pipeline training
    :param mode: pass gpu flag to make gpu evaluation
    """
    problem = 'classification'

    if mode == 'gpu':
        baseline_model = Fedot(problem=problem, preset='gpu')
    else:
        baseline_model = Fedot(problem=problem)

    svc_node_with_custom_params = PrimaryNode('svc')
    svc_node_with_custom_params.custom_params = dict(kernel='rbf',
                                                     C=10,
                                                     gamma=1,
                                                     cache_size=2000,
                                                     probability=True)

    logit_node = PrimaryNode('logit')

    rf_node = SecondaryNode(
        'rf', nodes_from=[svc_node_with_custom_params, logit_node])

    preset_pipeline = Pipeline(rf_node)

    start = datetime.now()
    baseline_model.fit(features=train_data,
                       target='target',
                       predefined_model=preset_pipeline)
    print(f'Completed with custom params in: {datetime.now() - start}')

    baseline_model.predict(features=test_data)
    print(baseline_model.get_metrics())
示例#20
0
def run_gapfilling_example():
    """
    This function runs an example of filling in gaps in synthetic data

    :return arrays_dict: dictionary with 4 keys ('ridge', 'local_poly',
    'batch_poly', 'linear') that can be used to get arrays without gaps
    :return gap_data: an array with gaps
    :return real_data: an array with actual values in gaps
    """

    # Get synthetic time series
    gap_data, real_data = get_array_with_gaps()

    # Filling in gaps using pipeline from FEDOT
    node_lagged = PrimaryNode('lagged')
    node_lagged.custom_params = {'window_size': 100}
    node_ridge = SecondaryNode('ridge', nodes_from=[node_lagged])
    ridge_pipeline = Pipeline(node_ridge)
    ridge_gapfiller = ModelGapFiller(gap_value=-100.0,
                                     pipeline=ridge_pipeline)
    without_gap_arr_ridge = \
        ridge_gapfiller.forward_inverse_filling(gap_data)

    # Filling in gaps using simple methods such as polynomial approximation
    simple_gapfill = SimpleGapFiller(gap_value=-100.0)
    without_gap_local_poly = \
        simple_gapfill.local_poly_approximation(gap_data, 4, 150)

    without_gap_batch_poly = \
        simple_gapfill.batch_poly_approximation(gap_data, 4, 150)

    without_gap_linear = \
        simple_gapfill.linear_interpolation(gap_data)

    arrays_dict = {'ridge': without_gap_arr_ridge,
                   'local_poly': without_gap_local_poly,
                   'batch_poly': without_gap_batch_poly,
                   'linear': without_gap_linear}
    return arrays_dict, gap_data, real_data
示例#21
0
def get_statsmodels_pipeline():
    node_ar = PrimaryNode('ar')
    node_ar.custom_params = {'lag_1': 20, 'lag_2': 100}
    pipeline = Pipeline(node_ar)
    return pipeline