示例#1
0
def sample_pipeline():
    return Pipeline(
        SecondaryNode(operation_type='logit',
                      nodes_from=[
                          PrimaryNode(operation_type='xgboost'),
                          PrimaryNode(operation_type='scaling')
                      ]))
示例#2
0
def get_simple_pipeline():
    """ Function returns simple pipeline """
    node_lagged = PrimaryNode('lagged')
    node_lagged.custom_params = {'window_size': 150}
    node_ridge = SecondaryNode('ridge', nodes_from=[node_lagged])
    ridge_pipeline = Pipeline(node_ridge)
    return ridge_pipeline
示例#3
0
def ts_pipeline_with_incorrect_data_flow():
    """
    Connection lagged -> lagged is incorrect
    Connection ridge -> ar is incorrect also
       lagged - lagged - ridge \
                                ar -> final forecast
                lagged - ridge /
    """

    # First level
    node_lagged = PrimaryNode('lagged')

    # Second level
    node_lagged_1 = SecondaryNode('lagged', nodes_from=[node_lagged])
    node_lagged_2 = PrimaryNode('lagged')

    # Third level
    node_ridge_1 = SecondaryNode('ridge', nodes_from=[node_lagged_1])
    node_ridge_2 = SecondaryNode('ridge', nodes_from=[node_lagged_2])

    # Fourth level - root node
    node_final = SecondaryNode('ar', nodes_from=[node_ridge_1, node_ridge_2])
    pipeline = Pipeline(node_final)

    return pipeline
示例#4
0
def run_pipeline_from_automl(train_file_path: str,
                             test_file_path: str,
                             max_run_time: timedelta = timedelta(minutes=10)):
    """ Function run pipeline with Auto ML models in nodes

    :param train_file_path: path to the csv file with data for train
    :param test_file_path: path to the csv file with data for validation
    :param max_run_time: maximum running time for customization of the "tpot" model

    :return roc_auc_value: ROC AUC metric for pipeline
    """
    train_data = InputData.from_csv(train_file_path)
    test_data = InputData.from_csv(test_file_path)

    testing_target = test_data.target

    node_scaling = PrimaryNode('scaling')
    node_tpot = PrimaryNode('tpot')

    node_tpot.operation.params = {'max_run_time_sec': max_run_time.seconds}

    node_lda = SecondaryNode('lda', nodes_from=[node_scaling])
    node_rf = SecondaryNode('rf', nodes_from=[node_tpot, node_lda])
    OperationTypesRepository.assign_repo('model', 'automl_repository.json')
    pipeline = Pipeline(node_rf)

    pipeline.fit(train_data)
    results = pipeline.predict(test_data)

    roc_auc_value = roc_auc(y_true=testing_target, y_score=results.predict)
    print(roc_auc_value)

    return roc_auc_value
示例#5
0
def run_one_model_with_specific_evaluation_mod(train_data,
                                               test_data,
                                               mode: str = None):
    """
    Runs the example with one model svc.

    :param train_data: train data for pipeline training
    :param test_data: test data for pipeline training
    :param mode: pass gpu flag to make gpu evaluation
    """

    problem = 'classification'

    if mode == 'gpu':
        baseline_model = Fedot(problem=problem, preset='gpu')
    else:
        baseline_model = Fedot(problem=problem)
    svc_node_with_custom_params = PrimaryNode('svc')
    # the custom params are needed to make probability evaluation available
    # otherwise an error is occurred
    svc_node_with_custom_params.custom_params = dict(kernel='rbf',
                                                     C=10,
                                                     gamma=1,
                                                     cache_size=2000,
                                                     probability=True)
    preset_pipeline = Pipeline(svc_node_with_custom_params)

    start = datetime.now()
    baseline_model.fit(features=train_data,
                       target='target',
                       predefined_model=preset_pipeline)
    print(f'Completed with custom params in: {datetime.now() - start}')

    baseline_model.predict(features=test_data)
    print(baseline_model.get_metrics())
示例#6
0
def return_working_pipeline():
    node_lagged_1 = PrimaryNode('lagged/1')
    node_exog = PrimaryNode('exog_ts_data_source')

    node_final = SecondaryNode('ridge', nodes_from=[node_lagged_1, node_exog])
    pipeline = Pipeline(node_final)
    return pipeline
示例#7
0
def test_forecast_with_exog():
    train_source_ts, predict_source_ts, train_exog_ts, predict_exog_ts, ts_test = synthetic_with_exogenous_ts(
    )

    # Source data for lagged node
    node_lagged = PrimaryNode('lagged')
    # Set window size for lagged transformation
    node_lagged.custom_params = {'window_size': window_size}
    # Exogenous variable for exog node
    node_exog = PrimaryNode('exog_ts_data_source')

    node_final = SecondaryNode('linear', nodes_from=[node_lagged, node_exog])
    pipeline = Pipeline(node_final)

    pipeline.fit(input_data=MultiModalData({
        'exog_ts_data_source': train_exog_ts,
        'lagged': train_source_ts
    }))

    forecast = pipeline.predict(
        input_data=MultiModalData({
            'exog_ts_data_source': predict_exog_ts,
            'lagged': predict_source_ts
        }))
    prediction = np.ravel(np.array(forecast.predict))

    assert tuple(prediction) == tuple(ts_test)
示例#8
0
def get_complex_pipeline():
    """
    Pipeline looking like this
    smoothing - lagged - ridge \
                                \
                                 ridge -> final forecast
                                /
                lagged - ridge /
    """

    # First level
    node_smoothing = PrimaryNode('smoothing')

    # Second level
    node_lagged_1 = SecondaryNode('lagged', nodes_from=[node_smoothing])
    node_lagged_2 = PrimaryNode('lagged')

    # Third level
    node_ridge_1 = SecondaryNode('ridge', nodes_from=[node_lagged_1])
    node_ridge_2 = SecondaryNode('ridge', nodes_from=[node_lagged_2])

    # Fourth level - root node
    node_final = SecondaryNode('ridge',
                               nodes_from=[node_ridge_1, node_ridge_2])
    pipeline = Pipeline(node_final)

    return pipeline
示例#9
0
def get_nodes():
    first_node = PrimaryNode('knn')
    second_node = PrimaryNode('knn')
    third_node = SecondaryNode('lda', nodes_from=[first_node, second_node])
    root = SecondaryNode('logit', nodes_from=[third_node])

    return [root, third_node, first_node, second_node]
示例#10
0
def pipeline_with_incorrect_task_type():
    first = PrimaryNode(operation_type='linear')
    second = PrimaryNode(operation_type='linear')
    final = SecondaryNode(operation_type='kmeans', nodes_from=[first, second])

    pipeline = Pipeline(final)

    return pipeline, Task(TaskTypesEnum.classification)
示例#11
0
def pipeline_with_incorrect_root_operation():
    first = PrimaryNode(operation_type='logit')
    second = PrimaryNode(operation_type='logit')
    final = SecondaryNode(operation_type='scaling', nodes_from=[first, second])

    pipeline = Pipeline(final)

    return pipeline
示例#12
0
def get_complex_class_pipeline():
    first = PrimaryNode(operation_type='xgboost')
    second = PrimaryNode(operation_type='pca')
    final = SecondaryNode(operation_type='logit', nodes_from=[first, second])

    pipeline = Pipeline(final)

    return pipeline
示例#13
0
def get_ts_pipeline(window_size):
    """ Function return pipeline with lagged transformation in it """
    node_lagged = PrimaryNode('lagged')
    node_lagged.custom_params = {'window_size': window_size}

    node_final = SecondaryNode('ridge', nodes_from=[node_lagged])
    pipeline = Pipeline(node_final)
    return pipeline
示例#14
0
def get_simple_ts_pipeline(model_root: str = 'ridge', window_size: int = 20):
    node_lagged = PrimaryNode('lagged')
    node_lagged.custom_params = {'window_size': window_size}
    node_root = SecondaryNode(model_root, nodes_from=[node_lagged])

    pipeline = Pipeline(node_root)

    return pipeline
示例#15
0
def test_distance_to_primary_level():
    first_node = PrimaryNode('knn')
    second_node = PrimaryNode('knn')
    third_node = SecondaryNode('lda', nodes_from=[first_node, second_node])
    root = SecondaryNode('logit', nodes_from=[third_node])

    distance = root.distance_to_primary_level

    assert distance == 2
示例#16
0
def pipeline_with_incorrect_data_flow():
    """ When combining the features in the presented pipeline, a table with 5
    columns will turn into a table with 10 columns """
    first = PrimaryNode(operation_type='scaling')
    second = PrimaryNode(operation_type='scaling')

    final = SecondaryNode(operation_type='ridge', nodes_from=[first, second])
    pipeline = Pipeline(final)
    return pipeline
示例#17
0
def get_simple_short_lagged_pipeline():
    # Create simple pipeline for forecasting
    node_lagged = PrimaryNode('lagged')
    # Use 4 elements in time series as predictors
    node_lagged.custom_params = {'window_size': 4}
    node_final = SecondaryNode('linear', nodes_from=[node_lagged])
    pipeline = Pipeline(node_final)

    return pipeline
示例#18
0
    def roll_pipeline_structure(
            self,
            operation_object: ['OperationTemplate', 'AtomizedModelTemplate'],
            visited_nodes: dict,
            path: str = None):
        """
        The function recursively traverses all disjoint operations
        and connects the operations in a pipeline.

        :params operation_object: operationTemplate or AtomizedOperationTemplate
        :params visited_nodes: array to remember which node was visited
        :params path: path to save
        :return: root_node
        """
        if operation_object.operation_id in visited_nodes:
            return visited_nodes[operation_object.operation_id]

        if operation_object.operation_type == atomized_model_type():
            atomized_model = operation_object.next_pipeline_template
            if operation_object.nodes_from:
                node = SecondaryNode(operation_type=atomized_model)
            else:
                node = PrimaryNode(operation_type=atomized_model)
        else:
            if operation_object.nodes_from:
                node = SecondaryNode(operation_object.operation_type)
            else:
                node = PrimaryNode(operation_object.operation_type)
            node.operation.params = operation_object.params
            node.rating = operation_object.rating

        if hasattr(
                operation_object, 'fitted_operation_path'
        ) and operation_object.fitted_operation_path and path is not None:
            path_to_operation = os.path.join(
                path, operation_object.fitted_operation_path)
            if not os.path.isfile(path_to_operation):
                message = f"Fitted operation on the path: {path_to_operation} does not exist."
                self.log.error(message)
                raise FileNotFoundError(message)

            fitted_operation = joblib.load(path_to_operation)
            operation_object.fitted_operation = fitted_operation
            node.fitted_operation = fitted_operation

        nodes_from = [
            operation_template
            for operation_template in self.operation_templates
            if operation_template.operation_id in operation_object.nodes_from
        ]
        node.nodes_from = [
            self.roll_pipeline_structure(node_from, visited_nodes, path)
            for node_from in nodes_from
        ]

        visited_nodes[operation_object.operation_id] = node
        return node
示例#19
0
def test_ordered_subnodes_hierarchy():
    first_node = PrimaryNode('knn')
    second_node = PrimaryNode('knn')
    third_node = SecondaryNode('lda', nodes_from=[first_node, second_node])
    root = SecondaryNode('logit', nodes_from=[third_node])

    ordered_nodes = root.ordered_subnodes_hierarchy()

    assert len(ordered_nodes) == 4
示例#20
0
def get_stlarima_pipeline():
    """ Function return complex pipeline with the following structure
        stl_arima
    """

    node_final = PrimaryNode('stl_arima')
    node_final.custom_params = {'period': 80, 'p': 2, 'd': 1, 'q': 0}
    pipeline = Pipeline(node_final)
    return pipeline
示例#21
0
def get_pipeline():
    knn_node = PrimaryNode('knn')
    lda_node = PrimaryNode('qda')
    xgb_node = PrimaryNode('xgboost')

    final = SecondaryNode('xgboost', nodes_from=[knn_node, lda_node, xgb_node])

    pipeline = Pipeline(final)

    return pipeline
示例#22
0
def get_simple_pipeline(log):
    first = PrimaryNode(operation_type='xgboost', log=log)
    second = PrimaryNode(operation_type='knn', log=log)
    final = SecondaryNode(operation_type='logit',
                          nodes_from=[first, second],
                          log=log)

    # if you do not pass the log object, Pipeline will create default log.log file placed in core
    pipeline = Pipeline(final, log=log)

    return pipeline
示例#23
0
def get_non_refinement_pipeline(lagged):
    """ Create 4-level pipeline without decompose operation """

    node_lagged = PrimaryNode('lagged')
    node_lagged.custom_params = {'window_size': lagged}
    node_lasso = SecondaryNode('lasso', nodes_from=[node_lagged])
    node_dtreg = SecondaryNode('dtreg', nodes_from=[node_lagged])
    node_dtreg.custom_params = {'max_depth': 3}
    final_node = SecondaryNode('ridge', nodes_from=[node_lasso, node_dtreg])

    pipeline = Pipeline(final_node)
    return pipeline
示例#24
0
def test_pipeline_repr():
    first = PrimaryNode(operation_type='logit')
    second = PrimaryNode(operation_type='lda')
    third = PrimaryNode(operation_type='knn')
    final = SecondaryNode(operation_type='xgboost',
                          nodes_from=[first, second, third])
    pipeline = Pipeline()
    pipeline.add_node(final)

    expected_pipeline_description = "{'depth': 2, 'length': 4, 'nodes': [xgboost, logit, lda, knn]}"

    assert repr(pipeline) == expected_pipeline_description
示例#25
0
def get_arima_nemo_pipeline():
    """ Function return complex pipeline with the following structure
        arima \
               linear
        nemo  |
    """

    node_arima = PrimaryNode('arima')
    node_nemo = PrimaryNode('exog_ts_data_source')
    node_final = SecondaryNode('ridge', nodes_from=[node_arima, node_nemo])
    pipeline = Pipeline(node_final)
    return pipeline
示例#26
0
def create_classification_pipeline_with_preprocessing():
    node_scaling = PrimaryNode('scaling')
    node_rfe = PrimaryNode('rfe_lin_class')

    xgb_node = SecondaryNode('xgboost', nodes_from=[node_scaling])
    logit_node = SecondaryNode('logit', nodes_from=[node_rfe])

    knn_root = SecondaryNode('knn', nodes_from=[xgb_node, logit_node])

    pipeline = Pipeline(knn_root)

    return pipeline
示例#27
0
def get_pipeline():
    third_level_one = PrimaryNode('lda')

    second_level_one = SecondaryNode('qda', nodes_from=[third_level_one])
    second_level_two = PrimaryNode('qda')

    first_level_one = SecondaryNode(
        'knn', nodes_from=[second_level_one, second_level_two])

    root = SecondaryNode(operation_type='logit', nodes_from=[first_level_one])
    pipeline = Pipeline(root)

    return pipeline
示例#28
0
def get_simple_pipeline():
    """ Function return simple pipeline with the following structure:
    xgboost \
             -> logit
      knn   |
    """
    first = PrimaryNode(operation_type='xgboost')
    second = PrimaryNode(operation_type='knn')
    final = SecondaryNode(operation_type='logit', nodes_from=[first, second])

    pipeline = Pipeline(final)

    return pipeline
示例#29
0
def test_delete_node_with_redirection():
    first = PrimaryNode(operation_type='logit')
    second = PrimaryNode(operation_type='lda')
    third = SecondaryNode(operation_type='knn', nodes_from=[first, second])
    final = SecondaryNode(operation_type='xgboost',
                          nodes_from=[third])
    pipeline = Pipeline()
    pipeline.add_node(final)

    pipeline.delete_node(third)

    assert len(pipeline.nodes) == 3
    assert first in pipeline.root_node.nodes_from
示例#30
0
def get_three_depth_manual_regr_pipeline():
    xgb_primary = PrimaryNode('xgbreg')
    knn_primary = PrimaryNode('knnreg')

    dtreg_secondary = SecondaryNode('dtreg', nodes_from=[xgb_primary])
    rfr_secondary = SecondaryNode('rfr', nodes_from=[knn_primary])

    knnreg_root = SecondaryNode('knnreg',
                                nodes_from=[dtreg_secondary, rfr_secondary])

    pipeline = Pipeline(knnreg_root)

    return pipeline