Пример #1
0
def test_invalid_init():
    invalid_graph = {'Imputer': [Imputer], 'OHE': OneHotEncoder}
    with pytest.raises(
            ValueError,
            match='All component information should be passed in as a list'):
        ComponentGraph(invalid_graph)

    with pytest.raises(
            ValueError,
            match='may only contain str or ComponentBase subclasses'):
        ComponentGraph({
            'Imputer': [Imputer(numeric_impute_strategy="most_frequent")],
            'OneHot': [OneHotEncoder]
        })

    graph = {
        'Imputer':
        [Imputer(numeric_impute_strategy='constant', numeric_fill_value=0)]
    }
    with pytest.raises(
            ValueError,
            match='may only contain str or ComponentBase subclasses'):
        ComponentGraph(graph)

    graph = {
        'Imputer': ['Imputer', 'Fake'],
        'Fake': ['Fake Component', 'Estimator'],
        'Estimator': [ElasticNetClassifier]
    }
    with pytest.raises(MissingComponentError):
        ComponentGraph(graph)
Пример #2
0
def test_component_graph_sampler():
    graph = {
        'Imputer': [Imputer],
        'OneHot': [OneHotEncoder, 'Imputer.x'],
        'Undersampler': [Undersampler, 'OneHot.x'],
        'Random Forest':
        [RandomForestClassifier, 'Undersampler.x', 'Undersampler.y'],
        'Elastic Net':
        [ElasticNetClassifier, 'Undersampler.x', 'Undersampler.y'],
        'Logistic Regression':
        [LogisticRegressionClassifier, 'Random Forest', 'Elastic Net']
    }

    component_graph = ComponentGraph(graph)
    component_graph.instantiate({})
    assert component_graph.get_parents('Imputer') == []
    assert component_graph.get_parents('OneHot') == ['Imputer.x']
    assert component_graph.get_parents('Undersampler') == ['OneHot.x']
    assert component_graph.get_parents('Random Forest') == [
        'Undersampler.x', 'Undersampler.y'
    ]
    assert component_graph.get_parents('Elastic Net') == [
        'Undersampler.x', 'Undersampler.y'
    ]
    assert component_graph.get_parents('Logistic Regression') == [
        'Random Forest', 'Elastic Net'
    ]
Пример #3
0
def test_init_bad_graphs():
    graph = {
        'Imputer': [Imputer],
        'OHE': [OneHotEncoder, 'Imputer.x', 'Estimator'],
        'Estimator': [RandomForestClassifier, 'OHE.x']
    }
    with pytest.raises(ValueError, match='given graph contains a cycle'):
        ComponentGraph(graph)

    graph = {
        'Imputer': [Imputer],
        'OneHot_RandomForest': [OneHotEncoder, 'Imputer.x'],
        'OneHot_ElasticNet': [OneHotEncoder, 'Imputer.x'],
        'Random Forest': [RandomForestClassifier],
        'Elastic Net': [ElasticNetClassifier],
        'Logistic Regression':
        [LogisticRegressionClassifier, 'Random Forest', 'Elastic Net']
    }
    with pytest.raises(ValueError, match='graph is not completely connected'):
        ComponentGraph(graph)

    graph = {
        'Imputer': ['Imputer'],
        'OneHot_RandomForest': ['One Hot Encoder', 'Imputer.x'],
        'OneHot_ElasticNet': ['One Hot Encoder', 'Imputer.x'],
        'Random Forest': ['Random Forest Classifier', 'OneHot_RandomForest.x'],
        'Elastic Net': ['Elastic Net Classifier'],
        'Logistic Regression':
        ['Logistic Regression Classifier', 'Random Forest', 'Elastic Net']
    }
    with pytest.raises(ValueError, match='graph has more than one final'):
        ComponentGraph(graph)
Пример #4
0
    def __init__(self, parameters, random_state=0):
        """Machine learning pipeline made out of transformers and a estimator.

        Required Class Variables:
            component_graph (list): List of components in order. Accepts strings or ComponentBase subclasses in the list

        Arguments:
            parameters (dict): Dictionary with component names as keys and dictionary of that component's parameters as values.
                 An empty dictionary {} implies using all default values for component parameters.
            random_state (int, np.random.RandomState): The random seed/state. Defaults to 0.
        """
        self.random_state = get_random_state(random_state)
        if isinstance(self.component_graph, list):  # Backwards compatibility
            self._component_graph = ComponentGraph().from_list(self.component_graph, random_state=self.random_state)
        else:
            self._component_graph = ComponentGraph(component_dict=self.component_graph, random_state=self.random_state)
        self._component_graph.instantiate(parameters)

        self.input_feature_names = {}
        self.input_target_name = None

        final_component = self._component_graph.get_last_component()
        self.estimator = final_component if isinstance(final_component, Estimator) else None
        self._estimator_name = self._component_graph.compute_order[-1] if self.estimator is not None else None

        self._validate_estimator_problem_type()
        self._is_fitted = False
        self._pipeline_params = parameters.get("pipeline", {})
Пример #5
0
def test_predict_repeat_estimator(mock_predict, mock_fit, X_y_binary):
    X, y = X_y_binary
    mock_predict.return_value = ww.DataColumn(pd.Series(y))

    graph = {
        'Imputer': [Imputer],
        'OneHot_RandomForest': [OneHotEncoder, 'Imputer.x'],
        'OneHot_Logistic': [OneHotEncoder, 'Imputer.x'],
        'Random Forest': [RandomForestClassifier, 'OneHot_RandomForest.x'],
        'Logistic Regression':
        [LogisticRegressionClassifier, 'OneHot_Logistic.x'],
        'Final Estimator':
        [LogisticRegressionClassifier, 'Random Forest', 'Logistic Regression']
    }
    component_graph = ComponentGraph(graph)
    component_graph.instantiate({})
    component_graph.fit(X, y)

    assert not component_graph.get_component(
        'Logistic Regression')._component_obj == component_graph.get_component(
            'Final Estimator')._component_obj

    component_graph.predict(X)
    assert mock_predict.call_count == 5
    assert mock_fit.call_count == 3
Пример #6
0
def test_no_instantiate_before_fit(X_y_binary):
    X, y = X_y_binary
    graph = {'Imputer': [Imputer],
             'OHE': [OneHotEncoder, 'Imputer.x'],
             'Estimator': [RandomForestClassifier, 'OHE.x']}
    component_graph = ComponentGraph(graph)
    with pytest.raises(ValueError, match='All components must be instantiated before fitting or predicting'):
        component_graph.fit(X, y)
Пример #7
0
def test_component_graph_order(example_graph):
    component_graph = ComponentGraph(example_graph)
    expected_order = ['Imputer', 'OneHot_ElasticNet', 'Elastic Net', 'OneHot_RandomForest', 'Random Forest', 'Logistic Regression']
    assert expected_order == component_graph.compute_order

    component_graph = ComponentGraph({'Imputer': [Imputer]})
    expected_order = ['Imputer']
    assert expected_order == component_graph.compute_order
Пример #8
0
def test_compute_final_component_features_single_component(mock_transform, X_y_binary):
    X, y = X_y_binary
    X = pd.DataFrame(X)
    mock_transform.return_value = ww.DataTable(X)
    component_graph = ComponentGraph({'Dummy Component': [DummyTransformer]}).instantiate({})
    component_graph.fit(X, y)

    X_t = component_graph.compute_final_component_features(X)
    assert_frame_equal(X, X_t.to_dataframe())
Пример #9
0
def test_fit_y_parent(mock_fit_transform, X_y_binary):
    X, y = X_y_binary
    graph = {'Imputer': [Imputer],
             'OHE': [OneHotEncoder, 'Imputer.x', 'Imputer.y'],
             'Random Forest': [RandomForestClassifier, 'OHE.x']}
    component_graph = ComponentGraph(graph).instantiate({})
    mock_fit_transform.return_value = tuple((pd.DataFrame(X), pd.Series(y)))

    component_graph.fit(X, y)
    mock_fit_transform.assert_called_once()
Пример #10
0
def test_fit(mock_predict, mock_fit, mock_fit_transform, example_graph, X_y_binary):
    X, y = X_y_binary
    mock_fit_transform.return_value = ww.DataTable(X)
    mock_predict.return_value = ww.DataColumn(y)
    component_graph = ComponentGraph(example_graph).instantiate({})
    component_graph.fit(X, y)

    assert mock_fit_transform.call_count == 3
    assert mock_fit.call_count == 3
    assert mock_predict.call_count == 2
Пример #11
0
def test_init(example_graph):
    comp_graph = ComponentGraph()
    assert len(comp_graph.component_dict) == 0

    graph = example_graph
    comp_graph = ComponentGraph(graph)
    assert len(comp_graph.component_dict) == 6

    expected_order = ['Imputer', 'OneHot_ElasticNet', 'Elastic Net', 'OneHot_RandomForest', 'Random Forest', 'Logistic Regression']
    assert comp_graph.compute_order == expected_order
Пример #12
0
def test_predict_transformer_end(mock_fit_transform, mock_transform, X_y_binary):
    X, y = X_y_binary
    graph = {'Imputer': [Imputer], 'OHE': [OneHotEncoder, 'Imputer.x']}
    component_graph = ComponentGraph(graph).instantiate({})
    mock_fit_transform.return_value = tuple((pd.DataFrame(X), pd.Series(y)))
    mock_transform.return_value = tuple((pd.DataFrame(X), pd.Series(y)))

    component_graph.fit(X, y)
    output = component_graph.predict(X)
    assert_frame_equal(pd.DataFrame(X), output.to_dataframe())
Пример #13
0
def test_iteration(example_graph):
    component_graph = ComponentGraph(example_graph)

    expected = [Imputer, OneHotEncoder, ElasticNetClassifier, OneHotEncoder, RandomForestClassifier, LogisticRegressionClassifier]
    iteration = [component for component in component_graph]
    assert iteration == expected

    component_graph.instantiate({'OneHot_RandomForest': {'top_n': 32}})
    expected = [Imputer(), OneHotEncoder(), ElasticNetClassifier(), OneHotEncoder(top_n=32), RandomForestClassifier(), LogisticRegressionClassifier()]
    iteration = [component for component in component_graph]
    assert iteration == expected
Пример #14
0
    def __init__(self,
                 component_graph,
                 parameters=None,
                 custom_name=None,
                 custom_hyperparameters=None,
                 random_seed=0):
        """Machine learning pipeline made out of transformers and a estimator.

        Arguments:
            component_graph (list or dict): List of components in order. Accepts strings or ComponentBase subclasses in the list.
                Note that when duplicate components are specified in a list, the duplicate component names will be modified with the
                component's index in the list. For example, the component graph
                [Imputer, One Hot Encoder, Imputer, Logistic Regression Classifier] will have names
                ["Imputer", "One Hot Encoder", "Imputer_2", "Logistic Regression Classifier"]
            parameters (dict): Dictionary with component names as keys and dictionary of that component's parameters as values.
                 An empty dictionary or None implies using all default values for component parameters. Defaults to None.
            custom_name (str): Custom name for the pipeline. Defaults to None.
            custom_hyperparameters (dict): Custom hyperparameter range for the pipeline. Defaults to None.
            random_seed (int): Seed for the random number generator. Defaults to 0.
        """
        self._custom_hyperparameters = custom_hyperparameters
        self.random_seed = random_seed

        self.component_graph = component_graph
        if isinstance(component_graph, list):  # Backwards compatibility
            self._component_graph = ComponentGraph().from_list(
                component_graph, random_seed=self.random_seed)
        else:
            self._component_graph = ComponentGraph(
                component_dict=component_graph, random_seed=self.random_seed)
        self._component_graph.instantiate(parameters)

        self.input_feature_names = {}
        self.input_target_name = None

        self.estimator = None
        if len(self._component_graph.compute_order) > 0:
            final_component = self._component_graph.get_last_component()
            self.estimator = final_component if isinstance(
                final_component, Estimator) else None
        self._estimator_name = self._component_graph.compute_order[
            -1] if self.estimator is not None else None

        self._validate_estimator_problem_type()
        self._is_fitted = False

        self._pipeline_params = None
        if parameters is not None:
            self._pipeline_params = parameters.get("pipeline", {})

        self._custom_name = custom_name
Пример #15
0
def test_fit_correct_inputs(mock_ohe_fit_transform, mock_imputer_fit_transform, X_y_binary):
    X, y = X_y_binary
    X = pd.DataFrame(X)
    y = pd.Series(y)
    graph = {'Imputer': [Imputer], 'OHE': [OneHotEncoder, 'Imputer.x', 'Imputer.y']}
    expected_x = ww.DataTable(pd.DataFrame(index=X.index, columns=X.index).fillna(1))
    expected_y = ww.DataColumn(pd.Series(index=y.index).fillna(0))
    mock_imputer_fit_transform.return_value = tuple((expected_x, expected_y))
    mock_ohe_fit_transform.return_value = expected_x
    component_graph = ComponentGraph(graph).instantiate({})
    component_graph.fit(X, y)
    expected_x_df = expected_x.to_dataframe().astype("Int64")
    assert_frame_equal(expected_x_df, mock_ohe_fit_transform.call_args[0][0].to_dataframe())
    assert_series_equal(expected_y.to_series(), mock_ohe_fit_transform.call_args[0][1].to_series())
Пример #16
0
def test_compute_final_component_features_nonlinear(mock_en_predict, mock_rf_predict, mock_ohe, mock_imputer, example_graph, X_y_binary):
    X, y = X_y_binary
    mock_imputer.return_value = ww.DataTable(pd.DataFrame(X))
    mock_ohe.return_value = ww.DataTable(pd.DataFrame(X))
    mock_en_predict.return_value = ww.DataColumn(pd.Series(np.ones(X.shape[0])))
    mock_rf_predict.return_value = ww.DataColumn(pd.Series(np.zeros(X.shape[0])))
    X_expected = pd.DataFrame({'Random Forest': np.zeros(X.shape[0]), 'Elastic Net': np.ones(X.shape[0])})
    component_graph = ComponentGraph(example_graph).instantiate({})
    component_graph.fit(X, y)

    X_t = component_graph.compute_final_component_features(X)
    assert_frame_equal(X_expected, X_t.to_dataframe())
    assert mock_imputer.call_count == 2
    assert mock_ohe.call_count == 4
Пример #17
0
def test_component_graph_sampler_list():
    component_list = [
        'Imputer', 'One Hot Encoder', 'Undersampler',
        'Random Forest Classifier'
    ]
    component_graph = ComponentGraph.from_list(component_list)

    assert len(component_graph.component_dict) == 4
    assert component_graph.get_component('Imputer') == Imputer
    assert component_graph.get_component('One Hot Encoder') == OneHotEncoder
    assert component_graph.get_component('Undersampler') == Undersampler
    assert component_graph.get_component(
        'Random Forest Classifier') == RandomForestClassifier

    assert component_graph.compute_order == component_list
    assert component_graph.component_dict == {
        'Imputer': [Imputer],
        'One Hot Encoder': [OneHotEncoder, 'Imputer.x'],
        'Undersampler': [Undersampler, 'One Hot Encoder.x'],
        'Random Forest Classifier':
        [RandomForestClassifier, 'Undersampler.x', 'Undersampler.y']
    }
    assert component_graph.get_parents('Imputer') == []
    assert component_graph.get_parents('One Hot Encoder') == ['Imputer.x']
    assert component_graph.get_parents('Undersampler') == ['One Hot Encoder.x']
    assert component_graph.get_parents('Random Forest Classifier') == [
        'Undersampler.x', 'Undersampler.y'
    ]
Пример #18
0
def test_order_x_and_y():
    graph = {
        'Imputer': [Imputer],
        'OHE': [OneHotEncoder, 'Imputer.x', 'Imputer.y'],
        'Random Forest': [RandomForestClassifier, 'OHE.x']
    }
    component_graph = ComponentGraph(graph).instantiate({})
    assert component_graph.compute_order == ['Imputer', 'OHE', 'Random Forest']
Пример #19
0
def test_instantiate_without_parameters(example_graph):
    graph = example_graph
    component_graph = ComponentGraph(graph)
    component_graph.instantiate({})
    assert component_graph.get_component('OneHot_RandomForest').parameters['top_n'] == 10
    assert component_graph.get_component('OneHot_ElasticNet').parameters['top_n'] == 10
    assert component_graph.get_component('OneHot_RandomForest') is not component_graph.get_component('OneHot_ElasticNet')

    expected_order = ['Imputer', 'OneHot_ElasticNet', 'Elastic Net', 'OneHot_RandomForest', 'Random Forest', 'Logistic Regression']
    assert component_graph.compute_order == expected_order
Пример #20
0
def test_bad_instantiate_can_reinstantiate(example_graph):
    component_graph = ComponentGraph(example_graph)
    with pytest.raises(ValueError, match='Error received when instantiating component'):
        component_graph.instantiate(parameters={'Elastic Net': {'max_iter': 100, 'fake_param': None}})

    component_graph.instantiate({'Elastic Net': {'max_iter': 22}})
    assert component_graph.get_component('Elastic Net').parameters['max_iter'] == 22
Пример #21
0
 def model_family(cls):
     "Returns model family of this pipeline template"""
     component_graph = copy.copy(cls.component_graph)
     if isinstance(component_graph, list):
         return handle_component_class(component_graph[-1]).model_family
     else:
         order = ComponentGraph.generate_order(component_graph)
         final_component = order[-1]
         return handle_component_class(component_graph[final_component][0]).model_family
Пример #22
0
def test_component_graph():
    graph = {'Imputer': ['Imputer'],
             'OneHot_RandomForest': ['One Hot Encoder', 'Imputer.x'],
             'OneHot_ElasticNet': ['One Hot Encoder', 'Imputer.x'],
             'Random Forest': ['Random Forest Classifier', 'OneHot_RandomForest.x'],
             'Elastic Net': ['Elastic Net Classifier', 'OneHot_ElasticNet.x'],
             'Logistic Regression': ['Logistic Regression Classifier', 'Random Forest', 'Elastic Net']}
    component_graph = ComponentGraph(graph)
    return component_graph
Пример #23
0
def test_predict_empty_graph(X_y_binary):
    X, y = X_y_binary
    X = pd.DataFrame(X)
    component_graph = ComponentGraph()
    component_graph.instantiate({})

    component_graph.fit(X, y)
    X_t = component_graph.predict(X)
    assert_frame_equal(X, X_t.to_dataframe())
Пример #24
0
def test_instantiate_from_list():
    component_list = ['Imputer', 'One Hot Encoder', 'Random Forest Classifier']
    component_graph = ComponentGraph().from_list(component_list)

    parameters = {'One Hot Encoder': {'top_n': 7}}
    component_graph.instantiate(parameters)
    assert isinstance(component_graph.get_component('Imputer'), Imputer)
    assert isinstance(component_graph.get_component('Random Forest Classifier'), RandomForestClassifier)
    assert component_graph.get_component('One Hot Encoder').parameters['top_n'] == 7
Пример #25
0
def test_from_list_repeat_component():
    component_list = ['Imputer', 'One Hot Encoder', 'One Hot Encoder', RandomForestClassifier]
    component_graph = ComponentGraph.from_list(component_list)

    expected_order = ['Imputer', 'One Hot Encoder', 'One Hot Encoder_2', 'Random Forest Classifier']
    assert component_graph.compute_order == expected_order

    component_graph.instantiate({'One Hot Encoder': {'top_n': 2},
                                 'One Hot Encoder_2': {'top_n': 11}})
    assert component_graph.get_component('One Hot Encoder').parameters['top_n'] == 2
    assert component_graph.get_component('One Hot Encoder_2').parameters['top_n'] == 11
Пример #26
0
def test_init_str_components():
    graph = {'Imputer': ['Imputer'],
             'OneHot_RandomForest': ['One Hot Encoder', 'Imputer.x'],
             'OneHot_ElasticNet': ['One Hot Encoder', 'Imputer.x'],
             'Random Forest': ['Random Forest Classifier', 'OneHot_RandomForest.x'],
             'Elastic Net': ['Elastic Net Classifier', 'OneHot_ElasticNet.x'],
             'Logistic Regression': ['Logistic Regression Classifier', 'Random Forest', 'Elastic Net']}
    comp_graph = ComponentGraph(graph)
    assert len(comp_graph.component_dict) == 6

    expected_order = ['Imputer', 'OneHot_ElasticNet', 'Elastic Net', 'OneHot_RandomForest', 'Random Forest', 'Logistic Regression']
    assert comp_graph.compute_order == expected_order
Пример #27
0
def test_from_list():
    component_list = ['Imputer', 'One Hot Encoder', RandomForestClassifier]

    component_graph = ComponentGraph.from_list(component_list)

    assert len(component_graph.component_dict) == 3
    assert component_graph.get_component('Imputer') == Imputer
    assert component_graph.get_component('One Hot Encoder') == OneHotEncoder
    assert component_graph.get_component('Random Forest Classifier') == RandomForestClassifier

    expected_order = ['Imputer', 'One Hot Encoder', 'Random Forest Classifier']
    assert component_graph.compute_order == expected_order
    assert component_graph.component_dict == {
        'Imputer': [Imputer],
        'One Hot Encoder': [OneHotEncoder, 'Imputer.x'],
        'Random Forest Classifier': [RandomForestClassifier, 'One Hot Encoder.x']
    }

    bad_component_list = ['Imputer', 'Fake Estimator']
    with pytest.raises(MissingComponentError, match='was not found'):
        ComponentGraph.from_list(bad_component_list)
Пример #28
0
def test_input_feature_names(example_graph):
    X = pd.DataFrame({
        'column_1': ['a', 'b', 'c', 'd', 'a', 'a', 'b', 'c', 'b'],
        'column_2': [1, 2, 3, 4, 5, 6, 5, 4, 3]
    })
    y = pd.Series([1, 0, 1, 0, 1, 1, 0, 0, 0])

    component_graph = ComponentGraph(example_graph)
    component_graph.instantiate({
        'OneHot_RandomForest': {
            'top_n': 2
        },
        'OneHot_ElasticNet': {
            'top_n': 3
        }
    })
    assert component_graph.input_feature_names == {}
    component_graph.fit(X, y)

    input_feature_names = component_graph.input_feature_names
    assert input_feature_names['Imputer'] == ['column_1', 'column_2']
    assert input_feature_names['OneHot_RandomForest'] == [
        'column_1', 'column_2'
    ]
    assert input_feature_names['OneHot_ElasticNet'] == ['column_1', 'column_2']
    assert input_feature_names['Random Forest'] == [
        'column_2', 'column_1_a', 'column_1_b'
    ]
    assert input_feature_names['Elastic Net'] == [
        'column_2', 'column_1_a', 'column_1_b', 'column_1_c'
    ]
    assert input_feature_names['Logistic Regression'] == [
        'Random Forest', 'Elastic Net'
    ]
Пример #29
0
def test_component_graph_dataset_with_different_types():
    # Checks that types are converted correctly by Woodwork. Specifically, the standard scaler
    # should convert column_3 to float, so our code to try to convert back to the original boolean type
    # will catch the TypeError thrown and not convert the column.
    graph = {'Imputer': [Imputer],
             'OneHot': [OneHotEncoder, 'Imputer.x'],
             'DateTime': [DateTimeFeaturizer, 'OneHot.x'],
             'Scaler': [StandardScaler, 'DateTime.x'],
             'Random Forest': [RandomForestClassifier, 'Scaler.x'],
             'Elastic Net': [ElasticNetClassifier, 'Scaler.x'],
             'Logistic Regression': [LogisticRegressionClassifier, 'Random Forest', 'Elastic Net']}

    X = pd.DataFrame({'column_1': ['a', 'b', 'c', 'd', 'a', 'a', 'b', 'c', 'b'],
                      'column_2': [1, 2, 3, 4, 5, 6, 5, 4, 3],
                      'column_3': [True, False, True, False, True, False, True, False, False]})
    y = pd.Series([1, 0, 1, 0, 1, 1, 0, 0, 0])
    X = infer_feature_types(X, {"column_2": "categorical"})

    component_graph = ComponentGraph(graph)
    component_graph.instantiate({})
    assert component_graph.input_feature_names == {}
    component_graph.fit(X, y)

    input_feature_names = component_graph.input_feature_names
    assert input_feature_names['Imputer'] == ['column_1', 'column_2', 'column_3']
    assert input_feature_names['OneHot'] == ['column_1', 'column_2', 'column_3']
    assert input_feature_names['DateTime'] == ['column_3', 'column_1_a', 'column_1_b', 'column_1_c', 'column_1_d',
                                               'column_2_1', 'column_2_2', 'column_2_3', 'column_2_4', 'column_2_5', 'column_2_6']
    assert input_feature_names['Scaler'] == ['column_3', 'column_1_a', 'column_1_b', 'column_1_c', 'column_1_d',
                                             'column_2_1', 'column_2_2', 'column_2_3', 'column_2_4', 'column_2_5', 'column_2_6']
    assert input_feature_names['Random Forest'] == ['column_3', 'column_1_a', 'column_1_b', 'column_1_c', 'column_1_d',
                                                    'column_2_1', 'column_2_2', 'column_2_3', 'column_2_4', 'column_2_5', 'column_2_6']
    assert input_feature_names['Elastic Net'] == ['column_3', 'column_1_a', 'column_1_b', 'column_1_c', 'column_1_d',
                                                  'column_2_1', 'column_2_2', 'column_2_3', 'column_2_4', 'column_2_5', 'column_2_6']
    assert input_feature_names['Logistic Regression'] == ['Random Forest', 'Elastic Net']
Пример #30
0
def test_predict(mock_predict, mock_fit, example_graph, X_y_binary):
    X, y = X_y_binary
    mock_predict.return_value = ww.DataColumn(pd.Series(y))
    component_graph = ComponentGraph(example_graph).instantiate({})
    component_graph.fit(X, y)

    component_graph.predict(X)
    assert mock_predict.call_count == 5  # Called twice when fitting pipeline, thrice when predicting
    assert mock_fit.call_count == 3  # Only called during fit, not predict